mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-22 07:42:16 +00:00
Add six tests covering add_entry with wait_type::applied and wait_type::committed for three snapshot scenarios affected in the previous commit: 1. Snapshot at the entry's index (wait_for_entry, term_for returns snapshot term). 2. Snapshot past the entry's index (wait_for_entry, term_for returns nullopt). 3. Follower's waiter is resolved via drop_waiters when a snapshot is loaded. Without the fix in the previous commit, 4 of 6 tests fail: all 3 wait_type::applied tests and the wait_type::committed drop_waiters test. The remaining two tests pass because the changes don't affect them. We don't write tests covering the scenarios when add_entry should still throw commit_status_unknown (that is when the entry's term doesn't match the snapshot's term) because: - these tests would be very complicated, - a bug that would make these tests fail should also make the nemesis tests fail, as there would be an issue with linearizability.
386 lines
16 KiB
C++
386 lines
16 KiB
C++
#include <fmt/std.h>
|
|
#include "raft/raft.hh"
|
|
#include "replication.hh"
|
|
#include "utils/error_injection.hh"
|
|
#include <seastar/util/defer.hh>
|
|
|
|
#ifdef SEASTAR_DEBUG
|
|
// Increase tick time to allow debug to process messages
|
|
const auto tick_delay = 200ms;
|
|
#else
|
|
const auto tick_delay = 100ms;
|
|
#endif
|
|
|
|
// The word "default" means "usually used by the tests here".
|
|
template <typename clock_type = std::chrono::steady_clock>
|
|
static raft_cluster<clock_type> get_default_cluster(test_case test_config) {
|
|
return raft_cluster<clock_type>{
|
|
std::move(test_config),
|
|
::apply_changes,
|
|
0,
|
|
0,
|
|
0, false, tick_delay, rpc_config{}
|
|
};
|
|
}
|
|
|
|
SEASTAR_THREAD_TEST_CASE(test_check_abort_on_client_api) {
|
|
raft_cluster<std::chrono::steady_clock> cluster(
|
|
test_case { .nodes = 1 },
|
|
[](raft::server_id id, const std::vector<raft::command_cref>& commands, lw_shared_ptr<hasher_int> hasher) {
|
|
return 0;
|
|
},
|
|
0,
|
|
0,
|
|
0, false, tick_delay, rpc_config{});
|
|
cluster.start_all().get();
|
|
|
|
cluster.stop_server(0, "test crash").get();
|
|
|
|
auto check_error = [](const raft::stopped_error& e) {
|
|
return sstring(e.what()) == sstring("Raft instance is stopped, reason: \"test crash\"");
|
|
};
|
|
BOOST_CHECK_EXCEPTION(cluster.add_entries(1, 0).get(), raft::stopped_error, check_error);
|
|
BOOST_CHECK_EXCEPTION(cluster.get_server(0).modify_config({}, {to_raft_id(0)}, nullptr).get(), raft::stopped_error, check_error);
|
|
BOOST_CHECK_EXCEPTION(cluster.get_server(0).read_barrier(nullptr).get(), raft::stopped_error, check_error);
|
|
BOOST_CHECK_EXCEPTION(cluster.get_server(0).set_configuration({}, nullptr).get(), raft::stopped_error, check_error);
|
|
}
|
|
|
|
SEASTAR_THREAD_TEST_CASE(test_release_memory_if_add_entry_throws) {
|
|
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
|
|
std::cerr << "Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n";
|
|
#else
|
|
const size_t command_size = sizeof(size_t);
|
|
test_case test_config {
|
|
.nodes = 1,
|
|
.config = std::vector<raft::server::configuration>({
|
|
raft::server::configuration {
|
|
.snapshot_threshold_log_size = 0,
|
|
.snapshot_trailing_size = 0,
|
|
.max_log_size = command_size,
|
|
.max_command_size = command_size
|
|
}
|
|
})
|
|
};
|
|
auto cluster = get_default_cluster(std::move(test_config));
|
|
cluster.start_all().get();
|
|
auto stop = defer([&cluster] { cluster.stop_all().get(); });
|
|
|
|
utils::get_local_injector().enable("fsm::add_entry/test-failure", true);
|
|
auto check_error = [](const std::runtime_error& e) {
|
|
return sstring(e.what()) == sstring("fsm::add_entry/test-failure");
|
|
};
|
|
BOOST_CHECK_EXCEPTION(cluster.add_entries(1, 0).get(), std::runtime_error, check_error);
|
|
|
|
// we would block forever if the memory wasn't released
|
|
// when the exception was thrown from the first add_entry
|
|
cluster.add_entries(1, 0).get();
|
|
cluster.read(read_value{0, 1}).get();
|
|
#endif
|
|
}
|
|
|
|
// A simple test verifying the most basic properties of `wait_for_state_change`:
|
|
// * Triggering the passed abort_source will abort the operation.
|
|
// The future will be resolved.
|
|
// * The future will contain an exception, and its type will be `raft::request_aborted`.
|
|
// Reproduces SCYLLADB-665.
|
|
SEASTAR_THREAD_TEST_CASE(test_aborting_wait_for_state_change) {
|
|
auto cluster = get_default_cluster(test_case{ .nodes = 1 });
|
|
cluster.start_all().get();
|
|
auto stop = defer([&cluster] { cluster.stop_all().get(); });
|
|
|
|
auto& server = cluster.get_server(0);
|
|
server.wait_for_leader(nullptr).get();
|
|
|
|
abort_source as;
|
|
// Note that this future cannot resolve immediately.
|
|
// In particular, the leader election we awaited above cannot
|
|
// influence it since the promises corresponding to
|
|
// waiting for a leader and state change are resolved
|
|
// within the same call, one after the other
|
|
// (cf. server_impl::process_fsm_output).
|
|
future<> fut_default_ex = server.wait_for_state_change(&as);
|
|
as.request_abort();
|
|
BOOST_CHECK_THROW((void) fut_default_ex.get(), raft::request_aborted);
|
|
}
|
|
|
|
static void test_func_on_aborted_server_aux(
|
|
std::function<future<>(raft::server&, abort_source*)> func,
|
|
const raft::server::configuration& config = raft::server::configuration{})
|
|
{
|
|
const size_t node_count = 2;
|
|
auto test_config = test_case {
|
|
.nodes = node_count,
|
|
.config = std::vector<raft::server::configuration>(node_count, config)
|
|
};
|
|
auto cluster = get_default_cluster(std::move(test_config));
|
|
|
|
constexpr std::string_view error_message = "some unfunny error message";
|
|
auto check_default_message = [] (const raft::stopped_error& e) {
|
|
return std::string_view(e.what()) == "Raft instance is stopped";
|
|
};
|
|
auto check_error_message = [&error_message] (const raft::stopped_error& e) {
|
|
return std::string_view(e.what()) == fmt::format("Raft instance is stopped, reason: \"{}\"", error_message);
|
|
};
|
|
|
|
/* Case 1. Default error message */ {
|
|
auto& s1 = cluster.get_server(0);
|
|
s1.start().get();
|
|
s1.abort().get();
|
|
|
|
abort_source as;
|
|
|
|
// Regardless of the state of the passed abort_source, we should get raft::stopped_error.
|
|
BOOST_CHECK_EXCEPTION((void) func(s1, nullptr).get(), raft::stopped_error, check_default_message);
|
|
BOOST_CHECK_EXCEPTION((void) func(s1, &as).get(), raft::stopped_error, check_default_message);
|
|
as.request_abort();
|
|
BOOST_CHECK_EXCEPTION((void) func(s1, &as).get(), raft::stopped_error, check_default_message);
|
|
}
|
|
|
|
/* Case 2. Custom error message */ {
|
|
auto& s2 = cluster.get_server(1);
|
|
s2.start().get();
|
|
s2.abort(sstring(error_message)).get();
|
|
|
|
abort_source as;
|
|
|
|
// The same checks as above: we just verify that the error message is what we want.
|
|
BOOST_CHECK_EXCEPTION((void) func(s2, nullptr).get(), raft::stopped_error, check_error_message);
|
|
BOOST_CHECK_EXCEPTION((void) func(s2, &as).get(), raft::stopped_error, check_error_message);
|
|
as.request_abort();
|
|
BOOST_CHECK_EXCEPTION((void) func(s2, &as).get(), raft::stopped_error, check_error_message);
|
|
}
|
|
}
|
|
|
|
static void test_add_entry_on_aborted_server_aux(const bool enable_forwarding) {
|
|
raft::server::configuration config { .enable_forwarding = enable_forwarding };
|
|
int val = 0;
|
|
auto add_entry = [&val] (raft::server& server, abort_source* as) {
|
|
return server.add_entry(create_command(val++), raft::wait_type::committed, as);
|
|
};
|
|
test_func_on_aborted_server_aux(add_entry, config);
|
|
}
|
|
|
|
static void test_modify_config_on_aborted_server_aux(const bool enable_forwarding) {
|
|
raft::server::configuration config { .enable_forwarding = enable_forwarding };
|
|
auto modify_config = [] (raft::server& server, abort_source* as) {
|
|
return server.modify_config({}, {}, as);
|
|
};
|
|
test_func_on_aborted_server_aux(modify_config, config);
|
|
}
|
|
|
|
// Reproducers of SCYLLADB-841: After raft::server had been aborted, both
|
|
// add_entry and modify_config used to return raft::not_a_leader with
|
|
// a null ID when forwarding was disabled.
|
|
//
|
|
// We verify that that's not the case. Furthermore, we check that
|
|
// raft::stopped_error is preferred over raft::request_aborted
|
|
// if both exceptions apply. That's a more natural choice.
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_on_aborted_server_disabled_forwarding) {
|
|
test_add_entry_on_aborted_server_aux(false);
|
|
}
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_on_aborted_server_enabled_forwarding) {
|
|
test_add_entry_on_aborted_server_aux(true);
|
|
}
|
|
SEASTAR_THREAD_TEST_CASE(test_modify_config_on_aborted_server_disabled_forwarding) {
|
|
test_modify_config_on_aborted_server_aux(false);
|
|
}
|
|
SEASTAR_THREAD_TEST_CASE(test_modify_config_on_aborted_server_enabled_forwarding) {
|
|
test_modify_config_on_aborted_server_aux(true);
|
|
}
|
|
|
|
// A call to raft::server::wait_for_leader should complete with
|
|
// raft::stopped_error if the server has been aborted, regardless
|
|
// of the state of the passed abort_source.
|
|
// Reproducer of SCYLLADB-841.
|
|
SEASTAR_THREAD_TEST_CASE(test_wait_for_leader_on_aborted_server) {
|
|
test_func_on_aborted_server_aux(&raft::server::wait_for_leader);
|
|
}
|
|
|
|
// A call to raft::server::wait_for_state_change should complete with
|
|
// raft::stopped_error if the server has been aborted, regardless
|
|
// of the state of the passed abort_source.
|
|
// Reproducer of SCYLLADB-841.
|
|
SEASTAR_THREAD_TEST_CASE(test_wait_for_state_change_on_aborted_server) {
|
|
test_func_on_aborted_server_aux(&raft::server::wait_for_state_change);
|
|
}
|
|
|
|
// Auxiliary function for testing add_entry behavior when a snapshot that
|
|
// includes the entry being added is taken before wait_for_entry runs.
|
|
//
|
|
// Uses a 1-node cluster with aggressive snapshotting and an error injection
|
|
// point that pauses add_entry after the entry is added to the log but before
|
|
// wait_for_entry checks its status. During the pause, the entry is committed,
|
|
// applied, and a snapshot is taken.
|
|
//
|
|
// If `advance_snapshot_past_entry` is true, a second entry is added so the
|
|
// snapshot moves past the first entry's index, fully truncating it from the
|
|
// log (term_for returns nullopt). Otherwise the snapshot is taken at the
|
|
// entry's index (term_for returns the snapshot's term).
|
|
//
|
|
// In both cases, wait_for_entry should succeed for both wait types, since
|
|
// the snapshot's term matching the entry's term proves the entry was committed
|
|
// and included in the snapshot.
|
|
static void test_add_entry_load_snapshot_before_wait_aux(raft::wait_type type, bool advance_snapshot_past_entry) {
|
|
#ifndef SCYLLA_ENABLE_ERROR_INJECTION
|
|
std::cerr << "Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n";
|
|
return;
|
|
#endif
|
|
const size_t command_size = sizeof(size_t);
|
|
test_case test_config {
|
|
.nodes = 1,
|
|
.config = std::vector<raft::server::configuration>({
|
|
raft::server::configuration {
|
|
// Snapshot after every entry; truncate aggressively.
|
|
.snapshot_threshold = 1,
|
|
.snapshot_threshold_log_size = 1,
|
|
.snapshot_trailing = 0,
|
|
.snapshot_trailing_size = 0,
|
|
.max_log_size = 10 * (command_size + sizeof(raft::log_entry)),
|
|
.enable_forwarding = false,
|
|
.max_command_size = command_size
|
|
}
|
|
})
|
|
};
|
|
// apply_entries must be greater than the number of entries added
|
|
// during the test, otherwise the state machine's done promise fires
|
|
// prematurely.
|
|
auto cluster = raft_cluster<std::chrono::steady_clock>{
|
|
std::move(test_config),
|
|
::apply_changes,
|
|
100, // apply_entries
|
|
0,
|
|
0, false, tick_delay, rpc_config{}
|
|
};
|
|
cluster.start_all().get();
|
|
auto stop = defer([&cluster] { cluster.stop_all().get(); });
|
|
|
|
cluster.add_entries(5, 0).get();
|
|
|
|
// one_shot: only the first add_entry is paused; the second one
|
|
// (if used) bypasses the injection.
|
|
utils::get_local_injector().enable("block_raft_add_entry_before_wait_for_entry", true);
|
|
|
|
auto& server = cluster.get_server(0);
|
|
auto fut = server.add_entry(create_command(42), type, nullptr);
|
|
|
|
// Wait for add_entry(42) to reach the injection point.
|
|
while (utils::get_local_injector().is_enabled("block_raft_add_entry_before_wait_for_entry")) {
|
|
seastar::thread::yield();
|
|
}
|
|
|
|
// Wait for the entry to be applied.
|
|
server.read_barrier(nullptr).get();
|
|
|
|
if (advance_snapshot_past_entry) {
|
|
// Add another entry so the snapshot moves past the first entry,
|
|
// fully truncating it from the log (term_for returns nullopt).
|
|
// The injection is one-shot and already consumed, so this goes through.
|
|
server.add_entry(create_command(43), raft::wait_type::applied, nullptr).get();
|
|
}
|
|
|
|
// Take a snapshot, truncating the entry from the log.
|
|
server.trigger_snapshot(nullptr).get();
|
|
|
|
// Unblock wait_for_entry.
|
|
utils::get_local_injector().receive_message("block_raft_add_entry_before_wait_for_entry");
|
|
|
|
// Both wait types should succeed: the snapshot's term matches the entry's
|
|
// term, proving the entry was committed and included in the snapshot.
|
|
BOOST_CHECK_NO_THROW(fut.get());
|
|
}
|
|
|
|
// Snapshot at the entry's index: term_for(eid.idx) returns the snapshot's term.
|
|
// Tests wait_for_entry site where the removed `applied` check used to throw
|
|
// commit_status_unknown.
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_load_snapshot_at_entry) {
|
|
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::applied, false);
|
|
}
|
|
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_load_snapshot_at_entry) {
|
|
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::committed, false);
|
|
}
|
|
|
|
// Snapshot past the entry's index: term_for(eid.idx) returns nullopt.
|
|
// Tests the `!term` branch in wait_for_entry where `snap_term == eid.term`
|
|
// now succeeds for both wait types.
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_load_snapshot_past_entry) {
|
|
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::applied, true);
|
|
}
|
|
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_load_snapshot_past_entry) {
|
|
test_add_entry_load_snapshot_before_wait_aux(raft::wait_type::committed, true);
|
|
}
|
|
|
|
// Auxiliary function for testing add_entry behavior when a follower receives
|
|
// the entry via a snapshot (load_snapshot) instead of applying it locally.
|
|
//
|
|
// Setup: 3-node cluster. Node 1 (follower) is blocked from receiving
|
|
// messages from the leader (node 0), but can still send to it. Node 1
|
|
// forwards add_entry to the leader, which commits the entry (with node 2),
|
|
// applies it, and takes a snapshot. When node 1 is reconnected, the leader
|
|
// sends a snapshot (since the log entries are truncated). Node 1 loads the
|
|
// snapshot via load_snapshot(), which calls drop_waiters(). The pending
|
|
// waiter for the forwarded entry is resolved successfully because the
|
|
// snapshot's term matches the entry's term.
|
|
static void test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type type) {
|
|
const size_t command_size = sizeof(size_t);
|
|
raft::server::configuration srv_config {
|
|
.snapshot_threshold = 1,
|
|
.snapshot_threshold_log_size = 1,
|
|
.snapshot_trailing = 0,
|
|
.snapshot_trailing_size = 0,
|
|
.max_log_size = 10 * (command_size + sizeof(raft::log_entry)),
|
|
.max_command_size = command_size
|
|
};
|
|
test_case test_config {
|
|
.nodes = 3,
|
|
.config = std::vector<raft::server::configuration>({srv_config, srv_config, srv_config})
|
|
};
|
|
// apply_entries must be greater than the number of entries added
|
|
// during the test, otherwise the state machine's done promise fires
|
|
// prematurely.
|
|
auto cluster = raft_cluster<std::chrono::steady_clock>{
|
|
std::move(test_config),
|
|
::apply_changes,
|
|
100, // apply_entries
|
|
0,
|
|
0, false, tick_delay, rpc_config{}
|
|
};
|
|
cluster.start_all().get();
|
|
auto stop = defer([&cluster] { cluster.stop_all().get(); });
|
|
|
|
// Add a few entries so all nodes are caught up.
|
|
cluster.add_entries(5, 0).get();
|
|
|
|
// Block node 1 from receiving messages from node 0 (leader).
|
|
// Node 1 can still send to node 0 (forwarding works).
|
|
cluster.block_receive(1, 0);
|
|
|
|
// Node 1 forwards add_entry to node 0. Node 0 commits (with node 2),
|
|
// applies, and takes a snapshot. Node 1 registers a waiter but never
|
|
// receives the entry via append entries.
|
|
auto& follower = cluster.get_server(1);
|
|
auto fut = follower.add_entry(create_command(42), type, nullptr);
|
|
|
|
// Wait for the leader to commit, apply, and snapshot the entry.
|
|
auto& leader = cluster.get_server(0);
|
|
leader.read_barrier(nullptr).get();
|
|
leader.trigger_snapshot(nullptr).get();
|
|
|
|
// Reconnect node 1. The leader will send a snapshot since the log
|
|
// entries are truncated (snapshot_trailing = 0).
|
|
cluster.connect_all();
|
|
|
|
// drop_waiters resolves the waiter successfully since the snapshot's
|
|
// term matches the entry's term, proving it was committed.
|
|
BOOST_CHECK_NO_THROW(fut.get());
|
|
}
|
|
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_applied_wait_resolved_via_drop_waiters) {
|
|
test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type::applied);
|
|
}
|
|
|
|
SEASTAR_THREAD_TEST_CASE(test_add_entry_committed_wait_resolved_via_drop_waiters) {
|
|
test_add_entry_wait_resolved_via_drop_waiters_aux(raft::wait_type::committed);
|
|
}
|