raft: separate group0 server start from in-memory state machine enablement

Split start_server_for_group0 so it only starts the raft server and
replays the log (applying mutations to system tables), without loading
the state into memory. A new enable_group0_state_machine() method is
added which callers must invoke explicitly after all dependencies
(CDC generation service, non-system schemas, etc.) are available.

This prepares for moving setup_group0_if_exist earlier in the startup
sequence so the raft log can be replayed before non-system keyspaces
are loaded, while deferring the in-memory state loading until after
all dependencies are initialized.
This commit is contained in:
Gleb Natapov
2026-06-01 14:05:41 +03:00
parent 3613d9a07d
commit 492a75ffbb
4 changed files with 23 additions and 4 deletions

View File

@@ -2375,6 +2375,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
// Need to do it before allowing incoming messaging service connections since
// storage proxy's and migration manager's verbs may access group0.
group0_service.setup_group0_if_exist(sys_ks.local(), ss.local(), qp.local(), mm.local()).get();
group0_service.enable_group0_state_machine().get();
}
// The call to setup_group0_if_exists() above guarantees that, if group0 is

View File

@@ -453,13 +453,12 @@ future<> raft_group0::start_server_for_group0(raft::group_id group0_id, service:
auto& persistence = srv_for_group0.persistence;
auto& server = *srv_for_group0.server;
co_await with_scheduling_group(_sg, [this, &srv_for_group0, group0_id] (this auto self) -> future<> {
auto& state_machine = dynamic_cast<group0_state_machine&>(srv_for_group0.state_machine);
_group0_sm = &dynamic_cast<group0_state_machine&>(srv_for_group0.state_machine);
co_await _raft_gr.start_server_for_group(std::move(srv_for_group0));
// Set _group0 immediately after the server is registered in _raft_gr._servers.
// This ensures abort_and_drain()/destroy() can find and clean up the server
// even if enable_in_memory_state_machine() or later steps throw.
// even if enable_group0_state_machine() or later steps throw.
_group0.emplace<raft::group_id>(group0_id);
co_await state_machine.enable_in_memory_state_machine();
});
// Fix for scylladb/scylladb#16683:
@@ -478,6 +477,11 @@ future<> raft_group0::start_server_for_group0(raft::group_id group0_id, service:
}
}
future<> raft_group0::enable_group0_state_machine() {
SCYLLA_ASSERT(_group0_sm);
co_await _group0_sm->enable_in_memory_state_machine();
}
future<> raft_group0::leadership_monitor_fiber() {
try {
auto sub = _abort_source.subscribe([&] () noexcept {
@@ -517,7 +521,9 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
auto group0_id = raft::group_id{co_await sys_ks.get_raft_group0_id()};
if (group0_id) {
// Group 0 ID present means we've already joined group 0 before.
co_return co_await start_server_for_group0(group0_id, ss, qp, mm);
co_await start_server_for_group0(group0_id, ss, qp, mm);
co_await enable_group0_state_machine();
co_return;
}
raft::server* server = nullptr;
@@ -586,6 +592,7 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
[] { std::raise(SIGSTOP); });
co_await start_server_for_group0(group0_id, ss, qp, mm);
co_await enable_group0_state_machine();
server = &_raft_gr.group0();
// FIXME if we crash now or after getting added to the config but before storing group 0 ID,
// we'll end with a bootstrapped server that possibly added some entries, but we won't remember that we have such a server

View File

@@ -27,6 +27,7 @@ namespace service {
extern const char* const raft_upgrade_doc;
class migration_manager;
class group0_state_machine;
class raft_group0_client;
class storage_service;
@@ -105,6 +106,7 @@ class raft_group0 {
gms::feature_service& _feat;
raft_group0_client& _client;
seastar::scheduling_group _sg;
group0_state_machine* _group0_sm = nullptr;
// Status of leader discovery. Initially there is no group 0,
// and the variant contains no state. During initial cluster
@@ -198,6 +200,14 @@ public:
//
future<> setup_group0_if_exist(db::system_keyspace&, service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm);
// Enable the in-memory state machine for group 0.
// This loads the persisted state (topology, schema, etc.) into memory.
// Must be called after all dependencies of reload_state() are initialized
// (e.g. CDC generation service, non-system keyspace schemas).
//
// Precondition: joined_group0().
future<> enable_group0_state_machine();
// Check whether the given Raft server is a member of group 0 configuration
// according to our current knowledge.
//

View File

@@ -1127,6 +1127,7 @@ private:
if (!group0_service.maintenance_mode() && _sys_ks.local().bootstrap_complete()) {
group0_service.setup_group0_if_exist(_sys_ks.local(), _ss.local(), _qp.local(), _mm.local()).get();
group0_service.enable_group0_state_machine().get();
}
_groups_manager.invoke_on_all([](service::strong_consistency::groups_manager& m) {
return m.start();