diff --git a/main.cc b/main.cc index d56c154f5c..78a1f53284 100644 --- a/main.cc +++ b/main.cc @@ -2375,6 +2375,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl // Need to do it before allowing incoming messaging service connections since // storage proxy's and migration manager's verbs may access group0. group0_service.setup_group0_if_exist(sys_ks.local(), ss.local(), qp.local(), mm.local()).get(); + group0_service.enable_group0_state_machine().get(); } // The call to setup_group0_if_exists() above guarantees that, if group0 is diff --git a/service/raft/raft_group0.cc b/service/raft/raft_group0.cc index b52490a47f..99a3e45aa7 100644 --- a/service/raft/raft_group0.cc +++ b/service/raft/raft_group0.cc @@ -453,13 +453,12 @@ future<> raft_group0::start_server_for_group0(raft::group_id group0_id, service: auto& persistence = srv_for_group0.persistence; auto& server = *srv_for_group0.server; co_await with_scheduling_group(_sg, [this, &srv_for_group0, group0_id] (this auto self) -> future<> { - auto& state_machine = dynamic_cast(srv_for_group0.state_machine); + _group0_sm = &dynamic_cast(srv_for_group0.state_machine); co_await _raft_gr.start_server_for_group(std::move(srv_for_group0)); // Set _group0 immediately after the server is registered in _raft_gr._servers. // This ensures abort_and_drain()/destroy() can find and clean up the server - // even if enable_in_memory_state_machine() or later steps throw. + // even if enable_group0_state_machine() or later steps throw. _group0.emplace(group0_id); - co_await state_machine.enable_in_memory_state_machine(); }); // Fix for scylladb/scylladb#16683: @@ -478,6 +477,11 @@ future<> raft_group0::start_server_for_group0(raft::group_id group0_id, service: } } +future<> raft_group0::enable_group0_state_machine() { + SCYLLA_ASSERT(_group0_sm); + co_await _group0_sm->enable_in_memory_state_machine(); +} + future<> raft_group0::leadership_monitor_fiber() { try { auto sub = _abort_source.subscribe([&] () noexcept { @@ -517,7 +521,9 @@ future<> raft_group0::join_group0(std::vector seeds, shared_p auto group0_id = raft::group_id{co_await sys_ks.get_raft_group0_id()}; if (group0_id) { // Group 0 ID present means we've already joined group 0 before. - co_return co_await start_server_for_group0(group0_id, ss, qp, mm); + co_await start_server_for_group0(group0_id, ss, qp, mm); + co_await enable_group0_state_machine(); + co_return; } raft::server* server = nullptr; @@ -586,6 +592,7 @@ future<> raft_group0::join_group0(std::vector seeds, shared_p [] { std::raise(SIGSTOP); }); co_await start_server_for_group0(group0_id, ss, qp, mm); + co_await enable_group0_state_machine(); server = &_raft_gr.group0(); // FIXME if we crash now or after getting added to the config but before storing group 0 ID, // we'll end with a bootstrapped server that possibly added some entries, but we won't remember that we have such a server diff --git a/service/raft/raft_group0.hh b/service/raft/raft_group0.hh index 6947948d9c..458643f748 100644 --- a/service/raft/raft_group0.hh +++ b/service/raft/raft_group0.hh @@ -27,6 +27,7 @@ namespace service { extern const char* const raft_upgrade_doc; class migration_manager; +class group0_state_machine; class raft_group0_client; class storage_service; @@ -105,6 +106,7 @@ class raft_group0 { gms::feature_service& _feat; raft_group0_client& _client; seastar::scheduling_group _sg; + group0_state_machine* _group0_sm = nullptr; // Status of leader discovery. Initially there is no group 0, // and the variant contains no state. During initial cluster @@ -198,6 +200,14 @@ public: // future<> setup_group0_if_exist(db::system_keyspace&, service::storage_service& ss, cql3::query_processor& qp, service::migration_manager& mm); + // Enable the in-memory state machine for group 0. + // This loads the persisted state (topology, schema, etc.) into memory. + // Must be called after all dependencies of reload_state() are initialized + // (e.g. CDC generation service, non-system keyspace schemas). + // + // Precondition: joined_group0(). + future<> enable_group0_state_machine(); + // Check whether the given Raft server is a member of group 0 configuration // according to our current knowledge. // diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index e2ac86f7c8..4a85c8e896 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -1127,6 +1127,7 @@ private: if (!group0_service.maintenance_mode() && _sys_ks.local().bootstrap_complete()) { group0_service.setup_group0_if_exist(_sys_ks.local(), _ss.local(), _qp.local(), _mm.local()).get(); + group0_service.enable_group0_state_machine().get(); } _groups_manager.invoke_on_all([](service::strong_consistency::groups_manager& m) { return m.start();