group0: stop group0 before draining storage service during shutdown
Currently storage service is drained while group0 is still active. The draining stops commitlogs, so after this point no more writes are possible, but if group0 is still active it may try to apply commands which will try to do writes and they will fail causing group0 state machine errors. This is benign since we are shutting down anyway, but better to fix shutdown order to keep logs clean. Fixes scylladb/scylladb#19665
This commit is contained in:
committed by
Kamil Braun
parent
a4ff0aae47
commit
af83c5e53e
5
main.cc
5
main.cc
@@ -1944,11 +1944,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
ss.local().uninit_address_map().get();
|
||||
});
|
||||
|
||||
// Need to make sure storage service does not use group0 before running group0_service.abort()
|
||||
auto stop_group0_usage_in_storage_service = defer_verbose_shutdown("group 0 usage in local storage", [&ss] {
|
||||
ss.local().wait_for_group0_stop().get();
|
||||
});
|
||||
|
||||
// Setup group0 early in case the node is bootstrapped already and the group exists.
|
||||
// Need to do it before allowing incoming messaging service connections since
|
||||
// storage proxy's and migration manager's verbs may access group0.
|
||||
|
||||
@@ -1096,12 +1096,15 @@ future<> storage_service::sstable_cleanup_fiber(raft::server& server, sharded<se
|
||||
break;
|
||||
}
|
||||
rtlogger.debug("cleanup flag cleared");
|
||||
} catch (const seastar::abort_requested_exception &) {
|
||||
} catch (const seastar::abort_requested_exception&) {
|
||||
rtlogger.info("cleanup fiber aborted");
|
||||
break;
|
||||
} catch (raft::request_aborted&) {
|
||||
rtlogger.info("cleanup fiber aborted");
|
||||
break;
|
||||
} catch (const seastar::broken_condition_variable&) {
|
||||
rtlogger.info("cleanup fiber aborted");
|
||||
break;
|
||||
} catch (...) {
|
||||
rtlogger.error("cleanup fiber got an error: {}", std::current_exception());
|
||||
err = true;
|
||||
@@ -4601,6 +4604,8 @@ future<> storage_service::drain() {
|
||||
future<> storage_service::do_drain() {
|
||||
co_await stop_transport();
|
||||
|
||||
co_await wait_for_group0_stop();
|
||||
|
||||
co_await tracing::tracing::tracing_instance().invoke_on_all(&tracing::tracing::shutdown);
|
||||
|
||||
co_await get_batchlog_manager().invoke_on_all([] (auto& bm) {
|
||||
|
||||
Reference in New Issue
Block a user