From b7bc2d89e60211b505d3620eff674be660e0b1f9 Mon Sep 17 00:00:00 2001 From: Andrzej Jackowski Date: Thu, 16 Apr 2026 17:24:53 +0200 Subject: [PATCH] audit: move audit construction before maintenance socket During graceful shutdown, deferred stops run in reverse order of construction. When the audit service was constructed after the maintenance socket, audit was destroyed first. A DML query still in-flight on the maintenance socket could then bypass auditing entirely. Move construction as early as possible so the audit service outlives the maintenance socket on the defer stack, and to maximise the window in which attempts to use audit before storage is ready are caught with on_internal_error_noexcept. Refs SCYLLADB-1615 --- main.cc | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/main.cc b/main.cc index 33cd206691..b81beb2dfd 100644 --- a/main.cc +++ b/main.cc @@ -1810,6 +1810,18 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl utils::get_local_injector().inject("stop_after_starting_migration_manager", [] { std::raise(SIGSTOP); }); + // Audit must be constructed before the maintenance socket so + // that on shutdown (reverse destruction order) the audit service + // outlives the maintenance socket and in-flight queries can + // still reach audit::inspect() safely. + checkpoint(stop_signal, "starting audit service"); + audit::audit::start_audit(*cfg, token_metadata, qp, mm).handle_exception([&] (auto&& e) { + startlog.error("audit start failed: {}", e); + }).get(); + auto audit_stop = defer([] { + audit::audit::stop_audit().get(); + }); + // XXX: stop_raft has to happen before query_processor and migration_manager // is stopped, since some groups keep using the query // processor until are stopped inside stop_raft. @@ -2357,14 +2369,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl startlog.info("Verifying that all of the tablet keyspaces use rack list replication factors"); db.local().check_rack_list_everywhere(cfg->enforce_rack_list()); - checkpoint(stop_signal, "starting audit service"); - audit::audit::start_audit(*cfg, token_metadata, qp, mm).handle_exception([&] (auto&& e) { - startlog.error("audit start failed: {}", e); - }).get(); - auto audit_stop = defer([] { - audit::audit::stop_audit().get(); - }); - // The table-based audit backend needs Raft (via join_cluster) // to create its keyspace and table. checkpoint(stop_signal, "starting audit storage");