From 498807724988c779dcd2f2b49f61dc8eb88d85e8 Mon Sep 17 00:00:00 2001 From: Piotr Smaron Date: Fri, 17 Apr 2026 15:05:29 +0200 Subject: [PATCH] transport: move requests_serving decrement to after response is sent The requests_serving metric was decremented right after query processing completed, but before the response was written to the client. This means requests whose responses were queued in the write pipeline were no longer counted as in-flight, understating the actual load. Move the decrement into the 'leave' defer block, which fires after the response is fully sent via _ready_to_respond. This makes the shedding check (max_concurrent_requests_per_shard) more accurate: requests that have finished processing but are still waiting in the response queue now correctly count toward the in-flight limit. --- transport/server.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/transport/server.cc b/transport/server.cc index 064f71f59e..8e66a4ed52 100644 --- a/transport/server.cc +++ b/transport/server.cc @@ -1001,8 +1001,6 @@ future>> auto stop_trace = defer([&] { tracing::stop_foreground(trace_state); }); - --_server._stats.requests_serving; - return seastar::futurize_invoke([&] () { if (f.failed()) { return make_exception_future>>(std::move(f).get_exception()); @@ -1240,6 +1238,7 @@ future<> cql_server::connection::process_request() { _pending_requests_gate.enter(); auto leave = defer([this] { + --_server._stats.requests_serving; _shedding_timer.cancel(); _shed_incoming_requests = false; _pending_requests_gate.leave();