From 498807724988c779dcd2f2b49f61dc8eb88d85e8 Mon Sep 17 00:00:00 2001
From: Piotr Smaron <piotr.smaron@scylladb.com>
Date: Fri, 17 Apr 2026 15:05:29 +0200
Subject: [PATCH] transport: move requests_serving decrement to after response
 is sent

The requests_serving metric was decremented right after query processing
completed, but before the response was written to the client. This means
requests whose responses were queued in the write pipeline were no longer
counted as in-flight, understating the actual load.

Move the decrement into the 'leave' defer block, which fires after the
response is fully sent via _ready_to_respond. This makes the shedding
check (max_concurrent_requests_per_shard) more accurate: requests that
have finished processing but are still waiting in the response queue now
correctly count toward the in-flight limit.
---
 transport/server.cc | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/transport/server.cc b/transport/server.cc
index 064f71f59e..8e66a4ed52 100644
--- a/transport/server.cc
+++ b/transport/server.cc
@@ -1001,8 +1001,6 @@ future<foreign_ptr<std::unique_ptr<cql_server::response>>>
         auto stop_trace = defer([&] {
             tracing::stop_foreground(trace_state);
         });
-        --_server._stats.requests_serving;
-
         return seastar::futurize_invoke([&] () {
             if (f.failed()) {
                 return make_exception_future<foreign_ptr<std::unique_ptr<cql_server::response>>>(std::move(f).get_exception());
@@ -1240,6 +1238,7 @@ future<> cql_server::connection::process_request() {
 
             _pending_requests_gate.enter();
             auto leave = defer([this] {
+                --_server._stats.requests_serving;
                 _shedding_timer.cancel();
                 _shed_incoming_requests = false;
                 _pending_requests_gate.leave();