From a86928caa16498ccab66bf35bcda4ca1399b1433 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Paszkowski?= <lukasz.paszkowski@scylladb.com>
Date: Sat, 28 Mar 2026 20:40:33 +0100
Subject: [PATCH 1/2] alternator: check concurrency limit before memory
 acquisition

The concurrency limit check in the Alternator server was positioned after
memory acquisition (get_units), request body reading (read_entire_stream),
signature verification, and decompression. This allowed unlimited requests
to pile up consuming memory before being rejected, exhausting LSA memory
and causing logalloc::bad_alloc errors that cascade into Raft applier
and topology coordinator failures, breaking subsequent operations.

Without this fix, test_limit_concurrent_requests on a 1GB node produces
50 logalloc::bad_alloc errors and cascading failures: reads from
system.scylla_local fail, the Raft applier fiber stops, the topology
coordinator stops, and all subsequent CreateTable operations fail with
InternalServerError (500). With this fix, the cascade is eliminated --
admitted requests may still cause LSA pressure on a memory-constrained
node, but the server remains functional.

Move the concurrency check to right after the content-length early-out,
before any memory acquisition or I/O. This mirrors the CQL transport
which correctly checks concurrency before memory acquisition
(transport/server.cc).

The concurrency check was originally added in 1b8c946ad7 (Sep 2020)
*before* memory acquisition, which at the time lived inside with_gate
(after the concurrency gate). The ordering was inverted by f41dac2a3a
(Mar 2021, "avoid large contiguous allocation for request body"), which
moved get_units() earlier in the function to reserve memory before
reading the newly-introduced content stream -- but inadvertently also
moved it before the concurrency check. c3593462a4 (Mar 2025) further
worsened the situation by adding a 16MB fallback reservation for
requests without Content-Length and ungzip/deflate decompression steps
-- all before the concurrency check -- greatly increasing the memory
consumed by requests that would ultimately be rejected.
---
 alternator/server.cc | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/alternator/server.cc b/alternator/server.cc
index 3133f15360..e74ea15054 100644
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -699,6 +699,17 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
         // for such a size.
         co_return api_error::payload_too_large(fmt::format("Request content length limit of {} bytes exceeded", request_content_length_limit));
     }
+    // Check the concurrency limit early, before acquiring memory and
+    // reading the request body, to avoid piling up memory from excess
+    // requests that will be rejected anyway. This mirrors the CQL
+    // transport which also checks concurrency before memory acquisition
+    // (transport/server.cc).
+    if (_pending_requests.get_count() >= _max_concurrent_requests) {
+        _executor._stats.requests_shed++;
+        co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
+    }
+    _pending_requests.enter();
+    auto leave = defer([this] () noexcept { _pending_requests.leave(); });
     // JSON parsing can allocate up to roughly 2x the size of the raw
     // document, + a couple of bytes for maintenance.
     // If the Content-Length of the request is not available, we assume
@@ -760,12 +771,6 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
         _executor._stats.unsupported_operations++;
         co_return api_error::unknown_operation(fmt::format("Unsupported operation {}", op));
     }
-    if (_pending_requests.get_count() >= _max_concurrent_requests) {
-        _executor._stats.requests_shed++;
-        co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
-    }
-    _pending_requests.enter();
-    auto leave = defer([this] () noexcept { _pending_requests.leave(); });
     executor::client_state client_state(service::client_state::external_tag(),
         _auth_service, &_sl_controller, _timeout_config.current_values(), req->get_client_address());
     if (!username.empty()) {

From b8e3ef0c64b5a12930546d0d110cb89e7d9b8cc8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C5=81ukasz=20Paszkowski?= <lukasz.paszkowski@scylladb.com>
Date: Sat, 28 Mar 2026 20:40:33 +0100
Subject: [PATCH 2/2] test: reduce concurrent-request-limit test pressure to
 avoid LSA exhaustion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The test_limit_concurrent_requests dtest uses concurrent CreateTable
requests to verify Alternator's concurrency limiting.  Each admitted
CreateTable triggers Raft consensus, schema mutations, and memtable
flushes—all of which consume LSA memory.  On the 1 GB test node
(2 SMP × 512 MB), the original settings (limit=5, 25 threads) created
enough flush pressure to exhaust the LSA emergency reserve, producing
logalloc::bad_alloc errors in the node log.  The test was always
marginal under these settings and became flaky as new system tables
increased baseline LSA usage over time.

Lower concurrent_requests_limit from 5 to 3 and the thread multiplier
from 5 to 2 (6 threads total).  This is still well above the limit and
sufficient to reliably trigger RequestLimitExceeded, while keeping flush
pressure within what 512 MB per shard can sustain.
---
 test/cluster/dtest/alternator_tests.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/cluster/dtest/alternator_tests.py b/test/cluster/dtest/alternator_tests.py
index e59349ec97..a823835b7f 100644
--- a/test/cluster/dtest/alternator_tests.py
+++ b/test/cluster/dtest/alternator_tests.py
@@ -481,12 +481,14 @@ class TesterAlternator(BaseAlternator):
         2) Issue Alternator 'heavy' requests concurrently (create-table)
         3) wait for RequestLimitExceeded error response.
         """
-        concurrent_requests_limit = 5
+        # Keep the limit low to avoid exhausting LSA memory on the 1GB test node
+        # when multiple CreateTable requests (Raft + schema + flush) run concurrently.
+        concurrent_requests_limit = 3
         extra_config = {"max_concurrent_requests_per_shard": concurrent_requests_limit, "num_tokens": 1}
         self.prepare_dynamodb_cluster(num_of_nodes=1, extra_config=extra_config)
         node1 = self.cluster.nodelist()[0]
         create_tables_threads = []
-        for tables_num in range(concurrent_requests_limit * 5):
+        for tables_num in range(concurrent_requests_limit * 2):
             create_tables_threads.append(self.run_create_table_thread())
 
         @retrying(num_attempts=150, sleep_time=0.2, allowed_exceptions=ConcurrencyLimitNotExceededError, message="Running create-table request")