diff --git a/alternator/server.cc b/alternator/server.cc index 3133f15360..e74ea15054 100644 --- a/alternator/server.cc +++ b/alternator/server.cc @@ -699,6 +699,17 @@ future server::handle_api_request(std::unique_ptr // for such a size. co_return api_error::payload_too_large(fmt::format("Request content length limit of {} bytes exceeded", request_content_length_limit)); } + // Check the concurrency limit early, before acquiring memory and + // reading the request body, to avoid piling up memory from excess + // requests that will be rejected anyway. This mirrors the CQL + // transport which also checks concurrency before memory acquisition + // (transport/server.cc). + if (_pending_requests.get_count() >= _max_concurrent_requests) { + _executor._stats.requests_shed++; + co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count())); + } + _pending_requests.enter(); + auto leave = defer([this] () noexcept { _pending_requests.leave(); }); // JSON parsing can allocate up to roughly 2x the size of the raw // document, + a couple of bytes for maintenance. // If the Content-Length of the request is not available, we assume @@ -760,12 +771,6 @@ future server::handle_api_request(std::unique_ptr _executor._stats.unsupported_operations++; co_return api_error::unknown_operation(fmt::format("Unsupported operation {}", op)); } - if (_pending_requests.get_count() >= _max_concurrent_requests) { - _executor._stats.requests_shed++; - co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count())); - } - _pending_requests.enter(); - auto leave = defer([this] () noexcept { _pending_requests.leave(); }); executor::client_state client_state(service::client_state::external_tag(), _auth_service, &_sl_controller, _timeout_config.current_values(), req->get_client_address()); if (!username.empty()) { diff --git a/test/cluster/dtest/alternator_tests.py b/test/cluster/dtest/alternator_tests.py index e59349ec97..a823835b7f 100644 --- a/test/cluster/dtest/alternator_tests.py +++ b/test/cluster/dtest/alternator_tests.py @@ -481,12 +481,14 @@ class TesterAlternator(BaseAlternator): 2) Issue Alternator 'heavy' requests concurrently (create-table) 3) wait for RequestLimitExceeded error response. """ - concurrent_requests_limit = 5 + # Keep the limit low to avoid exhausting LSA memory on the 1GB test node + # when multiple CreateTable requests (Raft + schema + flush) run concurrently. + concurrent_requests_limit = 3 extra_config = {"max_concurrent_requests_per_shard": concurrent_requests_limit, "num_tokens": 1} self.prepare_dynamodb_cluster(num_of_nodes=1, extra_config=extra_config) node1 = self.cluster.nodelist()[0] create_tables_threads = [] - for tables_num in range(concurrent_requests_limit * 5): + for tables_num in range(concurrent_requests_limit * 2): create_tables_threads.append(self.run_create_table_thread()) @retrying(num_attempts=150, sleep_time=0.2, allowed_exceptions=ConcurrencyLimitNotExceededError, message="Running create-table request")