diff --git a/alternator/server.cc b/alternator/server.cc
index 3133f15360..e74ea15054 100644
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -699,6 +699,17 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
         // for such a size.
         co_return api_error::payload_too_large(fmt::format("Request content length limit of {} bytes exceeded", request_content_length_limit));
     }
+    // Check the concurrency limit early, before acquiring memory and
+    // reading the request body, to avoid piling up memory from excess
+    // requests that will be rejected anyway. This mirrors the CQL
+    // transport which also checks concurrency before memory acquisition
+    // (transport/server.cc).
+    if (_pending_requests.get_count() >= _max_concurrent_requests) {
+        _executor._stats.requests_shed++;
+        co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
+    }
+    _pending_requests.enter();
+    auto leave = defer([this] () noexcept { _pending_requests.leave(); });
     // JSON parsing can allocate up to roughly 2x the size of the raw
     // document, + a couple of bytes for maintenance.
     // If the Content-Length of the request is not available, we assume
@@ -760,12 +771,6 @@ future<executor::request_return_type> server::handle_api_request(std::unique_ptr
         _executor._stats.unsupported_operations++;
         co_return api_error::unknown_operation(fmt::format("Unsupported operation {}", op));
     }
-    if (_pending_requests.get_count() >= _max_concurrent_requests) {
-        _executor._stats.requests_shed++;
-        co_return api_error::request_limit_exceeded(format("too many in-flight requests (configured via max_concurrent_requests_per_shard): {}", _pending_requests.get_count()));
-    }
-    _pending_requests.enter();
-    auto leave = defer([this] () noexcept { _pending_requests.leave(); });
     executor::client_state client_state(service::client_state::external_tag(),
         _auth_service, &_sl_controller, _timeout_config.current_values(), req->get_client_address());
     if (!username.empty()) {
diff --git a/test/cluster/dtest/alternator_tests.py b/test/cluster/dtest/alternator_tests.py
index e59349ec97..a823835b7f 100644
--- a/test/cluster/dtest/alternator_tests.py
+++ b/test/cluster/dtest/alternator_tests.py
@@ -481,12 +481,14 @@ class TesterAlternator(BaseAlternator):
         2) Issue Alternator 'heavy' requests concurrently (create-table)
         3) wait for RequestLimitExceeded error response.
         """
-        concurrent_requests_limit = 5
+        # Keep the limit low to avoid exhausting LSA memory on the 1GB test node
+        # when multiple CreateTable requests (Raft + schema + flush) run concurrently.
+        concurrent_requests_limit = 3
         extra_config = {"max_concurrent_requests_per_shard": concurrent_requests_limit, "num_tokens": 1}
         self.prepare_dynamodb_cluster(num_of_nodes=1, extra_config=extra_config)
         node1 = self.cluster.nodelist()[0]
         create_tables_threads = []
-        for tables_num in range(concurrent_requests_limit * 5):
+        for tables_num in range(concurrent_requests_limit * 2):
             create_tables_threads.append(self.run_create_table_thread())
 
         @retrying(num_attempts=150, sleep_time=0.2, allowed_exceptions=ConcurrencyLimitNotExceededError, message="Running create-table request")