diff --git a/alternator/error.hh b/alternator/error.hh
index a063d59399..b507649d22 100644
--- a/alternator/error.hh
+++ b/alternator/error.hh
@@ -94,6 +94,9 @@ public:
     static api_error internal(std::string msg) {
         return api_error("InternalServerError", std::move(msg), http::reply::status_type::internal_server_error);
     }
+    static api_error payload_too_large(std::string msg) {
+        return api_error("PayloadTooLarge", std::move(msg), status_type::payload_too_large);
+    }
 
     // Provide the "std::exception" interface, to make it easier to print this
     // exception in log messages. Note that this function is *not* used to
diff --git a/alternator/server.cc b/alternator/server.cc
index ddfc0b2786..4a4ae64b75 100644
--- a/alternator/server.cc
+++ b/alternator/server.cc
@@ -462,25 +462,81 @@ static tracing::trace_state_ptr maybe_trace_query(service::client_state& client_
     return trace_state;
 }
 
+// This read_entire_stream() is similar to Seastar's read_entire_stream()
+// which reads the given content_stream until its end into non-contiguous
+// memory. The difference is that this implementation takes an extra length
+// limit, and throws an error if we read more than this limit.
+// This length-limited variant would not have been needed if Seastar's HTTP
+// server's set_content_length_limit() worked in every case, but unfortunately
+// it does not - it only works if the request has a Content-Length header (see
+// issue #8196). In contrast this function can limit the request's length no
+// matter how it's encoded. We need this limit to protect Alternator from
+// oversized requests that can deplete memory.
+static future<chunked_content>
+read_entire_stream(input_stream<char>& inp, size_t length_limit) {
+    chunked_content ret;
+    // We try to read length_limit + 1 bytes, so that we can throw an
+    // exception if we managed to read more than length_limit.
+    ssize_t remain = length_limit + 1;
+    do {
+        temporary_buffer<char> buf = co_await inp.read_up_to(remain);
+        if (buf.empty()) {
+            break;
+        }
+        remain -= buf.size();
+        ret.push_back(std::move(buf));
+    } while (remain > 0);
+    // If we read the full length_limit + 1 bytes, we went over the limit:
+    if (remain <= 0) {
+        // By throwing here an error, we may send a reply (the error message)
+        // without having read the full request body. Seastar's httpd will
+        // realize that we have not read the entire content stream, and
+        // correctly mark the connection unreusable, i.e., close it.
+        // This means we are currently exposed to issue #12166 caused by
+        // Seastar issue 1325), where the client may get an RST instead of
+        // a FIN, and may rarely get a "Connection reset by peer" before
+        // reading the error we send.
+        throw api_error::payload_too_large(fmt::format("Request content length limit of {} bytes exceeded", length_limit));
+    }
+    co_return ret;
+}
+
 future<executor::request_return_type> server::handle_api_request(std::unique_ptr<request> req) {
     _executor._stats.total_operations++;
     sstring target = req->get_header("X-Amz-Target");
     // target is DynamoDB API version followed by a dot '.' and operation type (e.g. CreateTable)
     auto dot = target.find('.');
     std::string_view op = (dot == sstring::npos) ? std::string_view() : std::string_view(target).substr(dot+1);
+    if (req->content_length > request_content_length_limit) {
+        // If we have a Content-Length header and know the request will be too
+        // long, we don't need to wait for read_entire_stream() below to
+        // discover it. And we definitely mustn't try to get_units() below for
+        // for such a size.
+        co_return api_error::payload_too_large(fmt::format("Request content length limit of {} bytes exceeded", request_content_length_limit));
+    }
     // JSON parsing can allocate up to roughly 2x the size of the raw
     // document, + a couple of bytes for maintenance.
-    // TODO: consider the case where req->content_length is missing. Maybe
-    // we need to take the content_length_limit and return some of the units
-    // when we finish read_content_and_verify_signature?
-    size_t mem_estimate = req->content_length * 2 + 8000;
+    // If the Content-Length of the request is not available, we assume
+    // the largest possible request (request_content_length_limit, i.e., 16 MB)
+    // and after reading the request we return_units() the excess.
+    size_t mem_estimate = (req->content_length ? req->content_length : request_content_length_limit) * 2 + 8000;
     auto units_fut = get_units(*_memory_limiter, mem_estimate);
     if (_memory_limiter->waiters()) {
         ++_executor._stats.requests_blocked_memory;
     }
     auto units = co_await std::move(units_fut);
     SCYLLA_ASSERT(req->content_stream);
-    chunked_content content = co_await util::read_entire_stream(*req->content_stream);
+    chunked_content content = co_await read_entire_stream(*req->content_stream, request_content_length_limit);
+    // If the request had no Content-Length, we reserved too many units
+    // so need to return some
+    if (req->content_length == 0) {
+        size_t content_length = 0;
+        for (const auto& chunk : content) {
+            content_length += chunk.size();
+        }
+        size_t new_mem_estimate = content_length * 2 + 8000;
+        units.return_units(mem_estimate - new_mem_estimate);
+    }
     auto username = co_await verify_signature(*req, content);
     // As long as the system_clients_entry object is alive, this request will
     // be visible in the "system.clients" virtual table. When requested, this
@@ -659,14 +715,12 @@ future<> server::init(net::inet_address addr, std::optional<uint16_t> port, std:
 
         if (port) {
             set_routes(_http_server._routes);
-            _http_server.set_content_length_limit(server::content_length_limit);
             _http_server.set_content_streaming(true);
             _http_server.listen(socket_address{addr, *port}).get();
             _enabled_servers.push_back(std::ref(_http_server));
         }
         if (https_port) {
             set_routes(_https_server._routes);
-            _https_server.set_content_length_limit(server::content_length_limit);
             _https_server.set_content_streaming(true);
 
             if (this_shard_id() == 0) {
diff --git a/alternator/server.hh b/alternator/server.hh
index 2e2ef595fb..d966f24095 100644
--- a/alternator/server.hh
+++ b/alternator/server.hh
@@ -28,7 +28,11 @@ namespace alternator {
 using chunked_content = rjson::chunked_content;
 
 class server : public peering_sharded_service<server> {
-    static constexpr size_t content_length_limit = 16*MB;
+    // The maximum size of a request body that Alternator will accept,
+    // in bytes. This is a safety measure to prevent Alternator from
+    // running out of memory when a client sends a very large request.
+    // DynamoDB also has the same limit set to 16 MB.
+    static constexpr size_t request_content_length_limit = 16*MB;
     using alternator_callback = std::function<future<executor::request_return_type>(executor&, executor::client_state&,
             tracing::trace_state_ptr, service_permit, rjson::value, std::unique_ptr<http::request>)>;
     using alternator_callbacks_map = std::unordered_map<std::string_view, alternator_callback>;
diff --git a/test/alternator/test_manual_requests.py b/test/alternator/test_manual_requests.py
index c509cb3230..d7526faec9 100644
--- a/test/alternator/test_manual_requests.py
+++ b/test/alternator/test_manual_requests.py
@@ -110,8 +110,15 @@ def test_too_large_request(dynamodb, test_table):
 # 1. An over-long request is rejected no matter if it is sent using a
 #    Content-Length header or chunked encoding (reproduces issue #8196).
 # 2. The client should be able to recognize this error as a 413 error, not
-#     some I/O error like broken pipe (reproduces issue #8195).
-@pytest.mark.xfail(reason="issue #8196, #12166")
+#     some I/O error like broken pipe. Reproduces issue #8195 and, rarely,
+#     #12166.
+#
+# Because issue #12166 is still open, currently we can (in theory, but
+# almost never in practice) get a "connection reset by peer" instead of a
+# clean 413 reply if the packets get reordered and the RST arrives before
+# the reply. So we accept this failure mode too to avoid test flakiness.
+# When #12166 is fixed, we should stop allowing that failure mode.
+
 def test_too_large_request_chunked(dynamodb, test_table):
     if Version(urllib3.__version__) < Version('1.26'):
         pytest.skip("urllib3 before 1.26.0 threw broken pipe and did not read response and cause issue #8195. Fixed by pull request urllib3/urllib3#1524")
@@ -121,20 +128,32 @@ def test_too_large_request_chunked(dynamodb, test_table):
         '{"TableName": "' + test_table.name + '", ' + spaces + '"Item": {"p": {"S": "x"}, "c": {"S": "x"}}}')
     def generator(s):
         yield s
-    response = requests.post(req.url, headers=req.headers, data=generator(req.body), verify=False)
+    try:
+        response = requests.post(req.url, headers=req.headers, data=generator(req.body), verify=False)
+    # Until #12166 is fixed, we need this except. See comment above why.
+    except requests.exceptions.ConnectionError as e:
+        return
     # In issue #8196, Alternator did not recognize the request is too long
     # because it uses chunked encoding instead of Content-Length, so the
     # request succeeded, and the status_code was 200 instead of 413.
     assert response.status_code == 413
 
-@pytest.mark.xfail(reason="issue #12166. Note that only fails very rairly, will usually xpass")
-def test_too_large_request_content_length(dynamodb, test_table):
+# 17 MB is enough to verify that Alternator set a 16 MB limit on request
+# length (as DynamoDB does), but let's also try a bigger size, which in
+# at some point during the development caused us to reserve too much memory
+# and hang.
+@pytest.mark.parametrize("mb", [17, 50])
+def test_too_large_request_content_length(dynamodb, test_table, mb):
     if Version(urllib3.__version__) < Version('1.26'):
         pytest.skip("urllib3 before 1.26.0 threw broken pipe and did not read response and cause issue #8195. Fixed by pull request urllib3/urllib3#1524")
-    spaces = ' ' * (17 * 1024 * 1024)
+    spaces = ' ' * (mb * 1024 * 1024)
     req = get_signed_request(dynamodb, 'PutItem',
         '{"TableName": "' + test_table.name + '", ' + spaces + '"Item": {"p": {"S": "x"}, "c": {"S": "x"}}}')
-    response = requests.post(req.url, headers=req.headers, data=req.body, verify=False)
+    try:
+        response = requests.post(req.url, headers=req.headers, data=req.body, verify=False)
+    # Until #12166 is fixed, we need this except. See comment above why.
+    except requests.exceptions.ConnectionError as e:
+        return
     # In issue #8195, Alternator closed the connection early, causing the
     # library to incorrectly throw an exception (Broken Pipe) instead noticing
     # the error code 413 which the server did send.