Following DynamoDB, Alternator also places a 16 MB limit on the size of a request. Such a limit is necessary to avoid running out of memory - because the AWS message authentication protocol requires reading the entire request into memory before its signature can be verified. Our implementation for this limit used Seastar's HTTP server's content_length_limit feature. However, this Seastar feature is incomplete - it only works when the request uses the Content-Length header, and doesn't do anything if the request doesn't have a Content-Length (it may use chunked encoding, or have no length at all). So malicious users can cause Scylla to OOM by sending a huge request without a Content-Length. So in this patch we stop using the incomplete Seastar feature, and implement the length limit in Scylla in a way that works correctly with or without Content-Length: We read from the input stream and if we go over 16MB, we generate an error. Because we dropped Seastar's protection against a long Content-Length, we also need to fix a piece of code which used Content-Length to reserve some semaphore units to prevent reading many large requests in parallel. We fix two problems in the code: 1. If Content-Length is over the limit, we shouldn't attempt to reserve semaphore units - this should just be a Payload Too Large error. 2. If Content-Length is missing, the existing code did nothing and had a TODO that we should. In this patch we implement what was suggested in that TODO: We temporarily reserve the whole 16 MB limit, and after reading the actual request, we return part of the reservation according to the real request size. That last fix is important, because typically the largest requests will be BatchWriteItem where a well-written client would want to use chunked encoding, not Content-Length, to avoid materializing the entire request up-front. For such clients, the memory use semaphore did nothing, and now it does the right thing. Note that this patch does *not* solve the problem #12166 that existed with Seastar's length-limiting implementation but still exists in the new in-Scylla length-limiting implementation: The fact we send an error response in the middle of the request and then close the connection, while the client continues to send the request, can lead to an RST being sent by the server kernel. Usually this will be fine - well-written client libraries will be able to read the response before the RST. But even with a well-written library in some rare timings the client may get the RST before the response, and will miss the response, and get an empty or partial response or "connection reset by peer". This issue existed before this patch, and still exists, but is probably of minor impact. Fixes #8196 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Closes scylladb/scylladb#23434
112 lines
4.6 KiB
C++
112 lines
4.6 KiB
C++
/*
|
|
* Copyright 2019-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <seastar/http/httpd.hh>
|
|
#include "seastarx.hh"
|
|
#include "utils/rjson.hh"
|
|
|
|
namespace alternator {
|
|
|
|
// api_error contains a DynamoDB error message to be returned to the user.
|
|
// It can be returned by value (see executor::request_return_type) or thrown.
|
|
// The DynamoDB's error messages are described in detail in
|
|
// https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Programming.Errors.html
|
|
// An error message has an HTTP code (almost always 400), a type, e.g.,
|
|
// "ResourceNotFoundException", and a human readable message.
|
|
// Eventually alternator::api_handler will convert a returned or thrown
|
|
// api_error into a JSON object, and that is returned to the user.
|
|
class api_error final : public std::exception {
|
|
public:
|
|
using status_type = http::reply::status_type;
|
|
status_type _http_code;
|
|
std::string _type;
|
|
std::string _msg;
|
|
// Additional data attached to the error, null value if not set. It's wrapped in copyable_value
|
|
// class because copy constructor is required for exception classes otherwise it won't compile
|
|
// (despite that its use may be optimized away).
|
|
rjson::copyable_value _extra_fields;
|
|
api_error(std::string type, std::string msg, status_type http_code = status_type::bad_request,
|
|
rjson::value extra_fields = rjson::null_value())
|
|
: _http_code(std::move(http_code))
|
|
, _type(std::move(type))
|
|
, _msg(std::move(msg))
|
|
, _extra_fields(std::move(extra_fields))
|
|
{ }
|
|
|
|
// Factory functions for some common types of DynamoDB API errors
|
|
static api_error validation(std::string msg) {
|
|
return api_error("ValidationException", std::move(msg));
|
|
}
|
|
static api_error resource_not_found(std::string msg) {
|
|
return api_error("ResourceNotFoundException", std::move(msg));
|
|
}
|
|
static api_error resource_in_use(std::string msg) {
|
|
return api_error("ResourceInUseException", std::move(msg));
|
|
}
|
|
static api_error invalid_signature(std::string msg) {
|
|
return api_error("InvalidSignatureException", std::move(msg));
|
|
}
|
|
static api_error missing_authentication_token(std::string msg) {
|
|
return api_error("MissingAuthenticationTokenException", std::move(msg));
|
|
}
|
|
static api_error unrecognized_client(std::string msg) {
|
|
return api_error("UnrecognizedClientException", std::move(msg));
|
|
}
|
|
static api_error unknown_operation(std::string msg) {
|
|
return api_error("UnknownOperationException", std::move(msg));
|
|
}
|
|
static api_error access_denied(std::string msg) {
|
|
return api_error("AccessDeniedException", std::move(msg));
|
|
}
|
|
static api_error conditional_check_failed(std::string msg, rjson::value&& item) {
|
|
if (!item.IsNull()) {
|
|
auto tmp = rjson::empty_object();
|
|
rjson::add(tmp, "Item", std::move(item));
|
|
item = std::move(tmp);
|
|
}
|
|
return api_error("ConditionalCheckFailedException", std::move(msg), status_type::bad_request, std::move(item));
|
|
}
|
|
static api_error expired_iterator(std::string msg) {
|
|
return api_error("ExpiredIteratorException", std::move(msg));
|
|
}
|
|
static api_error trimmed_data_access_exception(std::string msg) {
|
|
return api_error("TrimmedDataAccessException", std::move(msg));
|
|
}
|
|
static api_error request_limit_exceeded(std::string msg) {
|
|
return api_error("RequestLimitExceeded", std::move(msg));
|
|
}
|
|
static api_error serialization(std::string msg) {
|
|
return api_error("SerializationException", std::move(msg));
|
|
}
|
|
static api_error table_not_found(std::string msg) {
|
|
return api_error("TableNotFoundException", std::move(msg));
|
|
}
|
|
static api_error limit_exceeded(std::string msg) {
|
|
return api_error("LimitExceededException", std::move(msg));
|
|
}
|
|
static api_error internal(std::string msg) {
|
|
return api_error("InternalServerError", std::move(msg), http::reply::status_type::internal_server_error);
|
|
}
|
|
static api_error payload_too_large(std::string msg) {
|
|
return api_error("PayloadTooLarge", std::move(msg), status_type::payload_too_large);
|
|
}
|
|
|
|
// Provide the "std::exception" interface, to make it easier to print this
|
|
// exception in log messages. Note that this function is *not* used to
|
|
// format the error to send it back to the client - server.cc has
|
|
// generate_error_reply() to format an api_error as the DynamoDB protocol
|
|
// requires.
|
|
virtual const char* what() const noexcept override;
|
|
mutable std::string _what_string;
|
|
};
|
|
|
|
}
|
|
|