From 32afcdbaf0bc2d1aa2db6b82d7eaecad70fe6ba5 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Mon, 17 Nov 2025 19:38:28 +0200 Subject: [PATCH] test/alternator: enable, and add, tests for gzip'ed requests After in the previous patch we implemented support in Alternator for gzip-compressed requests ("Content-Encoding: gzip"), here we enable an existing xfail-ing test for this feature, and also add more tests for more cases: * A test for longer compressed requests, or a short compressed request which expands to a longer request. Since the decompression uses small buffers, this test reaches additional code paths. * Check for various cases of a malformed gzip'ed request, and also an attempt to use an unsupported Content-Encoding. DynamoDB returns error 500 for both cases, so we want to test that we do to - and not silently ignore such errors. * Check that two concatenated gzip'ed streams is a valid request, and check that garbage at the end of the gzip - or a missing character at the end of the gzip - is recognized as an error. Signed-off-by: Nadav Har'El --- test/alternator/test_compressed_request.py | 197 ++++++++++++++++++++- test/alternator/test_manual_requests.py | 1 - 2 files changed, 196 insertions(+), 2 deletions(-) diff --git a/test/alternator/test_compressed_request.py b/test/alternator/test_compressed_request.py index 75cf8742f3..68d82bf40d 100644 --- a/test/alternator/test_compressed_request.py +++ b/test/alternator/test_compressed_request.py @@ -30,9 +30,13 @@ import boto3 import botocore +import gzip +import requests import pytest +from contextlib import contextmanager from .util import random_string +from .test_manual_requests import get_signed_request # The compressed_req fixture is like the dynamodb fixture - providing a # connection to a DynamDB-API server. But the unique feature of compressed_req @@ -72,7 +76,6 @@ def compressed_req(dynamodb): ret.meta.client.close() # A basic test for a compressed request, using PutItem and GetItem -@pytest.mark.xfail(reason='issue #5041') def test_compressed_request(test_table_s, compressed_req): tab = compressed_req.Table(test_table_s.name) p = random_string() @@ -81,3 +84,195 @@ def test_compressed_request(test_table_s, compressed_req): tab.put_item(Item=item) got_item = tab.get_item(Key={'p': p}, ConsistentRead=True)['Item'] assert got_item == item + +# Test a longer PutItem request. Our decompression implementation wants to +# decompress it in pieces, to avoid one long contiguous allocation of the +# output. So this test will check this code path. +def test_long_compressed_request(test_table_s, compressed_req): + tab = compressed_req.Table(test_table_s.name) + p = random_string() + x = random_string() + # First, make the request compress very well so the compressed request + # will be very short, but the uncompressed output is long and may + # be split across multiple output buffers. + long = p + 'x'*10000 + item = {'p': p, 'x': x, 'long': long} + tab.put_item(Item=item) + got_item = tab.get_item(Key={'p': p}, ConsistentRead=True)['Item'] + assert got_item == item + # Now try a request that doesn't compress as well. Our implementation + # may need to split both input and output buffer boundries. + long = random_string(5000)*2 + item = {'p': p, 'x': x, 'long': long} + tab.put_item(Item=item) + got_item = tab.get_item(Key={'p': p}, ConsistentRead=True)['Item'] + assert got_item == item + +# The tests above configured boto3 to compress its requests so we could +# test them. We now want to test unusual scenarios - including corrupt +# compressed requests that should fail, or requests compressed in a non- +# traditional way but still should work. We can't test these scenarios +# using boto3, and need to construct the requests on our own using functions +# from test_manual_requests.py. + +# Test the error when we send a wrong Content-Encoding header. The only +# Content-Encoding supported by DynamoDB is "gzip" - see test file +# test_compressed_request.py for tests of successful compression with +# a valid Content-Encoding. +def test_garbage_content_encoding(dynamodb, test_table): + p = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}}}' + req = get_signed_request(dynamodb, 'PutItem', payload) + # Add a bad Content-Encoding header. The request signature is still valid, + # but the server will not know how to decode the request. + headers = dict(req.headers) + headers.update({'Content-Encoding': 'garbage'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 500 + # Check the PutItem request really wasn't done + assert 'Item' not in test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True) + +# If Content-Encoding is "gzip" but the content is *not* valid gzip encoded, +# DynamoDB returns an InternalServerError. I'm not sure this is the most +# appropriate error to return (among other things it suggests that the +# broken request is retryable), but this is what DynamoDB does so Alternator +# should too. +def test_broken_gzip_content(dynamodb, test_table): + p = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}}}' + req = get_signed_request(dynamodb, 'PutItem', payload) + # Add a Content-Encoding header suggesting this is gzipped content. + # Of course it isn't - it's a valid uncompressed request. The server + # should fail decompressing it and return a 500 error + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 500 + # Check the PutItem request really wasn't done + assert 'Item' not in test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True) + +# Test a valid gziped request created manually (without boto3's help). The +# fact that this test passes a sanity test preparing us for the next tests +# where we change the compressed stream and see what happens. +def test_gzip_request_valid(dynamodb, test_table): + p = random_string() + v = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}, "v": {"S": "' + v + '"}}}' + # Compress the payload. The new "payload" will be bytes instead of a + # string - this is perfectly fine. + payload = gzip.compress(payload.encode('utf-8')) + req = get_signed_request(dynamodb, 'PutItem', payload) + # Need to tell the server with a Content-Encoding header that the + # payload is compressed: + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 200 + got = test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True)['Item'] + assert got == {'p': p, 'c': 'x', 'v': v} + +# Same test as test_gzip_request_valid() but compress the payload in two +# pieces, concatenating the two gzip outputs. This isn't something users +# will typically do, but is allowed according to the gzip standard so we +# want to support it. +def test_gzip_request_two_gzips(dynamodb, test_table): + p = random_string() + v = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}, "v": {"S": "' + v + '"}}}' + # Compress the payload in two halves - first compress the first 10 + # characters, then the rest, and concatenate the two resulting gzips. + payload = gzip.compress(payload[:10].encode('utf-8')) + gzip.compress(payload[10:].encode('utf-8')) + req = get_signed_request(dynamodb, 'PutItem', payload) + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 200 + got = test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True)['Item'] + assert got == {'p': p, 'c': 'x', 'v': v} + +# Same test as test_gzip_request_valid() but add extra junk - not another +# valid gzip - following the valid gzip string. This should be an error, +# the extra junk should not be just silently ignored. +# Strangely, although we see in other tests for bad gzip that DynamoDB +# returns an error, in this specific case it doesn't. I consider this a +# bug so this is one of the rare tests with the dynamodb_bug marker. +def test_gzip_request_with_extra_junk(dynamodb, test_table, dynamodb_bug): + p = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}}}' + payload = gzip.compress(payload.encode('utf-8')) + # Add junk - which isn't a second valid gzip - at the end of the payload + payload += b'junk' + req = get_signed_request(dynamodb, 'PutItem', payload) + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 500 + assert 'Item' not in test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True) + +# Same test as test_gzip_request_valid() but remove one character from the +# end of compressed payload, so it is missing the proper ending marker. +# Decompression should fail and generate an error. +def test_gzip_request_with_missing_character(dynamodb, test_table): + p = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}}}' + payload = gzip.compress(payload.encode('utf-8')) + # Remove the one last character from the compressed payload + payload = payload[:-1] + req = get_signed_request(dynamodb, 'PutItem', payload) + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 500 + assert 'Item' not in test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True) + +# We put a limit (request_content_length_limit = 16 MB) on the size of the +# request. If the request is compressed, even if the compressed request +# is tiny we should still limit the size of the uncompressed request. +# Let's try a 20 MB that compresses extremely well to a tiny string, but +# should still be rejected as an oversized request. +# +# This test is marked scylla_only because DynamoDB does NOT limit the +# uncompressed size of a compressed request: +# DynamoDB does (as we check in test_manual_requests.py) limit non-compressed +# request sizes, but these limits are not enforced for compressed requests - +# I checked that even a 1 GB (!) pre-compression-size request is accepted +# (but a 10 GB pre-compression-size request is silently rejected with 500 +# error). It is debateable whether this should be considered a DynamoDB bug - +# or a "feature". I would claim it's a bug - there is no reason why a huge +# well-compressible request should be allowed while not allowed when not +# compressed. A counter-argument is that the biggest reason why a large +# request is not allowed is the signature algorithm - we need to read +# the entire request to verify its validity and only then we can start acting +# on it. But with a compressed request we validate the small compressed +# version's validity, so we can then - at least in theory - read the +# uncompressed request in a streaming fashion, without ever reading the +# entire request into memory. +def test_gzip_request_oversized(dynamodb, test_table, scylla_only): + # Take a legal PutItem payload and add a lot of spaces to make it very + # long, but it's highly compressible so the compressed payload will be + # very small. The server should still reject the oversized uncompressed + # content. + long_len = 20*1024*1024 + p = random_string() + payload = '{"TableName": "' + test_table.name + '", "Item": {"p": {"S": "' + p + '"}, "c": {"S": "x"}}}' + payload = payload[:-1] + ' '*long_len + payload[-1] + payload = gzip.compress(payload.encode('utf-8')) + # the compressed payload is very small, it won't cause the 413 error + assert len(payload) < 16*1024*1024 + req = get_signed_request(dynamodb, 'PutItem', payload) + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 413 + assert 'Item' not in test_table.get_item(Key={'p': p, 'c': 'x'}, ConsistentRead=True) + +# An empty string is NOT a valid gzip, so if we try to pass it off as a +# a gzip'ed request, the result should be a 500 error like all other +# invalid gzip content. +def test_gzip_request_empty(dynamodb): + # pass the empty string '' as a (incorrect) compressed payload + req = get_signed_request(dynamodb, 'PutItem', '') + headers = dict(req.headers) + headers.update({'Content-Encoding': 'gzip'}) + r = requests.post(req.url, headers=headers, data=req.body, verify=False) + assert r.status_code == 500 diff --git a/test/alternator/test_manual_requests.py b/test/alternator/test_manual_requests.py index 1e6b4fd081..cb25f93e56 100644 --- a/test/alternator/test_manual_requests.py +++ b/test/alternator/test_manual_requests.py @@ -7,7 +7,6 @@ import base64 import json - import pytest import requests import urllib3