Files
scylladb/test/alternator/test_limits.py
Andrei Chekun 93b9b85c12 [test.py] Refactor alternator, nodetool, rest_api
Make alternator, nodetool and rest_api test directories as python packages.
Move scylla-gdb to scylla_gdb and make it python package.
2024-06-13 13:56:10 +02:00

762 lines
42 KiB
Python

# Copyright 2021-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
#############################################################################
# Tests for various limits, which did not fit naturally into other test files
#############################################################################
import pytest
from botocore.exceptions import ClientError
from test.alternator.util import random_string, new_test_table, full_query
from test.alternator.test_gsi import assert_index_query
#############################################################################
# The following tests check the limits on attribute name lengths.
# According to the DynamoDB documentation, attribute names are usually
# limited to 64K bytes, and the only exceptions are:
# 1. Secondary index partition/sort key names are limited to 255 characters.
# 2. In LSI, attributes listed for projection.
# We'll test all these cases below in several separate tests.
# We found a additional exceptions - the base-table key names are also limited
# to 255 bytes, and the expiration-time column given to UpdateTimeToLive is
# also limited to 255 character. We test the last fact in a different test
# file: test_ttl.py::test_update_ttl_errors.
# Attribute length test 1: non-key attribute names below 64KB are usable in
# PutItem, UpdateItem, GetItem, and also in various expressions (condition,
# update and projection) and their archaic pre-expression alternatives.
def test_limit_attribute_length_nonkey_good(test_table_s):
p = random_string()
too_long_name = random_string(64)*1024
long_name = too_long_name[:-1]
# Try legal long_name:
test_table_s.put_item(Item={'p': p, long_name: 1, 'another': 2 })
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 1, 'another': 2 }
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True,
ProjectionExpression='#name', ExpressionAttributeNames={'#name': long_name})['Item'] == {long_name: 1}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True,
AttributesToGet=[long_name])['Item'] == {long_name: 1}
test_table_s.update_item(Key={'p': p}, AttributeUpdates={long_name: {'Value': 2, 'Action': 'PUT'}})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 2, 'another': 2 }
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = :val',
ExpressionAttributeNames={'#name': long_name},
ExpressionAttributeValues={':val': 3})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 3, 'another': 2 }
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = #name+:val',
ExpressionAttributeNames={'#name': long_name},
ExpressionAttributeValues={':val': 1})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 4, 'another': 2 }
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = #name+:val',
ConditionExpression='#name = :oldval',
ExpressionAttributeNames={'#name': long_name},
ExpressionAttributeValues={':val': 1, ':oldval': 4})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 5, 'another': 2 }
# Attribute length test 2: attribute names 64KB or above generate an error
# in the aforementioned cases. Note that contrary to what the DynamoDB
# documentation suggests, the length 64KB itself is not allowed - 65535
# (which we tested above) is the last accepted size.
# Reproduces issue #9169.
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
def test_limit_attribute_length_nonkey_bad(test_table_s):
p = random_string()
too_long_name = random_string(64)*1024
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
test_table_s.put_item(Item={'p': p, too_long_name: 1})
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
test_table_s.get_item(Key={'p': p}, ProjectionExpression='#name',
ExpressionAttributeNames={'#name': too_long_name})
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
test_table_s.get_item(Key={'p': p}, AttributesToGet=[too_long_name])
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
test_table_s.update_item(Key={'p': p}, AttributeUpdates={too_long_name: {'Value': 2, 'Action': 'PUT'}})
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = :val',
ExpressionAttributeNames={'#name': too_long_name},
ExpressionAttributeValues={':val': 3})
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :val',
ConditionExpression='#name = :val',
ExpressionAttributeNames={'#name': too_long_name},
ExpressionAttributeValues={':val': 1})
# Attribute length test 3: Test that *key* (hash and range) attribute names
# up to 255 characters are allowed. In the test below we'll see that larger
# sizes aren't allowed.
def test_limit_attribute_length_key_good(dynamodb):
long_name1 = random_string(255)
long_name2 = random_string(255)
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': long_name1, 'KeyType': 'HASH' },
{ 'AttributeName': long_name2, 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': long_name1, 'AttributeType': 'S' },
{ 'AttributeName': long_name2, 'AttributeType': 'S' }]) as table:
table.put_item(Item={long_name1: 'hi', long_name2: 'ho', 'another': 2 })
assert table.get_item(Key={long_name1: 'hi', long_name2: 'ho'}, ConsistentRead=True)['Item'] == {long_name1: 'hi', long_name2: 'ho', 'another': 2 }
# Attribute length test 4: Test that *key* attribute names more than 255
# characters are not allowed - not for hash key and not for range key.
# Strangely, this limitation is not explicitly mentioned in the DynamoDB
# documentation - which only mentions that SI keys are limited to 255 bytes,
# but forgets to mention base-table keys.
# Reproduces issue #9169.
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
def test_limit_attribute_length_key_bad(dynamodb):
too_long_name = random_string(256)
with pytest.raises(ClientError, match='ValidationException.*length'):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': too_long_name, 'KeyType': 'HASH' } ],
AttributeDefinitions=[ { 'AttributeName': too_long_name, 'AttributeType': 'S' } ]) as table:
pass
with pytest.raises(ClientError, match='ValidationException.*length'):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'x', 'KeyType': 'HASH',
'AttributeName': too_long_name, 'KeyType': 'RANGE' }, ],
AttributeDefinitions=[ { 'AttributeName': too_long_name, 'AttributeType': 'S' },
{ 'AttributeName': 'x', 'AttributeType': 'S' } ]) as table:
pass
# Attribute length tests 5,6: similar as the above tests for the 255-byte
# limit for base table length, here we check that the same limit also applies
# to key columns in GSI and LSI.
def test_limit_attribute_length_gsi_lsi_good(dynamodb):
long_name1 = random_string(255)
long_name2 = random_string(255)
long_name3 = random_string(255)
long_name4 = random_string(255)
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': long_name1, 'KeyType': 'HASH' },
{ 'AttributeName': long_name2, 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': long_name1, 'AttributeType': 'S' },
{ 'AttributeName': long_name2, 'AttributeType': 'S' },
{ 'AttributeName': long_name3, 'AttributeType': 'S' },
{ 'AttributeName': long_name4, 'AttributeType': 'S' }],
GlobalSecondaryIndexes=[
{ 'IndexName': 'gsi', 'KeySchema': [
{ 'AttributeName': long_name3, 'KeyType': 'HASH' },
{ 'AttributeName': long_name4, 'KeyType': 'RANGE' },
], 'Projection': { 'ProjectionType': 'ALL' }
}
],
LocalSecondaryIndexes=[
{ 'IndexName': 'lsi', 'KeySchema': [
{ 'AttributeName': long_name1, 'KeyType': 'HASH' },
{ 'AttributeName': long_name4, 'KeyType': 'RANGE' },
], 'Projection': { 'ProjectionType': 'ALL' }
}
]) as table:
table.put_item(Item={long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat' })
assert table.get_item(Key={long_name1: 'hi', long_name2: 'ho'}, ConsistentRead=True)['Item'] == {long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat' }
# Verify the content through the indexes. LSI can use ConsistentRead
# but GSI might need to retry to find the content:
assert full_query(table, IndexName='lsi', ConsistentRead=True,
KeyConditions={
long_name1: {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'},
long_name4: {'AttributeValueList': ['cat'], 'ComparisonOperator': 'EQ'},
}) == [{long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat'}]
assert_index_query(table, 'gsi',
[{long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat'}],
KeyConditions={
long_name3: {'AttributeValueList': ['dog'], 'ComparisonOperator': 'EQ'},
long_name4: {'AttributeValueList': ['cat'], 'ComparisonOperator': 'EQ'},
})
# Reproduces issue #9169.
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
def test_limit_attribute_length_gsi_lsi_bad(dynamodb):
too_long_name = random_string(256)
with pytest.raises(ClientError, match='ValidationException.*length'):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' },
{ 'AttributeName': too_long_name, 'AttributeType': 'S' } ],
GlobalSecondaryIndexes=[
{ 'IndexName': 'gsi', 'KeySchema': [
{ 'AttributeName': too_long_name, 'KeyType': 'HASH' },
], 'Projection': { 'ProjectionType': 'ALL' }
}
]) as table:
pass
with pytest.raises(ClientError, match='ValidationException.*length'):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' },
{ 'AttributeName': too_long_name, 'AttributeType': 'S' } ],
LocalSecondaryIndexes=[
{ 'IndexName': 'lsi', 'KeySchema': [
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': too_long_name, 'KeyType': 'RANGE' },
], 'Projection': { 'ProjectionType': 'ALL' }
}
]) as table:
pass
# Attribute length tests 7,8: In an LSI, projected attribute names are also
# limited to 255 bytes. This is explicitly mentioned in the DynamoDB
# documentation. For GSI this is also true (but not explicitly mentioned).
# This limitation is only true to attributes *explicitly* projected by name -
# attributes projected as part as ALL can be bigger (up to the usual 64KB
# limit).
# Reproduces issue #9169.
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
def test_limit_attribute_length_gsi_lsi_projection_bad(dynamodb):
too_long_name = random_string(256)
with pytest.raises(ClientError, match='ValidationException.*length'):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' } ],
GlobalSecondaryIndexes=[
{ 'IndexName': 'gsi', 'KeySchema': [
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
], 'Projection': { 'ProjectionType': 'INCLUDE',
'NonKeyAttributes': [too_long_name]}
}
]) as table:
pass
with pytest.raises(ClientError, match='ValidationException.*length'):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' } ],
LocalSecondaryIndexes=[
{ 'IndexName': 'lsi', 'KeySchema': [
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' },
], 'Projection': { 'ProjectionType': 'INCLUDE',
'NonKeyAttributes': [too_long_name]}
}
]) as table:
pass
# Above we tested asking to project a specific column which has very long
# name, and failed the table creation. Here we show that a GSI/LSI which
# projects ALL, and has some attribute names with >255 but lower than the
# normal attribute name limit of 64KB, gets projected fine.
def test_limit_attribute_length_gsi_lsi_projection_all(dynamodb):
too_long_name = random_string(256)
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' }
],
AttributeDefinitions=[
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' }
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'gsi', 'KeySchema': [
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
], 'Projection': { 'ProjectionType': 'ALL' }
},
],
LocalSecondaryIndexes=[
{ 'IndexName': 'lsi', 'KeySchema': [
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' },
], 'Projection': { 'ProjectionType': 'ALL' }
}
]) as table:
# As we tested above, there is no problem adding a non-key attribute
# which has a >255 byte name. This is true even if this attribute is
# implicitly copied to the GSI or LSI by the ProjectionType=ALL.
table.put_item(Item={'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat' })
assert table.get_item(Key={'a': 'hi', 'b': 'ho'}, ConsistentRead=True)['Item'] == {'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat' }
# GSI cannot use ConsistentRead so we may need to retry the read, so
# we reuse a function that does this
assert_index_query(table, 'gsi',
[{'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat'}],
KeyConditions={'c': {'AttributeValueList': ['dog'],
'ComparisonOperator': 'EQ'}})
# LSI can use ConsistentRead:
assert full_query(table, IndexName='lsi', ConsistentRead=True,
KeyConditions={
'a': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'},
'c': {'AttributeValueList': ['dog'], 'ComparisonOperator': 'EQ'},
}) == [{'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat'}]
#############################################################################
# The following tests test various limits of expressions
# (ProjectionExpression, ConditionExpression, UpdateExpression and
# FilterExpression) as documented in
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
# The maximum string length of any of the expression parameters is 4 KB.
# Check that the length 4096 is allowed, 4097 isn't - on all four expression
# types.
def test_limit_expression_len(test_table_s):
p = random_string()
string4096 = 'x'*4096
string4097 = 'x'*4097
# ProjectionExpression:
test_table_s.get_item(Key={'p': p}, ProjectionExpression=string4096)
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression'):
test_table_s.get_item(Key={'p': p}, ProjectionExpression=string4097)
# UpdateExpression:
spaces4085 = ' '*4085
test_table_s.update_item(Key={'p': p}, UpdateExpression=f'SET{spaces4085}a = :val',
ExpressionAttributeValues={':val': 1})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 1}
with pytest.raises(ClientError, match='ValidationException.*UpdateExpression'):
test_table_s.update_item(Key={'p': p}, UpdateExpression=f'SET {spaces4085}a = :val',
ExpressionAttributeValues={':val': 1})
# ConditionExpression:
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :newval',
ExpressionAttributeValues={':newval': 2, ':oldval': 1},
ConditionExpression=f'a{spaces4085} = :oldval')
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 2}
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :newval',
ExpressionAttributeValues={':newval': 3, ':oldval': 2},
ConditionExpression=f'a {spaces4085} = :oldval')
# FilterExpression:
assert full_query(test_table_s, ConsistentRead=True,
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
FilterExpression=f'a{spaces4085} = :theval',
ExpressionAttributeValues={':theval': 2}
) == [{'p': p, 'a': 2}]
with pytest.raises(ClientError, match='ValidationException.*FilterExpression'):
full_query(test_table_s, ConsistentRead=True,
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
FilterExpression=f'a {spaces4085} = :theval',
ExpressionAttributeValues={':theval': 2})
# The previous test (test_limit_expression_len) makes the 4096-byte length
# limit of expressions appear very benign - so what if we accept a 10,000-byte
# expression? Issue #14473 shows one potential harm of long expressions:
# A long expression can also be deeply nested, and recursive algorithms for
# parsing or handling these expressions can cause Scylla to crash. The
# following tests test_limit_expression_len_crash*() used to crash Scylla
# before the expression length limit was enforced (issue #14473).
# These tests use ConditionExpression to demonstrate the problem.
def test_limit_expression_len_crash1(test_table_s):
# a<b and (a<b and (a<b and (a<b and (a<b and (...))))):
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 20000
condition = "a<b " + "and (a<b "*depth +")"*depth
# For this expression longer than 4096 bytes, DynamoDB produces the
# error "Invalid ConditionExpression: Expression size has exceeded the
# maximum allowed size; expression size: 200004". Scylla used to crash
# here (after very deep recursion) instead of a clean error.
with pytest.raises(ClientError, match='ValidationException.*expression size'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
def test_limit_expression_len_crash2(test_table_s):
# (((((((((((((((a<b)))))))))))))))
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 20000
condition = "("*depth + "a<b" + ")"*depth
with pytest.raises(ClientError, match='ValidationException.*expression size'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
def test_limit_expression_len_crash3(test_table_s):
# ((((((((((((((((((((((((((( - a syntax error
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
condition = "("*15000
# Although this expression is a syntax error, the fact it is too long
# should be recognized first.
with pytest.raises(ClientError, match='ValidationException.*expression size'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
def test_limit_expression_len_crash4(test_table_s):
# not not not not not ... not a<b
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 20000
condition = "not "*depth + "a<b"
with pytest.raises(ClientError, match='ValidationException.*expression size'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
def test_limit_expression_len_crash5(test_table_s):
# a < f(f(f(f(...(b)))))
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 20000
condition = "a < " + "f("*depth + "b" + ")"*depth
with pytest.raises(ClientError, match='ValidationException.*expression size'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
# The above tests test_limit_expression_len_crash* checked various cases
# where very long (>4096 bytes) and very deeply nested expressions caused
# Scylla to crash. We now need to check check that expressions in the
# allowed length (4096 bytes), even if deeply nested, work fine.
def test_deeply_nested_expression_1(test_table_s):
# ((((((((((((((((((((((((((( - a syntax error
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
condition = "(" * 4096
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
# Continuing the above test, check that Alternator prints a normal "syntax
# error" for just "(((" but a "expression nested too deeply" for 4096
# parentheses. This is a Scylla-only test - DynamoDB doesn't make
# this distinction, and the specific error message is not important.
# But I wanted to test that Alternator's error messages are as designed.
def test_deeply_nested_expression_1a(test_table_s, scylla_only):
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression.*syntax error'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :x', ExpressionAttributeValues={':x': 1},
ConditionExpression='(((')
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression.*expression nested too deeply'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :x', ExpressionAttributeValues={':x': 1},
ConditionExpression='('*4000)
# Even if an expression is shorter than 4096 bytes, DynamoDB can reject
# if it has more than 300 "operators" - in the expression below we fit
# 909 operators ("<" and "or") under 4096 bytes. DynamoDB rejects this
# case with the message "Invalid ConditionExpression: The expression
# contains too many operators; operator count: 301". Scylla currently
# doesn't have this specific limit, but it rejects this expression because
# it exceeds nesting depth MAX_DEPTH. The important thing is that the
# expression is rejected cleanly, without crashing as it used to happen
# on longer expressions.
def test_deeply_nested_expression_2(test_table_s):
# a<b or (a<b or (a<b or (a<b or (...)))):
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
# depth=149 has a total of 299 "<" and "or" operators so should work.
# Importantly, parentheses and spaces are *not* counted among the
# "operators", only the "<" and "or".
depth = 149
condition = "a<b " + "or (a<b "*depth +")"*depth
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
# depth=454 is still below 4096 bytes, but rejected by DynamoDB because
# it has more than 300 operators, and by Scylla because 454 > MAX_DEPTH.
depth = 454
condition = "a<b " + "or (a<b "*depth +")"*depth
assert len(condition) < 4096
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 1})
# Another deeply-recursive expression, (((((((((a<b))))))))), that used to
# crash Scylla when the expression was very long but shouldn't crash it
# for expressions shorter than the 4096-byte limit.
# Currently, DynamoDB and Scylla reject this case with different reasons -
# DynamoDB complains that "Invalid ConditionExpression: The expression has
# redundant parentheses", and Scylla stops parsing after recursing too
# deeply (MAX_DEPTH) and reports "Failed parsing ConditionExpression".
# The really important thing is Scylla doesn't crash on this expression (as
# it used to before implementing MAX_DEPTH).
def test_deeply_nested_expression_3(test_table_s):
# (((((((((((((((a<b)))))))))))))))
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 2046
condition = "("*depth + "a<b" + ")"*depth
assert len(condition) < 4096
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 2})
# Another example of a deeply-nested expression which is shorter than
# the 4096-byte limit, but has more than 300 operators (it has 1000 "NOT"
# operators), so DynamoDB rejects it and Scylla rejects it because the
# recursion is deeper than MAX_DEPTH. Of course the more interesting
# observation is that it doesn't crash Scylla during parsing.
def test_deeply_nested_expression_4(test_table_s):
# not not not not ... not not a<b
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 1000 # even, so condition is equivalent to just a<b
condition = "not "*depth + "a<b"
assert len(condition) < 4096
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 2})
# Another example of a deeply-nested expression shorter than the limit of
# 4096 bytes, a < f(f(f(f(...(b))))). DynamoDB apparently doesn't count
# function calls as "operations", so it isn't limited to 300 nested calls.
# But it should print the correct error message, and obviously not crash.
def test_deeply_nested_expression_5(test_table_s):
# a < f(f(f(f(...(b)))))
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
'b': {'Value': 2, 'Action': 'PUT'}})
depth = 1355
condition = "a < " + "f("*depth + "b" + ")"*depth
assert len(condition) < 4096
# DynamoDB prints: "Invalid ConditionExpression: Invalid function name;
# function: f". Scylla fails parsing this expression because the depth
# exceeds MAX_DEPTH. We think this is fine.
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 2})
# a < size(size(size(size(...size(b)))))
# Here the function exists, but the innermost size returns an integer,
# which the surrounding size() doesn't like (it's not defined on an int)
# In Scylla it is still rejected because of MAX_DEPTH.
depth = 680
condition = "a < " + "size("*depth + "b" + ")"*depth
assert len(condition) < 4096
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET z = :val',
ConditionExpression=condition,
ExpressionAttributeValues={':val': 2})
# TODO: additional expression limits documented in DynamoDB's documentation
# that we should test here:
# * a limit on the length of attribute or value references (#name or :val) -
# the reference together with the first character (# or :) is limited to
# 255 bytes.
# * the sum of length of ExpressionAttributeValues and ExpressionAttributeNames
# is limited to 2MB (not a very interesting limit...)
#############################################################################
# DynamoDB documentation says that the sort key must be between 1 and 1024
# bytes in length. We already test (test_item.py::test_update_item_empty_key)
# that 0 bytes are not allowed, so here we want to verify that 1024 is
# indeed the limit - i.e., 1024 is allowed, 1025 is isn't. This is true for
# both strings and bytes (and for bytes, it is the actual bytes - not their
# base64 encoding - that is counted).
# We may decide that this test never needs to pass on Alternator, because
# we may adopt a different limit. In this case we'll need to document this
# decision. In any case, Alternator must have some key-length limits (the
# internal implementation limits key length to 64 KB), so the test after this
# one should pass.
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
def test_limit_sort_key_len_1024(test_table_ss, test_table_sb):
p = random_string()
# String sort key with length 1024 is fine:
key = {'p': p, 'c': 'x'*1024}
test_table_ss.put_item(Item=key)
assert test_table_ss.get_item(Key=key, ConsistentRead=True)['Item'] == key
# But sort key with length 1025 is forbidden - in both read and write.
# DynamoDB's message says "Aggregated size of all range keys has exceeded
# the size limit of 1024 bytes". It's not clear what "all range keys"
# actually refers to, as there can be only one. We investigate this
# further below in test_limit_sort_key_len_lsi().
key = {'p': p, 'c': 'x'*1025}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_ss.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_ss.get_item(Key=key, ConsistentRead=True)
# The same limits are true for the bytes type. The length of a bytes
# array is its real length - not the length of its base64 encoding.
key = {'p': p, 'c': bytearray([123]*1024)}
test_table_sb.put_item(Item=key)
assert test_table_sb.get_item(Key=key, ConsistentRead=True)['Item'] == key
key = {'p': p, 'c': bytearray([123]*1025)}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_sb.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_sb.get_item(Key=key, ConsistentRead=True)
# This is a variant of the above test, where we don't insist that the
# sort key length limit must be exactly 1024 bytes as in DynamoDB, but
# that it be *at least* 1024. I.e., we verify that 1024-byte sort keys
# are allowed, while very long keys that surpass Scylla's low-level
# key-length limit (64 KB) are forbidden with an appropriate error message
# and not an "internal server error". This test should pass even if
# Alternator decides to adopt a different sort-key-length limit from
# DynamoDB. We do have to adopt *some* limit because the internal Scylla
# implementation has a 64 KB limit on key lengths.
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
def test_limit_sort_key_len(test_table_ss, test_table_sb):
p = random_string()
# String sort key with length 1024 is fine:
key = {'p': p, 'c': 'x'*1024}
test_table_ss.put_item(Item=key)
assert test_table_ss.get_item(Key=key, ConsistentRead=True)['Item'] == key
# Sort key of length 64 KB + 1 is forbidden - it obviously exceeds
# DynamoDB's limit (1024 bytes), but also exceeds Scylla's internal
# limit on key length (64 KB). We except to get a reasonable error
# on request validation - not some "internal server error".
key = {'p': p, 'c': 'x'*65537}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_ss.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_ss.get_item(Key=key, ConsistentRead=True)
# The same limits are true for the bytes type. The length of a bytes
# array is its real length - not the length of its base64 encoding.
key = {'p': p, 'c': bytearray([123]*1024)}
test_table_sb.put_item(Item=key)
assert test_table_sb.get_item(Key=key, ConsistentRead=True)['Item'] == key
key = {'p': p, 'c': bytearray([123]*65537)}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_sb.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_sb.get_item(Key=key, ConsistentRead=True)
# As mentioned above, DynamoDB's error about sort key length exceeding the
# 1024 byte limit says that "Aggregated size of all range keys has exceeded
# the size limit of 1024 bytes". This is an odd message, considering that
# there can only be one range key... So there is a question whether when we
# have an LSI and several of the item's attributes become range keys (of
# different tables), perhaps their *total* length is limited. It turns out
# the answer is no. We can write an item with two 1024-byte attributes, where
# one if the base table's sort key and the other is an LSI's sort key.
# DyanamoDB's error message appears to be nothing more than a mistake.
def test_limit_sort_key_len_lsi(dynamodb):
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' }
],
AttributeDefinitions=[
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' }
],
LocalSecondaryIndexes=[
{ 'IndexName': 'lsi', 'KeySchema': [
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' },
], 'Projection': { 'ProjectionType': 'ALL' }
}
]) as table:
item = {'a': 'hello', 'b': 'x'*1024, 'c': 'y'*1024 }
table.put_item(Item=item)
assert table.get_item(Key={'a': 'hello', 'b': 'x'*1024}, ConsistentRead=True)['Item'] == item
assert table.query(IndexName='lsi', KeyConditions={'a': {'AttributeValueList': ['hello'], 'ComparisonOperator': 'EQ'}, 'c': {'AttributeValueList': ['y'*1024], 'ComparisonOperator': 'EQ'}}, ConsistentRead=True)['Items'] == [item]
# DynamoDB documentation says that the partition key must be between 1 and 2048
# bytes in length. We already test (test_item.py::test_update_item_empty_key)
# that 0 bytes are not allowed, so here we want to verify that 2048 is
# indeed the limit - i.e., 2048 is allowed, 2049 is isn't. This is true for
# both strings and bytes (and for bytes, it is the actual bytes - not their
# base64 encoding - that is counted).
# We may decide that this test never needs to pass on Alternator, because
# we may adopt a different limit. In this case we'll need to document this
# decision. In any case, Alternator must have some key-length limits (the
# internal implementation limits key length to 64 KB), so even if this test
# won't pass, the one after it should pass.
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
def test_limit_partition_key_len_2048(test_table_s, test_table_b):
# String partition key with length 2048 is fine:
item = {'p': 'x'*2048, 'z': 'hello'}
test_table_s.put_item(Item=item)
assert test_table_s.get_item(Key={'p': 'x'*2048}, ConsistentRead=True)['Item'] == item
# But partition key with length 2049 is forbidden - in both read and write.
key = {'p': 'x'*2049}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_s.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_s.get_item(Key=key, ConsistentRead=True)
# The same limits are true for the bytes type. The length of a bytes
# array is its real length - not the length of its base64 encoding.
item = {'p': bytearray([123]*2048), 'z': 'hello'}
test_table_b.put_item(Item=item)
assert test_table_b.get_item(Key={'p': bytearray([123]*2048)}, ConsistentRead=True)['Item'] == item
key = {'p': bytearray([123]*2049)}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_b.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_b.get_item(Key=key, ConsistentRead=True)
# This is a variant of the above test, where we don't insist that the
# partition key length limit must be exactly 2048 bytes as in DynamoDB, but
# that it be *at least* 2048. I.e., we verify that 2048-byte sort keys
# are allowed, while very long keys that surpass Scylla's low-level
# key-length limit (64 KB) are forbidden with an appropriate error message
# and not an "internal server error". This test should pass even if
# Alternator decides to adopt a different sort-key-length limit from
# DynamoDB. We do have to adopt *some* limit because the internal Scylla
# implementation has a 64 KB limit on key lengths.
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
def test_limit_partition_key_len(test_table_s, test_table_b):
# String partition key with length 2048 is fine:
item = {'p': 'x'*2048, 'z': 'hello'}
test_table_s.put_item(Item=item)
assert test_table_s.get_item(Key={'p': 'x'*2048}, ConsistentRead=True)['Item'] == item
# Partition key of length 64 KB + 1 is forbidden - it obviously exceeds
# DynamoDB's limit (2048 bytes), but also exceeds Scylla's internal
# limit on key length (64 KB). We except to get a reasonable error
# on request validation - not some "internal server error".
key = {'p': 'x'*65537}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_s.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_s.get_item(Key=key, ConsistentRead=True)
# The same limits are true for the bytes type. The length of a bytes
# array is its real length - not the length of its base64 encoding.
item = {'p': bytearray([123]*2048), 'z': 'hello'}
test_table_b.put_item(Item=item)
assert test_table_b.get_item(Key={'p': bytearray([123]*2048)}, ConsistentRead=True)['Item'] == item
key = {'p': bytearray([123]*65537)}
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_b.put_item(Item=key)
with pytest.raises(ClientError, match='ValidationException.*limit'):
test_table_b.get_item(Key=key, ConsistentRead=True)