Make alternator, nodetool and rest_api test directories as python packages. Move scylla-gdb to scylla_gdb and make it python package.
762 lines
42 KiB
Python
762 lines
42 KiB
Python
# Copyright 2021-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
#############################################################################
|
|
# Tests for various limits, which did not fit naturally into other test files
|
|
#############################################################################
|
|
|
|
import pytest
|
|
from botocore.exceptions import ClientError
|
|
|
|
from test.alternator.util import random_string, new_test_table, full_query
|
|
from test.alternator.test_gsi import assert_index_query
|
|
|
|
|
|
#############################################################################
|
|
# The following tests check the limits on attribute name lengths.
|
|
# According to the DynamoDB documentation, attribute names are usually
|
|
# limited to 64K bytes, and the only exceptions are:
|
|
# 1. Secondary index partition/sort key names are limited to 255 characters.
|
|
# 2. In LSI, attributes listed for projection.
|
|
# We'll test all these cases below in several separate tests.
|
|
# We found a additional exceptions - the base-table key names are also limited
|
|
# to 255 bytes, and the expiration-time column given to UpdateTimeToLive is
|
|
# also limited to 255 character. We test the last fact in a different test
|
|
# file: test_ttl.py::test_update_ttl_errors.
|
|
|
|
# Attribute length test 1: non-key attribute names below 64KB are usable in
|
|
# PutItem, UpdateItem, GetItem, and also in various expressions (condition,
|
|
# update and projection) and their archaic pre-expression alternatives.
|
|
def test_limit_attribute_length_nonkey_good(test_table_s):
|
|
p = random_string()
|
|
too_long_name = random_string(64)*1024
|
|
long_name = too_long_name[:-1]
|
|
# Try legal long_name:
|
|
test_table_s.put_item(Item={'p': p, long_name: 1, 'another': 2 })
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 1, 'another': 2 }
|
|
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True,
|
|
ProjectionExpression='#name', ExpressionAttributeNames={'#name': long_name})['Item'] == {long_name: 1}
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True,
|
|
AttributesToGet=[long_name])['Item'] == {long_name: 1}
|
|
|
|
test_table_s.update_item(Key={'p': p}, AttributeUpdates={long_name: {'Value': 2, 'Action': 'PUT'}})
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 2, 'another': 2 }
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = :val',
|
|
ExpressionAttributeNames={'#name': long_name},
|
|
ExpressionAttributeValues={':val': 3})
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 3, 'another': 2 }
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = #name+:val',
|
|
ExpressionAttributeNames={'#name': long_name},
|
|
ExpressionAttributeValues={':val': 1})
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 4, 'another': 2 }
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = #name+:val',
|
|
ConditionExpression='#name = :oldval',
|
|
ExpressionAttributeNames={'#name': long_name},
|
|
ExpressionAttributeValues={':val': 1, ':oldval': 4})
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, long_name: 5, 'another': 2 }
|
|
|
|
# Attribute length test 2: attribute names 64KB or above generate an error
|
|
# in the aforementioned cases. Note that contrary to what the DynamoDB
|
|
# documentation suggests, the length 64KB itself is not allowed - 65535
|
|
# (which we tested above) is the last accepted size.
|
|
# Reproduces issue #9169.
|
|
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
|
|
def test_limit_attribute_length_nonkey_bad(test_table_s):
|
|
p = random_string()
|
|
too_long_name = random_string(64)*1024
|
|
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
|
|
test_table_s.put_item(Item={'p': p, too_long_name: 1})
|
|
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
|
|
test_table_s.get_item(Key={'p': p}, ProjectionExpression='#name',
|
|
ExpressionAttributeNames={'#name': too_long_name})
|
|
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
|
|
test_table_s.get_item(Key={'p': p}, AttributesToGet=[too_long_name])
|
|
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
|
|
test_table_s.update_item(Key={'p': p}, AttributeUpdates={too_long_name: {'Value': 2, 'Action': 'PUT'}})
|
|
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET #name = :val',
|
|
ExpressionAttributeNames={'#name': too_long_name},
|
|
ExpressionAttributeValues={':val': 3})
|
|
with pytest.raises(ClientError, match='ValidationException.*Attribute name'):
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :val',
|
|
ConditionExpression='#name = :val',
|
|
ExpressionAttributeNames={'#name': too_long_name},
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
# Attribute length test 3: Test that *key* (hash and range) attribute names
|
|
# up to 255 characters are allowed. In the test below we'll see that larger
|
|
# sizes aren't allowed.
|
|
def test_limit_attribute_length_key_good(dynamodb):
|
|
long_name1 = random_string(255)
|
|
long_name2 = random_string(255)
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': long_name1, 'KeyType': 'HASH' },
|
|
{ 'AttributeName': long_name2, 'KeyType': 'RANGE' } ],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': long_name1, 'AttributeType': 'S' },
|
|
{ 'AttributeName': long_name2, 'AttributeType': 'S' }]) as table:
|
|
table.put_item(Item={long_name1: 'hi', long_name2: 'ho', 'another': 2 })
|
|
assert table.get_item(Key={long_name1: 'hi', long_name2: 'ho'}, ConsistentRead=True)['Item'] == {long_name1: 'hi', long_name2: 'ho', 'another': 2 }
|
|
|
|
# Attribute length test 4: Test that *key* attribute names more than 255
|
|
# characters are not allowed - not for hash key and not for range key.
|
|
# Strangely, this limitation is not explicitly mentioned in the DynamoDB
|
|
# documentation - which only mentions that SI keys are limited to 255 bytes,
|
|
# but forgets to mention base-table keys.
|
|
# Reproduces issue #9169.
|
|
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
|
|
def test_limit_attribute_length_key_bad(dynamodb):
|
|
too_long_name = random_string(256)
|
|
with pytest.raises(ClientError, match='ValidationException.*length'):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': too_long_name, 'KeyType': 'HASH' } ],
|
|
AttributeDefinitions=[ { 'AttributeName': too_long_name, 'AttributeType': 'S' } ]) as table:
|
|
pass
|
|
with pytest.raises(ClientError, match='ValidationException.*length'):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'x', 'KeyType': 'HASH',
|
|
'AttributeName': too_long_name, 'KeyType': 'RANGE' }, ],
|
|
AttributeDefinitions=[ { 'AttributeName': too_long_name, 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'x', 'AttributeType': 'S' } ]) as table:
|
|
pass
|
|
|
|
# Attribute length tests 5,6: similar as the above tests for the 255-byte
|
|
# limit for base table length, here we check that the same limit also applies
|
|
# to key columns in GSI and LSI.
|
|
def test_limit_attribute_length_gsi_lsi_good(dynamodb):
|
|
long_name1 = random_string(255)
|
|
long_name2 = random_string(255)
|
|
long_name3 = random_string(255)
|
|
long_name4 = random_string(255)
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': long_name1, 'KeyType': 'HASH' },
|
|
{ 'AttributeName': long_name2, 'KeyType': 'RANGE' } ],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': long_name1, 'AttributeType': 'S' },
|
|
{ 'AttributeName': long_name2, 'AttributeType': 'S' },
|
|
{ 'AttributeName': long_name3, 'AttributeType': 'S' },
|
|
{ 'AttributeName': long_name4, 'AttributeType': 'S' }],
|
|
GlobalSecondaryIndexes=[
|
|
{ 'IndexName': 'gsi', 'KeySchema': [
|
|
{ 'AttributeName': long_name3, 'KeyType': 'HASH' },
|
|
{ 'AttributeName': long_name4, 'KeyType': 'RANGE' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
}
|
|
],
|
|
LocalSecondaryIndexes=[
|
|
{ 'IndexName': 'lsi', 'KeySchema': [
|
|
{ 'AttributeName': long_name1, 'KeyType': 'HASH' },
|
|
{ 'AttributeName': long_name4, 'KeyType': 'RANGE' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
}
|
|
]) as table:
|
|
table.put_item(Item={long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat' })
|
|
assert table.get_item(Key={long_name1: 'hi', long_name2: 'ho'}, ConsistentRead=True)['Item'] == {long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat' }
|
|
# Verify the content through the indexes. LSI can use ConsistentRead
|
|
# but GSI might need to retry to find the content:
|
|
assert full_query(table, IndexName='lsi', ConsistentRead=True,
|
|
KeyConditions={
|
|
long_name1: {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'},
|
|
long_name4: {'AttributeValueList': ['cat'], 'ComparisonOperator': 'EQ'},
|
|
}) == [{long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat'}]
|
|
assert_index_query(table, 'gsi',
|
|
[{long_name1: 'hi', long_name2: 'ho', long_name3: 'dog', long_name4: 'cat'}],
|
|
KeyConditions={
|
|
long_name3: {'AttributeValueList': ['dog'], 'ComparisonOperator': 'EQ'},
|
|
long_name4: {'AttributeValueList': ['cat'], 'ComparisonOperator': 'EQ'},
|
|
})
|
|
|
|
# Reproduces issue #9169.
|
|
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
|
|
def test_limit_attribute_length_gsi_lsi_bad(dynamodb):
|
|
too_long_name = random_string(256)
|
|
with pytest.raises(ClientError, match='ValidationException.*length'):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': 'a', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'b', 'AttributeType': 'S' },
|
|
{ 'AttributeName': too_long_name, 'AttributeType': 'S' } ],
|
|
GlobalSecondaryIndexes=[
|
|
{ 'IndexName': 'gsi', 'KeySchema': [
|
|
{ 'AttributeName': too_long_name, 'KeyType': 'HASH' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
}
|
|
]) as table:
|
|
pass
|
|
with pytest.raises(ClientError, match='ValidationException.*length'):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': 'a', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'b', 'AttributeType': 'S' },
|
|
{ 'AttributeName': too_long_name, 'AttributeType': 'S' } ],
|
|
LocalSecondaryIndexes=[
|
|
{ 'IndexName': 'lsi', 'KeySchema': [
|
|
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': too_long_name, 'KeyType': 'RANGE' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
}
|
|
]) as table:
|
|
pass
|
|
|
|
# Attribute length tests 7,8: In an LSI, projected attribute names are also
|
|
# limited to 255 bytes. This is explicitly mentioned in the DynamoDB
|
|
# documentation. For GSI this is also true (but not explicitly mentioned).
|
|
# This limitation is only true to attributes *explicitly* projected by name -
|
|
# attributes projected as part as ALL can be bigger (up to the usual 64KB
|
|
# limit).
|
|
# Reproduces issue #9169.
|
|
@pytest.mark.xfail(reason="issue #9169: attribute name limits not enforced")
|
|
def test_limit_attribute_length_gsi_lsi_projection_bad(dynamodb):
|
|
too_long_name = random_string(256)
|
|
with pytest.raises(ClientError, match='ValidationException.*length'):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': 'a', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'b', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'c', 'AttributeType': 'S' } ],
|
|
GlobalSecondaryIndexes=[
|
|
{ 'IndexName': 'gsi', 'KeySchema': [
|
|
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
|
|
], 'Projection': { 'ProjectionType': 'INCLUDE',
|
|
'NonKeyAttributes': [too_long_name]}
|
|
}
|
|
]) as table:
|
|
pass
|
|
with pytest.raises(ClientError, match='ValidationException.*length'):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'b', 'KeyType': 'RANGE' } ],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': 'a', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'b', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'c', 'AttributeType': 'S' } ],
|
|
LocalSecondaryIndexes=[
|
|
{ 'IndexName': 'lsi', 'KeySchema': [
|
|
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'c', 'KeyType': 'RANGE' },
|
|
], 'Projection': { 'ProjectionType': 'INCLUDE',
|
|
'NonKeyAttributes': [too_long_name]}
|
|
}
|
|
]) as table:
|
|
pass
|
|
|
|
# Above we tested asking to project a specific column which has very long
|
|
# name, and failed the table creation. Here we show that a GSI/LSI which
|
|
# projects ALL, and has some attribute names with >255 but lower than the
|
|
# normal attribute name limit of 64KB, gets projected fine.
|
|
def test_limit_attribute_length_gsi_lsi_projection_all(dynamodb):
|
|
too_long_name = random_string(256)
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'b', 'KeyType': 'RANGE' }
|
|
],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': 'a', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'b', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'c', 'AttributeType': 'S' }
|
|
],
|
|
GlobalSecondaryIndexes=[
|
|
{ 'IndexName': 'gsi', 'KeySchema': [
|
|
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
},
|
|
],
|
|
LocalSecondaryIndexes=[
|
|
{ 'IndexName': 'lsi', 'KeySchema': [
|
|
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'c', 'KeyType': 'RANGE' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
}
|
|
]) as table:
|
|
# As we tested above, there is no problem adding a non-key attribute
|
|
# which has a >255 byte name. This is true even if this attribute is
|
|
# implicitly copied to the GSI or LSI by the ProjectionType=ALL.
|
|
table.put_item(Item={'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat' })
|
|
assert table.get_item(Key={'a': 'hi', 'b': 'ho'}, ConsistentRead=True)['Item'] == {'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat' }
|
|
# GSI cannot use ConsistentRead so we may need to retry the read, so
|
|
# we reuse a function that does this
|
|
assert_index_query(table, 'gsi',
|
|
[{'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat'}],
|
|
KeyConditions={'c': {'AttributeValueList': ['dog'],
|
|
'ComparisonOperator': 'EQ'}})
|
|
# LSI can use ConsistentRead:
|
|
assert full_query(table, IndexName='lsi', ConsistentRead=True,
|
|
KeyConditions={
|
|
'a': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'},
|
|
'c': {'AttributeValueList': ['dog'], 'ComparisonOperator': 'EQ'},
|
|
}) == [{'a': 'hi', 'b': 'ho', 'c': 'dog', too_long_name: 'cat'}]
|
|
|
|
#############################################################################
|
|
# The following tests test various limits of expressions
|
|
# (ProjectionExpression, ConditionExpression, UpdateExpression and
|
|
# FilterExpression) as documented in
|
|
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html
|
|
|
|
# The maximum string length of any of the expression parameters is 4 KB.
|
|
# Check that the length 4096 is allowed, 4097 isn't - on all four expression
|
|
# types.
|
|
def test_limit_expression_len(test_table_s):
|
|
p = random_string()
|
|
string4096 = 'x'*4096
|
|
string4097 = 'x'*4097
|
|
# ProjectionExpression:
|
|
test_table_s.get_item(Key={'p': p}, ProjectionExpression=string4096)
|
|
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression'):
|
|
test_table_s.get_item(Key={'p': p}, ProjectionExpression=string4097)
|
|
# UpdateExpression:
|
|
spaces4085 = ' '*4085
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression=f'SET{spaces4085}a = :val',
|
|
ExpressionAttributeValues={':val': 1})
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 1}
|
|
with pytest.raises(ClientError, match='ValidationException.*UpdateExpression'):
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression=f'SET {spaces4085}a = :val',
|
|
ExpressionAttributeValues={':val': 1})
|
|
# ConditionExpression:
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :newval',
|
|
ExpressionAttributeValues={':newval': 2, ':oldval': 1},
|
|
ConditionExpression=f'a{spaces4085} = :oldval')
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': 2}
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :newval',
|
|
ExpressionAttributeValues={':newval': 3, ':oldval': 2},
|
|
ConditionExpression=f'a {spaces4085} = :oldval')
|
|
# FilterExpression:
|
|
assert full_query(test_table_s, ConsistentRead=True,
|
|
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
FilterExpression=f'a{spaces4085} = :theval',
|
|
ExpressionAttributeValues={':theval': 2}
|
|
) == [{'p': p, 'a': 2}]
|
|
with pytest.raises(ClientError, match='ValidationException.*FilterExpression'):
|
|
full_query(test_table_s, ConsistentRead=True,
|
|
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
FilterExpression=f'a {spaces4085} = :theval',
|
|
ExpressionAttributeValues={':theval': 2})
|
|
|
|
# The previous test (test_limit_expression_len) makes the 4096-byte length
|
|
# limit of expressions appear very benign - so what if we accept a 10,000-byte
|
|
# expression? Issue #14473 shows one potential harm of long expressions:
|
|
# A long expression can also be deeply nested, and recursive algorithms for
|
|
# parsing or handling these expressions can cause Scylla to crash. The
|
|
# following tests test_limit_expression_len_crash*() used to crash Scylla
|
|
# before the expression length limit was enforced (issue #14473).
|
|
# These tests use ConditionExpression to demonstrate the problem.
|
|
def test_limit_expression_len_crash1(test_table_s):
|
|
# a<b and (a<b and (a<b and (a<b and (a<b and (...))))):
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 20000
|
|
condition = "a<b " + "and (a<b "*depth +")"*depth
|
|
# For this expression longer than 4096 bytes, DynamoDB produces the
|
|
# error "Invalid ConditionExpression: Expression size has exceeded the
|
|
# maximum allowed size; expression size: 200004". Scylla used to crash
|
|
# here (after very deep recursion) instead of a clean error.
|
|
with pytest.raises(ClientError, match='ValidationException.*expression size'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
def test_limit_expression_len_crash2(test_table_s):
|
|
# (((((((((((((((a<b)))))))))))))))
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 20000
|
|
condition = "("*depth + "a<b" + ")"*depth
|
|
with pytest.raises(ClientError, match='ValidationException.*expression size'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
def test_limit_expression_len_crash3(test_table_s):
|
|
# ((((((((((((((((((((((((((( - a syntax error
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
condition = "("*15000
|
|
# Although this expression is a syntax error, the fact it is too long
|
|
# should be recognized first.
|
|
with pytest.raises(ClientError, match='ValidationException.*expression size'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
def test_limit_expression_len_crash4(test_table_s):
|
|
# not not not not not ... not a<b
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 20000
|
|
condition = "not "*depth + "a<b"
|
|
with pytest.raises(ClientError, match='ValidationException.*expression size'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
def test_limit_expression_len_crash5(test_table_s):
|
|
# a < f(f(f(f(...(b)))))
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 20000
|
|
condition = "a < " + "f("*depth + "b" + ")"*depth
|
|
with pytest.raises(ClientError, match='ValidationException.*expression size'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
# The above tests test_limit_expression_len_crash* checked various cases
|
|
# where very long (>4096 bytes) and very deeply nested expressions caused
|
|
# Scylla to crash. We now need to check check that expressions in the
|
|
# allowed length (4096 bytes), even if deeply nested, work fine.
|
|
def test_deeply_nested_expression_1(test_table_s):
|
|
# ((((((((((((((((((((((((((( - a syntax error
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
condition = "(" * 4096
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
# Continuing the above test, check that Alternator prints a normal "syntax
|
|
# error" for just "(((" but a "expression nested too deeply" for 4096
|
|
# parentheses. This is a Scylla-only test - DynamoDB doesn't make
|
|
# this distinction, and the specific error message is not important.
|
|
# But I wanted to test that Alternator's error messages are as designed.
|
|
def test_deeply_nested_expression_1a(test_table_s, scylla_only):
|
|
p = random_string()
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression.*syntax error'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :x', ExpressionAttributeValues={':x': 1},
|
|
ConditionExpression='(((')
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression.*expression nested too deeply'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :x', ExpressionAttributeValues={':x': 1},
|
|
ConditionExpression='('*4000)
|
|
|
|
# Even if an expression is shorter than 4096 bytes, DynamoDB can reject
|
|
# if it has more than 300 "operators" - in the expression below we fit
|
|
# 909 operators ("<" and "or") under 4096 bytes. DynamoDB rejects this
|
|
# case with the message "Invalid ConditionExpression: The expression
|
|
# contains too many operators; operator count: 301". Scylla currently
|
|
# doesn't have this specific limit, but it rejects this expression because
|
|
# it exceeds nesting depth MAX_DEPTH. The important thing is that the
|
|
# expression is rejected cleanly, without crashing as it used to happen
|
|
# on longer expressions.
|
|
def test_deeply_nested_expression_2(test_table_s):
|
|
# a<b or (a<b or (a<b or (a<b or (...)))):
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
# depth=149 has a total of 299 "<" and "or" operators so should work.
|
|
# Importantly, parentheses and spaces are *not* counted among the
|
|
# "operators", only the "<" and "or".
|
|
depth = 149
|
|
condition = "a<b " + "or (a<b "*depth +")"*depth
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 1
|
|
# depth=454 is still below 4096 bytes, but rejected by DynamoDB because
|
|
# it has more than 300 operators, and by Scylla because 454 > MAX_DEPTH.
|
|
depth = 454
|
|
condition = "a<b " + "or (a<b "*depth +")"*depth
|
|
assert len(condition) < 4096
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 1})
|
|
|
|
# Another deeply-recursive expression, (((((((((a<b))))))))), that used to
|
|
# crash Scylla when the expression was very long but shouldn't crash it
|
|
# for expressions shorter than the 4096-byte limit.
|
|
# Currently, DynamoDB and Scylla reject this case with different reasons -
|
|
# DynamoDB complains that "Invalid ConditionExpression: The expression has
|
|
# redundant parentheses", and Scylla stops parsing after recursing too
|
|
# deeply (MAX_DEPTH) and reports "Failed parsing ConditionExpression".
|
|
# The really important thing is Scylla doesn't crash on this expression (as
|
|
# it used to before implementing MAX_DEPTH).
|
|
def test_deeply_nested_expression_3(test_table_s):
|
|
# (((((((((((((((a<b)))))))))))))))
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 2046
|
|
condition = "("*depth + "a<b" + ")"*depth
|
|
assert len(condition) < 4096
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 2})
|
|
|
|
# Another example of a deeply-nested expression which is shorter than
|
|
# the 4096-byte limit, but has more than 300 operators (it has 1000 "NOT"
|
|
# operators), so DynamoDB rejects it and Scylla rejects it because the
|
|
# recursion is deeper than MAX_DEPTH. Of course the more interesting
|
|
# observation is that it doesn't crash Scylla during parsing.
|
|
def test_deeply_nested_expression_4(test_table_s):
|
|
# not not not not ... not not a<b
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 1000 # even, so condition is equivalent to just a<b
|
|
condition = "not "*depth + "a<b"
|
|
assert len(condition) < 4096
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 2})
|
|
|
|
# Another example of a deeply-nested expression shorter than the limit of
|
|
# 4096 bytes, a < f(f(f(f(...(b))))). DynamoDB apparently doesn't count
|
|
# function calls as "operations", so it isn't limited to 300 nested calls.
|
|
# But it should print the correct error message, and obviously not crash.
|
|
def test_deeply_nested_expression_5(test_table_s):
|
|
# a < f(f(f(f(...(b)))))
|
|
p = random_string()
|
|
test_table_s.update_item(Key={'p': p},
|
|
AttributeUpdates={'a': {'Value': 1, 'Action': 'PUT'},
|
|
'b': {'Value': 2, 'Action': 'PUT'}})
|
|
depth = 1355
|
|
condition = "a < " + "f("*depth + "b" + ")"*depth
|
|
assert len(condition) < 4096
|
|
# DynamoDB prints: "Invalid ConditionExpression: Invalid function name;
|
|
# function: f". Scylla fails parsing this expression because the depth
|
|
# exceeds MAX_DEPTH. We think this is fine.
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 2})
|
|
|
|
# a < size(size(size(size(...size(b)))))
|
|
# Here the function exists, but the innermost size returns an integer,
|
|
# which the surrounding size() doesn't like (it's not defined on an int)
|
|
# In Scylla it is still rejected because of MAX_DEPTH.
|
|
depth = 680
|
|
condition = "a < " + "size("*depth + "b" + ")"*depth
|
|
assert len(condition) < 4096
|
|
with pytest.raises(ClientError, match='ValidationException.*ConditionExpression'):
|
|
test_table_s.update_item(Key={'p': p},
|
|
UpdateExpression='SET z = :val',
|
|
ConditionExpression=condition,
|
|
ExpressionAttributeValues={':val': 2})
|
|
|
|
# TODO: additional expression limits documented in DynamoDB's documentation
|
|
# that we should test here:
|
|
# * a limit on the length of attribute or value references (#name or :val) -
|
|
# the reference together with the first character (# or :) is limited to
|
|
# 255 bytes.
|
|
# * the sum of length of ExpressionAttributeValues and ExpressionAttributeNames
|
|
# is limited to 2MB (not a very interesting limit...)
|
|
|
|
#############################################################################
|
|
|
|
# DynamoDB documentation says that the sort key must be between 1 and 1024
|
|
# bytes in length. We already test (test_item.py::test_update_item_empty_key)
|
|
# that 0 bytes are not allowed, so here we want to verify that 1024 is
|
|
# indeed the limit - i.e., 1024 is allowed, 1025 is isn't. This is true for
|
|
# both strings and bytes (and for bytes, it is the actual bytes - not their
|
|
# base64 encoding - that is counted).
|
|
# We may decide that this test never needs to pass on Alternator, because
|
|
# we may adopt a different limit. In this case we'll need to document this
|
|
# decision. In any case, Alternator must have some key-length limits (the
|
|
# internal implementation limits key length to 64 KB), so the test after this
|
|
# one should pass.
|
|
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
|
|
def test_limit_sort_key_len_1024(test_table_ss, test_table_sb):
|
|
p = random_string()
|
|
# String sort key with length 1024 is fine:
|
|
key = {'p': p, 'c': 'x'*1024}
|
|
test_table_ss.put_item(Item=key)
|
|
assert test_table_ss.get_item(Key=key, ConsistentRead=True)['Item'] == key
|
|
# But sort key with length 1025 is forbidden - in both read and write.
|
|
# DynamoDB's message says "Aggregated size of all range keys has exceeded
|
|
# the size limit of 1024 bytes". It's not clear what "all range keys"
|
|
# actually refers to, as there can be only one. We investigate this
|
|
# further below in test_limit_sort_key_len_lsi().
|
|
key = {'p': p, 'c': 'x'*1025}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_ss.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_ss.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# The same limits are true for the bytes type. The length of a bytes
|
|
# array is its real length - not the length of its base64 encoding.
|
|
key = {'p': p, 'c': bytearray([123]*1024)}
|
|
test_table_sb.put_item(Item=key)
|
|
assert test_table_sb.get_item(Key=key, ConsistentRead=True)['Item'] == key
|
|
key = {'p': p, 'c': bytearray([123]*1025)}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_sb.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_sb.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# This is a variant of the above test, where we don't insist that the
|
|
# sort key length limit must be exactly 1024 bytes as in DynamoDB, but
|
|
# that it be *at least* 1024. I.e., we verify that 1024-byte sort keys
|
|
# are allowed, while very long keys that surpass Scylla's low-level
|
|
# key-length limit (64 KB) are forbidden with an appropriate error message
|
|
# and not an "internal server error". This test should pass even if
|
|
# Alternator decides to adopt a different sort-key-length limit from
|
|
# DynamoDB. We do have to adopt *some* limit because the internal Scylla
|
|
# implementation has a 64 KB limit on key lengths.
|
|
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
|
|
def test_limit_sort_key_len(test_table_ss, test_table_sb):
|
|
p = random_string()
|
|
# String sort key with length 1024 is fine:
|
|
key = {'p': p, 'c': 'x'*1024}
|
|
test_table_ss.put_item(Item=key)
|
|
assert test_table_ss.get_item(Key=key, ConsistentRead=True)['Item'] == key
|
|
# Sort key of length 64 KB + 1 is forbidden - it obviously exceeds
|
|
# DynamoDB's limit (1024 bytes), but also exceeds Scylla's internal
|
|
# limit on key length (64 KB). We except to get a reasonable error
|
|
# on request validation - not some "internal server error".
|
|
key = {'p': p, 'c': 'x'*65537}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_ss.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_ss.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# The same limits are true for the bytes type. The length of a bytes
|
|
# array is its real length - not the length of its base64 encoding.
|
|
key = {'p': p, 'c': bytearray([123]*1024)}
|
|
test_table_sb.put_item(Item=key)
|
|
assert test_table_sb.get_item(Key=key, ConsistentRead=True)['Item'] == key
|
|
key = {'p': p, 'c': bytearray([123]*65537)}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_sb.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_sb.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# As mentioned above, DynamoDB's error about sort key length exceeding the
|
|
# 1024 byte limit says that "Aggregated size of all range keys has exceeded
|
|
# the size limit of 1024 bytes". This is an odd message, considering that
|
|
# there can only be one range key... So there is a question whether when we
|
|
# have an LSI and several of the item's attributes become range keys (of
|
|
# different tables), perhaps their *total* length is limited. It turns out
|
|
# the answer is no. We can write an item with two 1024-byte attributes, where
|
|
# one if the base table's sort key and the other is an LSI's sort key.
|
|
# DyanamoDB's error message appears to be nothing more than a mistake.
|
|
def test_limit_sort_key_len_lsi(dynamodb):
|
|
with new_test_table(dynamodb,
|
|
KeySchema=[ { 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'b', 'KeyType': 'RANGE' }
|
|
],
|
|
AttributeDefinitions=[
|
|
{ 'AttributeName': 'a', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'b', 'AttributeType': 'S' },
|
|
{ 'AttributeName': 'c', 'AttributeType': 'S' }
|
|
],
|
|
LocalSecondaryIndexes=[
|
|
{ 'IndexName': 'lsi', 'KeySchema': [
|
|
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
|
|
{ 'AttributeName': 'c', 'KeyType': 'RANGE' },
|
|
], 'Projection': { 'ProjectionType': 'ALL' }
|
|
}
|
|
]) as table:
|
|
item = {'a': 'hello', 'b': 'x'*1024, 'c': 'y'*1024 }
|
|
table.put_item(Item=item)
|
|
assert table.get_item(Key={'a': 'hello', 'b': 'x'*1024}, ConsistentRead=True)['Item'] == item
|
|
assert table.query(IndexName='lsi', KeyConditions={'a': {'AttributeValueList': ['hello'], 'ComparisonOperator': 'EQ'}, 'c': {'AttributeValueList': ['y'*1024], 'ComparisonOperator': 'EQ'}}, ConsistentRead=True)['Items'] == [item]
|
|
|
|
# DynamoDB documentation says that the partition key must be between 1 and 2048
|
|
# bytes in length. We already test (test_item.py::test_update_item_empty_key)
|
|
# that 0 bytes are not allowed, so here we want to verify that 2048 is
|
|
# indeed the limit - i.e., 2048 is allowed, 2049 is isn't. This is true for
|
|
# both strings and bytes (and for bytes, it is the actual bytes - not their
|
|
# base64 encoding - that is counted).
|
|
# We may decide that this test never needs to pass on Alternator, because
|
|
# we may adopt a different limit. In this case we'll need to document this
|
|
# decision. In any case, Alternator must have some key-length limits (the
|
|
# internal implementation limits key length to 64 KB), so even if this test
|
|
# won't pass, the one after it should pass.
|
|
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
|
|
def test_limit_partition_key_len_2048(test_table_s, test_table_b):
|
|
# String partition key with length 2048 is fine:
|
|
item = {'p': 'x'*2048, 'z': 'hello'}
|
|
test_table_s.put_item(Item=item)
|
|
assert test_table_s.get_item(Key={'p': 'x'*2048}, ConsistentRead=True)['Item'] == item
|
|
# But partition key with length 2049 is forbidden - in both read and write.
|
|
key = {'p': 'x'*2049}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_s.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_s.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# The same limits are true for the bytes type. The length of a bytes
|
|
# array is its real length - not the length of its base64 encoding.
|
|
item = {'p': bytearray([123]*2048), 'z': 'hello'}
|
|
test_table_b.put_item(Item=item)
|
|
assert test_table_b.get_item(Key={'p': bytearray([123]*2048)}, ConsistentRead=True)['Item'] == item
|
|
key = {'p': bytearray([123]*2049)}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_b.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_b.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# This is a variant of the above test, where we don't insist that the
|
|
# partition key length limit must be exactly 2048 bytes as in DynamoDB, but
|
|
# that it be *at least* 2048. I.e., we verify that 2048-byte sort keys
|
|
# are allowed, while very long keys that surpass Scylla's low-level
|
|
# key-length limit (64 KB) are forbidden with an appropriate error message
|
|
# and not an "internal server error". This test should pass even if
|
|
# Alternator decides to adopt a different sort-key-length limit from
|
|
# DynamoDB. We do have to adopt *some* limit because the internal Scylla
|
|
# implementation has a 64 KB limit on key lengths.
|
|
@pytest.mark.xfail(reason="issue #10347: sort key limits not enforced")
|
|
def test_limit_partition_key_len(test_table_s, test_table_b):
|
|
# String partition key with length 2048 is fine:
|
|
item = {'p': 'x'*2048, 'z': 'hello'}
|
|
test_table_s.put_item(Item=item)
|
|
assert test_table_s.get_item(Key={'p': 'x'*2048}, ConsistentRead=True)['Item'] == item
|
|
# Partition key of length 64 KB + 1 is forbidden - it obviously exceeds
|
|
# DynamoDB's limit (2048 bytes), but also exceeds Scylla's internal
|
|
# limit on key length (64 KB). We except to get a reasonable error
|
|
# on request validation - not some "internal server error".
|
|
key = {'p': 'x'*65537}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_s.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_s.get_item(Key=key, ConsistentRead=True)
|
|
|
|
# The same limits are true for the bytes type. The length of a bytes
|
|
# array is its real length - not the length of its base64 encoding.
|
|
item = {'p': bytearray([123]*2048), 'z': 'hello'}
|
|
test_table_b.put_item(Item=item)
|
|
assert test_table_b.get_item(Key={'p': bytearray([123]*2048)}, ConsistentRead=True)['Item'] == item
|
|
key = {'p': bytearray([123]*65537)}
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_b.put_item(Item=key)
|
|
with pytest.raises(ClientError, match='ValidationException.*limit'):
|
|
test_table_b.get_item(Key=key, ConsistentRead=True)
|