Files
scylladb/test/alternator/test_projection_expression.py
Szymon Malewski 7ed38155a3 test/alternator: use test_table_ss instead of test_table in expressions related tests.
This patch includes minor refactoring of expressions related tests (#22494) - use `test_table_ss` instead of `test_table`.
2025-09-28 04:06:00 +02:00

381 lines
24 KiB
Python

# Copyright 2019-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
# Tests for the various operations (GetItem, Query, Scan) with a
# ProjectionExpression parameter.
#
# ProjectionExpression is an expansion of the legacy AttributesToGet
# parameter. Both parameters request that only a subset of the attributes
# be fetched for each item, instead of all of them. But while AttributesToGet
# was limited to top-level attributes, ProjectionExpression can request also
# nested attributes.
import pytest
from botocore.exceptions import ClientError
from test.alternator.util import random_string, full_scan, full_query, multiset
# Basic test for ProjectionExpression, requesting only top-level attributes.
# Result should include the selected attributes only - if one wants the key
# attributes as well, one needs to select them explicitly. When no key
# attributes are selected, an item may have *none* of the selected
# attributes, and returned as an empty item.
def test_projection_expression_toplevel(test_table_ss):
p = random_string()
c = random_string()
item = {'p': p, 'c': c, 'a': 'hello', 'b': 'hi'}
test_table_ss.put_item(Item=item)
for wanted in [ ['a'], # only non-key attribute
['c', 'a'], # a key attribute (sort key) and non-key
['p', 'c'], # entire key
['nonexistent'] # Our item doesn't have this
]:
got_item = test_table_ss.get_item(Key={'p': p, 'c': c}, ProjectionExpression=",".join(wanted), ConsistentRead=True)['Item']
expected_item = {k: item[k] for k in wanted if k in item}
assert expected_item == got_item
# Various simple tests for ProjectionExpression's syntax, using only top-evel
# attributes.
def test_projection_expression_toplevel_syntax(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a')['Item'] == {'a': 'hello'}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a'})['Item'] == {'a': 'hello'}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,b')['Item'] == {'a': 'hello', 'b': 'hi'}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=' a , b ')['Item'] == {'a': 'hello', 'b': 'hi'}
# Missing or unused names in ExpressionAttributeNames are errors:
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#wrong': 'a'})['Item']
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a', '#unused': 'b'})['Item']
# It is not allowed to fetch the same top-level attribute twice (or in
# general, list two overlapping attributes). We get an error like
# "Invalid ProjectionExpression: Two document paths overlap with each
# other; must remove or rewrite one of these paths; path one: [a], path
# two: [a]".
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,a')['Item']
# A comma with nothing after it is a syntax error:
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,')['Item']
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=',a')['Item']
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,,b')['Item']
# An empty ProjectionExpression is not allowed. DynamoDB recognizes its
# syntax, but then writes: "Invalid ProjectionExpression: The expression
# can not be empty".
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='')['Item']
# The following two tests are similar to test_projection_expression_toplevel()
# which tested the GetItem operation - but these test Scan and Query.
# Both test ProjectionExpression with only top-level attributes.
def test_projection_expression_scan(filled_test_table):
table, items = filled_test_table
for wanted in [ ['another'], # only non-key attributes (one item doesn't have it!)
['c', 'another'], # a key attribute (sort key) and non-key
['p', 'c'], # entire key
['nonexistent'] # none of the items have this attribute!
]:
got_items = full_scan(table, ProjectionExpression=",".join(wanted))
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
assert multiset(expected_items) == multiset(got_items)
def test_projection_expression_query(test_table_ss):
p = random_string()
items = [{'p': p, 'c': str(i), 'a': str(i*10), 'b': str(i*100) } for i in range(10)]
with test_table_ss.batch_writer() as batch:
for item in items:
batch.put_item(item)
for wanted in [ ['a'], # only non-key attributes
['c', 'a'], # a key attribute (sort key) and non-key
['p', 'c'], # entire key
['nonexistent'] # none of the items have this attribute!
]:
got_items = full_query(test_table_ss, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression=",".join(wanted))
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
assert multiset(expected_items) == multiset(got_items)
# The previous tests all fetched only top-level attributes. They could all
# be written using AttributesToGet instead of ProjectionExpression (and,
# in fact, we do have similar tests with AttributesToGet in other files),
# but the previous test checked that the alternative syntax works correctly.
# The following test checks fetching more elaborate attribute paths from
# nested documents.
def test_projection_expression_path(test_table_s):
p = random_string()
test_table_s.put_item(Item={
'p': p,
'a': {'b': [2, 4, {'x': 'hi', 'y': 'yo'}], 'c': 5},
'b': 'hello'
})
# Fetching the entire nested document "a" works, of course:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a')['Item'] == {'a': {'b': [2, 4, {'x': 'hi', 'y': 'yo'}], 'c': 5}}
# If we fetch a.b, we get only the content of b - but it's still inside
# the a dictionary:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': [2, 4, {'x': 'hi', 'y': 'yo'}]}}
# Similarly, fetching a.b[0] gives us a one-element array in a dictionary.
# Note that [0] is the first element of an array.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0]')['Item'] == {'a': {'b': [2]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2]')['Item'] == {'a': {'b': [{'x': 'hi', 'y': 'yo'}]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2].y')['Item'] == {'a': {'b': [{'y': 'yo'}]}}
# Trying to read any sort of non-existent attribute returns an empty item.
# This includes a non-existing top-level attribute, an attempt to read
# beyond the end of an array or a non-existent member of a dictionary, as
# well as paths which begin with a non-existent prefix.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3]')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x.y')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3].x')['Item'] == {}
# Similarly, indexing a dictionary as an array, or array as dictionary, or
# integer as either, yields an empty item.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b.x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0].x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0][0]')['Item'] == {}
# We can read multiple paths - the result are merged into one object
# structured the same was as in the original item:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.b[1]')['Item'] == {'a': {'b': [2, 4]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.c')['Item'] == {'a': {'b': [2], 'c': 5}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.c,b')['Item'] == {'a': {'c': 5}, 'b': 'hello'}
# If some of the paths are not available, they are silently ignored (just
# like they returned an empty item when used alone earlier)
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x, a.b[0], x, a.b[3].x')['Item'] == {'a': {'b': [2]}}
# It is not allowed to read the same path multiple times. The error from
# DynamoDB looks like: "Invalid ProjectionExpression: Two document paths
# overlap with each other; must remove or rewrite one of these paths;
# path one: [a, b, [0]], path two: [a, b, [0]]".
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.b[0]')['Item']
# Two paths are considered to "overlap" if the content of one path
# contains the content of the second path. So requesting both "a" and
# "a.b[0]" is not allowed.
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,a.b[0]')['Item']
# Above we noted that asking for to project a non-existent attribute in an
# existing item yields an empty Item object. However, if the item does not
# exist at all, the Item object will be missing entirely:
p = random_string()
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')
# Above in test_projection_expression_toplevel_syntax() we tested how
# name references (#name) work in top-level attributes. In the following
# two tests we test how they work in more elaborate paths:
# 1. Multiple path components can make multiple references, e.g., "#a.#b"
# 2. Conversely, a single reference, e.g., "#a", is always a single path
# component. Even if "#a" is "a.b", this refers to the literal attribute
# "a.b" - with a dot in its name - and not to the b element in a.
def test_projection_expression_path_references(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 1, 'c': 2}, 'b': 'hi'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': 1}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#n1.b', ExpressionAttributeNames={'#n1': 'a'})['Item'] == {'a': {'b': 1}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#n2': 'b'})['Item'] == {'a': {'b': 1}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#n1.#n2', ExpressionAttributeNames={'#n1': 'a', '#n2': 'b'})['Item'] == {'a': {'b': 1}}
# Missing or unused names in ExpressionAttributeNames are errors:
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#wrong': 'b'})
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#n2': 'b', '#unused': 'x'})
def test_projection_expression_path_dot(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a.b': 'hi', 'a': {'b': 'yo', 'c': 'jo'}})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': 'yo'}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a.b'})['Item'] == {'a.b': 'hi'}
# DynamoDB does not allow "overlapping" paths to be listed in
# ProjectionExpression. This includes both identical paths, and paths where
# one is a sub-path of the other - e.g. "a.b" and "a.b.c". As we already saw
# above, paths with just a common *prefix* - e.g., "a.b, a.c" - are fine.
def test_projection_expression_path_overlap(test_table_s):
# The overlap is tested symbolically, on the given paths, without any
# relation to what the item contains, or whether it even exists. So we
# don't even need to create an item for this test. We still need a
# key for the GetItem call :-)
p = random_string()
for expr in ['a, a',
'a.b, a.b',
'a[1], a[1]',
'a, a.b',
'a.b, a',
'a.b, a.b[2]',
'a.b, a.b.c',
'a, a.b[2].c',
'a.b.d, a.b',
'a.b.d.e, a.b',
'a.b, a.b.d',
'a.b, a.b.d.e',
]:
with pytest.raises(ClientError, match='ValidationException.* overlap'):
print(expr)
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# The checks above can be easily passed by an over-zealos "overlap" check
# which declares everything an overlap :-) Let's also check some non-
# overlap cases - which shouldn't be declared an overlap.
for expr in ['a, b',
'a.b, a.c',
'a.b.d, a.b.e',
'a[1], a[2]',
'a.b, a.c[2]',
]:
print(expr)
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# In addition to not allowing "overlapping" paths, DynamoDB also does not
# allow "conflicting" paths: It does not allow giving both a.b and a[1] in a
# single ProjectionExpression. It gives the error:
# "Invalid ProjectionExpression: Two document paths conflict with each other;
# must remove or rewrite one of these paths; path one: [a, b], path two:
# [a, [1]]".
# The reasoning is that asking for both in one request makes no sense because
# no item will ever be able to fulfill both.
def test_projection_expression_path_conflict(test_table_s):
# The conflict is tested symbolically, on the given paths, without any
# relation to what the item contains, or whether it even exists. So we
# don't even need to create an item for this test. We still need a
# key for the GetItem call :-)
p = random_string()
for expr in ['a.b, a[1]',
'a[1], a.b',
'a.b[1], a.b.c',
'a.b.c, a.b[1]',
]:
with pytest.raises(ClientError, match='ValidationException.* conflict'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# The checks above can be easily passed by an over-zealos "conflict" check
# which declares everything a conflict :-) Let's also check some non-
# conflict cases - which shouldn't be declared a conflict.
for expr in ['a.b, a.c',
'a.b, a.c[1]',
]:
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# Above we nested paths in ProjectionExpression, but just for the GetItem
# request. Let's verify they also work in Query and Scan requests:
def test_query_projection_expression_path(test_table_ss):
p = random_string()
items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
with test_table_ss.batch_writer() as batch:
for item in items:
batch.put_item(item)
got_items = full_query(test_table_ss, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression="a.x")
expected_items = [{'a': {'x': x['a']['x']}} for x in items]
assert multiset(expected_items) == multiset(got_items)
def test_scan_projection_expression_path(test_table_ss):
# This test is similar to test_query_projection_expression_path above,
# but uses a scan instead of a query. The scan will generate unrelated
# partitions created by other tests (hopefully not too many...) that we
# need to ignore. We also need to ask for "p" too, so we can filter by it.
p = random_string()
items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
with test_table_ss.batch_writer() as batch:
for item in items:
batch.put_item(item)
got_items = [ x for x in full_scan(test_table_ss, ProjectionExpression="p, a.x") if x['p'] == p]
expected_items = [{'p': p, 'a': {'x': x['a']['x']}} for x in items]
assert multiset(expected_items) == multiset(got_items)
# BatchGetItem also supports ProjectionExpression, let's test that it
# applies to all items, and that it correctly supports document paths as well.
def test_batch_get_item_projection_expression_path(test_table_s):
items = [{'p': random_string(), 'a': {'b': random_string(), 'x': 'hi'}, 'c': random_string()} for i in range(3)]
with test_table_s.batch_writer() as batch:
for item in items:
batch.put_item(item)
got_items = test_table_s.meta.client.batch_get_item(
RequestItems = {test_table_s.name: {
'Keys': [{'p': item['p']} for item in items],
'ProjectionExpression': 'a.b',
'ConsistentRead': True}})['Responses'][test_table_s.name]
expected_items = [{'a': {'b': item['a']['b']}} for item in items]
assert multiset(got_items) == multiset(expected_items)
# It is not allowed to use both ProjectionExpression and its older cousin,
# AttributesToGet, together. If trying to do this, DynamoDB produces an error
# like "Can not use both expression and non-expression parameters in the same
# request: Non-expression parameters: {AttributesToGet} Expression
# parameters: {ProjectionExpression}
def test_projection_expression_and_attributes_to_get(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
with pytest.raises(ClientError, match='ValidationException.*both'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a', AttributesToGet=['b'])['Item']
with pytest.raises(ClientError, match='ValidationException.*both'):
full_scan(test_table_s, ProjectionExpression='a', AttributesToGet=['a'])
with pytest.raises(ClientError, match='ValidationException.*both'):
full_query(test_table_s, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression='a', AttributesToGet=['a'])
# above in test_projection_expression_toplevel_syntax among other things
# we noted how spurious entries in ExpressionAttributeNames, not needed
# the the ProjectionExpression, cause an error. Sometimes we have two
# expressions in the same request, for example, both a ProjectionExpression
# and a KeyConditionExpression. It's only an error if a name is not
# needed by both of these expressions
def test_projection_expression_and_key_condition_expression(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
got_items = full_query(test_table_s,
KeyConditionExpression='#name1 = :val1',
ProjectionExpression='#name2',
ExpressionAttributeNames={'#name1': 'p', '#name2': 'a'},
ExpressionAttributeValues={':val1': p});
assert got_items == [{'a': 'hello'}]
# Test whether the nesting depth of an a path in a projection expression
# is limited. If the implementation is done using recursion, it is goood
# practice to limit it and not crash the server. According to the DynamoDB
# documentation, DynamoDB supports nested attributes up to 32 levels deep:
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html#limits-attributes-nested-depth
# There is no reason why Alternator should not use exactly the same limit
# as is officially documented by DynamoDB.
def test_projection_expression_path_nesting_levels(test_table_s):
p = random_string()
# 32 nesting levels (including the top-level attribute) work
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*31))
# 33 nesting levels do not. DynamoDB gives an error: "Invalid
# ProjectionExpression: The document path has too many nesting levels;
# nesting levels: 33".
with pytest.raises(ClientError, match='ValidationException.*nesting levels'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*32))
# Above we already checked different cases of reading individual elements
# from a list - the expression a[i]. The following test exercises these
# list indexes more rigourously, including testing what happens when the
# index overflows an integer (reproducing #25947).
def test_projection_expression_list_index(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': [7, 42]})
# a[0] and a[1] return the elements from the list, as expected
# (note that a[i] actually returns an array with a single element a[i])
assert {'a': [7]} == test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')['Item']
assert {'a': [42]} == test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[1]')['Item']
# If the index is beyond the length of the array, such as a[2] or a[999],
# we expect to get back an empty Item - not an error, and not missing Item.
assert {} == test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[2]')['Item']
assert {} == test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[999]')['Item']
# If the index is so high that it can't be parsed as an integer, it isn't
# silently ignored like 999 above, but causes a parse error. DynamoDB
# reports: "Invalid ProjectionExpression: List index is not within the
# allowable range; index: [99999999999999]". After fixing #25947,
# Alternator reports: "Failed parsing ProjectionExpression
# 'a[99999999999999]': list index out of integer range".
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression.*index'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[99999999999999]')['Item']
# Trying to use a negative number as an index, like a[-1], is just a
# syntax error - the parser expects to see digits, not "-".
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression.*[Ss]yntax error'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[-1]')['Item']
# A completely missing index - a[] - is also a syntax error:
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression.*[Ss]yntax error'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[]')['Item']