Files
scylladb/test/alternator/test_projection_expression.py
Nadav Har'El d03bd82222 Revert "test: move scylla_inject_error from alternator/ to cql-pytest/"
This reverts commit 8e892426e2 and fixes
the code in a different way:

That commit moved the scylla_inject_error function from
test/alternator/util.py to test/cql-pytest/util.py and renamed
test/alternator/util.py. I found the rename confusing and unnecessary.
Moreover, the moved function isn't even usable today by the test suite
that includes it, cql-pytest, because it lacks the "rest_api" fixture :-)
so test/cql-pytest/util.py wasn't the right place for it anyway.
test/rest_api/rest_util.py could have been a good place for this function,
but there is another complication: Although the Alternator and rest_api
tests both had a "rest_api" fixture, it has a different type, which led
to the code in rest_api which used the moved function to have to jump
through hoops to call it instead of just passing "rest_api".

I think the best solution is to revert the above commit, and duplicate
the short scylla_inject_error() function. The duplication isn't an
exact copy - the test/rest_api/rest_util.py version now accepts the
"rest_api" fixture instead of the URL that the Alternator version used.

In the future we can remove some of this duplication by having some
shared "library" code but we should do it carefully and starting with
agreeing on the basic fixtures like "rest_api" and "cql", without that
it's not useful to share small functions that operate on them.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>

Closes #11275
2022-08-11 06:43:26 +03:00

348 lines
22 KiB
Python

# Copyright 2019-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
# Tests for the various operations (GetItem, Query, Scan) with a
# ProjectionExpression parameter.
#
# ProjectionExpression is an expension of the legacy AttributesToGet
# parameter. Both parameters request that only a subset of the attributes
# be fetched for each item, instead of all of them. But while AttributesToGet
# was limited to top-level attributes, ProjectionExpression can request also
# nested attributes.
import pytest
from botocore.exceptions import ClientError
from util import random_string, full_scan, full_query, multiset
# Basic test for ProjectionExpression, requesting only top-level attributes.
# Result should include the selected attributes only - if one wants the key
# attributes as well, one needs to select them explicitly. When no key
# attributes are selected, an item may have *none* of the selected
# attributes, and returned as an empty item.
def test_projection_expression_toplevel(test_table):
p = random_string()
c = random_string()
item = {'p': p, 'c': c, 'a': 'hello', 'b': 'hi'}
test_table.put_item(Item=item)
for wanted in [ ['a'], # only non-key attribute
['c', 'a'], # a key attribute (sort key) and non-key
['p', 'c'], # entire key
['nonexistent'] # Our item doesn't have this
]:
got_item = test_table.get_item(Key={'p': p, 'c': c}, ProjectionExpression=",".join(wanted), ConsistentRead=True)['Item']
expected_item = {k: item[k] for k in wanted if k in item}
assert expected_item == got_item
# Various simple tests for ProjectionExpression's syntax, using only top-evel
# attributes.
def test_projection_expression_toplevel_syntax(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a')['Item'] == {'a': 'hello'}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a'})['Item'] == {'a': 'hello'}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,b')['Item'] == {'a': 'hello', 'b': 'hi'}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=' a , b ')['Item'] == {'a': 'hello', 'b': 'hi'}
# Missing or unused names in ExpressionAttributeNames are errors:
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#wrong': 'a'})['Item'] == {'a': 'hello'}
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a', '#unused': 'b'})['Item'] == {'a': 'hello'}
# It is not allowed to fetch the same top-level attribute twice (or in
# general, list two overlapping attributes). We get an error like
# "Invalid ProjectionExpression: Two document paths overlap with each
# other; must remove or rewrite one of these paths; path one: [a], path
# two: [a]".
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,a')['Item']
# A comma with nothing after it is a syntax error:
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,')['Item']
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=',a')['Item']
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,,b')['Item']
# An empty ProjectionExpression is not allowed. DynamoDB recognizes its
# syntax, but then writes: "Invalid ProjectionExpression: The expression
# can not be empty".
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='')['Item']
# The following two tests are similar to test_projection_expression_toplevel()
# which tested the GetItem operation - but these test Scan and Query.
# Both test ProjectionExpression with only top-level attributes.
def test_projection_expression_scan(filled_test_table):
table, items = filled_test_table
for wanted in [ ['another'], # only non-key attributes (one item doesn't have it!)
['c', 'another'], # a key attribute (sort key) and non-key
['p', 'c'], # entire key
['nonexistent'] # none of the items have this attribute!
]:
got_items = full_scan(table, ProjectionExpression=",".join(wanted))
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
assert multiset(expected_items) == multiset(got_items)
def test_projection_expression_query(test_table):
p = random_string()
items = [{'p': p, 'c': str(i), 'a': str(i*10), 'b': str(i*100) } for i in range(10)]
with test_table.batch_writer() as batch:
for item in items:
batch.put_item(item)
for wanted in [ ['a'], # only non-key attributes
['c', 'a'], # a key attribute (sort key) and non-key
['p', 'c'], # entire key
['nonexistent'] # none of the items have this attribute!
]:
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression=",".join(wanted))
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
assert multiset(expected_items) == multiset(got_items)
# The previous tests all fetched only top-level attributes. They could all
# be written using AttributesToGet instead of ProjectionExpression (and,
# in fact, we do have similar tests with AttributesToGet in other files),
# but the previous test checked that the alternative syntax works correctly.
# The following test checks fetching more elaborate attribute paths from
# nested documents.
def test_projection_expression_path(test_table_s):
p = random_string()
test_table_s.put_item(Item={
'p': p,
'a': {'b': [2, 4, {'x': 'hi', 'y': 'yo'}], 'c': 5},
'b': 'hello'
})
# Fetching the entire nested document "a" works, of course:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a')['Item'] == {'a': {'b': [2, 4, {'x': 'hi', 'y': 'yo'}], 'c': 5}}
# If we fetch a.b, we get only the content of b - but it's still inside
# the a dictionary:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': [2, 4, {'x': 'hi', 'y': 'yo'}]}}
# Similarly, fetching a.b[0] gives us a one-element array in a dictionary.
# Note that [0] is the first element of an array.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0]')['Item'] == {'a': {'b': [2]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2]')['Item'] == {'a': {'b': [{'x': 'hi', 'y': 'yo'}]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[2].y')['Item'] == {'a': {'b': [{'y': 'yo'}]}}
# Trying to read any sort of non-existent attribute returns an empty item.
# This includes a non-existing top-level attribute, an attempt to read
# beyond the end of an array or a non-existent member of a dictionary, as
# well as paths which begin with a non-existent prefix.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3]')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x.y')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3].x')['Item'] == {}
# Similarly, indexing a dictionary as an array, or array as dictionary, or
# integer as either, yields an empty item.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b.x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0].x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0][0]')['Item'] == {}
# We can read multiple paths - the result are merged into one object
# structured the same was as in the original item:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.b[1]')['Item'] == {'a': {'b': [2, 4]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.c')['Item'] == {'a': {'b': [2], 'c': 5}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.c,b')['Item'] == {'a': {'c': 5}, 'b': 'hello'}
# If some of the paths are not available, they are silently ignored (just
# like they returned an empty item when used alone earlier)
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x, a.b[0], x, a.b[3].x')['Item'] == {'a': {'b': [2]}}
# It is not allowed to read the same path multiple times. The error from
# DynamoDB looks like: "Invalid ProjectionExpression: Two document paths
# overlap with each other; must remove or rewrite one of these paths;
# path one: [a, b, [0]], path two: [a, b, [0]]".
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.b[0]')['Item']
# Two paths are considered to "overlap" if the content of one path
# contains the content of the second path. So requesting both "a" and
# "a.b[0]" is not allowed.
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,a.b[0]')['Item']
# Above we noted that asking for to project a non-existent attribute in an
# existing item yields an empty Item object. However, if the item does not
# exist at all, the Item object will be missing entirely:
p = random_string()
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')
# Above in test_projection_expression_toplevel_syntax() we tested how
# name references (#name) work in top-level attributes. In the following
# two tests we test how they work in more elaborate paths:
# 1. Multiple path components can make multiple references, e.g., "#a.#b"
# 2. Conversely, a single reference, e.g., "#a", is always a single path
# component. Even if "#a" is "a.b", this refers to the literal attribute
# "a.b" - with a dot in its name - and not to the b element in a.
def test_projection_expression_path_references(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 1, 'c': 2}, 'b': 'hi'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': 1}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#n1.b', ExpressionAttributeNames={'#n1': 'a'})['Item'] == {'a': {'b': 1}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#n2': 'b'})['Item'] == {'a': {'b': 1}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#n1.#n2', ExpressionAttributeNames={'#n1': 'a', '#n2': 'b'})['Item'] == {'a': {'b': 1}}
# Missing or unused names in ExpressionAttributeNames are errors:
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#wrong': 'b'})
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#n2': 'b', '#unused': 'x'})
def test_projection_expression_path_dot(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a.b': 'hi', 'a': {'b': 'yo', 'c': 'jo'}})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': 'yo'}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a.b'})['Item'] == {'a.b': 'hi'}
# DynamoDB does not allow "overlapping" paths to be listed in
# ProjectionExpression. This includes both identical paths, and paths where
# one is a sub-path of the other - e.g. "a.b" and "a.b.c". As we already saw
# above, paths with just a common *prefix* - e.g., "a.b, a.c" - are fine.
def test_projection_expression_path_overlap(test_table_s):
# The overlap is tested symbolically, on the given paths, without any
# relation to what the item contains, or whether it even exists. So we
# don't even need to create an item for this test. We still need a
# key for the GetItem call :-)
p = random_string()
for expr in ['a, a',
'a.b, a.b',
'a[1], a[1]',
'a, a.b',
'a.b, a',
'a.b, a.b[2]',
'a.b, a.b.c',
'a, a.b[2].c',
'a.b.d, a.b',
'a.b.d.e, a.b',
'a.b, a.b.d',
'a.b, a.b.d.e',
]:
with pytest.raises(ClientError, match='ValidationException.* overlap'):
print(expr)
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# The checks above can be easily passed by an over-zealos "overlap" check
# which declares everything an overlap :-) Let's also check some non-
# overlap cases - which shouldn't be declared an overlap.
for expr in ['a, b',
'a.b, a.c',
'a.b.d, a.b.e',
'a[1], a[2]',
'a.b, a.c[2]',
]:
print(expr)
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# In addition to not allowing "overlapping" paths, DynamoDB also does not
# allow "conflicting" paths: It does not allow giving both a.b and a[1] in a
# single ProjectionExpression. It gives the error:
# "Invalid ProjectionExpression: Two document paths conflict with each other;
# must remove or rewrite one of these paths; path one: [a, b], path two:
# [a, [1]]".
# The reasoning is that asking for both in one request makes no sense because
# no item will ever be able to fulfill both.
def test_projection_expression_path_conflict(test_table_s):
# The conflict is tested symbolically, on the given paths, without any
# relation to what the item contains, or whether it even exists. So we
# don't even need to create an item for this test. We still need a
# key for the GetItem call :-)
p = random_string()
for expr in ['a.b, a[1]',
'a[1], a.b',
'a.b[1], a.b.c',
'a.b.c, a.b[1]',
]:
with pytest.raises(ClientError, match='ValidationException.* conflict'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# The checks above can be easily passed by an over-zealos "conflict" check
# which declares everything a conflict :-) Let's also check some non-
# conflict cases - which shouldn't be declared a conflict.
for expr in ['a.b, a.c',
'a.b, a.c[1]',
]:
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# Above we nested paths in ProjectionExpression, but just for the GetItem
# request. Let's verify they also work in Query and Scan requests:
def test_query_projection_expression_path(test_table):
p = random_string()
items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
with test_table.batch_writer() as batch:
for item in items:
batch.put_item(item)
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression="a.x")
expected_items = [{'a': {'x': x['a']['x']}} for x in items]
assert multiset(expected_items) == multiset(got_items)
def test_scan_projection_expression_path(test_table):
# This test is similar to test_query_projection_expression_path above,
# but uses a scan instead of a query. The scan will generate unrelated
# partitions created by other tests (hopefully not too many...) that we
# need to ignore. We also need to ask for "p" too, so we can filter by it.
p = random_string()
items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
with test_table.batch_writer() as batch:
for item in items:
batch.put_item(item)
got_items = [ x for x in full_scan(test_table, ProjectionExpression="p, a.x") if x['p'] == p]
expected_items = [{'p': p, 'a': {'x': x['a']['x']}} for x in items]
assert multiset(expected_items) == multiset(got_items)
# BatchGetItem also supports ProjectionExpression, let's test that it
# applies to all items, and that it correctly suports document paths as well.
def test_batch_get_item_projection_expression_path(test_table_s):
items = [{'p': random_string(), 'a': {'b': random_string(), 'x': 'hi'}, 'c': random_string()} for i in range(3)]
with test_table_s.batch_writer() as batch:
for item in items:
batch.put_item(item)
got_items = test_table_s.meta.client.batch_get_item(
RequestItems = {test_table_s.name: {
'Keys': [{'p': item['p']} for item in items],
'ProjectionExpression': 'a.b',
'ConsistentRead': True}})['Responses'][test_table_s.name]
expected_items = [{'a': {'b': item['a']['b']}} for item in items]
assert multiset(got_items) == multiset(expected_items)
# It is not allowed to use both ProjectionExpression and its older cousin,
# AttributesToGet, together. If trying to do this, DynamoDB produces an error
# like "Can not use both expression and non-expression parameters in the same
# request: Non-expression parameters: {AttributesToGet} Expression
# parameters: {ProjectionExpression}
def test_projection_expression_and_attributes_to_get(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
with pytest.raises(ClientError, match='ValidationException.*both'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a', AttributesToGet=['b'])['Item']
with pytest.raises(ClientError, match='ValidationException.*both'):
full_scan(test_table_s, ProjectionExpression='a', AttributesToGet=['a'])
with pytest.raises(ClientError, match='ValidationException.*both'):
full_query(test_table_s, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, ProjectionExpression='a', AttributesToGet=['a'])
# above in test_projection_expression_toplevel_syntax among other things
# we noted how spurious entries in ExpressionAttributeNames, not needed
# the the ProjectionExpression, cause an error. Sometimes we have two
# expressions in the same request, for example, both a ProjectionExpression
# and a KeyConditionExpression. It's only an error if a name is not
# needed by both of these expressions
def test_projection_expression_and_key_condition_expression(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': 'hi'})
got_items = full_query(test_table_s,
KeyConditionExpression='#name1 = :val1',
ProjectionExpression='#name2',
ExpressionAttributeNames={'#name1': 'p', '#name2': 'a'},
ExpressionAttributeValues={':val1': p});
assert got_items == [{'a': 'hello'}]
# Test whether the nesting depth of an a path in a projection expression
# is limited. If the implementation is done using recursion, it is goood
# practice to limit it and not crash the server. According to the DynamoDB
# documentation, DynamoDB supports nested attributes up to 32 levels deep:
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html#limits-attributes-nested-depth
# There is no reason why Alternator should not use exactly the same limit
# as is officially documented by DynamoDB.
def test_projection_expression_path_nesting_levels(test_table_s):
p = random_string()
# 32 nesting levels (including the top-level attribute) work
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*31))
# 33 nesting levels do not. DynamoDB gives an error: "Invalid
# ProjectionExpression: The document path has too many nesting levels;
# nesting levels: 33".
with pytest.raises(ClientError, match='ValidationException.*nesting levels'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*32))