copilot noticed in that in in many of Alternator tests, we have some
unnecessary assignments. For example, in a few places, we use the idiom:
with pytest.raises(...):
ret = ...
The "ret=" part is unnecessary, as this test expects the statement to
fail (hence the raises()), and ret is never assigned. The assignment
was only there because we copied this statement from another place in
the test, which does expect the statement to pass and wants to validate
the returned value.
So we should just drop the "ret=" from these tests.
Another common occurance is that we used the idiom
response = table.do_something()
Without checking the response and no intention to check it (either we
know it will work, or we just want to check it doesn't throw). So we
can drop the "response=" here too.
All of the unused variables in this patch were discovered by Copilot,
but I reviewed each of them carefully myself and prepared this patch.
Signed-off-by: Nadav Har'El <nyh@scylladb.com>
734 lines
38 KiB
Python
734 lines
38 KiB
Python
# -*- coding: utf-8 -*-
|
|
# Copyright 2019-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
# Tests for the Query operation
|
|
# Some of the Query features are tested in separate files:
|
|
# * test_key_conditions.py: the KeyConditions parameter.
|
|
# * test_key_condition_expression.py: the KeyConditionExpression parameter.
|
|
# * test_filter_expression.py: the FilterExpression parameter.
|
|
# * test_query_filter.py: the QueryFilter parameter.
|
|
|
|
import operator
|
|
import random
|
|
from decimal import Decimal
|
|
|
|
import pytest
|
|
from boto3.dynamodb.conditions import Attr
|
|
from botocore.exceptions import ClientError
|
|
|
|
from test.alternator.util import random_string, random_bytes, full_query, multiset
|
|
|
|
python_compare_op_dict = {"LE": operator.le, "LT": operator.lt, "GE": operator.ge, "GT": operator.gt}
|
|
|
|
def test_query_nonexistent_table(dynamodb):
|
|
client = dynamodb.meta.client
|
|
with pytest.raises(ClientError, match="ResourceNotFoundException"):
|
|
client.query(TableName="i_do_not_exist", KeyConditions={
|
|
'p' : {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'},
|
|
'c' : {'AttributeValueList': ['11'], 'ComparisonOperator': 'BEGINS_WITH'}
|
|
})
|
|
|
|
# Items returned by Query should be sorted by the sort key. The following
|
|
# tests verify that this is indeed the case, for the three allowed key types:
|
|
# strings, binary, and numbers. These tests test not just the Query operation,
|
|
# but inherently that the sort-key sorting works.
|
|
def test_query_sort_order_string(test_table):
|
|
# Insert a lot of random items in one new partition:
|
|
# str(i) has a non-obvious sort order (e.g., "100" comes before "2") so is a nice test.
|
|
p = random_string()
|
|
items = [{'p': p, 'c': str(i)} for i in range(128)]
|
|
with test_table.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
assert len(items) == len(got_items)
|
|
# Extract just the sort key ("c") from the items
|
|
sort_keys = [x['c'] for x in items]
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
# Verify that got_sort_keys are already sorted (in string order)
|
|
assert sorted(got_sort_keys) == got_sort_keys
|
|
# Verify that got_sort_keys are a sorted version of the expected sort_keys
|
|
assert sorted(sort_keys) == got_sort_keys
|
|
def test_query_sort_order_bytes(test_table_sb):
|
|
# Insert a lot of random items in one new partition:
|
|
# We arbitrarily use random_bytes with a random length.
|
|
p = random_string()
|
|
items = [{'p': p, 'c': random_bytes(10)} for i in range(128)]
|
|
with test_table_sb.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
got_items = full_query(test_table_sb, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
assert len(items) == len(got_items)
|
|
sort_keys = [x['c'] for x in items]
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
# Boto3's "Binary" objects are sorted as if bytes are signed integers.
|
|
# This isn't the order that DynamoDB itself uses (byte 0 should be first,
|
|
# not byte -128). Sorting the byte array ".value" works.
|
|
assert sorted(got_sort_keys, key=lambda x: x.value) == got_sort_keys
|
|
assert sorted(sort_keys) == got_sort_keys
|
|
def test_query_sort_order_number(test_table_sn):
|
|
# This is a list of numbers, sorted in correct order, and each suitable
|
|
# for accurate representation by Alternator's number type.
|
|
numbers = [
|
|
Decimal("-2e10"),
|
|
Decimal("-7.1e2"),
|
|
Decimal("-4.1"),
|
|
Decimal("-0.1"),
|
|
Decimal("-1e-5"),
|
|
Decimal("0"),
|
|
Decimal("2e-5"),
|
|
Decimal("0.15"),
|
|
Decimal("1"),
|
|
Decimal("1.00000000000000000000000001"),
|
|
Decimal("3.14159"),
|
|
Decimal("3.1415926535897932384626433832795028841"),
|
|
Decimal("31.4"),
|
|
Decimal("1.4e10"),
|
|
]
|
|
# Insert these numbers, in random order, into one partition:
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Finally, verify that we get back exactly the same numbers (with identical
|
|
# precision), and in their original sorted order.
|
|
got_items = full_query(test_table_sn, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == numbers
|
|
|
|
# Note: this is a very partial check for the QueryFilter feature. See
|
|
# test_query_filter.py for much more exhaustive tests for this feature.
|
|
def test_query_filtering_attributes_equality(filled_test_table):
|
|
test_table, items = filled_test_table
|
|
|
|
query_filter = {
|
|
"attribute" : {
|
|
"AttributeValueList" : [ "xxxx" ],
|
|
"ComparisonOperator": "EQ"
|
|
}
|
|
}
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, QueryFilter=query_filter)
|
|
print(got_items)
|
|
assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx']) == multiset(got_items)
|
|
|
|
query_filter = {
|
|
"attribute" : {
|
|
"AttributeValueList" : [ "xxxx" ],
|
|
"ComparisonOperator": "EQ"
|
|
},
|
|
"another" : {
|
|
"AttributeValueList" : [ "yy" ],
|
|
"ComparisonOperator": "EQ"
|
|
}
|
|
}
|
|
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, QueryFilter=query_filter)
|
|
print(got_items)
|
|
assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx' and item['another'] == 'yy']) == multiset(got_items)
|
|
|
|
# Test that FilterExpression works as expected
|
|
def test_query_filter_expression(filled_test_table):
|
|
test_table, items = filled_test_table
|
|
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, FilterExpression=Attr("attribute").eq("xxxx"))
|
|
print(got_items)
|
|
assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx']) == multiset(got_items)
|
|
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': ['long'], 'ComparisonOperator': 'EQ'}}, FilterExpression=Attr("attribute").eq("xxxx") & Attr("another").eq("yy"))
|
|
print(got_items)
|
|
assert multiset([item for item in items if item['p'] == 'long' and item['attribute'] == 'xxxx' and item['another'] == 'yy']) == multiset(got_items)
|
|
|
|
|
|
# Test Query with the AttributesToGet parameter. Result should include the
|
|
# selected attributes only - if one wants the key attributes as well, one
|
|
# needs to select them explicitly. When no key attributes are selected,
|
|
# some items may have *none* of the selected attributes. Those items are
|
|
# returned too, as empty items - they are not outright missing.
|
|
def test_query_attributes_to_get(dynamodb, test_table):
|
|
p = random_string()
|
|
items = [{'p': p, 'c': str(i), 'a': str(i*10), 'b': str(i*100) } for i in range(10)]
|
|
with test_table.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
for wanted in [ ['a'], # only non-key attributes
|
|
['c', 'a'], # a key attribute (sort key) and non-key
|
|
['p', 'c'], # entire key
|
|
['nonexistent'] # none of the items have this attribute!
|
|
]:
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, AttributesToGet=wanted)
|
|
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
|
|
assert multiset(expected_items) == multiset(got_items)
|
|
|
|
# Verify that it is forbidden to ask for an empty AttributesToGet
|
|
# Reproduces issue #10332.
|
|
def test_query_attributes_to_get_empty(dynamodb, test_table):
|
|
p = random_string()
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, AttributesToGet=[])
|
|
|
|
# Test that in a table with both hash key and sort key, which keys we can
|
|
# Query by: We can Query by the hash key, by a combination of both hash and
|
|
# sort keys, but *cannot* query by just the sort key, and obviously not
|
|
# by any non-key column.
|
|
def test_query_which_key(test_table):
|
|
p = random_string()
|
|
c = random_string()
|
|
p2 = random_string()
|
|
c2 = random_string()
|
|
item1 = {'p': p, 'c': c}
|
|
item2 = {'p': p, 'c': c2}
|
|
item3 = {'p': p2, 'c': c}
|
|
for i in [item1, item2, item3]:
|
|
test_table.put_item(Item=i)
|
|
# Query by hash key only:
|
|
got_items = full_query(test_table, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
expected_items = [item1, item2]
|
|
assert multiset(expected_items) == multiset(got_items)
|
|
# Query by hash key *and* sort key (this is basically a GetItem):
|
|
got_items = full_query(test_table, KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
|
'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
|
|
})
|
|
expected_items = [item1]
|
|
assert multiset(expected_items) == multiset(got_items)
|
|
# Query by sort key alone is not allowed. DynamoDB reports:
|
|
# "Query condition missed key schema element: p".
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table, KeyConditions={
|
|
'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
|
|
})
|
|
# Query by a non-key isn't allowed, for the same reason - that the
|
|
# actual hash key (p) is missing in the query:
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table, KeyConditions={
|
|
'z': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
|
|
})
|
|
# If we try both p and a non-key we get a complaint that the sort
|
|
# key is missing: "Query condition missed key schema element: c"
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table, KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
|
'z': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
|
|
})
|
|
# If we try p, c and another key, we get an error that
|
|
# "Conditions can be of length 1 or 2 only".
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table, KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'},
|
|
'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'},
|
|
'z': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}
|
|
})
|
|
|
|
# Test the "Select" parameter of Query. The default Select mode,
|
|
# ALL_ATTRIBUTES, returns items with all their attributes. Other modes
|
|
# allow returning just specific attributes or just counting the results
|
|
# without returning items at all.
|
|
def test_query_select(test_table_sn):
|
|
numbers = [Decimal(i) for i in range(10)]
|
|
# Insert these numbers, in random order, into one partition:
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num, 'x': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Verify that we get back the numbers in their sorted order. By default,
|
|
# query returns all attributes:
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})['Items']
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == numbers
|
|
got_x_attributes = [x['x'] for x in got_items]
|
|
assert got_x_attributes == numbers
|
|
# Select=ALL_ATTRIBUTES does exactly the same as the default - return
|
|
# all attributes:
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='ALL_ATTRIBUTES')['Items']
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == numbers
|
|
got_x_attributes = [x['x'] for x in got_items]
|
|
assert got_x_attributes == numbers
|
|
# Select=ALL_PROJECTED_ATTRIBUTES is not allowed on a base table (it
|
|
# is just for indexes, when IndexName is specified)
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='ALL_PROJECTED_ATTRIBUTES')
|
|
# Select=SPECIFIC_ATTRIBUTES requires that either a AttributesToGet
|
|
# or ProjectionExpression appears, but then really does nothing:
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='SPECIFIC_ATTRIBUTES')
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='SPECIFIC_ATTRIBUTES', AttributesToGet=['x'])['Items']
|
|
expected_items = [{'x': i} for i in numbers]
|
|
assert got_items == expected_items
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='SPECIFIC_ATTRIBUTES', ProjectionExpression='x')['Items']
|
|
assert got_items == expected_items
|
|
# Select=COUNT just returns a count - not any items
|
|
got = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='COUNT')
|
|
assert got['Count'] == len(numbers)
|
|
assert not 'Items' in got
|
|
# Check again that we also get a count - not just with Select=COUNT,
|
|
# but without Select=COUNT we also get the items:
|
|
got = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
assert got['Count'] == len(numbers)
|
|
assert 'Items' in got
|
|
# Select with some unknown string generates a validation exception:
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='UNKNOWN')
|
|
# The Select value is case sensitive - "COUNT" works (checked above),
|
|
# but "count" doesn't:
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='count')
|
|
# If either AttributesToGet or ProjectionExpression appear in the query,
|
|
# only Select=SPECIFIC_ATTRIBUTES (or nothing) is allowed - other Select
|
|
# settings contradict the AttributesToGet or ProjectionExpression, and
|
|
# therefore forbidden:
|
|
with pytest.raises(ClientError, match='ValidationException.*AttributesToGet'):
|
|
test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='ALL_ATTRIBUTES', AttributesToGet=['x'])
|
|
with pytest.raises(ClientError, match='ValidationException.*AttributesToGet'):
|
|
test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='COUNT', AttributesToGet=['x'])
|
|
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression'):
|
|
test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='ALL_ATTRIBUTES', ProjectionExpression='x')
|
|
with pytest.raises(ClientError, match='ValidationException.*ProjectionExpression'):
|
|
test_table_sn.query(KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Select='COUNT', ProjectionExpression='x')
|
|
|
|
# Test that the "Limit" parameter can be used to return only some of the
|
|
# items in a single partition. The items returned are the first in the
|
|
# sorted order.
|
|
def test_query_limit(test_table_sn):
|
|
numbers = [Decimal(i) for i in range(10)]
|
|
# Insert these numbers, in random order, into one partition:
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Verify that we get back the numbers in their sorted order.
|
|
# First, no Limit so we should get all numbers (we have few of them, so
|
|
# it all fits in the default 1MB limitation)
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})['Items']
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == numbers
|
|
# Now try a few different Limit values, and verify that the query
|
|
# returns exactly the first Limit sorted numbers.
|
|
for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=limit)['Items']
|
|
assert len(got_items) == min(limit, len(numbers))
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == numbers[0:limit]
|
|
# Limit 0 is not allowed:
|
|
with pytest.raises(ClientError, match='ValidationException.*[lL]imit'):
|
|
test_table_sn.query(ConsistentRead=True, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=0)
|
|
|
|
# In test_query_limit we tested just that Limit allows to stop the result
|
|
# after right right number of items. Here we test that such a stopped result
|
|
# can be resumed, via the LastEvaluatedKey/ExclusiveStartKey paging mechanism.
|
|
def test_query_limit_paging(test_table_sn):
|
|
numbers = [Decimal(i) for i in range(20)]
|
|
# Insert these numbers, in random order, into one partition:
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Verify that full_query() returns all these numbers, in sorted order.
|
|
# full_query() will do a query with the given limit, and resume it again
|
|
# and again until the last page.
|
|
for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
|
|
got_items = full_query(test_table_sn, KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}, Limit=limit)
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == numbers
|
|
|
|
# Although ExclusiveStartKey is usually used for paging through a long
|
|
# partition by setting it to the previous page's LastEvaluatedKey, a user
|
|
# may also use ExclusiveStartKey to skip directly to the middle of a
|
|
# partition, without having paged through anything earlier. Moreover,
|
|
# ExclusiveStartKey doesn't even have to be one of the actual keys in the
|
|
# partition. This test verifies that this works.
|
|
# Additionally, because the previous tests only passed the value of
|
|
# LastEvaluatedKey into ExclusiveStartKey, they couldn't tell whether the
|
|
# format of the "cookie" is the correct one - any opaque cookie would have
|
|
# worked. So this test also demonstrates that ExclusiveStartKey with a
|
|
# specific format actually works - because users can use this format directly.
|
|
def test_query_exclusivestartkey(test_table_sn):
|
|
# Insert the numbers 0, 2, 4, ... 38 into one partition. We insert the
|
|
# items in random order, but of course as sort keys they will be sorted.
|
|
numbers = [i*2 for i in range(20)]
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Query with ExclusiveStartKey set to different numbers, and verify we
|
|
# get the expected results. In particular we want to check that the
|
|
# result is *exclusive* of the given key (if ExclusiveStartKey=0,
|
|
# the first result is 2, not 0), and that it's fine for ExclusiveStartKey
|
|
# to not be an existing key (-3, 17 and 80), and that it's fine that we
|
|
# have less than the Limit remaining items (34 and 80).
|
|
limit = 5
|
|
for start in [-3, 0, 8, 17, 34, 80]:
|
|
expected_sort_keys = [x for x in numbers if x > start][:limit]
|
|
# The ExclusiveStartKey option must indicate both partition key and
|
|
# sort key. Note that the Python driver further converts this map
|
|
# into the correct format for the request (including the key types).
|
|
got_items = test_table_sn.query(
|
|
KeyConditions={'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ExclusiveStartKey= { 'p': p, 'c': start },
|
|
Limit=limit)['Items']
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert expected_sort_keys == got_sort_keys
|
|
|
|
# In the previous test (test_query_exclusivestartkey) we checked that
|
|
# in a table with a partition and sort key, ExclusiveStartKey that specifies
|
|
# both works. Here we verify that it is not allowed for ExclusiveStartKey
|
|
# to specify *only* the partition key - if you want the query to start in
|
|
# the beginning of the partition, don't specify an ExclusiveStartKey at all.
|
|
def test_query_exclusivestartkey_missing_sortkey(test_table_sn):
|
|
p = random_string()
|
|
# The error that DynamoDB reports if the sort key is missing in
|
|
# ExclusiveStartKey is "The provided starting key is invalid". In
|
|
# Alternator, the error is "Key column c not found".
|
|
with pytest.raises(ClientError, match='ValidationException.*[kK]ey'):
|
|
test_table_sn.query(
|
|
KeyConditions={'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
# missing 'c' in ExclusiveStartKey!
|
|
ExclusiveStartKey= { 'p': p })
|
|
|
|
# When Query'ing on the partition key 'right', ExclusiveStartKey must be in
|
|
# the same partition 'right' being queried - it must not be some other
|
|
# partition. Reproduces issue #26988.
|
|
@pytest.mark.xfail(reason="issue #26988")
|
|
def test_query_exclusivestartkey_wrong_partition(test_table_sn):
|
|
# The error that DynamoDB reports if the wrong partition is mentioned
|
|
# in ExclusiveStartKey is "The provided starting key is outside query
|
|
# boundaries based on provided conditions".
|
|
with pytest.raises(ClientError, match='ValidationException.*starting key'):
|
|
test_table_sn.query(
|
|
KeyConditions={'p': { 'AttributeValueList': ['right'], 'ComparisonOperator': 'EQ'}},
|
|
# The partition key part of ExclusiveStartKey should be 'right',
|
|
# trying something else like 'wrong' should fail
|
|
ExclusiveStartKey= { 'p': 'wrong', 'c': 0 })
|
|
|
|
# Check that ExclusiveStartKey cannot contain any spurious column names
|
|
# beyond the actual primary key (here a partition key and sort key)
|
|
# Reproduces issue #26988.
|
|
@pytest.mark.xfail(reason="issue #26988")
|
|
def test_query_exclusivestartkey_spurious_column(test_table_sn):
|
|
p = random_string()
|
|
# The error that DynamoDB reports if ExclusiveStartKey has spurious
|
|
# columns is "The provided starting key is invalid".
|
|
with pytest.raises(ClientError, match='ValidationException.*starting key'):
|
|
test_table_sn.query(
|
|
KeyConditions={'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
# 'x' is not part of the key, so this should cause an error
|
|
ExclusiveStartKey= { 'p': p, 'c': 0, 'x': 3 })
|
|
|
|
# The previous tests, and actually all tests in this file, all used a table
|
|
# with both a partition key and a sort key. Naturally, "Query" is meant to
|
|
# be used on a table with a sort key. However, it actually works also on
|
|
# a table with just a partition key. Let's check that it works.
|
|
def test_query_no_sort_key(test_table_s):
|
|
p = random_string()
|
|
item = {'p': p, 'animal': 'dog'}
|
|
test_table_s.put_item(Item=item)
|
|
got = test_table_s.query(
|
|
ConsistentRead=True,
|
|
KeyConditions={'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})['Items']
|
|
assert got == [item]
|
|
|
|
# Although a Query is allowed on a table with just a partition key and no
|
|
# sort key (and always returns a single item), ExclusiveStartKey makes no
|
|
# sense for these one-item partitions: Since it is *exclusive*, i.e., the
|
|
# response shouldn't include the given key, the response would be completely
|
|
# empty... So DynamoDB doesn't allow this case at all.
|
|
# Reproduces issue #26988.
|
|
@pytest.mark.xfail(reason="issue #26988")
|
|
def test_query_no_sort_key_exclusive_start_key(test_table_s):
|
|
p = random_string()
|
|
# DynamoDB gives the error message "The provided exclusive start key
|
|
# is invalid: ExclusiveStartKey: {p=AttributeValue: {S:KPQ4X3JE19}}"
|
|
with pytest.raises(ClientError, match='ValidationException.*ExclusiveStartKey'):
|
|
test_table_s.query(
|
|
KeyConditions={'p': { 'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ExclusiveStartKey={'p': p})
|
|
|
|
# Test that the ScanIndexForward parameter works, and can be used to
|
|
# return items sorted in reverse order. Combining this with Limit can
|
|
# be used to return the last items instead of the first items of the
|
|
# partition.
|
|
@pytest.mark.parametrize("sort_key_op", [None, 'LT', 'LE', 'GT', 'GE'])
|
|
def test_query_reverse(sort_key_op, test_table_sn):
|
|
numbers = [Decimal(i) for i in range(20)]
|
|
# Insert these numbers, in random order, into one partition:
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Verify that we get back the numbers in their sorted order or reverse
|
|
# order, depending on the ScanIndexForward parameter being True or False.
|
|
# First, no Limit so we should get all numbers (we have few of them, so
|
|
# it all fits in the default 1MB limitation)
|
|
key_condition = {'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}
|
|
if sort_key_op:
|
|
c_bound = random.randint(1, 19)
|
|
key_condition['c'] = {'AttributeValueList': [c_bound], 'ComparisonOperator': sort_key_op}
|
|
op = lambda x: python_compare_op_dict[sort_key_op](x, c_bound)
|
|
else:
|
|
op = lambda x: True
|
|
|
|
for scan_index_forward in [True, False]:
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions=key_condition, ScanIndexForward=scan_index_forward)['Items']
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(filter(op, numbers[:: 1 if scan_index_forward else -1]))
|
|
|
|
for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
|
|
got_items = test_table_sn.query(ConsistentRead=True, KeyConditions=key_condition, Limit=limit, ScanIndexForward=scan_index_forward)['Items']
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(filter(op, numbers[:: 1 if scan_index_forward else -1]))[:limit]
|
|
|
|
|
|
# Test that paging also works properly with reverse order
|
|
# (ScanIndexForward=false), i.e., reverse-order queries can be resumed
|
|
@pytest.mark.parametrize("sort_key_op", [None, 'LT', 'LE', 'GT', 'GE'])
|
|
def test_query_reverse_paging(sort_key_op, test_table_sn):
|
|
numbers = [Decimal(i) for i in range(20)]
|
|
# Insert these numbers, in random order, into one partition:
|
|
p = random_string()
|
|
items = [{'p': p, 'c': num} for num in random.sample(numbers, len(numbers))]
|
|
with test_table_sn.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
reversed_numbers = list(reversed(numbers))
|
|
# Verify that with ScanIndexForward=False, full_query() returns all
|
|
# these numbers in reversed sorted order - getting pages of Limit items
|
|
# at a time and resuming the query.
|
|
key_condition = {'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}}
|
|
if sort_key_op:
|
|
c_bound = random.randint(1, 19)
|
|
key_condition['c'] = {'AttributeValueList': [c_bound], 'ComparisonOperator': sort_key_op}
|
|
op = lambda x: python_compare_op_dict[sort_key_op](x, c_bound)
|
|
else:
|
|
op = lambda x: True
|
|
|
|
for limit in [1, 2, 3, 7, 10, 17, 100, 10000]:
|
|
got_items = full_query(test_table_sn, KeyConditions=key_condition, ScanIndexForward=False, Limit=limit)
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(filter(op, reversed_numbers))
|
|
|
|
# Test that a reverse query also works for long partitions. This test
|
|
# reproduces #7586, where reverse queries had to read the entire partition
|
|
# so were limited to 100 MB (max_memory_for_unlimited_query_hard_limit).
|
|
# This is a relatively slow test (its setup of a 100 MB partition takes
|
|
# several seconds), so we mark it with "veryslow" - so it's not run unless
|
|
# the "--runveryslow" option is passed to pytest.
|
|
@pytest.mark.veryslow
|
|
def test_query_reverse_long(test_table_sn):
|
|
# Insert many big strings into one partition sized over 100MB:
|
|
p = random_string()
|
|
str = 'x' * 10240
|
|
N = 10000
|
|
with test_table_sn.batch_writer() as batch:
|
|
for i in range(N):
|
|
batch.put_item({'p': p, 'c': i, 's': str})
|
|
|
|
# Query one page of a specific length (Limit=50) in reverse order.
|
|
# We should get the requested number of items, starting from the last
|
|
# item (N-1), in reversed order:
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ScanIndexForward=False,
|
|
Limit=50,
|
|
ConsistentRead=True)['Items']
|
|
assert len(got_items) == 50
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(reversed(range(N-50, N)))
|
|
|
|
# A similar limited and reversed query - with an explicit starting
|
|
# point (2345) instead of the end:
|
|
start = 2345
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ExclusiveStartKey={'p': p, 'c': start},
|
|
ScanIndexForward=False, Limit=50, ConsistentRead=True)['Items']
|
|
assert len(got_items) == 50
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(reversed(range(start-50, start)))
|
|
|
|
# Even if a Limit is *not* specified, queries have some built-in size
|
|
# limit (around 1MB) - a query should never return the entire 100MB
|
|
# partition in one response. One of the development versions had a bug
|
|
# here - normal (unreversed) queries were limited to 1MB, but reversed
|
|
# queries returned the entire 100MB.
|
|
# First check this with regular (unreversed) order:
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ConsistentRead=True)['Items']
|
|
n = len(got_items)
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(range(n))
|
|
assert n < N # we don't how big n should be, but definitely not N!
|
|
# And in reverse order:
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ScanIndexForward=False,
|
|
ConsistentRead=True)['Items']
|
|
n = len(got_items)
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(reversed(range(N-n, N)))
|
|
assert n < N
|
|
|
|
# The above test_query_reverse_long() tests a very long (over 100MB)
|
|
# partition, so is very slow especially over a slow network. The following
|
|
# is a much smaller subset of the above test that reproduces issue #9487:
|
|
# When doing a reverse query without "Limit" on a partition of 2MB, a result
|
|
# page should nevertheless be limited to 1MB of data and not return the
|
|
# entire 2MB in one page.
|
|
def test_query_reverse_longish(test_table_sn):
|
|
# Insert a 2MB partition
|
|
p = random_string()
|
|
str = 'x' * 10240
|
|
N = 200
|
|
with test_table_sn.batch_writer() as batch:
|
|
for i in range(N):
|
|
batch.put_item({'p': p, 'c': i, 's': str})
|
|
|
|
# Query one page of a specific length (Limit=50) in reverse order.
|
|
# We should get the requested number of items, starting from the last
|
|
# item (N-1), in reversed order:
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ScanIndexForward=False,
|
|
Limit=50,
|
|
ConsistentRead=True)['Items']
|
|
assert len(got_items) == 50
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(reversed(range(N-50, N)))
|
|
|
|
# A similar limited and reversed query - with an explicit starting
|
|
# point (2345) instead of the end:
|
|
start = 147
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ExclusiveStartKey={'p': p, 'c': start},
|
|
ScanIndexForward=False, Limit=50, ConsistentRead=True)['Items']
|
|
assert len(got_items) == 50
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(reversed(range(start-50, start)))
|
|
|
|
# Even if a Limit is *not* specified, queries have some built-in size
|
|
# limit (around 1MB) - a query should never return the entire 2MB
|
|
# partition in one response. One of the development versions had a bug
|
|
# here - normal (unreversed) queries were limited to 1MB, but reversed
|
|
# queries returned the entire 100MB.
|
|
# First check this with regular (unreversed) order:
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ConsistentRead=True)['Items']
|
|
n = len(got_items)
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(range(n))
|
|
assert n < N # we don't how big n should be, but definitely not N!
|
|
# And in reverse order:
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ScanIndexForward=False,
|
|
ConsistentRead=True)['Items']
|
|
n = len(got_items)
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
assert got_sort_keys == list(reversed(range(N-n, N)))
|
|
assert n < N
|
|
|
|
# A query without a KeyConditions or KeyConditionExpress is, or an empty
|
|
# one, is obviously not allowed:
|
|
def test_query_missing_key(test_table):
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table, KeyConditions={})
|
|
with pytest.raises(ClientError, match='ValidationException'):
|
|
full_query(test_table)
|
|
|
|
# The paging tests above used a numeric sort key. Let's now also test paging
|
|
# with a bytes sort key. We already have above a test that bytes sort keys
|
|
# work and are sorted correctly (test_query_sort_order_bytes), but the
|
|
# following test adds a check that *paging* works correctly for such keys.
|
|
# We used to have a bug in this (issue #7768) - the returned LastEvaluatedKey
|
|
# was incorrectly formatted, breaking the boto3's parsing of the response.
|
|
# Note we only check the case of bytes *sort* keys in this test. For bytes
|
|
# *partition* keys, see test_scan_paging_bytes().
|
|
def test_query_paging_bytes(test_table_sb):
|
|
p = random_string()
|
|
items = [{'p': p, 'c': random_bytes()} for i in range(10)]
|
|
with test_table_sb.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
# Deliberately pass Limit=1 to enforce paging even though we have
|
|
# just 10 items in the partition.
|
|
got_items = full_query(test_table_sb, Limit=1,
|
|
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
expected_sort_keys = sorted(x['c'] for x in items)
|
|
assert got_sort_keys == expected_sort_keys
|
|
|
|
# Similar for test for string clustering keys
|
|
def test_query_paging_string(test_table_ss):
|
|
p = random_string()
|
|
items = [{'p': p, 'c': random_string()} for i in range(10)]
|
|
with test_table_ss.batch_writer() as batch:
|
|
for item in items:
|
|
batch.put_item(item)
|
|
got_items = full_query(test_table_ss, Limit=1,
|
|
KeyConditions={'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}})
|
|
got_sort_keys = [x['c'] for x in got_items]
|
|
expected_sort_keys = sorted(x['c'] for x in items)
|
|
assert got_sort_keys == expected_sort_keys
|
|
|
|
# The following test reproduces #17995: A Query returning a large page
|
|
# composed of many small rows, which causes a lot of processing work for
|
|
# outputting the result, and the risk (before #17995 is fixed) to stall.
|
|
# To see the stall, an option like '--blocked-reactor-notify-ms', '5'
|
|
# must be added to Scylla in test/alternator/run, as the default stall
|
|
# threshold is higher than the one that this test produces.
|
|
# Because this test is slow (takes several seconds to build the large
|
|
# partition) and can't fail or even log a stall without different
|
|
# configuration, we skip it by default, using the "veryslow" mark.
|
|
# Remove this mark, and set the --block-reactor-notify-ms option, to run
|
|
# this test.
|
|
@pytest.mark.veryslow
|
|
def test_query_large_page_small_rows(test_table_sn):
|
|
p = random_string()
|
|
# Experimentally, Scylla considers the rows we insert below (which each
|
|
# have a 10-byte string partition key with a one byte name, a numeric
|
|
# clustering key with a one byte name, and no other data) as being 32
|
|
# bytes in size, and returns 32772 of these rows in one (nominally) 1MB
|
|
# page of Query. So if the partition has just 30,000 rows, it will be
|
|
# returned entirely in one page.
|
|
N = 30_000
|
|
with test_table_sn.batch_writer() as batch:
|
|
for i in range(N):
|
|
batch.put_item({'p': p, 'c': i})
|
|
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ConsistentRead=True)['Items']
|
|
n = len(got_items)
|
|
assert n == N
|
|
|
|
# This test is a less extreme and faster version of the previous test
|
|
# (test_query_large_page_small_rows): We test a query returning a large but
|
|
# not huge number (700) of tiny rows. If Alternator has a special code path
|
|
# for handling a response with that many rows (namely, to work around problems
|
|
# with RapidJSON's contiguous allocation of array objects - see #23535),
|
|
# then this test exercises this case.
|
|
def test_query_many_small_rows(test_table_sn):
|
|
p = random_string()
|
|
N = 700
|
|
with test_table_sn.batch_writer() as batch:
|
|
for i in range(N):
|
|
batch.put_item({'p': p, 'c': i})
|
|
got_items = test_table_sn.query(KeyConditions={
|
|
'p': {'AttributeValueList': [p], 'ComparisonOperator': 'EQ'}},
|
|
ConsistentRead=True)['Items']
|
|
i = 0
|
|
for item in got_items:
|
|
assert item == {'p': p, 'c': i}
|
|
i += 1
|
|
assert N == i
|