Files
scylladb/alternator-test/test_gsi.py
Piotr Sarna 6579a3850a alternator-test: add xfail test for GSI with 2 regular columns
When updating the second regular base column that is also a view
key, the code in Scylla will assume it only needs to update an entry
instead of replacing an old one. This leads to inconsitencies
exposed in the test case.
Message-Id: <5dfeb9f61f986daa6e480e9da4c7aabb5a09a4ec.1567599461.git.sarna@scylladb.com>
2019-09-11 18:01:05 +03:00

802 lines
39 KiB
Python

# Copyright 2019 ScyllaDB
#
# This file is part of Scylla.
#
# Scylla is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Scylla is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Scylla. If not, see <http://www.gnu.org/licenses/>.
# Tests of GSI (Global Secondary Indexes)
#
# Note that many of these tests are slower than usual, because many of them
# need to create new tables and/or new GSIs of different types, operations
# which are extremely slow in DynamoDB, often taking minutes (!).
import pytest
import time
from botocore.exceptions import ClientError, ParamValidationError
from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables
# GSIs only support eventually consistent reads, so tests that involve
# writing to a table and then expect to read something from it cannot be
# guaranteed to succeed without retrying the read. The following utility
# functions make it easy to write such tests.
# Note that in practice, there repeated reads are almost never necessary:
# Amazon claims that "Changes to the table data are propagated to the global
# secondary indexes within a fraction of a second, under normal conditions"
# and indeed, in practice, the tests here almost always succeed without a
# retry.
def assert_index_query(table, index_name, expected_items, **kwargs):
for i in range(3):
if multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs)):
return
print('assert_index_query retrying')
time.sleep(1)
assert multiset(expected_items) == multiset(full_query(table, IndexName=index_name, **kwargs))
def assert_index_scan(table, index_name, expected_items, **kwargs):
for i in range(3):
if multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs)):
return
print('assert_index_scan retrying')
time.sleep(1)
assert multiset(expected_items) == multiset(full_scan(table, IndexName=index_name, **kwargs))
# Although quite silly, it is actually allowed to create an index which is
# identical to the base table.
def test_gsi_identical(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }],
AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
'Projection': { 'ProjectionType': 'ALL' }
}
])
items = [{'p': random_string(), 'x': random_string()} for i in range(10)]
with table.batch_writer() as batch:
for item in items:
batch.put_item(item)
# Scanning the entire table directly or via the index yields the same
# results (in different order).
assert multiset(items) == multiset(full_scan(table))
assert_index_scan(table, 'hello', items)
# We can't scan a non-existant index
with pytest.raises(ClientError, match='ValidationException'):
full_scan(table, IndexName='wrong')
table.delete()
# One of the simplest forms of a non-trivial GSI: The base table has a hash
# and sort key, and the index reverses those roles. Other attributes are just
# copied.
@pytest.fixture(scope="session")
def test_table_gsi_1(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' }
],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
{ 'AttributeName': 'p', 'KeyType': 'RANGE' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
],
)
yield table
table.delete()
def test_gsi_simple(test_table_gsi_1):
items = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)]
with test_table_gsi_1.batch_writer() as batch:
for item in items:
batch.put_item(item)
c = items[0]['c']
# The index allows a query on just a specific sort key, which isn't
# allowed on the base table.
with pytest.raises(ClientError, match='ValidationException'):
full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
expected_items = [x for x in items if x['c'] == c]
assert_index_query(test_table_gsi_1, 'hello', expected_items,
KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
# Scanning the entire table directly or via the index yields the same
# results (in different order).
assert_index_scan(test_table_gsi_1, 'hello', full_scan(test_table_gsi_1))
def test_gsi_same_key(test_table_gsi_1):
c = random_string();
# All these items have the same sort key 'c' but different hash key 'p'
items = [{'p': random_string(), 'c': c, 'x': random_string()} for i in range(10)]
with test_table_gsi_1.batch_writer() as batch:
for item in items:
batch.put_item(item)
assert_index_query(test_table_gsi_1, 'hello', items,
KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
# Check we get an appropriate error when trying to read a non-existing index
# of an existing table. Although the documentation specifies that a
# ResourceNotFoundException should be returned if "The operation tried to
# access a nonexistent table or index", in fact in the specific case that
# the table does exist but an index does not - we get a ValidationException.
def test_gsi_missing_index(test_table_gsi_1):
with pytest.raises(ClientError, match='ValidationException.*wrong_name'):
full_query(test_table_gsi_1, IndexName='wrong_name',
KeyConditions={'x': {'AttributeValueList': [1], 'ComparisonOperator': 'EQ'}})
with pytest.raises(ClientError, match='ValidationException.*wrong_name'):
full_scan(test_table_gsi_1, IndexName='wrong_name')
# Nevertheless, if the table itself does not exist, a query should return
# a ResourceNotFoundException, not ValidationException:
def test_gsi_missing_table(dynamodb):
with pytest.raises(ClientError, match='ResourceNotFoundException'):
dynamodb.meta.client.query(TableName='nonexistent_table', IndexName='any_name', KeyConditions={'x': {'AttributeValueList': [1], 'ComparisonOperator': 'EQ'}})
with pytest.raises(ClientError, match='ResourceNotFoundException'):
dynamodb.meta.client.scan(TableName='nonexistent_table', IndexName='any_name')
# Verify that strongly-consistent reads on GSI are *not* allowed.
@pytest.mark.xfail(reason="GSI strong consistency not checked")
def test_gsi_strong_consistency(test_table_gsi_1):
with pytest.raises(ClientError, match='ValidationException.*Consistent'):
full_query(test_table_gsi_1, KeyConditions={'c': {'AttributeValueList': ['hi'], 'ComparisonOperator': 'EQ'}}, IndexName='hello', ConsistentRead=True)
with pytest.raises(ClientError, match='ValidationException.*Consistent'):
full_scan(test_table_gsi_1, IndexName='hello', ConsistentRead=True)
# Verify that a GSI is correctly listed in describe_table
@pytest.mark.xfail(reason="DescribeTable provides index names only, no size or item count")
def test_gsi_describe(test_table_gsi_1):
desc = test_table_gsi_1.meta.client.describe_table(TableName=test_table_gsi_1.name)
assert 'Table' in desc
assert 'GlobalSecondaryIndexes' in desc['Table']
gsis = desc['Table']['GlobalSecondaryIndexes']
assert len(gsis) == 1
gsi = gsis[0]
assert gsi['IndexName'] == 'hello'
assert 'IndexSizeBytes' in gsi # actual size depends on content
assert 'ItemCount' in gsi
assert gsi['Projection'] == {'ProjectionType': 'ALL'}
assert gsi['IndexStatus'] == 'ACTIVE'
assert gsi['KeySchema'] == [{'KeyType': 'HASH', 'AttributeName': 'c'},
{'KeyType': 'RANGE', 'AttributeName': 'p'}]
# TODO: check also ProvisionedThroughput, IndexArn
# When a GSI's key includes an attribute not in the base table's key, we
# need to remember to add its type to AttributeDefinitions.
def test_gsi_missing_attribute_definition(dynamodb):
with pytest.raises(ClientError, match='ValidationException.*AttributeDefinitions'):
create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [ { 'AttributeName': 'c', 'KeyType': 'HASH' } ],
'Projection': { 'ProjectionType': 'ALL' }
}
])
# test_table_gsi_1_hash_only is a variant of test_table_gsi_1: It's another
# case where the index doesn't involve non-key attributes. Again the base
# table has a hash and sort key, but in this case the index has *only* a
# hash key (which is the base's hash key). In the materialized-view-based
# implementation, we need to remember the other part of the base key as a
# clustering key.
@pytest.fixture(scope="session")
def test_table_gsi_1_hash_only(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' }
],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
],
)
yield table
table.delete()
def test_gsi_key_not_in_index(test_table_gsi_1_hash_only):
# Test with items with different 'c' values:
items = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)]
with test_table_gsi_1_hash_only.batch_writer() as batch:
for item in items:
batch.put_item(item)
c = items[0]['c']
expected_items = [x for x in items if x['c'] == c]
assert_index_query(test_table_gsi_1_hash_only, 'hello', expected_items,
KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
# Test items with the same sort key 'c' but different hash key 'p'
c = random_string();
items = [{'p': random_string(), 'c': c, 'x': random_string()} for i in range(10)]
with test_table_gsi_1_hash_only.batch_writer() as batch:
for item in items:
batch.put_item(item)
assert_index_query(test_table_gsi_1_hash_only, 'hello', items,
KeyConditions={'c': {'AttributeValueList': [c], 'ComparisonOperator': 'EQ'}})
# Scanning the entire table directly or via the index yields the same
# results (in different order).
assert_index_scan(test_table_gsi_1_hash_only, 'hello', full_scan(test_table_gsi_1_hash_only))
# A second scenario of GSI. Base table has just hash key, Index has a
# different hash key - one of the non-key attributes from the base table.
@pytest.fixture(scope="session")
def test_table_gsi_2(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'x', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'x', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
])
yield table
table.delete()
def test_gsi_2(test_table_gsi_2):
items1 = [{'p': random_string(), 'x': random_string()} for i in range(10)]
x1 = items1[0]['x']
x2 = random_string()
items2 = [{'p': random_string(), 'x': x2} for i in range(10)]
items = items1 + items2
with test_table_gsi_2.batch_writer() as batch:
for item in items:
batch.put_item(item)
expected_items = [i for i in items if i['x'] == x1]
assert_index_query(test_table_gsi_2, 'hello', expected_items,
KeyConditions={'x': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}})
expected_items = [i for i in items if i['x'] == x2]
assert_index_query(test_table_gsi_2, 'hello', expected_items,
KeyConditions={'x': {'AttributeValueList': [x2], 'ComparisonOperator': 'EQ'}})
# Test that when a table has a GSI, if the indexed attribute is missing, the
# item is added to the base table but not the index.
def test_gsi_missing_attribute(test_table_gsi_2):
p1 = random_string()
x1 = random_string()
test_table_gsi_2.put_item(Item={'p': p1, 'x': x1})
p2 = random_string()
test_table_gsi_2.put_item(Item={'p': p2})
# Both items are now in the base table:
assert test_table_gsi_2.get_item(Key={'p': p1})['Item'] == {'p': p1, 'x': x1}
assert test_table_gsi_2.get_item(Key={'p': p2})['Item'] == {'p': p2}
# But only the first item is in the index: It can be found using a
# Query, and a scan of the index won't find it (but a scan on the base
# will).
assert_index_query(test_table_gsi_2, 'hello', [{'p': p1, 'x': x1}],
KeyConditions={'x': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}})
assert any([i['p'] == p1 for i in full_scan(test_table_gsi_2)])
# Note: with eventually consistent read, we can't really be sure that
# and item will "never" appear in the index. We do this test last,
# so if we had a bug and such item did appear, hopefully we had enough
# time for the bug to become visible. At least sometimes.
assert not any([i['p'] == p2 for i in full_scan(test_table_gsi_2, IndexName='hello')])
# Test when a table has a GSI, if the indexed attribute has the wrong type,
# the update operation is rejected, and is added to neither base table nor
# index. This is different from the case of a *missing* attribute, where
# the item is added to the base table but not index.
# The following three tests test_gsi_wrong_type_attribute_{put,update,batch}
# test updates using PutItem, UpdateItem, and BatchWriteItem respectively.
def test_gsi_wrong_type_attribute_put(test_table_gsi_2):
# PutItem with wrong type for 'x' is rejected, item isn't created even
# in the base table.
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*mismatch'):
test_table_gsi_2.put_item(Item={'p': p, 'x': 3})
assert not 'Item' in test_table_gsi_2.get_item(Key={'p': p}, ConsistentRead=True)
def test_gsi_wrong_type_attribute_update(test_table_gsi_2):
# An UpdateItem with wrong type for 'x' is also rejected, but naturally
# if the item already existed, it remains as it was.
p = random_string()
x = random_string()
test_table_gsi_2.put_item(Item={'p': p, 'x': x})
with pytest.raises(ClientError, match='ValidationException.*mismatch'):
test_table_gsi_2.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': 3, 'Action': 'PUT'}})
assert test_table_gsi_2.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'x': x}
def test_gsi_wrong_type_attribute_batch(test_table_gsi_2):
# In a BatchWriteItem, if any update is forbidden, the entire batch is
# rejected, and none of the updates happen at all.
p1 = random_string()
p2 = random_string()
p3 = random_string()
items = [{'p': p1, 'x': random_string()},
{'p': p2, 'x': 3},
{'p': p3, 'x': random_string()}]
with pytest.raises(ClientError, match='ValidationException.*mismatch'):
with test_table_gsi_2.batch_writer() as batch:
for item in items:
batch.put_item(item)
for p in [p1, p2, p3]:
assert not 'Item' in test_table_gsi_2.get_item(Key={'p': p}, ConsistentRead=True)
# A third scenario of GSI. Index has a hash key and a sort key, both are
# non-key attributes from the base table. This scenario may be very
# difficult to implement in Alternator because Scylla's materialized-views
# implementation only allows one new key column in the view, and here
# we need two (which, also, aren't actual columns, but map items).
@pytest.fixture(scope="session")
def test_table_gsi_3(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' }
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
{ 'AttributeName': 'b', 'KeyType': 'RANGE' }
],
'Projection': { 'ProjectionType': 'ALL' }
}
])
yield table
table.delete()
def test_gsi_3(test_table_gsi_3):
items = [{'p': random_string(), 'a': random_string(), 'b': random_string()} for i in range(10)]
with test_table_gsi_3.batch_writer() as batch:
for item in items:
batch.put_item(item)
assert_index_query(test_table_gsi_3, 'hello', [items[3]],
KeyConditions={'a': {'AttributeValueList': [items[3]['a']], 'ComparisonOperator': 'EQ'},
'b': {'AttributeValueList': [items[3]['b']], 'ComparisonOperator': 'EQ'}})
@pytest.mark.xfail(reason="GSI in alternator currently have a bug on updating the second regular base column")
def test_gsi_update_second_regular_base_column(test_table_gsi_3):
items = [{'p': random_string(), 'a': random_string(), 'b': random_string(), 'd': random_string()} for i in range(10)]
with test_table_gsi_3.batch_writer() as batch:
for item in items:
batch.put_item(item)
items[3]['b'] = 'updated'
test_table_gsi_3.update_item(Key={'p': items[3]['p']}, AttributeUpdates={'b': {'Value': 'updated', 'Action': 'PUT'}})
assert_index_query(test_table_gsi_3, 'hello', [items[3]],
KeyConditions={'a': {'AttributeValueList': [items[3]['a']], 'ComparisonOperator': 'EQ'},
'b': {'AttributeValueList': [items[3]['b']], 'ComparisonOperator': 'EQ'}})
# A fourth scenario of GSI. Two GSIs on a single base table.
@pytest.fixture(scope="session")
def test_table_gsi_4(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'a', 'AttributeType': 'S' },
{ 'AttributeName': 'b', 'AttributeType': 'S' }
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello_a',
'KeySchema': [
{ 'AttributeName': 'a', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'ALL' }
},
{ 'IndexName': 'hello_b',
'KeySchema': [
{ 'AttributeName': 'b', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
])
yield table
table.delete()
# Test that a base table with two GSIs updates both as expected.
def test_gsi_4(test_table_gsi_4):
items = [{'p': random_string(), 'a': random_string(), 'b': random_string()} for i in range(10)]
with test_table_gsi_4.batch_writer() as batch:
for item in items:
batch.put_item(item)
assert_index_query(test_table_gsi_4, 'hello_a', [items[3]],
KeyConditions={'a': {'AttributeValueList': [items[3]['a']], 'ComparisonOperator': 'EQ'}})
assert_index_query(test_table_gsi_4, 'hello_b', [items[3]],
KeyConditions={'b': {'AttributeValueList': [items[3]['b']], 'ComparisonOperator': 'EQ'}})
# Verify that describe_table lists the two GSIs.
def test_gsi_4_describe(test_table_gsi_4):
desc = test_table_gsi_4.meta.client.describe_table(TableName=test_table_gsi_4.name)
assert 'Table' in desc
assert 'GlobalSecondaryIndexes' in desc['Table']
gsis = desc['Table']['GlobalSecondaryIndexes']
assert len(gsis) == 2
assert multiset([g['IndexName'] for g in gsis]) == multiset(['hello_a', 'hello_b'])
# A scenario for GSI in which the table has both hash and sort key
@pytest.fixture(scope="session")
def test_table_gsi_5(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }, { 'AttributeName': 'c', 'KeyType': 'RANGE' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' },
{ 'AttributeName': 'x', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'p', 'KeyType': 'HASH' },
{ 'AttributeName': 'x', 'KeyType': 'RANGE' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
])
yield table
table.delete()
def test_gsi_5(test_table_gsi_5):
items1 = [{'p': random_string(), 'c': random_string(), 'x': random_string()} for i in range(10)]
p1, x1 = items1[0]['p'], items1[0]['x']
p2, x2 = random_string(), random_string()
items2 = [{'p': p2, 'c': random_string(), 'x': x2} for i in range(10)]
items = items1 + items2
with test_table_gsi_5.batch_writer() as batch:
for item in items:
batch.put_item(item)
expected_items = [i for i in items if i['p'] == p1 and i['x'] == x1]
assert_index_query(test_table_gsi_5, 'hello', expected_items,
KeyConditions={'p': {'AttributeValueList': [p1], 'ComparisonOperator': 'EQ'},
'x': {'AttributeValueList': [x1], 'ComparisonOperator': 'EQ'}})
expected_items = [i for i in items if i['p'] == p2 and i['x'] == x2]
assert_index_query(test_table_gsi_5, 'hello', expected_items,
KeyConditions={'p': {'AttributeValueList': [p2], 'ComparisonOperator': 'EQ'},
'x': {'AttributeValueList': [x2], 'ComparisonOperator': 'EQ'}})
# All tests above involved "ProjectionType: ALL". This test checks how
# "ProjectionType:: KEYS_ONLY" works. We note that it projects both
# the index's key, *and* the base table's key. So items which had different
# base-table keys cannot suddenly become the same item in the index.
@pytest.mark.xfail(reason="GSI not supported")
def test_gsi_projection_keys_only(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'x', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'x', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'KEYS_ONLY' }
}
])
items = [{'p': random_string(), 'x': random_string(), 'y': random_string()} for i in range(10)]
with table.batch_writer() as batch:
for item in items:
batch.put_item(item)
wanted = ['p', 'x']
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
assert_index_scan(table, 'hello', expected_items)
table.delete()
# Test for "ProjectionType:: INCLUDE". The secondary table includes the
# its own and the base's keys (as in KEYS_ONLY) plus the extra keys given
# in NonKeyAttributes.
@pytest.mark.xfail(reason="GSI not supported")
def test_gsi_projection_include(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'x', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'x', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'INCLUDE',
'NonKeyAttributes': ['a', 'b'] }
}
])
# Some items have the projected attributes a,b and some don't:
items = [{'p': random_string(), 'x': random_string(), 'a': random_string(), 'b': random_string(), 'y': random_string()} for i in range(10)]
items = items + [{'p': random_string(), 'x': random_string(), 'y': random_string()} for i in range(10)]
with table.batch_writer() as batch:
for item in items:
batch.put_item(item)
wanted = ['p', 'x', 'a', 'b']
expected_items = [{k: x[k] for k in wanted if k in x} for x in items]
assert_index_scan(table, 'hello', expected_items)
print(len(expected_items))
table.delete()
# DynamoDB's says the "Projection" argument of GlobalSecondaryIndexes is
# mandatory, and indeed Boto3 enforces that it must be passed. The
# documentation then goes on to claim that the "ProjectionType" member of
# "Projection" is optional - and Boto3 allows it to be missing. But in
# fact, it is not allowed to be missing: DynamoDB complains: "Unknown
# ProjectionType: null".
@pytest.mark.xfail(reason="GSI not supported")
def test_gsi_missing_projection_type(dynamodb):
with pytest.raises(ClientError, match='ValidationException.*ProjectionType'):
create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }],
AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
'Projection': {}
}
])
# update_table() for creating a GSI is an asynchronous operation.
# The table's TableStatus changes from ACTIVE to UPDATING for a short while
# and then goes back to ACTIVE, but the new GSI's IndexStatus appears as
# CREATING, until eventually (after a *long* time...) it becomes ACTIVE.
# During the CREATING phase, at some point the Backfilling attribute also
# appears, until it eventually disappears. We need to wait until all three
# markers indicate completion.
# Unfortunately, while boto3 has a client.get_waiter('table_exists') to
# wait for a table to exists, there is no such function to wait for an
# index to come up, so we need to code it ourselves.
def wait_for_gsi(table, gsi_name):
start_time = time.time()
# Surprisingly, even for tiny tables this can take a very long time
# on DynamoDB - often many minutes!
for i in range(300):
time.sleep(1)
desc = table.meta.client.describe_table(TableName=table.name)
table_status = desc['Table']['TableStatus']
if table_status != 'ACTIVE':
print('%d Table status still %s' % (i, table_status))
continue
index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
assert len(index_desc) == 1
index_status = index_desc[0]['IndexStatus']
if index_status != 'ACTIVE':
print('%d Index status still %s' % (i, index_status))
continue
# When the index is ACTIVE, this must be after backfilling completed
assert not 'Backfilling' in index_desc[0]
print('wait_for_gsi took %d seconds' % (time.time() - start_time))
return
raise AssertionError("wait_for_gsi did not complete")
# Similarly to how wait_for_gsi() waits for a GSI to finish adding,
# this function waits for a GSI to be finally deleted.
def wait_for_gsi_gone(table, gsi_name):
start_time = time.time()
for i in range(300):
time.sleep(1)
desc = table.meta.client.describe_table(TableName=table.name)
table_status = desc['Table']['TableStatus']
if table_status != 'ACTIVE':
print('%d Table status still %s' % (i, table_status))
continue
if 'GlobalSecondaryIndexes' in desc['Table']:
index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
if len(index_desc) != 0:
index_status = index_desc[0]['IndexStatus']
print('%d Index status still %s' % (i, index_status))
continue
print('wait_for_gsi_gone took %d seconds' % (time.time() - start_time))
return
raise AssertionError("wait_for_gsi_gone did not complete")
# All tests above involved creating a new table with a GSI up-front. This
# test will test creating a base table *without* a GSI, putting data in
# it, and then adding a GSI with the UpdateTable operation. This starts
# a backfilling stage - where data is copied to the index - and when this
# stage is done, the index is usable. Items whose indexed column contains
# the wrong type are silently ignored and not added to the index (it would
# not have been possible to add such items if the GSI was already configured
# when they were added).
@pytest.mark.xfail(reason="GSI not supported")
def test_gsi_backfill(dynamodb):
# First create, and fill, a table without GSI. The items in items1
# will have the appropriate string type for 'x' and will later get
# indexed. Items in item2 have no value for 'x', and in item3 'x' is in
# not a string; So the items in items2 and items3 will be missing
# in the index we'll create later.
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
items1 = [{'p': random_string(), 'x': random_string(), 'y': random_string()} for i in range(10)]
items2 = [{'p': random_string(), 'y': random_string()} for i in range(10)]
items3 = [{'p': random_string(), 'x': i} for i in range(10)]
items = items1 + items2 + items3
with table.batch_writer() as batch:
for item in items:
batch.put_item(item)
assert multiset(items) == multiset(full_scan(table))
# Now use UpdateTable to create the GSI
dynamodb.meta.client.update_table(TableName=table.name,
AttributeDefinitions=[{ 'AttributeName': 'x', 'AttributeType': 'S' }],
GlobalSecondaryIndexUpdates=[ { 'Create':
{ 'IndexName': 'hello',
'KeySchema': [{ 'AttributeName': 'x', 'KeyType': 'HASH' }],
'Projection': { 'ProjectionType': 'ALL' }
}}])
# update_table is an asynchronous operation. We need to wait until it
# finishes and the table is backfilled.
wait_for_gsi(table, 'hello')
# As explained above, only items in items1 got copied to the gsi,
# and Scan on them works as expected.
# Note that we don't need to retry the reads here (i.e., use the
# assert_index_scan() or assert_index_query() functions) because after
# we waited for backfilling to complete, we know all the pre-existing
# data is already in the index.
assert multiset(items1) == multiset(full_scan(table, IndexName='hello'))
# We can also use Query on the new GSI, to search on the attribute x:
assert multiset([items1[3]]) == multiset(full_query(table,
IndexName='hello',
KeyConditions={'x': {'AttributeValueList': [items1[3]['x']], 'ComparisonOperator': 'EQ'}}))
# Let's also test that we cannot add another index with the same name
# that already exists
with pytest.raises(ClientError, match='ValidationException.*already exists'):
dynamodb.meta.client.update_table(TableName=table.name,
AttributeDefinitions=[{ 'AttributeName': 'y', 'AttributeType': 'S' }],
GlobalSecondaryIndexUpdates=[ { 'Create':
{ 'IndexName': 'hello',
'KeySchema': [{ 'AttributeName': 'y', 'KeyType': 'HASH' }],
'Projection': { 'ProjectionType': 'ALL' }
}}])
table.delete()
# Test deleting an existing GSI using UpdateTable
@pytest.mark.xfail(reason="GSI not supported")
def test_gsi_delete(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'x', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': 'hello',
'KeySchema': [
{ 'AttributeName': 'x', 'KeyType': 'HASH' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
])
items = [{'p': random_string(), 'x': random_string()} for i in range(10)]
with table.batch_writer() as batch:
for item in items:
batch.put_item(item)
# So far, we have the index for "x" and can use it:
assert_index_query(table, 'hello', [items[3]],
KeyConditions={'x': {'AttributeValueList': [items[3]['x']], 'ComparisonOperator': 'EQ'}})
# Now use UpdateTable to delete the GSI for "x"
dynamodb.meta.client.update_table(TableName=table.name,
GlobalSecondaryIndexUpdates=[{ 'Delete':
{ 'IndexName': 'hello' } }])
# update_table is an asynchronous operation. We need to wait until it
# finishes and the GSI is removed.
wait_for_gsi_gone(table, 'hello')
# Now index is gone. We cannot query using it.
with pytest.raises(ClientError, match='ValidationException.*hello'):
full_query(table, IndexName='hello',
KeyConditions={'x': {'AttributeValueList': [items[3]['x']], 'ComparisonOperator': 'EQ'}})
table.delete()
# Utility function for creating a new table a GSI with the given name,
# and, if creation was successful, delete it. Useful for testing which
# GSI names work.
def create_gsi(dynamodb, index_name):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }],
AttributeDefinitions=[{ 'AttributeName': 'p', 'AttributeType': 'S' }],
GlobalSecondaryIndexes=[
{ 'IndexName': index_name,
'KeySchema': [{ 'AttributeName': 'p', 'KeyType': 'HASH' }],
'Projection': { 'ProjectionType': 'ALL' }
}
])
# Verify that the GSI wasn't just ignored, as Scylla originally did ;-)
assert 'GlobalSecondaryIndexes' in table.meta.client.describe_table(TableName=table.name)['Table']
table.delete()
# Like table names (tested in test_table.py), index names must must also
# be 3-255 characters and match the regex [a-zA-Z0-9._-]+. This test
# is similar to test_create_table_unsupported_names(), but for GSI names.
# Note that Scylla is actually more limited in the length of the index
# names, because both table name and index name, together, have to fit in
# 221 characters. But we don't verify here this specific limitation.
def test_gsi_unsupported_names(dynamodb):
# Unfortunately, the boto library tests for names shorter than the
# minimum length (3 characters) immediately, and failure results in
# ParamValidationError. But the other invalid names are passed to
# DynamoDB, which returns an HTTP response code, which results in a
# CientError exception.
with pytest.raises(ParamValidationError):
create_gsi(dynamodb, 'n')
with pytest.raises(ParamValidationError):
create_gsi(dynamodb, 'nn')
with pytest.raises(ClientError, match='ValidationException.*nnnnn'):
create_gsi(dynamodb, 'n' * 256)
with pytest.raises(ClientError, match='ValidationException.*nyh'):
create_gsi(dynamodb, 'nyh@test')
# On the other hand, names following the above rules should be accepted. Even
# names which the Scylla rules forbid, such as a name starting with .
def test_gsi_non_scylla_name(dynamodb):
create_gsi(dynamodb, '.alternator_test')
# Index names with 255 characters are allowed in Dynamo. In Scylla, the
# limit is different - the sum of both table and index length cannot
# exceed 211 characters. So we test a much shorter limit.
# (compare test_create_and_delete_table_very_long_name()).
def test_gsi_very_long_name(dynamodb):
#create_gsi(dynamodb, 'n' * 255) # works on DynamoDB, but not on Scylla
create_gsi(dynamodb, 'n' * 190)
# Verify that ListTables does not list materialized views used for indexes.
# This is hard to test, because we don't really know which table names
# should be listed beyond those we created, and don't want to assume that
# no other test runs in parallel with us. So the method we chose is to use a
# unique random name for an index, and check that no table contains this
# name. This assumes that materialized-view names are composed using the
# index's name (which is currently what we do).
@pytest.fixture(scope="session")
def test_table_gsi_random_name(dynamodb):
index_name = random_string()
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' }
],
AttributeDefinitions=[
{ 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' },
],
GlobalSecondaryIndexes=[
{ 'IndexName': index_name,
'KeySchema': [
{ 'AttributeName': 'c', 'KeyType': 'HASH' },
{ 'AttributeName': 'p', 'KeyType': 'RANGE' },
],
'Projection': { 'ProjectionType': 'ALL' }
}
],
)
yield [table, index_name]
table.delete()
def test_gsi_list_tables(dynamodb, test_table_gsi_random_name):
table, index_name = test_table_gsi_random_name
# Check that the random "index_name" isn't a substring of any table name:
tables = list_tables(dynamodb)
for name in tables:
assert not index_name in name
# But of course, the table's name should be in the list:
assert table.name in tables