test/alternator: another test for adding a GSI to an existing table

This patch adds yet another test for Alternator's unimplemented feature
of adding a GSI to an already existing table (issue #5022), but this
test is for a very specific corner case - tables which contain string
attributes with an empty value - the corner case described in
issue #9424:

DynamoDB used to forbid any string attributes from being set to an empty
string, but this changed in May 2020, and since then empty strings are
allowed - but NOT as keys. So although it is legal to set a string
attribute to an empty string, if this table has a GSI whose key is that
specific attribute, the update command is refused. We already had a
test for this - test_gsi_empty_value.

However, the case in this patch is the case where a GSI is added to a
table *after* the table already has data. In this case (as this test
demonstrates), we are supposed to drop the items which have the empty
string key from the GSI.

Even when #5022 (the ability to add GSIs to existing tables) will be done,
this test will continue to fail. The unique problem of this test is that
Scylla's materialized views *do* allow empty strings as clustering keys
(right now) and even partition keys (after #9375 will be solved), while
we don't want them to enter the GSI. We will probably need to add to the
view's filter, which right now contains (as required) "x IS NOT NULL"
also the filter "x != ''" (when x's type is a string or binary) so that
items with empty-string keys will be dropped.

Refs #5022
Refs #9375
Refs #9424

Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20211003170636.477582-1-nyh@scylladb.com>
This commit is contained in:
Nadav Har'El
2021-10-03 20:06:35 +03:00
committed by Piotr Sarna
parent b136104298
commit 6dee86eade

View File

@@ -24,7 +24,7 @@
import pytest
import time
from botocore.exceptions import ClientError, ParamValidationError
from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables
from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables, new_test_table
# GSIs only support eventually consistent reads, so tests that involve
# writing to a table and then expect to read something from it cannot be
@@ -694,18 +694,18 @@ def wait_for_gsi(table, gsi_name):
start_time = time.time()
# Surprisingly, even for tiny tables this can take a very long time
# on DynamoDB - often many minutes!
for i in range(300):
for i in range(600):
time.sleep(1)
desc = table.meta.client.describe_table(TableName=table.name)
table_status = desc['Table']['TableStatus']
if table_status != 'ACTIVE':
print('%d Table status still %s' % (i, table_status))
print(f'{i} Table {table.name} status still {table_status}')
continue
index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
assert len(index_desc) == 1
index_status = index_desc[0]['IndexStatus']
if index_status != 'ACTIVE':
print('%d Index status still %s' % (i, index_status))
print(f'{i} Index {gsi_name} status still {index_status}')
continue
# When the index is ACTIVE, this must be after backfilling completed
assert not 'Backfilling' in index_desc[0]
@@ -717,18 +717,18 @@ def wait_for_gsi(table, gsi_name):
# this function waits for a GSI to be finally deleted.
def wait_for_gsi_gone(table, gsi_name):
start_time = time.time()
for i in range(300):
for i in range(600):
time.sleep(1)
desc = table.meta.client.describe_table(TableName=table.name)
table_status = desc['Table']['TableStatus']
if table_status != 'ACTIVE':
print('%d Table status still %s' % (i, table_status))
print(f'{i} Table {table.name} status still {table_status}')
continue
if 'GlobalSecondaryIndexes' in desc['Table']:
index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
if len(index_desc) != 0:
index_status = index_desc[0]['IndexStatus']
print('%d Index status still %s' % (i, index_status))
print(f'{i} Index {gsi_name} status still {index_status}')
continue
print('wait_for_gsi_gone took %d seconds' % (time.time() - start_time))
return
@@ -742,7 +742,8 @@ def wait_for_gsi_gone(table, gsi_name):
# the wrong type are silently ignored and not added to the index (it would
# not have been possible to add such items if the GSI was already configured
# when they were added).
@pytest.mark.xfail(reason="GSI not supported")
# Reproduces issue #5022.
@pytest.mark.xfail(reason="issue #5022")
def test_gsi_backfill(dynamodb):
# First create, and fill, a table without GSI. The items in items1
# will have the appropriate string type for 'x' and will later get
@@ -795,7 +796,8 @@ def test_gsi_backfill(dynamodb):
table.delete()
# Test deleting an existing GSI using UpdateTable
@pytest.mark.xfail(reason="GSI not supported")
# Reproduces issue #5022.
@pytest.mark.xfail(reason="issue #5022")
def test_gsi_delete(dynamodb):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
@@ -922,3 +924,62 @@ def test_gsi_list_tables(dynamodb, test_table_gsi_random_name):
assert not index_name in name
# But of course, the table's name should be in the list:
assert table.name in tables
# As noted above in test_gsi_empty_value(), setting an indexed string column
# to an empty string is rejected, since keys (including GSI keys) are not
# allowed to be empty strings or binary blobs.
# However, empty strings *are* legal for ordinary non-indexed attributes, so
# if the user adds a GSI to an existing table with pre-existing data, it might
# contain empty string values for the indexed keys. Such values should be
# skipped while filling the GSI - even if Scylla actually capable of
# representing such empty view keys (see issue #9375).
# Reproduces issue #5022 and #9424.
@pytest.mark.xfail(reason="issue #5022, #9424")
def test_gsi_backfill_empty_string(dynamodb):
# First create, and fill, a table without GSI:
with new_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' },
{ 'AttributeName': 'c', 'KeyType': 'RANGE' } ],
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' } ]) as table:
p1 = random_string()
p2 = random_string()
c = random_string()
# Create two items, one has an empty "x" attribute, the other is
# non-empty.
table.put_item(Item={'p': p1, 'c': c, 'x': 'hello'})
table.put_item(Item={'p': p2, 'c': c, 'x': ''})
# Now use UpdateTable to create two GSIs. In one of them "x" will be
# the partition key, and in the other "x" will be a sort key.
# DynamoDB limits the number of indexes that can be added in one
# UpdateTable command to just one, so we need to do it in two separate
# commands and wait for each to complete.
dynamodb.meta.client.update_table(TableName=table.name,
AttributeDefinitions=[{ 'AttributeName': 'x', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' }],
GlobalSecondaryIndexUpdates=[
{ 'Create': { 'IndexName': 'index1',
'KeySchema': [{ 'AttributeName': 'x', 'KeyType': 'HASH' }],
'Projection': { 'ProjectionType': 'ALL' }}
}
])
wait_for_gsi(table, 'index1')
dynamodb.meta.client.update_table(TableName=table.name,
AttributeDefinitions=[{ 'AttributeName': 'x', 'AttributeType': 'S' },
{ 'AttributeName': 'c', 'AttributeType': 'S' }],
GlobalSecondaryIndexUpdates=[
{ 'Create': { 'IndexName': 'index2',
'KeySchema': [{ 'AttributeName': 'c', 'KeyType': 'HASH' },
{ 'AttributeName': 'x', 'KeyType': 'RANGE' }],
'Projection': { 'ProjectionType': 'ALL' }}
}
])
wait_for_gsi(table, 'index2')
# Verify that the items with the empty-string x are missing from both
# GSIs, so only the one item with x != '' should appear in both.
# Note that we don't need to retry the reads here (i.e., use the
# assert_index_scan() or assert_index_query() functions) because after
# we waited for backfilling to complete, we know all the pre-existing
# data is already in the index.
assert [{'p': p1, 'c': c, 'x': 'hello'}] == full_scan(table, ConsistentRead=False, IndexName='index1')
assert [{'p': p1, 'c': c, 'x': 'hello'}] == full_scan(table, ConsistentRead=False, IndexName='index2')