mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-08 16:03:20 +00:00
test/alternator: another test for adding a GSI to an existing table
This patch adds yet another test for Alternator's unimplemented feature of adding a GSI to an already existing table (issue #5022), but this test is for a very specific corner case - tables which contain string attributes with an empty value - the corner case described in issue #9424: DynamoDB used to forbid any string attributes from being set to an empty string, but this changed in May 2020, and since then empty strings are allowed - but NOT as keys. So although it is legal to set a string attribute to an empty string, if this table has a GSI whose key is that specific attribute, the update command is refused. We already had a test for this - test_gsi_empty_value. However, the case in this patch is the case where a GSI is added to a table *after* the table already has data. In this case (as this test demonstrates), we are supposed to drop the items which have the empty string key from the GSI. Even when #5022 (the ability to add GSIs to existing tables) will be done, this test will continue to fail. The unique problem of this test is that Scylla's materialized views *do* allow empty strings as clustering keys (right now) and even partition keys (after #9375 will be solved), while we don't want them to enter the GSI. We will probably need to add to the view's filter, which right now contains (as required) "x IS NOT NULL" also the filter "x != ''" (when x's type is a string or binary) so that items with empty-string keys will be dropped. Refs #5022 Refs #9375 Refs #9424 Signed-off-by: Nadav Har'El <nyh@scylladb.com> Message-Id: <20211003170636.477582-1-nyh@scylladb.com>
This commit is contained in:
committed by
Piotr Sarna
parent
b136104298
commit
6dee86eade
@@ -24,7 +24,7 @@
|
||||
import pytest
|
||||
import time
|
||||
from botocore.exceptions import ClientError, ParamValidationError
|
||||
from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables
|
||||
from util import create_test_table, random_string, full_scan, full_query, multiset, list_tables, new_test_table
|
||||
|
||||
# GSIs only support eventually consistent reads, so tests that involve
|
||||
# writing to a table and then expect to read something from it cannot be
|
||||
@@ -694,18 +694,18 @@ def wait_for_gsi(table, gsi_name):
|
||||
start_time = time.time()
|
||||
# Surprisingly, even for tiny tables this can take a very long time
|
||||
# on DynamoDB - often many minutes!
|
||||
for i in range(300):
|
||||
for i in range(600):
|
||||
time.sleep(1)
|
||||
desc = table.meta.client.describe_table(TableName=table.name)
|
||||
table_status = desc['Table']['TableStatus']
|
||||
if table_status != 'ACTIVE':
|
||||
print('%d Table status still %s' % (i, table_status))
|
||||
print(f'{i} Table {table.name} status still {table_status}')
|
||||
continue
|
||||
index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
|
||||
assert len(index_desc) == 1
|
||||
index_status = index_desc[0]['IndexStatus']
|
||||
if index_status != 'ACTIVE':
|
||||
print('%d Index status still %s' % (i, index_status))
|
||||
print(f'{i} Index {gsi_name} status still {index_status}')
|
||||
continue
|
||||
# When the index is ACTIVE, this must be after backfilling completed
|
||||
assert not 'Backfilling' in index_desc[0]
|
||||
@@ -717,18 +717,18 @@ def wait_for_gsi(table, gsi_name):
|
||||
# this function waits for a GSI to be finally deleted.
|
||||
def wait_for_gsi_gone(table, gsi_name):
|
||||
start_time = time.time()
|
||||
for i in range(300):
|
||||
for i in range(600):
|
||||
time.sleep(1)
|
||||
desc = table.meta.client.describe_table(TableName=table.name)
|
||||
table_status = desc['Table']['TableStatus']
|
||||
if table_status != 'ACTIVE':
|
||||
print('%d Table status still %s' % (i, table_status))
|
||||
print(f'{i} Table {table.name} status still {table_status}')
|
||||
continue
|
||||
if 'GlobalSecondaryIndexes' in desc['Table']:
|
||||
index_desc = [x for x in desc['Table']['GlobalSecondaryIndexes'] if x['IndexName'] == gsi_name]
|
||||
if len(index_desc) != 0:
|
||||
index_status = index_desc[0]['IndexStatus']
|
||||
print('%d Index status still %s' % (i, index_status))
|
||||
print(f'{i} Index {gsi_name} status still {index_status}')
|
||||
continue
|
||||
print('wait_for_gsi_gone took %d seconds' % (time.time() - start_time))
|
||||
return
|
||||
@@ -742,7 +742,8 @@ def wait_for_gsi_gone(table, gsi_name):
|
||||
# the wrong type are silently ignored and not added to the index (it would
|
||||
# not have been possible to add such items if the GSI was already configured
|
||||
# when they were added).
|
||||
@pytest.mark.xfail(reason="GSI not supported")
|
||||
# Reproduces issue #5022.
|
||||
@pytest.mark.xfail(reason="issue #5022")
|
||||
def test_gsi_backfill(dynamodb):
|
||||
# First create, and fill, a table without GSI. The items in items1
|
||||
# will have the appropriate string type for 'x' and will later get
|
||||
@@ -795,7 +796,8 @@ def test_gsi_backfill(dynamodb):
|
||||
table.delete()
|
||||
|
||||
# Test deleting an existing GSI using UpdateTable
|
||||
@pytest.mark.xfail(reason="GSI not supported")
|
||||
# Reproduces issue #5022.
|
||||
@pytest.mark.xfail(reason="issue #5022")
|
||||
def test_gsi_delete(dynamodb):
|
||||
table = create_test_table(dynamodb,
|
||||
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' } ],
|
||||
@@ -922,3 +924,62 @@ def test_gsi_list_tables(dynamodb, test_table_gsi_random_name):
|
||||
assert not index_name in name
|
||||
# But of course, the table's name should be in the list:
|
||||
assert table.name in tables
|
||||
|
||||
# As noted above in test_gsi_empty_value(), setting an indexed string column
|
||||
# to an empty string is rejected, since keys (including GSI keys) are not
|
||||
# allowed to be empty strings or binary blobs.
|
||||
# However, empty strings *are* legal for ordinary non-indexed attributes, so
|
||||
# if the user adds a GSI to an existing table with pre-existing data, it might
|
||||
# contain empty string values for the indexed keys. Such values should be
|
||||
# skipped while filling the GSI - even if Scylla actually capable of
|
||||
# representing such empty view keys (see issue #9375).
|
||||
# Reproduces issue #5022 and #9424.
|
||||
@pytest.mark.xfail(reason="issue #5022, #9424")
|
||||
def test_gsi_backfill_empty_string(dynamodb):
|
||||
# First create, and fill, a table without GSI:
|
||||
with new_test_table(dynamodb,
|
||||
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' },
|
||||
{ 'AttributeName': 'c', 'KeyType': 'RANGE' } ],
|
||||
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' },
|
||||
{ 'AttributeName': 'c', 'AttributeType': 'S' } ]) as table:
|
||||
p1 = random_string()
|
||||
p2 = random_string()
|
||||
c = random_string()
|
||||
# Create two items, one has an empty "x" attribute, the other is
|
||||
# non-empty.
|
||||
table.put_item(Item={'p': p1, 'c': c, 'x': 'hello'})
|
||||
table.put_item(Item={'p': p2, 'c': c, 'x': ''})
|
||||
# Now use UpdateTable to create two GSIs. In one of them "x" will be
|
||||
# the partition key, and in the other "x" will be a sort key.
|
||||
# DynamoDB limits the number of indexes that can be added in one
|
||||
# UpdateTable command to just one, so we need to do it in two separate
|
||||
# commands and wait for each to complete.
|
||||
dynamodb.meta.client.update_table(TableName=table.name,
|
||||
AttributeDefinitions=[{ 'AttributeName': 'x', 'AttributeType': 'S' },
|
||||
{ 'AttributeName': 'c', 'AttributeType': 'S' }],
|
||||
GlobalSecondaryIndexUpdates=[
|
||||
{ 'Create': { 'IndexName': 'index1',
|
||||
'KeySchema': [{ 'AttributeName': 'x', 'KeyType': 'HASH' }],
|
||||
'Projection': { 'ProjectionType': 'ALL' }}
|
||||
}
|
||||
])
|
||||
wait_for_gsi(table, 'index1')
|
||||
dynamodb.meta.client.update_table(TableName=table.name,
|
||||
AttributeDefinitions=[{ 'AttributeName': 'x', 'AttributeType': 'S' },
|
||||
{ 'AttributeName': 'c', 'AttributeType': 'S' }],
|
||||
GlobalSecondaryIndexUpdates=[
|
||||
{ 'Create': { 'IndexName': 'index2',
|
||||
'KeySchema': [{ 'AttributeName': 'c', 'KeyType': 'HASH' },
|
||||
{ 'AttributeName': 'x', 'KeyType': 'RANGE' }],
|
||||
'Projection': { 'ProjectionType': 'ALL' }}
|
||||
}
|
||||
])
|
||||
wait_for_gsi(table, 'index2')
|
||||
# Verify that the items with the empty-string x are missing from both
|
||||
# GSIs, so only the one item with x != '' should appear in both.
|
||||
# Note that we don't need to retry the reads here (i.e., use the
|
||||
# assert_index_scan() or assert_index_query() functions) because after
|
||||
# we waited for backfilling to complete, we know all the pre-existing
|
||||
# data is already in the index.
|
||||
assert [{'p': p1, 'c': c, 'x': 'hello'}] == full_scan(table, ConsistentRead=False, IndexName='index1')
|
||||
assert [{'p': p1, 'c': c, 'x': 'hello'}] == full_scan(table, ConsistentRead=False, IndexName='index2')
|
||||
|
||||
Reference in New Issue
Block a user