scylladb/test/alternator/test_describe_table.py

# Copyright 2019-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1

# Tests for the DescribeTable operation.
# Some attributes used only by a specific major feature will be tested
# elsewhere:
#  1. Tests for describing tables with global or local secondary indexes
#     (the GlobalSecondaryIndexes and LocalSecondaryIndexes attributes)
#     are in test_gsi.py and test_lsi.py.
#  2. Tests for the stream feature (LatestStreamArn, LatestStreamLabel,
#     StreamSpecification) will be in the tests devoted to the stream
#     feature.
#  3. Tests for describing a restored table (RestoreSummary, TableId)
#     will be together with tests devoted to the backup/restore feature.

import pytest
from botocore.exceptions import ClientError
import re
import time
from test.alternator.util import multiset, new_test_table, random_string, scylla_config_temporary

import requests

# Test that DescribeTable correctly returns the table's name and state
def test_describe_table_basic(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert got['TableName'] == test_table.name
    assert got['TableStatus'] == 'ACTIVE'

# Test that DescribeTable correctly returns the table's schema, in
# AttributeDefinitions and KeySchema attributes
def test_describe_table_schema(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    expected = { # Copied from test_table()'s fixture
        'KeySchema': [ { 'AttributeName': 'p', 'KeyType': 'HASH' },
                    { 'AttributeName': 'c', 'KeyType': 'RANGE' }
        ],
        'AttributeDefinitions': [
                    { 'AttributeName': 'p', 'AttributeType': 'S' },
                    { 'AttributeName': 'c', 'AttributeType': 'S' },
        ]
    }
    assert got['KeySchema'] == expected['KeySchema']
    # The list of attribute definitions may be arbitrarily reordered
    assert multiset(got['AttributeDefinitions']) == multiset(expected['AttributeDefinitions'])

# Test that DescribeTable correctly returns the table's billing mode,
# in the BillingModeSummary attribute.
def test_describe_table_billing(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert got['BillingModeSummary']['BillingMode'] == 'PAY_PER_REQUEST'
    # The BillingModeSummary should also contain a
    # LastUpdateToPayPerRequestDateTime attribute, which is a date.
    # We don't know what date this is supposed to be, but something we
    # do know is that the test table was created already with this billing
    # mode, so the table creation date should be the same as the billing
    # mode setting date.
    assert 'LastUpdateToPayPerRequestDateTime' in got['BillingModeSummary']
    assert got['BillingModeSummary']['LastUpdateToPayPerRequestDateTime'] == got['CreationDateTime']

# Test that DescribeTable correctly returns the table's creation time -
# timestamp in seconds as double with as much precision as the operating system will provide
# Reproduces issue #5013
def test_describe_table_creation_time(dynamodb):
    before_time = time.time()

    schema = {
        'KeySchema': [ { 'AttributeName': 'p', 'KeyType': 'HASH' },
                    { 'AttributeName': 'c', 'KeyType': 'RANGE' }
        ],
        'AttributeDefinitions': [
                    { 'AttributeName': 'p', 'AttributeType': 'S' },
                    { 'AttributeName': 'c', 'AttributeType': 'S' },
        ],
    }

    with new_test_table(dynamodb, **schema) as table1:
        # let's sleep few ms, so table2 creation time was always bigger, as we now return CreationDateTime in ms precision
        time.sleep(0.002)
        with new_test_table(dynamodb, **schema) as table2:
            got1 = table1.meta.client.describe_table(TableName=table1.name)['Table']
            got2 = table2.meta.client.describe_table(TableName=table2.name)['Table']
            assert 'CreationDateTime' in got1
            assert 'CreationDateTime' in got2

            time1 = got1['CreationDateTime']
            time2 = got2['CreationDateTime']

            after_time = time.time()
            # we add some wiggle room, as the test might be run with on aws and time synchronization might be bit off
            assert before_time - 1  < time1.timestamp() < time2.timestamp() < after_time + 1

            got4 = table2.meta.client.describe_table(TableName=table2.name)['Table']
            got3 = table1.meta.client.describe_table(TableName=table1.name)['Table']
            assert 'CreationDateTime' in got3
            assert 'CreationDateTime' in got4

            time3 = got3['CreationDateTime']
            time4 = got4['CreationDateTime']

            assert time1 == time3, (time1, time1.timestamp(), time3, time3.timestamp())
            assert time2 == time4, (time2, time2.timestamp(), time4, time4.timestamp())

# Test that DescribeTable returns the table's estimated item count
# in the ItemCount attribute. Unfortunately, there's not much we can
# really test here... The documentation says that the count can be
# delayed by six hours, so the number we get here may have no relation
# to the current number of items in the test table. The attribute should exist,
# though. This test does NOT verify that ItemCount isn't always returned as
# zero - such stub implementation will pass this test.
@pytest.mark.xfail(reason="DescribeTable does not return table item count")
def test_describe_table_item_count(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert 'ItemCount' in got

# Similar test for estimated size in bytes - TableSizeBytes - which again,
# may reflect the size as long as six hours ago.
def test_describe_table_size(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert 'TableSizeBytes' in got
    assert got['TableSizeBytes'] >= 0

# this is scylla-only test - it uses scylla's configuration option to stabilize test
# we also don't have any guarantees about sizes returned by dynamodb
@pytest.mark.parametrize("cache_validity_in_seconds", [0, 3600])
def test_describe_table_size_with_N_timeout(scylla_only, dynamodb, rest_api, cache_validity_in_seconds):
    '''
    This tests side effect of how describe table works in ScyllaDB - it caches calculated
    values. The cache validity is set to 1 hour (long enough to cover whole test duration).
    We call DescribeTable twice, expecting second value to be the same as first one due to the cache.
    '''
    schema = {
        'KeySchema': [ { 'AttributeName': 'p', 'KeyType': 'HASH' },
                    { 'AttributeName': 'c', 'KeyType': 'RANGE' }
        ],
        'AttributeDefinitions': [
                    { 'AttributeName': 'p', 'AttributeType': 'S' },
                    { 'AttributeName': 'c', 'AttributeType': 'S' },
        ],
    }

    with scylla_config_temporary(dynamodb, 'alternator_describe_table_info_cache_validity_in_seconds', str(cache_validity_in_seconds)):
        with new_test_table(dynamodb, **schema) as test_table:
            got1 = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
            assert 'TableSizeBytes' in got1
            assert got1['TableSizeBytes'] >= 0

            p = random_string()
            c = random_string()
            v = random_string()
            test_table.put_item(Item={'p': p, 'c': c, 'v': v})

            ks = 'alternator_' + test_table.name
            cf = test_table.name
            # We need to flush memtables to make sure size is updated, as current implementation
            # calculates size based on sstables only
            response = requests.post(rest_api+f'/storage_service/keyspace_flush/{ks}', params={'cf' : cf})
            assert response.ok

            got2 = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
            assert 'TableSizeBytes' in got2
            if cache_validity_in_seconds == 0:
                assert got2['TableSizeBytes'] > got1['TableSizeBytes']
            else:
                assert got2['TableSizeBytes'] == got1['TableSizeBytes']

# Test the ProvisionedThroughput attribute returned by DescribeTable.
# This is a very partial test: Our test table is configured without
# provisioned throughput, so obviously it will not have interesting settings
# for it. But DynamoDB documents that zeros be returned for WriteCapacityUnits
# and ReadCapacityUnits, and does this in practice as well - and some
# applications assume these numbers are always there (even if 0).
def test_describe_table_provisioned_throughput(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert got['ProvisionedThroughput']['NumberOfDecreasesToday'] == 0
    assert got['ProvisionedThroughput']['WriteCapacityUnits'] == 0
    assert got['ProvisionedThroughput']['ReadCapacityUnits'] == 0

# This is a silly test for the RestoreSummary attribute in DescribeTable -
# it should not exist in a table not created by a restore. When testing
# the backup/restore feature, we will have more meaningful tests for the
# value of this attribute in that case.
def test_describe_table_restore_summary(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert not 'RestoreSummary' in got

# This is a silly test for the SSEDescription attribute in DescribeTable -
# by default, a table is encrypted with AWS-owned keys, not using client-
# owned keys, and the SSEDescription attribute is not returned at all.
def test_describe_table_encryption(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert not 'SSEDescription' in got

# This is a silly test for the StreamSpecification attribute in DescribeTable -
# when there are no streams, this attribute should be missing.
def test_describe_table_stream_specification(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert not 'StreamSpecification' in got

# Test that the table has an ARN, a unique identifier for the table which
# includes which zone it is on, which account, and of course the table's
# name. The ARN format is described in
# https://docs.aws.amazon.com/general/latest/gr/aws-arns-and-namespaces.html#genref-arns
def test_describe_table_arn(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert 'TableArn' in got and got['TableArn'].startswith('arn:')

# Test that the table has a TableId.
# DynamoDB documentation states that this id must look like a UUID.
def test_describe_table_id(test_table):
    got = test_table.meta.client.describe_table(TableName=test_table.name)['Table']
    assert 'TableId' in got
    assert re.fullmatch('[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}', got['TableId'])

# DescribeTable error path: trying to describe a non-existent table should
# result in a ResourceNotFoundException.
def test_describe_table_non_existent_table(dynamodb):
    with pytest.raises(ClientError, match='ResourceNotFoundException') as einfo:
        dynamodb.meta.client.describe_table(TableName='non_existent_table')
    # As one of the first error-path tests that we wrote, let's test in more
    # detail that the error reply has the appropriate fields:
    response = einfo.value.response
    print(response)
    err = response['Error']
    assert err['Code'] == 'ResourceNotFoundException'
    assert re.match(err['Message'], 'Requested resource not found: Table: non_existent_table not found')