scylladb/test/cqlpy/test_using_timestamp.py

# Copyright 2021-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0

#############################################################################
# Various tests for USING TIMESTAMP support in Scylla. Note that Cassandra
# also had tests for timestamps, which we ported in
# cassandra_tests/validation/entities/json_timestamp.py. The tests here are
# either additional ones, or focusing on more esoteric issues or small tests
# aiming to reproduce bugs discovered by bigger Cassandra tests.
#############################################################################

from .util import unique_name, unique_key_int
from cassandra.protocol import InvalidRequest
import pytest
import time

@pytest.fixture(scope="module")
def table1(cql, test_keyspace):
    table = test_keyspace + "." + unique_name()
    cql.execute(f"CREATE TABLE {table} (k int PRIMARY KEY, v int, w int)")
    yield table
    cql.execute("DROP TABLE " + table)

# sync with wall-clock on exact second so that expiration won't cross the whole-second boundary
# 100 milliseconds should be enough to execute 2 inserts at the same second in debug mode
# sleep until the next whole second mark if there is not enough time left on the clock
def ensure_sync_with_tick(millis = 100):
    t = time.time()
    while t - int(t) >= 1 - millis / 1000:
        time.sleep(1 - (t - int(t)))
        t = time.time()
    return t

# In Cassandra, timestamps can be any *signed* 64-bit integer, not including
# the most negative 64-bit integer (-2^63) which for deletion times is
# reserved for marking *not deleted* cells.
# As proposed in issue #5619, Scylla forbids timestamps higher than the
# current time in microseconds plus three days. Still, any negative is
# timestamp is still allowed in Scylla. If we ever choose to expand #5619
# and also forbid negative timestamps, we will need to remove this test -
# but for now, while they are allowed, let's test that they are.
def test_negative_timestamp(cql, table1):
    p = unique_key_int()
    write = cql.prepare(f"INSERT INTO {table1} (k, v) VALUES (?, ?) USING TIMESTAMP ?")
    read = cql.prepare(f"SELECT writetime(v) FROM {table1} where k = ?")
    # Note we need to order the loop in increasing timestamp if we want
    # the read to see the latest value:
    for ts in [-2**63+1, -100, -1]:
        print(ts)
        cql.execute(write, [p, 1, ts])
        assert ts == cql.execute(read, [p]).one()[0]
    # The specific value -2**63 is not allowed as a timestamp - although it
    # is a legal signed 64-bit integer, it is reserved to mean "not deleted"
    # in the deletion time of cells.
    with pytest.raises(InvalidRequest, match='bound'):
        cql.execute(write, [p, 1, -2**63])

# As explained above, after issue #5619 Scylla can forbid timestamps higher
# than the current time in microseconds plus three days. This test will
# check that it actually does. Starting with #12527 this restriction can
# be turned on or off, so this test checks which mode we're in that this
# mode does the right thing. On Cassandra, this checking is always disabled.
def test_futuristic_timestamp(cql, table1):
    # The USING TIMESTAMP checking assumes the timestamp is in *microseconds*
    # since the UNIX epoch. If we take the number of *nanoseconds* since the
    # epoch, this will be thousands of years into the future, and if USING
    # TIMESTAMP rejects overly-futuristic timestamps, it should surely reject
    # this one.
    futuristic_ts = int(time.time()*1e9)
    p = unique_key_int()
    # In Cassandra and in Scylla with restrict_future_timestamp=false,
    # futuristic_ts can be successfully written and then read as-is. In
    # Scylla with restrict_future_timestamp=true, it can't be written.
    def restrict_future_timestamp():
        # If not running on Scylla, futuristic timestamp is not restricted
        names = [row.table_name for row in cql.execute("SELECT * FROM system_schema.tables WHERE keyspace_name = 'system'")]
        if not any('scylla' in name for name in names):
            return False
        # In Scylla, we check the configuration via CQL.
        v = list(cql.execute("SELECT value FROM system.config WHERE name = 'restrict_future_timestamp'"))
        return v[0].value == "true"
    if restrict_future_timestamp():
        print('checking with restrict_future_timestamp=true')
        with pytest.raises(InvalidRequest, match='into the future'):
            cql.execute(f'INSERT INTO {table1} (k, v) VALUES ({p}, 1) USING TIMESTAMP {futuristic_ts}')
    else:
        print('checking with restrict_future_timestamp=false')
        cql.execute(f'INSERT INTO {table1} (k, v) VALUES ({p}, 1) USING TIMESTAMP {futuristic_ts}')
        assert [(futuristic_ts,)] == cql.execute(f'SELECT writetime(v) FROM {table1} where k = {p}')

# Currently, writetime(k) is not allowed for a key column. Neither is ttl(k).
# Scylla issue #14019 and CASSANDRA-9312 consider allowing it - with the
# meaning that it should return the timestamp and ttl of a row marker.
# If this issue is ever implemented in Scylla or Cassandra, the following
# test will need to be replaced by a test for the new feature instead of
# expecting an error message.
def test_key_writetime(cql, table1):
    with pytest.raises(InvalidRequest, match='PRIMARY KEY part k|WRITETIME is not legal on partition key component k'):
        cql.execute(f'SELECT writetime(k) FROM {table1}')
    with pytest.raises(InvalidRequest, match='PRIMARY KEY part k|TTL is not legal on partition key component k'):
        cql.execute(f'SELECT ttl(k) FROM {table1}')

def test_rewrite_different_values_using_same_timestamp(cql, table1):
    """
    Rewriting cells more than once with the same timestamp
    requires tie-breaking to decide which of the cells prevails.
    When the two inserts are non-expiring or when they have the same expiration time,
    cells are selected based on the higher value.
    Otherwise, expiring cells are preferred over non-expiring ones,
    and if both are expiring, the one with the later expiration time wins.
    """
    table = table1
    ts = 1000
    values = [[1, 2], [2, 1]]
    for i in range(len(values)):
        v1, v2 = values[i]

        def assert_value(k, expected):
            select = f"SELECT k, v FROM {table} WHERE k = {k}"
            res = list(cql.execute(select))
            assert len(res) == 1
            assert res[0].v == expected

        # With no TTL, highest value wins
        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts}")
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts}")
        assert_value(k, max(v1, v2))

        # Expiring cells are preferred over non-expiring
        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts}")
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts} and TTL 10")
        assert_value(k, v2)

        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts} and TTL 10")
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts}")
        assert_value(k, v1)

        # When both are expiring, the one with the later expiration time wins
        ensure_sync_with_tick()
        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts} and TTL 10")
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts} and TTL 20")
        assert_value(k, v2)

        ensure_sync_with_tick()
        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts} and TTL 20")
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts} and TTL 10")
        assert_value(k, v1)

def test_rewrite_different_values_using_same_timestamp_and_expiration(scylla_only, cql, table1):
    """
    Rewriting cells more than once with the same timestamp
    requires tie-breaking to decide which of the cells prevails.
    When the two inserts are expiring and have the same expiration time,
    scylla selects the cells with the lower ttl.
    """
    table = table1
    ts = 1000
    values = [[1, 2], [2, 1]]
    for i in range(len(values)):
        v1, v2 = values[i]

        def assert_value(k, expected):
            select = f"SELECT k, v FROM {table} WHERE k = {k}"
            res = list(cql.execute(select))
            assert len(res) == 1
            assert res[0].v == expected

        # When both have the same expiration, the one with the lower TTL wins (as it has higher derived write time = expiration - ttl)
        ensure_sync_with_tick()
        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts} and TTL 3")
        time.sleep(1)
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts} and TTL 2")
        assert_value(k, v2)

def test_rewrite_using_same_timestamp_select_after_expiration(cql, table1):
    """
    Reproducer for https://github.com/scylladb/scylladb/issues/14182

    Rewrite a cell using the same timestamp and ttl.
    Due to #14182, after the first insert expires,
    the first write would have been selected when it has a lexicographically larger
    value, and that results in a null value in the select query result.
    With the fix, we expect to get the cell with the higher expiration time.
    """
    table = table1
    ts = 1000
    values = [[2, 1], [1, 2]]
    for i in range(len(values)):
        v1, v2 = values[i]

        def assert_value(k, expected):
            select = f"SELECT k, v FROM {table} WHERE k = {k}"
            res = list(cql.execute(select))
            assert len(res) == 1
            assert res[0].v == expected

        ensure_sync_with_tick()
        k = unique_key_int()
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v1}) USING TIMESTAMP {ts} AND TTL 1")
        cql.execute(f"INSERT INTO {table} (k, v) VALUES ({k}, {v2}) USING TIMESTAMP {ts} AND TTL 10")

        # wait until first insert expires, and expect 2nd value.
        # Null value was returned due to #14182 when v1 > v2
        time.sleep(1)
        assert_value(k, v2)

def test_rewrite_multiple_cells_using_same_timestamp(cql, table1):
    """
    Reproducer for https://github.com/scylladb/scylladb/issues/14182:

    Inserts multiple cells in two insert queries that use the same timestamp and different expiration.
    Due to #14182, the select query result contained a mixture
    of the inserts that is based on the value in each cell,
    rather than on the (different) expiration times on the
    two inserts.
    """
    table = table1
    ts = 1000
    ttl1 = 10
    ttl2 = 20
    values = [{'v':1, 'w':2}, {'v':2, 'w':1}]

    def assert_values(k, expected):
        select = f"SELECT * FROM {table} WHERE k = {k}"
        res = list(cql.execute(select))
        assert len(res) == 1
        assert res[0].k == k and res[0].v == expected['v'] and res[0].w == expected['w']

    # rewrite values once with and once without TTL
    # if reconciliation is done by value, the result will be a mix of the two writes
    # while if reconciliation is based first on the expiration time, the second write should prevail.
    k = unique_key_int()
    cql.execute(f"INSERT INTO {table} (k, v, w) VALUES ({k}, {values[0]['v']}, {values[0]['w']}) USING TIMESTAMP {ts} AND TTL {ttl1}")
    cql.execute(f"INSERT INTO {table} (k, v, w) VALUES ({k}, {values[1]['v']}, {values[1]['w']}) USING TIMESTAMP {ts}")
    assert_values(k, values[0])

    # rewrite values using the same write time and different ttls, so they get different expiration times
    # if reconciliation is done by value, the result will be a mix of the two writes
    # while if reconciliation is based first on the expiration time, the second write should prevail.
    k = unique_key_int()
    cql.execute(f"INSERT INTO {table} (k, v, w) VALUES ({k}, {values[0]['v']}, {values[0]['w']}) USING TIMESTAMP {ts} AND TTL {ttl1}")
    cql.execute(f"INSERT INTO {table} (k, v, w) VALUES ({k}, {values[1]['v']}, {values[1]['w']}) USING TIMESTAMP {ts} AND TTL {ttl2}")
    assert_values(k, values[1])