# Copyright 2023-present ScyllaDB # # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 ############################################################################### # Tests for data casts, e.g `SELECT CAST (... AS int) FROM tbl` # # Note that this is a different feature from the query casts `(int)123` # which are tested separately in test_cast.py. Whereas the query casts # can only reinterpret data losslessly (e.g., cast an integer to a wider # integer), the data casts tested here can actually convert data from one # type to another in sometimes surprising ways. For example, a very large # arbitrary-precision number (varint) can be cast into an integer and result # in a wraparound, or be cast into a float and result an infinity. In # this test file we'll check some of these surprising conversions, and # verify that they are the same in Scylla and Cassandra and don't regress. # # See also cql_cast_test.py in dtest. ############################################################################### import pytest import math from .util import new_test_table, unique_key_int from cassandra.protocol import InvalidRequest from ctypes import c_float @pytest.fixture(scope="module") def table1(cql, test_keyspace): with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, cVarint varint") as table: yield table @pytest.fixture(scope="module") def table2(cql, test_keyspace): with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, c counter") as table: yield table @pytest.fixture(scope="module") def table3(cql, test_keyspace): with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, i int, a ascii, bi bigint, b blob, bool boolean, d date, dec decimal, db double, dur duration, f float, addr inet, si smallint, t text, tim time, ts timestamp, tu timeuuid, ti tinyint, u uuid, vc varchar, vi varint") as table: yield table # Utility function for emulating a wrapping cast of a big positive number # into a smaller signed integer of given number of bits. For example, # casting 511 to 8 bits results in -1. def signed(number, bits): p = 2**bits ret = number % p if ret > p/2: return ret - p else: return ret # Test casting a very large varint number to various other types, resulting # in various non-trivial conversions including truncation or wrap-around of # numbers. def test_cast_from_large_varint(cql, table1): p = unique_key_int() p_negative = unique_key_int() v = 32767456456456456456545678943512357658768763546575675 cql.execute(f'INSERT INTO {table1} (p, cVarint) VALUES ({p}, {v})') cql.execute(f'INSERT INTO {table1} (p, cVarint) VALUES ({p_negative}, {-v})') # We can read back the original number without a cast, or with a cast # to the same type. The "decimal" type can also hold a varint and return # the same number. assert [(v,)] == list(cql.execute(f"SELECT cVarint FROM {table1} WHERE p={p}")) assert [(v,)] == list(cql.execute(f"SELECT CAST(cVarint AS varint) FROM {table1} WHERE p={p}")) assert [(v,)] == list(cql.execute(f"SELECT CAST(cVarint AS decimal) FROM {table1} WHERE p={p}")) # Casting into smaller integer types results in wraparound assert [(signed(v,8),)] == list(cql.execute(f"SELECT CAST(cVarint AS tinyint) FROM {table1} WHERE p={p}")) assert [(signed(v,16),)] == list(cql.execute(f"SELECT CAST(cVarint AS smallint) FROM {table1} WHERE p={p}")) assert [(signed(v,32),)] == list(cql.execute(f"SELECT CAST(cVarint AS int) FROM {table1} WHERE p={p}")) assert [(signed(v,64),)] == list(cql.execute(f"SELECT CAST(cVarint AS bigint) FROM {table1} WHERE p={p}")) # Casting to a 32-bit floating point, which only supports numbers up # to 1e38, results in infinity assert [(math.inf,)] == list(cql.execute(f"SELECT CAST(cVarint AS float) FROM {table1} WHERE p={p}")) assert [(-math.inf,)] == list(cql.execute(f"SELECT CAST(cVarint AS float) FROM {table1} WHERE p={p_negative}")) # Casting to a 64-bit floating point, which supports the range of our # given number (though not its full precision!) is allowed, and some # precision is lost. Confusingly, Python's 64-bit floating point is # called "float", and we can use Python's float() function to convert # the large number into a 64-bit double to compare the expected result. assert [(float(v),)] == list(cql.execute(f"SELECT CAST(cVarint AS double) FROM {table1} WHERE p={p}")) # Casting the number to string types results in printing the string in # decimal, as expected - same as Python's str() function: assert [(str(v),)] == list(cql.execute(f"SELECT CAST(cVarint AS ascii) FROM {table1} WHERE p={p}")) assert [(str(v),)] == list(cql.execute(f"SELECT CAST(cVarint AS text) FROM {table1} WHERE p={p}")) # "varchar" is supposed to be an alias to "text" and worked just as well, # but surprisingly casting to varchar doesn't work on Cassandra, so let's # test it in a separate test below, test_cast_from_large_varint_to_varchar # Casting a number to all other types is NOT allowed: for t in ['blob', 'boolean', 'counter', 'date', 'duration', 'inet', 'timestamp', 'timeuuid', 'uuid']: with pytest.raises(InvalidRequest, match='cast'): cql.execute(f"SELECT CAST(cVarint AS {t}) FROM {table1} WHERE p={p}") # In test_cast_from_large_varint we checked that a varint can be cast # to the "text" type. Since "varchar" is just an alias for "text", casting # to varchar should work too, but in Cassandra it doesn't so this test # is marked a Cassandra bug. def test_cast_from_large_varint_to_varchar(cql, table1, cassandra_bug): p = unique_key_int() v = 32767456456456456456545678943512357658768763546575675 cql.execute(f'INSERT INTO {table1} (p, cVarint) VALUES ({p}, {v})') assert [(str(v),)] == list(cql.execute(f"SELECT CAST(cVarint AS varchar) FROM {table1} WHERE p={p}")) # In test_cast_column_names we are checking that columns are named in Cassandra style def test_cast_column_names(cql, table1): p = unique_key_int() v = 123 for t in ['int', 'double', 'text', 'tinyint', 'smallint']: cql.execute(f'INSERT INTO {table1} (p, cVarint) VALUES ({p}, {v})') assert cql.execute(f"SELECT CAST(cVarint as {t}) FROM {table1} WHERE p={p}").one()._fields[0] == f"cast_cvarint_as_{t}" #In test_ignore_cast_to_the_same_type we are checking that query with casting of column to the same type is optimized def test_ignore_cast_to_the_same_type(cql, table1): p = unique_key_int() v = 456 cql.execute(f'INSERT INTO {table1} (p, cVarint) VALUES ({p}, {v})') assert cql.execute(f"SELECT CAST(p as int) FROM {table1} WHERE p={p}").one()._fields[0] == "p" # Test casting a counter to various other types. Reproduces #14501. def test_cast_from_counter(cql, table2): p = unique_key_int() # Set the counter to 1000 in two increments, to make it less trivial to # read correctly. cql.execute(f'UPDATE {table2} SET c = c + 230 WHERE p = {p}') cql.execute(f'UPDATE {table2} SET c = c + 770 WHERE p = {p}') # We can read back the original number without a cast, or with a silly # cast to the same type "counter". assert [(1000,)] == list(cql.execute(f"SELECT c FROM {table2} WHERE p={p}")) assert [(1000,)] == list(cql.execute(f"SELECT CAST(c AS counter) FROM {table2} WHERE p={p}")) # Casting into smaller integer types results in wraparound assert [(signed(1000,8),)] == list(cql.execute(f"SELECT CAST(c AS tinyint) FROM {table2} WHERE p={p}")) assert [(1000,)] == list(cql.execute(f"SELECT CAST(c AS smallint) FROM {table2} WHERE p={p}")) assert [(1000,)] == list(cql.execute(f"SELECT CAST(c AS int) FROM {table2} WHERE p={p}")) assert [(1000,)] == list(cql.execute(f"SELECT CAST(c AS bigint) FROM {table2} WHERE p={p}")) assert [(1000.0,)] == list(cql.execute(f"SELECT CAST(c AS float) FROM {table2} WHERE p={p}")) assert [(1000.0,)] == list(cql.execute(f"SELECT CAST(c AS double) FROM {table2} WHERE p={p}")) # Casting the counter to string types results in printing the number in # decimal, as expected assert [("1000",)] == list(cql.execute(f"SELECT CAST(c AS ascii) FROM {table2} WHERE p={p}")) assert [("1000",)] == list(cql.execute(f"SELECT CAST(c AS text) FROM {table2} WHERE p={p}")) # "varchar" is supposed to be an alias to "text" and should work, but # surprisingly casting to varchar doesn't work on Cassandra, so we # test it in a separate test below, test_cast_from_counter_to_varchar. # Casting a counter to all other types is NOT allowed: for t in ['blob', 'boolean', 'date', 'duration', 'inet', 'timestamp', 'timeuuid', 'uuid']: with pytest.raises(InvalidRequest, match='cast'): cql.execute(f"SELECT CAST(c AS {t}) FROM {table2} WHERE p={p}") # In test_cast_from_counter we checked that a counter can be cast to the # "text" type. Since "varchar" is just an alias for "text", casting # to varchar should work too, but in Cassandra it doesn't so this test # is marked a Cassandra bug. def test_cast_from_counter_to_varchar(cql, table2, cassandra_bug): p = unique_key_int() cql.execute(f'UPDATE {table2} SET c = c + 1000 WHERE p = {p}') assert [("1000",)] == list(cql.execute(f"SELECT CAST(c AS varchar) FROM {table2} WHERE p={p}")) # Test casts from various types *to* counter type. This is a rather silly # operation - casting to "counter" doesn't make a real counter. It could # have been supported the same as casting to bigint, but Cassandra chose not # to support it and neither do we. # The only case that works is the do-nothing casting of a counter to counter, # and that case is already checked in test_cast_from_counter(). def test_cast_to_counter(cql, table3): p = unique_key_int() for col in ['i', 'a', 'bi', 'b', 'bool', 'd', 'dec', 'db', 'dur', 'f', 'addr', 'si', 't', 'tim', 'ts', 'tu', 'ti', 'u', 'vc', 'vi']: with pytest.raises(InvalidRequest, match='cannot be cast'): cql.execute(f"SELECT CAST({col} AS counter) FROM {table3} WHERE p={p}") # Test casting 32-bit floating-point to text. The output string should have # enough digits to faithfully reproduce the original floating-point value # when read back. We don't check in this test whether maybe it outputs too # many digits that don't make any difference when read back. # Reproduces issue #15127. def test_cast_float_to_text(cql, table3): p = unique_key_int() # The numbers below (based on pi) have excessive precision, they are # truncated to Python's 64-bit precision, and later further truncated # to 32-bit floats when assigned to a Scylla "float" column. numbers = [ 3.141592653589793238462643383279502884197, -3.141592653589793238462643383279502884197, -31415926535897932384.62643383279502884197, 0.00003141592653589793238462643383279502884197] for n in numbers: cql.execute(f'INSERT INTO {table3} (p, f) VALUES ({p}, {n})') # Sanity check: If we read "f" back, we get n truncated to 32-bit # precision. We use c_float to truncate n to 32-bit precision. expected_f = c_float(n) read_f = c_float(list(cql.execute(f"SELECT f FROM {table3} WHERE p={p}"))[0][0]) assert expected_f.value == read_f.value # that's how to compare c_float objects # Finally test CAST(f AS text): test that the resulting textual # representation has enough digits to restore f without loss of # precision. If we ask Scylla to cast f to text, and then convert # this text back to c_float, the result should be equal again to # expected_f. If the cast printed too few digits, it would not be # equal. ftext = list(cql.execute(f"SELECT CAST(f AS text) FROM {table3} WHERE p={p}"))[0][0] read_f = c_float(float(ftext)) assert expected_f.value == read_f.value # Same test as test_cast_float_to_text() above, just for 64-bit floats. # Reproduces issue #15127. def test_cast_double_to_text(cql, table3): p = unique_key_int() numbers = [ 3.141592653589793238462643383279502884197, -3.141592653589793238462643383279502884197, -31415926535897932384.62643383279502884197, 0.00003141592653589793238462643383279502884197] for n in numbers: cql.execute(f'INSERT INTO {table3} (p, db) VALUES ({p}, {n})') # If we read "db" back, we get n truncated to 64-bit precision. assert n == list(cql.execute(f"SELECT db FROM {table3} WHERE p={p}"))[0][0] # If we ask Scylla to cast db to text, and then convert this text back # to 64-bit float, the result should be equal again to n. If the cast # printed too few digits, it would not be equal. ftext = list(cql.execute(f"SELECT CAST(db AS text) FROM {table3} WHERE p={p}"))[0][0] assert n == float(ftext) # TODO: test casts from more types.