# Copyright 2020-present ScyllaDB # # SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 ############################################################################# # Tests for empty values (especially, but not just, empty strings) ############################################################################# import pytest from cassandra.protocol import InvalidRequest from .util import unique_name, unique_key_string, unique_key_int, new_test_table from . import nodetool @pytest.fixture(scope="module") def table1(cql, test_keyspace): table = test_keyspace + "." + unique_name() cql.execute(f"CREATE TABLE {table} (p text, c text, v text, primary key (p, c))") yield table cql.execute("DROP TABLE " + table) # In test_insert_null_key in test_null.py we verified that a null value is not # allowed as a key column - neither as a partition key nor clustering key. # An *empty string*, in contrast, is NOT a null. So ideally should have been # allowed as a key. However, for undocumented reasons (having to do with how # partition keys are serialized in sstables), an empty string is NOT allowed # as a partition key. It is allowed as a clustering key, though. In the # following test we confirm those things. # See issue #9352. def test_insert_empty_string_key(cql, table1): s = unique_key_string() # An empty-string clustering *is* allowed: cql.execute(f"INSERT INTO {table1} (p,c,v) VALUES ('{s}', '', 'cat')") assert list(cql.execute(f"SELECT v FROM {table1} WHERE p='{s}' AND c=''")) == [('cat',)] # But an empty-string partition key is *not* allowed, with a specific # error that a "Key may not be empty": with pytest.raises(InvalidRequest, match='Key may not be empty'): cql.execute(f"INSERT INTO {table1} (p,c,v) VALUES ('', '{s}', 'dog')") # Test an empty-string clustering key again, but this time doing a flush # bypassing the cache - checking that empty-string clustering keys can # be correctly written to sstables and read back. See issue #12561. # This test is scylla_only, because it uses BYPASS CACHE. def test_insert_empty_string_key_with_flush(cql, table1, scylla_only): s = unique_key_string() cql.execute(f"INSERT INTO {table1} (p,c,v) VALUES ('{s}', '', 'cat')") nodetool.flush(cql, table1) assert list(cql.execute(f"SELECT v FROM {table1} WHERE p='{s}' AND c='' BYPASS CACHE")) == [('cat',)] assert list(cql.execute(f"SELECT v FROM {table1} WHERE p='{s}' BYPASS CACHE")) == [('cat',)] # In contrast with normal tables where an empty clustering key is allowed, # in a WITH COMPACT STORAGE table, an empty clustering key is not allowed. # As usual, an empty partition key is not allowed either. @pytest.mark.xfail(reason="issue #12749, misleading error message") def test_insert_empty_string_key_compact(compact_storage, cql, test_keyspace): schema = 'p text, c text, v text, primary key (p, c)' with new_test_table(cql, test_keyspace, schema, 'WITH COMPACT STORAGE') as table: s = unique_key_string() with pytest.raises(InvalidRequest, match='empty'): cql.execute(f"INSERT INTO {table} (p,c,v) VALUES ('{s}', '', 'cat')") with pytest.raises(InvalidRequest, match='Key may not be empty'): cql.execute(f"INSERT INTO {table} (p,c,v) VALUES ('', '{s}', 'dog')") # However, in a COMPACT STORAGE table with a *compound* clustering key (more # than one clustering key column), setting one of them to empty *is* allowed. @pytest.mark.xfail(reason="issue #12749") def test_insert_empty_string_compound_clustering_key_compact(compact_storage, cql, test_keyspace): schema = 'p text, c1 text, c2 text, v text, primary key (p, c1, c2)' with new_test_table(cql, test_keyspace, schema, 'WITH COMPACT STORAGE') as table: s = unique_key_string() # Setting just one of the clustering key components, or both, to # an empty string, is allowed. This is in contrast with a single- # component clustering key which we saw above is cannot be set empty. cql.execute(f"INSERT INTO {table} (p,c1,c2,v) VALUES ('{s}', '', 'dog', 'cat')") cql.execute(f"INSERT INTO {table} (p,c1,c2,v) VALUES ('{s}', 'hello', '', 'mouse')") cql.execute(f"INSERT INTO {table} (p,c1,c2,v) VALUES ('{s}', '', '', 'horse')") assert list(cql.execute(f"SELECT v FROM {table} WHERE p='{s}'")) == [('horse',),('cat',),('mouse',)] # test_update_empty_string_key() is the same as test_insert_empty_string_key() # just uses an UPDATE instead of INSERT. It turns out that exactly the cases # which are allowed by INSERT are also allowed by UPDATE. def test_update_empty_string_key(cql, table1): s = unique_key_string() # An empty-string clustering *is* allowed: cql.execute(f"UPDATE {table1} SET v = 'cat' WHERE p='{s}' AND c=''") assert list(cql.execute(f"SELECT v FROM {table1} WHERE p='{s}' AND c=''")) == [('cat',)] # But an empty-string partition key is *not* allowed, with a specific # error that a "Key may not be empty": with pytest.raises(InvalidRequest, match='Key may not be empty'): cql.execute(f"UPDATE {table1} SET v = 'dog' WHERE p='' AND c='{s}'") # ... and same for DELETE def test_delete_empty_string_key(cql, table1): s = unique_key_string() # An empty-string clustering *is* allowed: cql.execute(f"DELETE FROM {table1} WHERE p='{s}' AND c=''") # But an empty-string partition key is *not* allowed, with a specific # error that a "Key may not be empty": with pytest.raises(InvalidRequest, match='Key may not be empty'): cql.execute(f"DELETE FROM {table1} WHERE p='' AND c='{s}'") # Another test like test_insert_empty_string_key() just using an INSERT JSON # instead of a regular INSERT. Because INSERT JSON takes a different code path # from regular INSERT, we need the emptiness test in yet another place. # Reproduces issue #9853 (the empty-string partition key was allowed, and # actually inserted into the table.) def test_insert_json_empty_string_key(cql, table1): s = unique_key_string() # An empty-string clustering *is* allowed: cql.execute("""INSERT INTO %s JSON '{"p": "%s", "c": "", "v": "cat"}'""" % (table1, s)) assert list(cql.execute(f"SELECT v FROM {table1} WHERE p='{s}' AND c=''")) == [('cat',)] # But an empty-string partition key is *not* allowed, with a specific # error that a "Key may not be empty": with pytest.raises(InvalidRequest, match='Key may not be empty'): cql.execute("""INSERT INTO %s JSON '{"p": "", "c": "%s", "v": "cat"}'""" % (table1, s)) # Although an empty string is not allowed as a partition key (as tested # above by test_empty_string_key()), it turns out that in a *compound* # partition key (with multiple partition-key columns), any or all of them # may be empty strings! This inconsistency is known in Cassandra, but # deemed unworthy to fix - see: # https://issues.apache.org/jira/browse/CASSANDRA-11487 def test_empty_string_key2(cql, test_keyspace): schema = 'p1 text, p2 text, c text, v text, primary key ((p1, p2), c)' with new_test_table(cql, test_keyspace, schema) as table: cql.execute(f"INSERT INTO {table} (p1,p2,c,v) VALUES ('', '', '', 'cat')") cql.execute(f"INSERT INTO {table} (p1,p2,c,v) VALUES ('x', 'y', 'z', 'dog')") assert list(cql.execute(f"SELECT v FROM {table} WHERE p1='' AND p2='' AND c=''")) == [('cat',)] # For historical reasons, CQL allows any type to be empty, not just strings. # An "empty" int value is a value with size 0 - and is distinct from a null # int (size -1) or UNSET_VALUE (size -2) or a normal int value (size 4). # This is not an important behavior to preserve in modern CQL, but we should # probably be aware if we ever break it, so it's good to have a regression # test for it. def test_empty_int(cql, test_keyspace): schema = 'p text, v int, primary key (p)' with new_test_table(cql, test_keyspace, schema) as table: # blobAsInt(0x) is the way to generate an empty int in CQL: cql.execute(f"INSERT INTO {table} (p,v) VALUES ('hi', blobAsInt(0x))") # When the Python driver returns an empty int, it returns it just like # a null int - None. Note that some other drivers may have problems # with an empty integer being returned - e.g., see # https://github.com/scylladb/scylla-rust-driver/issues/278 assert list(cql.execute(f"SELECT v FROM {table} WHERE p='hi'")) == [(None,)] # Above in test_empty_int() we noted a bizarre and archaic (not used by # modern CQL users) feature where any type can be set to "empty". # But the way to write this bizarre empty values in CQL was via an ugly # blobAsInt(0x). It mustn't be something as benign as trying to use an # empty string to set an integer. The following tests verify that trying # to use an empty string to set other types results in an error. Each type # will be a separate test, using one common test table. Each test will be # repeated with regular INSERT and with INSERT JSON. # The bugs uncovered by these tests were reported in issue #10625 and #7944. # Create a test table with an integer partition key, and many regular # columns of all scalar types. The column of type int is called "vint". @pytest.fixture(scope="module") def table_all_scalar(cql, test_keyspace): types = ['ascii', 'bigint', 'blob', 'boolean', 'date', 'decimal', 'double', 'duration', 'float', 'inet', 'int', 'smallint', 'text', 'time', 'timestamp', 'timeuuid', 'tinyint', 'uuid', 'varchar', 'varint'] vars = ', '.join(['v'+x+' '+x for x in types]) with new_test_table(cql, test_keyspace, f'p int primary key, {vars}') as table: yield table # The following types can be assigned any string, and in particular it # is perfectly fine to assign to them an empty string. @pytest.mark.parametrize("t", ["ascii", "text", "varchar"]) def test_empty_string_for_string_types(cql, table_all_scalar, t): p = unique_key_int() cql.execute(f"INSERT INTO {table_all_scalar} (p,v{t}) VALUES ({p}, '')") assert list(cql.execute(f"SELECT v{t} FROM {table_all_scalar} WHERE p={p}")) == [('',)] @pytest.mark.parametrize("t", ["ascii", "text", "varchar"]) def test_empty_string_for_string_types_json(cql, table_all_scalar, t): p = unique_key_int() cql.execute(f"INSERT INTO {table_all_scalar} JSON '{{\"p\": {p}, \"v{t}\": \"\"}}'") assert list(cql.execute(f"SELECT v{t} FROM {table_all_scalar} WHERE p={p}")) == [('',)] # "fussy" string types are types that in CQL can be assigned a string # constant, but the string needs to follow a particular format and in # particular the empty string should not be allowed. # Reproduces #10625. @pytest.mark.xfail(reason="issue #10625") @pytest.mark.parametrize("t", ["date", "time"]) def test_empty_string_for_fussy_string_types(cql, table_all_scalar, t): p = unique_key_int() with pytest.raises(InvalidRequest): cql.execute(f"INSERT INTO {table_all_scalar} (p,v{t}) VALUES ({p}, '')") @pytest.mark.xfail(reason="issue #10625") @pytest.mark.parametrize("t", ["date", "time"]) def test_empty_string_for_fussy_string_types_json(cql, table_all_scalar, t): p = unique_key_int() with pytest.raises(InvalidRequest): cql.execute(f"INSERT INTO {table_all_scalar} JSON '{{\"p\": {p}, \"v{t}\": \"\"}}'") # Same as test_empty_string_for_fussy_string_types, but for some reason # Cassandra allows the empty string for these two types, even though an # empty string doesn't make sense as an IP address or a timestamp. # We consider this a Cassandra bug, hence the tag "cassandra_bug" below. # Reproduces #10625. @pytest.mark.xfail(reason="issue #10625") @pytest.mark.parametrize("t", ["inet", "timestamp"]) def test_empty_string_for_fussy_string_types2(cql, table_all_scalar, t, cassandra_bug): p = unique_key_int() with pytest.raises(InvalidRequest): cql.execute(f"INSERT INTO {table_all_scalar} (p,v{t}) VALUES ({p}, '')") @pytest.mark.xfail(reason="issue #10625") @pytest.mark.parametrize("t", ["inet", "timestamp"]) def test_empty_string_for_fussy_string_types2_json(cql, table_all_scalar, t, cassandra_bug): p = unique_key_int() with pytest.raises(InvalidRequest): cql.execute(f"INSERT INTO {table_all_scalar} JSON '{{\"p\": {p}, \"v{t}\": \"\"}}'") # All other types cannot be assigned a string constant at all (the error # is not specific to the empty string) @pytest.mark.parametrize("t", ["bigint", "blob", "boolean", "decimal", "double", "duration", "float", "int", "smallint", "timeuuid", "tinyint", "uuid", "varint"]) def test_empty_string_for_other_types(cql, table_all_scalar, t): p = unique_key_int() with pytest.raises(InvalidRequest, match='Invalid STRING constant'): cql.execute(f"INSERT INTO {table_all_scalar} (p,v{t}) VALUES ({p}, '')") # Although INSERT JSON can convert strings to numbers, it shouldn't allow # an *empty* string. # Reproduces #7944 and #10625. # See also test_json.py::test_fromjson_{varint,int}_empty_string* # which reproduces the same bug but just for two specific types. @pytest.mark.xfail(reason="issue #7944") @pytest.mark.parametrize("t", ["bigint", "blob", "boolean", "decimal", "double", "duration", "float", "int", "smallint", "timeuuid", "tinyint", "uuid", "varint"]) def test_empty_string_for_other_types_json(cql, table_all_scalar, t, cassandra_bug): p = unique_key_int() with pytest.raises(InvalidRequest, match='Error decoding JSON'): cql.execute(f"INSERT INTO {table_all_scalar} JSON '{{\"p\": {p}, \"v{t}\": \"\"}}'") assert list(cql.execute(f"SELECT v{t} FROM {table_all_scalar} WHERE p={p}")) == [('',)] # Some of the tests that failed above allowed inserting a empty value # in an "unofficial" way by assigning an empty string to it. This is not # a big problem for regular columns, but for key columns it is bad - # namely, because the partition key cannot be empty. # See #10625, #7944 @pytest.mark.xfail(reason="issue #7944, Scylla returns internal server error") def test_empty_string_for_nonstring_partition_key1(cql, test_keyspace): schema = 'p int primary key, v int' with new_test_table(cql, test_keyspace, schema) as table: # Cassandra returns a "Key may not be empty" but even better would # be to report earlier that an empty string cannot be converted to # a number. with pytest.raises(InvalidRequest): cql.execute(f"INSERT INTO {table} JSON '{{\"p\": \"\", \"v\": 3}}'") def test_empty_string_for_nonstring_partition_key2(cql, test_keyspace): schema = 'p inet primary key, v int' with new_test_table(cql, test_keyspace, schema) as table: # Cassandra returns a "Key may not be empty" but even better would # be to report earlier that an empty string cannot be converted to # a number. with pytest.raises(InvalidRequest): cql.execute(f"INSERT INTO {table} (p,v) VALUES ('', 3)")