mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-25 11:00:35 +00:00
Oversized materialized view and index names are rejected; Materialized view names with invalid symbols are rejected. fixes: #20755 Closes scylladb/scylladb#21746
2053 lines
120 KiB
Python
2053 lines
120 KiB
Python
# Copyright 2020-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
# Tests for secondary indexes
|
|
|
|
import random
|
|
import itertools
|
|
import time
|
|
import tempfile
|
|
import pytest
|
|
import os
|
|
from . import rest_api
|
|
from cassandra.protocol import SyntaxException, AlreadyExists, InvalidRequest, ConfigurationException, ReadFailure, WriteFailure
|
|
from cassandra.query import SimpleStatement
|
|
from .cassandra_tests.porting import assert_rows, assert_row_count, assert_rows_ignoring_order, assert_empty
|
|
|
|
from .util import new_test_table, unique_name, unique_key_int, is_scylla
|
|
|
|
# A reproducer for issue #7443: Normally, when the entire table is SELECTed,
|
|
# the partitions are returned sorted by the partitions' token. When there
|
|
# is filtering, this order is not expected to change. Furthermore, when this
|
|
# filtering happens to use a secondary index, again the order is not expected
|
|
# to change.
|
|
def test_partition_order_with_si(cql, test_keyspace):
|
|
schema = 'pk int, x int, PRIMARY KEY ((pk))'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Insert 20 partitions, all of them with x=1 so that filtering by x=1
|
|
# will yield the same 20 partitions:
|
|
N = 20
|
|
stmt = cql.prepare('INSERT INTO '+table+' (pk, x) VALUES (?, ?)')
|
|
for i in range(N):
|
|
cql.execute(stmt, [i, 1])
|
|
# SELECT all the rows, and verify they are returned in increasing
|
|
# partition token order (note that the token is a *signed* number):
|
|
tokens = [row.system_token_pk for row in cql.execute('SELECT token(pk) FROM '+table)]
|
|
assert len(tokens) == N
|
|
assert sorted(tokens) == tokens
|
|
# Now select all the partitions with filtering of x=1. Since all
|
|
# rows have x=1, this shouldn't change the list of matching rows, and
|
|
# also shouldn't check their order:
|
|
tokens1 = [row.system_token_pk for row in cql.execute('SELECT token(pk) FROM '+table+' WHERE x=1 ALLOW FILTERING')]
|
|
assert tokens1 == tokens
|
|
# Now add an index on x, which allows implementing the "x=1"
|
|
# restriction differently. With the index, "ALLOW FILTERING" is
|
|
# no longer necessary. But the order of the results should
|
|
# still not change. Issue #7443 is about the order changing here.
|
|
cql.execute('CREATE INDEX ON '+table+'(x)')
|
|
# "CREATE INDEX" does not wait until the index is actually available
|
|
# for use. Reads immediately after the CREATE INDEX may fail or return
|
|
# partial results. So let's retry until reads resume working:
|
|
for i in range(100):
|
|
try:
|
|
tokens2 = [row.system_token_pk for row in cql.execute('SELECT token(pk) FROM '+table+' WHERE x=1')]
|
|
if len(tokens2) == N:
|
|
break
|
|
except ReadFailure:
|
|
pass
|
|
time.sleep(0.1)
|
|
assert tokens2 == tokens
|
|
|
|
# Test which ensures that indexes for a query are picked by the order in which
|
|
# they appear in restrictions. That way, users can deterministically pick
|
|
# which indexes are used for which queries.
|
|
# Note that the order of picking indexing is not set in stone and may be
|
|
# subject to change - in which case this test case should be amended as well.
|
|
# The order tested in this case was decided as a good first step in issue
|
|
# #7969, but it's possible that it will eventually be implemented another
|
|
# way, e.g. dynamically based on estimated query selectivity statistics.
|
|
# In any case, the order must be consistent across coordinators and time
|
|
# (and upgrades...) to allow paged queries that use an index to continue.
|
|
# Ref: #7969
|
|
def test_order_of_indexes(scylla_only, cql, test_keyspace):
|
|
schema = 'p int primary key, v1 int, v2 int, v3 int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX my_v3_idx ON {table}(v3)")
|
|
cql.execute(f"CREATE INDEX my_v1_idx ON {table}(v1)")
|
|
cql.execute(f"CREATE INDEX my_v2_idx ON {table}((p),v2)")
|
|
# All queries below should use the first index they find in the list
|
|
# of restrictions. Tracing information will be consulted to ensure
|
|
# it's true. Currently some of the cases below succeed, because the
|
|
# order is not well defined (and may, for instance, change upon
|
|
# server restart), but some of them fail. Once a proper ordering
|
|
# is implemented, all cases below should succeed.
|
|
def index_used(query, index_name):
|
|
assert any([index_name in event.description for event in cql.execute(query, trace=True).get_query_trace().events])
|
|
index_used(f"SELECT * FROM {table} WHERE v3 = 1", "my_v3_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 allow filtering", "my_v1_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
|
|
# Local indexes are still skipped if they cannot be used
|
|
index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v1 = 2 allow filtering", "my_v1_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v3 = 2 and v1 = 3 allow filtering", "my_v3_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE v1 = 1 and v2 = 2 and v3 = 3 allow filtering", "my_v1_idx")
|
|
# Local indexes are still preferred over global ones, if they can be used
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 and v2 = 2 allow filtering", "my_v2_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v2 = 1 and v1 = 2 allow filtering", "my_v2_idx")
|
|
|
|
# Indexes can be created without an explicit name, in which case a default name is chosen.
|
|
# However, due to #8620 it was possible to break the index creation mechanism by creating
|
|
# a properly named regular table, which conflicts with the generated index name.
|
|
def test_create_unnamed_index_when_its_name_is_taken(cql, test_keyspace):
|
|
schema = 'p int primary key, v int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
try:
|
|
cql.execute(f"CREATE TABLE {table}_v_idx_index (i_do_not_exist_in_the_base_table int primary key)")
|
|
# Creating an index should succeed, even though its default name is taken
|
|
# by the table above
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
finally:
|
|
cql.execute(f"DROP TABLE {table}_v_idx_index")
|
|
|
|
# Indexed created with an explicit name cause a materialized view to be created,
|
|
# and this view has a specific name - <index-name>_index. If there happens to be
|
|
# a regular table (or another view) named just like that, index creation should fail.
|
|
def test_create_named_index_when_its_name_is_taken(scylla_only, cql, test_keyspace):
|
|
schema = 'p int primary key, v int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
index_name = unique_name()
|
|
try:
|
|
cql.execute(f"CREATE TABLE {test_keyspace}.{index_name}_index (i_do_not_exist_in_the_base_table int primary key)")
|
|
# Creating an index should fail, because it's impossible to create
|
|
# its underlying materialized view, because its name is taken by a regular table
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
finally:
|
|
cql.execute(f"DROP TABLE {test_keyspace}.{index_name}_index")
|
|
|
|
# Tests for CREATE INDEX IF NOT EXISTS
|
|
# Reproduces issue #8717.
|
|
def test_create_index_if_not_exists(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v int') as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
# Can't create the same index again without "IF NOT EXISTS", but can
|
|
# do it with "IF NOT EXISTS":
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS ON {table}(v)")
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_v_idx")
|
|
|
|
# Now test the same thing for named indexes. This is what broke in #8717:
|
|
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS xyz ON {table}(v)")
|
|
cql.execute(f"DROP INDEX {test_keyspace}.xyz")
|
|
|
|
# Exactly the same with non-lower case name.
|
|
cql.execute(f'CREATE INDEX "CamelCase" ON {table}(v)')
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f'CREATE INDEX "CamelCase" ON {table}(v)')
|
|
cql.execute(f'CREATE INDEX IF NOT EXISTS "CamelCase" ON {table}(v)')
|
|
cql.execute(f'DROP INDEX {test_keyspace}."CamelCase"')
|
|
|
|
# Trying to create an index for an attribute that's already indexed,
|
|
# but with a different name. The "IF NOT EXISTS" appears to succeed
|
|
# in this case, but does not actually create the new index name -
|
|
# only the old one remains.
|
|
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f"CREATE INDEX abc ON {table}(v)")
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS abc ON {table}(v)")
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(f"DROP INDEX {test_keyspace}.abc")
|
|
cql.execute(f"DROP INDEX {test_keyspace}.xyz")
|
|
|
|
# Another test for CREATE INDEX IF NOT EXISTS: Checks what happens if an index
|
|
# with the given *name* already exists, but it's a different index than the
|
|
# one requested, i.e.,
|
|
# CREATE INDEX xyz ON tbl(a)
|
|
# CREATE INDEX IF NOT EXIST xyz ON tbl(b)
|
|
# Should the second command
|
|
# 1. Silently do nothing (because xyz already exists),
|
|
# 2. or try to create an index (because an index on tbl(b) doesn't yet exist)
|
|
# and visibly fail when it can't because the name is already taken?
|
|
# Cassandra chose the first behavior (silently do nothing), Scylla chose the
|
|
# second behavior. We consider Cassandra's behavior to be *wrong* and
|
|
# unhelpful - the intention of the user was ensure that an index tbl(b)
|
|
# (an index on column b of table tbl) exists, and if we can't, an error
|
|
# message is better than silently doing nothing.
|
|
# So this test is marked "cassandra_bug" - passes on Scylla and xfails on
|
|
# Cassandra.
|
|
# Reproduces issue #9182
|
|
def test_create_index_if_not_exists2(cql, test_keyspace, cassandra_bug):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v1 int, v2 int') as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v1)")
|
|
# Obviously can't create a different index with the same name:
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v2)")
|
|
# Even with "IF NOT EXISTS" we still get a failure. An index for
|
|
# {table}(v2) does not yet exist, so the index creation is attempted.
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table}(v2)")
|
|
|
|
# Verify that oversized index names are cleanly rejected as InvalidRequest
|
|
# Reproduces issue #20755
|
|
def test_create_index_oversized_name(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v int') as table:
|
|
index_name = 'x'*500
|
|
with pytest.raises((InvalidRequest, ConfigurationException)):
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
|
|
# Test that the paging state works properly for indexes on tables
|
|
# with descending clustering order. There was a problem with indexes
|
|
# created on clustering keys with DESC clustering order - they are represented
|
|
# as "reverse" types internally and Scylla assertions failed that the base type
|
|
# is different from the underlying view type, even though, from the perspective
|
|
# of deserialization, they're equal. Issue #8666
|
|
def test_paging_with_desc_clustering_order(cql, test_keyspace):
|
|
schema = 'p int, c int, primary key (p,c)'
|
|
extra = 'with clustering order by (c desc)'
|
|
with new_test_table(cql, test_keyspace, schema, extra) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c)")
|
|
for i in range(3):
|
|
cql.execute(f"INSERT INTO {table}(p,c) VALUES ({i}, 42)")
|
|
stmt = SimpleStatement(f"SELECT * FROM {table} WHERE c = 42", fetch_size=1)
|
|
assert len([row for row in cql.execute(stmt)]) == 3
|
|
|
|
# The following test is similar to the above, except that the reversed type
|
|
# is not of the indexed column itself - but it's still part of the index's
|
|
# materialized view (because it's part of the key), and we use SELECT DISTINCT
|
|
# instead of SELECT. This reproduces Scylla Enterprise issue #2449,
|
|
# and is a regression test for commit c8653d1321ca9ff5963f9e5479372a6000a0f096.
|
|
def test_paging_with_desc_clustering_order2(cql, test_keyspace):
|
|
schema = 'p1 int, p2 int, c int, primary key ((p1,p2),c)'
|
|
extra = 'with clustering order by (c desc)'
|
|
with new_test_table(cql, test_keyspace, schema, extra) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(p1)")
|
|
for i in range(3):
|
|
cql.execute(f"INSERT INTO {table}(p1,p2,c) VALUES (7, {i}, 42)")
|
|
stmt = SimpleStatement(f"SELECT p1,p2 FROM {table} WHERE p1 = 7", fetch_size=1)
|
|
assert len(list(cql.execute(stmt))) == 3
|
|
# Reproduces Scylla Enterprise issue #2449:
|
|
stmt = SimpleStatement(f"SELECT DISTINCT p1,p2 FROM {table} WHERE p1 = 7", fetch_size=1)
|
|
assert len(list(cql.execute(stmt))) == 3
|
|
|
|
# Test that deleting a base partition works fine, even if it produces a large batch
|
|
# of individual view updates. Refs #8852 - view updates used to be applied with
|
|
# per-partition granularity, but after fixing the issue it's no longer the case,
|
|
# so a regression test is necessary. Scylla-only - relies on the underlying
|
|
# representation of the index table.
|
|
def test_partition_deletion(cql, test_keyspace, scylla_only):
|
|
schema = 'p int, c1 int, c2 int, v int, primary key (p,c1,c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
prep = cql.prepare(f"INSERT INTO {table}(p,c1,c2) VALUES (1, ?, 1)")
|
|
for i in range(1342):
|
|
cql.execute(prep, [i])
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 1")
|
|
res = [row for row in cql.execute(f"SELECT * FROM {table}_c1_idx_index")]
|
|
assert len(res) == 0
|
|
|
|
# Test that deleting a clustering range works fine, even if it produces a large batch
|
|
# of individual view updates. Refs #8852 - view updates used to be applied with
|
|
# per-partition granularity, but after fixing the issue it's no longer the case,
|
|
# so a regression test is necessary. Scylla-only - relies on the underlying
|
|
# representation of the index table.
|
|
def test_range_deletion(cql, test_keyspace, scylla_only):
|
|
schema = 'p int, c1 int, c2 int, v int, primary key (p,c1,c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
prep = cql.prepare(f"INSERT INTO {table}(p,c1,c2) VALUES (1, ?, 1)")
|
|
for i in range(1342):
|
|
cql.execute(prep, [i])
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 1 AND c1 > 5 and c1 < 846")
|
|
res = [row.c1 for row in cql.execute(f"SELECT * FROM {table}_c1_idx_index")]
|
|
assert sorted(res) == [x for x in range(1342) if x <= 5 or x >= 846]
|
|
|
|
# Reproduces #8627:
|
|
# Test that trying to insert a value for an indexed column that exceeds 64KiB fails,
|
|
# because this value is too large to be written as a key in the underlying index
|
|
@pytest.mark.xfail(reason="issue #8627")
|
|
def test_too_large_indexed_value(cql, test_keyspace):
|
|
schema = 'p int, c int, v text, primary key (p,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
big = 'x'*66536
|
|
with pytest.raises(InvalidRequest, match='size'):
|
|
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (0,1,'{big}')")
|
|
|
|
# Similar to the above test (test_too_large_indexed_value) but when indexing
|
|
# keys of collection. Modern Cassandra, and Scylla, allow collection keys
|
|
# and values to be up to 2GB, but the keys written to an index are limited
|
|
# to 64 KB. When a collection is indexed, the insertion of oversized elements
|
|
# should fail cleanly at the time of write.
|
|
# Reproduces #8627
|
|
@pytest.mark.xfail(reason="issue #8627")
|
|
def test_too_large_indexed_collection_value(cql, test_keyspace):
|
|
schema = 'p int, c int, m map<text,text>, primary key (p,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(values(m))")
|
|
cql.execute(f"CREATE INDEX ON {table}(keys(m))")
|
|
big = 'x'*66536
|
|
with pytest.raises(InvalidRequest, match='size'):
|
|
cql.execute(f"INSERT INTO {table}(p,c,m) VALUES (0,1,{{'hi': '{big}'}})")
|
|
with pytest.raises(InvalidRequest, match='size'):
|
|
cql.execute(f"INSERT INTO {table}(p,c,m) VALUES (0,1,{{'{big}': 'hi'}})")
|
|
|
|
# Reproduces #8627:
|
|
# Same as test_too_large_indexed_value above, just check adding an index
|
|
# to a table with pre-existing data. The background index-building process
|
|
# cannot return an error to the user, but we do expect it to skip the
|
|
# problematic row and continue to complete the rest of the index build.
|
|
@pytest.mark.xfail(reason="issue #8627")
|
|
def test_too_large_indexed_value_build(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
|
# No index yet - a "big" value in v is perfectly fine:
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p,v) VALUES (?, ?)')
|
|
for i in range(30):
|
|
cql.execute(stmt, [i, str(i)])
|
|
big = 'x'*66536
|
|
cql.execute(stmt, [30, big])
|
|
assert [(30,big)] == list(cql.execute(f'SELECT * FROM {table} WHERE p=30'))
|
|
# Create an index on v as the new key. The background index-building
|
|
# process should start promptly.
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
# If Scylla's view builder hangs or stops, there is no way to
|
|
# tell this state apart from a view build that simply hasn't
|
|
# completed yet (besides looking at the logs, which we don't).
|
|
# This means, unfortunately, that a failure of this test is slow -
|
|
# it needs to wait for a timeout.
|
|
# However, today we are lucky (?) that the cql.execute(read, [big])
|
|
# test also fails immediately on Scylla, so this test fails quickly.
|
|
read = cql.prepare(f'SELECT * FROM {table} WHERE v = ?')
|
|
start_time = time.time()
|
|
while time.time() < start_time + 30:
|
|
# The oversized "big" cannot be a key in the view, and
|
|
# cannot be searched. Cassandra reports: "Index expression
|
|
# values may not be larger than 64K".
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(read, [big])
|
|
# All the other keys should eventually be there
|
|
c = 0
|
|
for i in range(30):
|
|
if list(cql.execute(read, [str(i)])):
|
|
c += 1
|
|
if c == 30:
|
|
break
|
|
print(c)
|
|
time.sleep(0.1)
|
|
for i in range(30):
|
|
assert list(cql.execute(read, [str(i)]))
|
|
|
|
# Selecting values using only clustering key should require filtering, but work correctly
|
|
# Reproduces issue #8991
|
|
def test_filter_cluster_key(cql, test_keyspace):
|
|
schema = 'p int, c1 int, c2 int, primary key (p, c1, c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c2)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 1)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 0, 1)")
|
|
|
|
stmt = SimpleStatement(f"SELECT c1, c2 FROM {table} WHERE c1 = 1 and c2 = 1 ALLOW FILTERING")
|
|
rows = cql.execute(stmt)
|
|
assert_rows(rows, [1, 1])
|
|
|
|
# Reproduces #8627:
|
|
# Reproduced #13548:
|
|
# Insert a string with > 64K characters (reproducing issue #8627 - see other tests reproducing it),
|
|
# which causes the view builder to never finish.
|
|
# If Scylla is run with `--enable-keyspace-column-family-metrics 1` cmd line param,
|
|
# and we immediately recreate the table (drop and create),
|
|
# an "old" table's object in memory won't get deallocated before the "new" table's object is created,
|
|
# which raised a seastar::metrics::double_registration exception
|
|
def test_instant_table_recreation(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
|
insert_statement = cql.prepare(f'INSERT INTO {table} (p,v) VALUES (?, ?)')
|
|
big_string = 'x'*66536
|
|
cql.execute(insert_statement, [0, big_string])
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
# going out of scope DROPs the above table. Create another table with the same name:
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
|
pass
|
|
|
|
# Selecting *only* an indexed clustering key does not require filtering, it's
|
|
# a full-index scan (the amount of output is proportional to the read).
|
|
# Additionally, with unnecessary parentheses the query also works, and isn't
|
|
# handled like a multi-column restriction (reproduces #13250).
|
|
@pytest.mark.xfail(reason="issue #13250")
|
|
def test_index_scan_multicolumn_syntax(cql, test_keyspace):
|
|
schema = 'p int, c1 int, c2 int, primary key (p, c1, c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
cql.execute(f"CREATE INDEX ON {table}(c2)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 1)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 0, 1)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 0)")
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c2 FROM {table} WHERE c1 = 1'))
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c1 FROM {table} WHERE c2 = 1'))
|
|
# The query (c1) = (1) isn't a real multi-column restriction (it
|
|
# should mean the same as c1=1) so it can use the index - and work
|
|
# without ALLOW FILTERING. Reproduces #13250:
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c2 FROM {table} WHERE (c1) = (1)'))
|
|
# The query (c2) = (1) isn't a real multi-column restriction (it
|
|
# should mean the same as c2=1) so it should be allowed despite
|
|
# missing a restriction on c1. In our case c2=1 is allowed because
|
|
# c2 is indexed. Reproduces #13250:
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c1 FROM {table} WHERE (c2) = (1)'))
|
|
|
|
def test_multi_column_with_regular_index(cql, test_keyspace):
|
|
"""Reproduces #9085."""
|
|
with new_test_table(cql, test_keyspace, 'p int, c1 int, c2 int, r int, primary key(p,c1,c2)') as tbl:
|
|
cql.execute(f'CREATE INDEX ON {tbl}(r)')
|
|
cql.execute(f'INSERT INTO {tbl}(p, c1, c2, r) VALUES (1, 1, 1, 0)')
|
|
cql.execute(f'INSERT INTO {tbl}(p, c1, c2, r) VALUES (1, 1, 2, 1)')
|
|
cql.execute(f'INSERT INTO {tbl}(p, c1, c2, r) VALUES (1, 2, 1, 0)')
|
|
assert_rows(cql.execute(f'SELECT c1 FROM {tbl} WHERE (c1,c2)<(2,0) AND r=0 ALLOW FILTERING'), [1])
|
|
assert_rows(cql.execute(f'SELECT c1 FROM {tbl} WHERE p=1 AND (c1,c2)<(2,0) AND r=0 ALLOW FILTERING'), [1])
|
|
|
|
# Test that indexing an *empty string* works as expected. There is nothing
|
|
# wrong or unusual about an empty string, and it should be supported just
|
|
# like any other string.
|
|
# Reproduces issue #9364
|
|
def test_index_empty_string(cql, test_keyspace):
|
|
schema = 'p int, v text, primary key (p)'
|
|
# Searching for v='' without an index (with ALLOW FILTERING), works
|
|
# as expected:
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (1, 'hello')")
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (2, '')")
|
|
assert_rows(cql.execute(f"SELECT p FROM {table} WHERE v='' ALLOW FILTERING"), [2])
|
|
# Now try the same thing with an index on v. ALLOW FILTERING should
|
|
# no longer be needed, and the correct row should be found (in #9364
|
|
# it wasn't). We create here a new table instead of adding an index to
|
|
# the existing table to avoid the question of how will we know when the
|
|
# new index is ready.
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (1, 'hello')")
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (2, '')")
|
|
# The following assert fails in #9364:
|
|
# Note that on a single-node cqlpy, index updates are
|
|
# synchronous so we don't have to retry the SELECT.
|
|
assert_rows(cql.execute(f"SELECT p FROM {table} WHERE v=''"), [2])
|
|
|
|
# Test that trying to delete an entry based on an indexed column
|
|
# does not cause the whole partition to be wiped. Refs #9495
|
|
def test_try_deleting_based_on_index_column(cql, test_keyspace):
|
|
schema = 'p int, c1 int, c2 int, v int, primary key (p, c1, c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
for i in range(10):
|
|
cql.execute(f"INSERT INTO {table} (p,c1,c2,v) VALUES (0,{i},{i},{i})")
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 0 AND c2 = 1500")
|
|
assert_row_count(cql.execute(f"SELECT v FROM {table}"), 10)
|
|
cql.execute(f"CREATE INDEX ON {table}(c2)")
|
|
# Creating an index should *not* influence the fact that deletion
|
|
# is not allowed
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 0 AND c2 = 1500")
|
|
assert_row_count(cql.execute(f"SELECT v FROM {table}"), 10)
|
|
|
|
# Reproducer for issue #3403: A column name may contain all sorts of
|
|
# non-alphanumeric characters, including even "/". When an index is created,
|
|
# its default name - composed from the column name - may contain these
|
|
# characters and lead to problems - or at worst access to unintended file
|
|
# by using things like "../../.." in the column name.
|
|
def test_index_weird_chars_in_col_name(cql, test_keyspace):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
# When issue #3403 exists, column name ../../...../tmpdir/x_yz will
|
|
# cause Scylla to create the new index in tmpdir!
|
|
# Of course a more sinister attacker can cause Scylla to create
|
|
# directories anywhere in the file system - or to crash if the
|
|
# directory creation fails - e.g., if magic_path ends in
|
|
# /dev/null/hello, and /dev/null is not a directory
|
|
magic_path='/..'*20 + tmpdir + '/x_yz'
|
|
schema = f'pk int PRIMARY KEY, "{magic_path}" int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}("{magic_path}")')
|
|
# Creating the index should not have miraculously created
|
|
# something in tmpdir! If it has, we have issue #3403.
|
|
assert os.listdir(tmpdir) == []
|
|
# Check that the expected index name was chosen - based on
|
|
# only the alphanumeric/underscore characters of the column name.
|
|
ks, cf = table.split('.')
|
|
index_name = list(cql.execute(f"SELECT index_name FROM system_schema.indexes WHERE keyspace_name = '{ks}' AND table_name = '{cf}'"))[0].index_name
|
|
iswordchar = lambda x: str.isalnum(x) or x == '_'
|
|
cleaned_up_column_name = ''.join(filter(iswordchar, magic_path))
|
|
assert index_name == cf + '_' + cleaned_up_column_name + '_idx'
|
|
|
|
# Cassandra does not allow IN restrictions on non-primary-key columns,
|
|
# and Scylla does (see test_filtering.py::test_filter_in_restriction).
|
|
# However Scylla currently allows this only with ALLOW FILTERING.
|
|
# In theory, on an index column we could allow it also without filtering,
|
|
# just like we allow it on the partition key. But currently we don't
|
|
# so the following test is marked xfail. It's also cassandra_bug because
|
|
# Cassandra doesn't support it either (it gives the message "not yet
|
|
# supported" suggesting it may be fixed in the future).
|
|
@pytest.mark.xfail
|
|
def test_index_in_restriction(cql, test_keyspace, cassandra_bug):
|
|
schema = 'pk int, ck int, x int, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck, x) VALUES (?, ?, ?)')
|
|
for i in range(3):
|
|
cql.execute(stmt, [1, i, i*2])
|
|
assert [(1,), (2,)] == list(cql.execute(f'SELECT ck FROM {table} WHERE x IN (2, 4)'))
|
|
assert [(1,)] == list(cql.execute(f'SELECT ck FROM {table} WHERE x IN (2, 7)'))
|
|
assert [] == list(cql.execute(f'SELECT ck FROM {table} WHERE x IN (3, 7)'))
|
|
|
|
# Test that a LIMIT works correctly in conjunction filtering - with an
|
|
# without a secondary index. The LIMIT is supposed to control the number
|
|
# of rows that the query returns - after filtering - not the number of
|
|
# rows before filtering.
|
|
# Reproduces #10649 - when use_index=True the test failed because LIMIT
|
|
# returned fewer than the requested number of rows.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#10649")), False])
|
|
def test_filter_and_limit(cql, test_keyspace, use_index, driver_bug_1):
|
|
with new_test_table(cql, test_keyspace, 'pk int primary key, x int, y int') as table:
|
|
if use_index:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, x, y) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [0, 1, 0])
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [2, 1, 0])
|
|
cql.execute(stmt, [3, 1, 1])
|
|
cql.execute(stmt, [4, 1, 0])
|
|
cql.execute(stmt, [5, 1, 1])
|
|
cql.execute(stmt, [6, 1, 0])
|
|
cql.execute(stmt, [7, 1, 1])
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND y = 0 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,), (6,)]
|
|
# Make sure that with LIMIT 3 we get back exactly 3 results - not
|
|
# less and also not more.
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND y = 0 LIMIT 3 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,)]
|
|
# Make the test even harder (exercising more code paths) by asking
|
|
# to fetch the 3 results in tiny one-result pages instead of one page.
|
|
s = cql.prepare(f'SELECT pk FROM {table} WHERE x = 1 AND y = 0 LIMIT 3 ALLOW FILTERING')
|
|
s.fetch_size = 1
|
|
assert sorted(cql.execute(s)) == [(0,), (2,), (4,)]
|
|
|
|
# The following test is similar to the previous one (test_filter_and_limit)
|
|
# with one main difference: Whereas in the previous test the table's schema
|
|
# had only a partition key, here we also add a clustering key.
|
|
# This variation in the test is important because Scylla's index-using code
|
|
# has a different code path for the case that the index lookup results in a
|
|
# list of matching partitions (the previous test) and when it results in a
|
|
# list of matching rows (this test).
|
|
# Reproduces #10649 - when use_index=True the test failed because LIMIT
|
|
# returned fewer than the requested number of rows.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#10649")), False])
|
|
def test_filter_and_limit_clustering(cql, test_keyspace, use_index):
|
|
with new_test_table(cql, test_keyspace, 'pk int, ck int, x int, PRIMARY KEY(pk, ck)') as table:
|
|
if use_index:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck, x) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 1])
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [2, 0, 1])
|
|
cql.execute(stmt, [3, 1, 1])
|
|
cql.execute(stmt, [4, 0, 1])
|
|
cql.execute(stmt, [5, 1, 1])
|
|
cql.execute(stmt, [6, 0, 1])
|
|
cql.execute(stmt, [7, 1, 1])
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND ck = 0 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,), (6,)]
|
|
# Make sure that with LIMIT 3 we get back exactly 3 results - not
|
|
# less and also not more.
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND ck = 0 LIMIT 3 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,)]
|
|
# Make the test even harder (exercising more code paths) by asking
|
|
# to fetch the 3 results in tiny one-result pages instead of one page.
|
|
s = cql.prepare(f'SELECT pk FROM {table} WHERE x = 1 AND ck = 0 LIMIT 3 ALLOW FILTERING')
|
|
s.fetch_size = 1
|
|
assert sorted(cql.execute(s)) == [(0,), (2,), (4,)]
|
|
|
|
# Another reproducer for #10649, similar to the previous test
|
|
# (test_filter_and_limit_clustering) but indexes the clustering
|
|
# key column. This test is closer to the use case of the original user
|
|
# who discovered #10649, and involves slightly different code paths in
|
|
# Scylla (the index-driver query needs to read individual rows, not
|
|
# entire partitions, from the base table).
|
|
@pytest.mark.xfail(reason="#10649")
|
|
def test_filter_and_limit_2(cql, test_keyspace):
|
|
schema = 'pk int, ck1 int, ck2 int, x int, PRIMARY KEY (pk, ck1, ck2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(ck2)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck1, ck2, x) VALUES (?, ?, ?, ?)')
|
|
N = 10
|
|
J = 3
|
|
for i in range(N):
|
|
for j in range(J):
|
|
cql.execute(stmt, [1, i, j, j+i%2])
|
|
results = list(cql.execute(f'SELECT ck1 FROM {table} WHERE ck2 = 2 AND x = 3 ALLOW FILTERING'))
|
|
# Note in the data-adding loop above, all rows match our pk=1, and
|
|
# when ck=2 it means j=2 and at that point - x=3 if i%2==1. So the
|
|
# expected results are:
|
|
assert results == [(i,) for i in range(N) if i%2==1]
|
|
for i in [3, 1, N]:
|
|
assert results[0:i] == list(cql.execute(f'SELECT ck1 FROM {table} WHERE ck2 = 2 AND x = 3 LIMIT {i} ALLOW FILTERING'))
|
|
# Try exactly the same with adding pk=1, which shouldn't change
|
|
# anything in the result (because all our rows have pk=1).
|
|
for i in [3, 1, N]:
|
|
assert results[0:i] == list(cql.execute(f'SELECT ck1 FROM {table} WHERE pk = 1 AND ck2 = 2 AND x = 3 LIMIT {i} ALLOW FILTERING'))
|
|
|
|
# Yet another reproducer for #10649, this time using a local index instead
|
|
# of a global index. As before, test that a LIMIT works correctly in
|
|
# conjunction filtering. A user tried this variant in issue #12766.
|
|
# This is a scylla_only test because local index is a Scylla-only feature.
|
|
@pytest.mark.parametrize("use_local_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#10649")), False])
|
|
def test_filter_and_limit_local_index(cql, test_keyspace, use_local_index, driver_bug_1, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int, c int, x int, y int, primary key (p, c)') as table:
|
|
if use_local_index:
|
|
cql.execute(f'CREATE INDEX ON {table}((p), x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p, c, x, y) VALUES (?, ?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 1, 0])
|
|
cql.execute(stmt, [0, 1, 1, 1])
|
|
cql.execute(stmt, [0, 2, 1, 0])
|
|
cql.execute(stmt, [0, 3, 1, 1])
|
|
cql.execute(stmt, [0, 4, 1, 0])
|
|
cql.execute(stmt, [0, 5, 1, 1])
|
|
cql.execute(stmt, [0, 6, 1, 0])
|
|
cql.execute(stmt, [0, 7, 1, 1])
|
|
results = list(cql.execute(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 ALLOW FILTERING'))
|
|
assert sorted(results) == [(1,), (3,), (5,), (7,)]
|
|
# Make sure that with LIMIT N we get back exactly N results - not
|
|
# less and also not more.
|
|
assert [(1,)] == sorted(list(cql.execute(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 LIMIT 1 ALLOW FILTERING')))
|
|
assert [(1,), (3,), (5,)] == sorted(list(cql.execute(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 LIMIT 3 ALLOW FILTERING')))
|
|
# Make the test even harder (exercising more code paths) by asking
|
|
# to fetch the 3 results in tiny one-result pages instead of one page.
|
|
s = cql.prepare(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 LIMIT 3 ALLOW FILTERING')
|
|
s.fetch_size = 1
|
|
assert sorted(cql.execute(s)) == [(1,), (3,), (5,)]
|
|
|
|
# Tests for issue #2962 - different type of indexing on collection columns.
|
|
# Note that we also have a randomized test for this feature as a C++ unit
|
|
# tests, as well as many tests translated from Cassandra's unit tests (grep
|
|
# the issue number #2962 to find them). Unlike the randomized test, the goal
|
|
# here is to try to cover as many corner cases we can think of, explicitly.
|
|
#
|
|
# Note that we can assume that on a single-node cqlpy, materialized view
|
|
# (and therefore index) updates are synchronous, so none of these tests need
|
|
# loops to wait for a change to be indexed.
|
|
|
|
def test_index_list(cql, test_keyspace):
|
|
schema = 'pk int, ck int, l list<int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(l)')
|
|
# The list is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 3'))
|
|
# Add a values to the list, check they can be found
|
|
cql.execute(f'UPDATE {table} set l = l + [7] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
cql.execute(f'UPDATE {table} set l = l + [8] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
# Remove values from the list, check they can no longer be found
|
|
cql.execute(f'UPDATE {table} set l = l - [7] WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set l = l - [8] WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
# Replace entire value of list, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, l) VALUES (1, 2, [4, 5])')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set l = [2, 5] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
# A list can have the same value more than once, here we append
|
|
# another 2 to the list which will now contain [2, 5, 2]. Searching
|
|
# for 2, we should find this row, but only once.
|
|
cql.execute(f'UPDATE {table} set l = l + [2] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
# If we remove the first element from the list [2, 5, 2], there is
|
|
# still a 2 remaining, so it should still be found:
|
|
cql.execute(f'DELETE l[0] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
# Removing the other 2 (now l=[5,2]) should leaving a search for 2
|
|
# returning nothing:
|
|
cql.execute(f'DELETE l[1] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
# The list is now [5]. Replace the 5 with 2 and see 2 is found, 5 not:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
cql.execute(f'UPDATE {table} set l[0] = 2 WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
# Replacing the list [2] with an empty list works as expected:
|
|
cql.execute(f'UPDATE {table} SET l = [] WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
|
|
def test_index_set(cql, test_keyspace):
|
|
schema = 'pk int, ck int, s set<int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
# The set is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 3'))
|
|
# Add a values to the set, check they can be found
|
|
cql.execute(f'UPDATE {table} set s = s + {{7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
cql.execute(f'UPDATE {table} set s = s + {{8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
# Remove values from the set, check they can no longer be found
|
|
cql.execute(f'UPDATE {table} set s = s - {{7}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set s = s - {{8}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
# Replace entire value of set, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, s) VALUES (1, 2, {{4, 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set s = {{2, 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
# A set can only have the same value once. The set is now {2,5},
|
|
# trying to add 2 again makes no difference - and removing it just once
|
|
# will remove this value:
|
|
cql.execute(f'UPDATE {table} set s = s + {{2}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 2'))
|
|
cql.execute(f'UPDATE {table} set s = s - {{2}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 2'))
|
|
# The set is now {5}. Replacing it with an empty set works as expected:
|
|
cql.execute(f'UPDATE {table} SET s = {{}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 5'))
|
|
# The DELETE operation does the same thing:
|
|
cql.execute(f'UPDATE {table} SET s = {{17,18}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 17'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 18'))
|
|
cql.execute(f'DELETE s FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 17'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 18'))
|
|
|
|
def test_index_map_values(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# indexing m (same as values(m)) will allow CONTAINS but not CONTAINS KEY
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3')
|
|
# The map is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3'))
|
|
# Add a elements to the map, check their values can be found
|
|
cql.execute(f'UPDATE {table} set m = m + {{10: 7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
cql.execute(f'UPDATE {table} set m = m + {{11: 8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
# Remove elements from the map, check they can no longer be found
|
|
cql.execute(f'DELETE m[10] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
cql.execute(f'DELETE m[11] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
# Replace entire value of map, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set m = {{3: 2, 4: 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
# A map can have multiple elements with the same *value*. The list is now
|
|
# {3:2, 4:5}, if we add another element of value 2, searching for value 2
|
|
# will return the item only once. But we'll need to delete both elements
|
|
# to no longer find the value 2:
|
|
cql.execute(f'UPDATE {table} set m = m + {{14: 2}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
cql.execute(f'DELETE m[3] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
cql.execute(f'DELETE m[14] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
# The map is now {4:5}. Change the value of 4 and see it take effect:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
cql.execute(f'UPDATE {table} set m[4] = 6 WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 6'))
|
|
# The "-" operation also works as expected (it removes specific *key*,
|
|
# not *values*, depsite what some confused documentation claims):
|
|
cql.execute(f'UPDATE {table} set m = m - {{4}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 6'))
|
|
|
|
# In the previous test (test_index_map_values) we noted that if one map has
|
|
# several keys with the same value, then the "values(m)" index must store
|
|
# all all them, so that we can still match this value even after removing one
|
|
# of those keys. We tested in the previous test that although the same value
|
|
# appears more than once, when we search for it, we only get the same item
|
|
# once. Under the hood, Scylla does find the same value multiple times, but
|
|
# then eliminates the duplicate matched raw and returns it only once.
|
|
# There is a complication, that this de-duplication does not easily span
|
|
# *paging*. So the purpose of this test is to check that paging does not
|
|
# cause the same row to be returned more than once.
|
|
def test_index_map_values_paging(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# index m (same as values(m)). Will allow "CONTAINS".
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# Insert a map where 10 out of the 12 elements have the same value 3
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{0:4, 1:3, 2:3, 3:3, 4:3, 5:3, 6:3, 7:3, 8:3, 9:3, 10:3, 11:7}})')
|
|
# But looking for "m CONTAINS 3" should return the row (1,2) only once:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3'))
|
|
# Try exactly the same with paging with small page sizes. If Scylla
|
|
# doesn't de-duplicate the results between pages, the same row will
|
|
# be returned multiple times.
|
|
for page_size in [1, 2, 3, 7]:
|
|
stmt = SimpleStatement(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3', fetch_size=page_size)
|
|
assert [(1,2)] == list(cql.execute(stmt))
|
|
|
|
# In the previous test (test_index_map_values*) all tests involved a single
|
|
# row, which could match a search, or not. In this test we verify that the
|
|
# case of multiple matching rows also works.
|
|
def test_index_map_values_multiple_matching_rows(cql, test_keyspace, driver_bug_1):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# index m (same as values(m)). Will allow "CONTAINS".
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# Insert several rows with several different maps, some of them have
|
|
# the value 3 in them somewhere, others don't. One of the maps has
|
|
# multiple occurrences of the value 3, so we also reproduce here the
|
|
# same bug that test_index_map_values_paging() reproduces.
|
|
# Note: Scylla needs to skip a duplicate 3 value in (2,4) which
|
|
# results in an empty page in the result set when page_size=1. We
|
|
# need the driver to correctly support this, hence the "driver_bug_1".
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{1:2, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 3, {{1:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 4, {{7:3}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (2, 2, {{1:3, 2:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (2, 4, {{}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (2, 5, {{7:3}})')
|
|
assert [(1,3),(1,4),(2,2),(2,5)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3'))
|
|
for page_size in [1, 2, 3, 7]:
|
|
stmt = SimpleStatement(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3', fetch_size=page_size)
|
|
assert [(1,3),(1,4),(2,2),(2,5)] == list(cql.execute(stmt))
|
|
|
|
# In the previous tests (test_index_map_values*) all tests involved a base
|
|
# table with both partition keys and clustering keys. Because some of the
|
|
# implementation is different depending the schema has clustering keys,
|
|
# let's also write a similar test with just a partition key:
|
|
def test_index_map_values_partition_key_only(cql, test_keyspace, driver_bug_1):
|
|
schema = 'pk int, m map<int,int>, PRIMARY KEY (pk)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# index m (same as values(m)). Will allow "CONTAINS".
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# Insert several rows with several different maps, some of them have
|
|
# the value 3 in them somewhere, others don't. One of the maps has
|
|
# multiple occurrences of the value 3, so we also reproduce here the
|
|
# same bug that test_index_map_values_paging() reproduces (and here
|
|
# test its intersection with the case of no clustering key).
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (1, {{1:2, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (2, {{1:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (3, {{7:3}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (4, {{1:3, 2:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (5, {{}})')
|
|
assert [(2,), (3,), (4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE m CONTAINS 3'))
|
|
for page_size in [1, 2, 3, 7]:
|
|
stmt = SimpleStatement(f'SELECT pk FROM {table} WHERE m CONTAINS 3', fetch_size=page_size)
|
|
assert [(2,), (3,), (4,)] == sorted(cql.execute(stmt))
|
|
# I wanted to check here that page_size is actually obeyed,
|
|
# but we can't - when Scylla skips one of the duplicate values
|
|
# it can result in a smaller page, and while not great (Cassandra
|
|
# doesn't do it, all its pages are full size), it's legal.
|
|
|
|
def test_index_map_keys(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS KEY" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
# indexing keys(m) will allow CONTAINS KEY but not CONTAINS
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3')
|
|
# The map is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
# Add a elements to the map, check their keys can be found
|
|
cql.execute(f'UPDATE {table} set m = m + {{10: 7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
cql.execute(f'UPDATE {table} set m = m + {{11: 8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
# Remove elements from the map, check they can no longer be found
|
|
cql.execute(f'DELETE m[10] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
cql.execute(f'DELETE m[11] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
# Replace entire value of map, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 17'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 18'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
cql.execute(f'UPDATE {table} set m = {{3: 2, 4: 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 17'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 18'))
|
|
# The map is now {3:2, 4:5}. Change the value of 4 and see see it has
|
|
# no effect on keys:
|
|
cql.execute(f'UPDATE {table} set m[4] = 6 WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 4'))
|
|
# The "-" operation also works as expected (it removes specific *key*,
|
|
# not *values*, depsite what some confused documentation claims):
|
|
cql.execute(f'UPDATE {table} set m = m - {{4}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 4'))
|
|
|
|
def test_index_map_entries(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
# indexing entries(m) will allow neither CONTAINS KEY nor CONTAINS
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3')
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3')
|
|
# The map is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[1] = 2'))
|
|
# Add a elements to the map, check their keys can be found
|
|
cql.execute(f'UPDATE {table} set m = m + {{10: 7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 8'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 7'))
|
|
cql.execute(f'UPDATE {table} set m = m + {{11: 8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 8'))
|
|
# Remove elements from the map, check they can no longer be found
|
|
cql.execute(f'DELETE m[10] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 8'))
|
|
cql.execute(f'DELETE m[11] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 8'))
|
|
# Replace entire value of map, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[17] = 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[18] = 5'))
|
|
cql.execute(f'UPDATE {table} set m = {{3: 2, 4: 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[3] = 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[17] = 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[18] = 5'))
|
|
# The map is now {3:2, 4:5}. Change the value of 4 and see see it has
|
|
# the expected effect
|
|
cql.execute(f'UPDATE {table} set m[4] = 6 WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 6'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 5'))
|
|
# The "-" operation also works as expected (it removes specific *key*,
|
|
# not *values*, depsite what some confused documentation claims):
|
|
cql.execute(f'UPDATE {table} set m = m - {{4}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[3] = 2'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 6'))
|
|
|
|
# Check that it is possible to index the same map column in different ways
|
|
# (values, keys and entries) at the same time:
|
|
def test_index_map_multiple(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
# values(m) can do this:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 4'))
|
|
# keys(m) can do this:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 18'))
|
|
# entries(m) can do this:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[18] = 5'))
|
|
|
|
# Test that indexing keys(x), values(x) or entries(x) is only allowed for
|
|
# specific column types (namely, specific kinds of collections).
|
|
def test_index_collection_wrong_type(cql, test_keyspace):
|
|
schema = 'pk int primary key, x int, l list<int>, s set<int>, m map<int,int>, t tuple<int,int>'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# scalar type: only the bare column name is allowed:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(x))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(x))')
|
|
with pytest.raises(InvalidRequest, match="values()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(x))')
|
|
# list: bare column name and values() is allowed. Both of these
|
|
# refer to the same index - so you can't have both at the same time.
|
|
cql.execute(f'CREATE INDEX ON {table}(l)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(l))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(l))')
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(l))')
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_l_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(values(l))')
|
|
# set: bare column name and values() is allowed. Both of these
|
|
# refer to the same index - so you can't have both at the same time.
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(s))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(s))')
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(s))')
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_s_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(values(s))')
|
|
# map: bare column name, values(), keys() and entries() are all
|
|
# allowed. The first tworefer to the same index - so you can't have
|
|
# both at the same time.
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_m_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
# A tuple is not a collection, and doesn't support indexing its
|
|
# elements separately (this would not have been possible, by the way,
|
|
# because it can have elements of different types).
|
|
cql.execute(f'CREATE INDEX ON {table}(t)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(t))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(t))')
|
|
with pytest.raises(InvalidRequest, match="values()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(t))')
|
|
# None of the above types allow a FULL index - it is only
|
|
# allowed for *frozen* collections.
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(t))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(l))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(m))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(s))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(t))')
|
|
|
|
# Check the default name of collection indexes. This "default name" is
|
|
# needed to drop an index which was created without explicitly specifying
|
|
# a name for it. We want the default name to be identical to Cassandra,
|
|
# because an application may assume it is so.
|
|
def test_index_collection_default_name(cql, test_keyspace):
|
|
schema = 'pk int primary key, m map<int,int>'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
# values(m) and m refer to the same thing so must have the same
|
|
# default name.
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
# key(m) and entries(m) are different indexes than just m, but
|
|
# their default index name turns out to be exactly the same one:
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
# We can create multiple types of the above indexes at the same
|
|
# time (see also test_index_map_multiple() above), so they will
|
|
# get different default names using the standard default index
|
|
# name mechanism (adding _1, etc.)
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
cql.execute(f"DROP INDEX {table}_m_idx_1")
|
|
cql.execute(f"DROP INDEX {table}_m_idx_2")
|
|
|
|
# Reproducer for issue #10707 - indexing a column whose name is a quoted
|
|
# string should work fine. Even if the quoted string happens to look like
|
|
# an instruction to index a collection, e.g., "keys(m)".
|
|
def test_index_quoted_names(cql, test_keyspace):
|
|
quoted_names = ['"hEllo"', '"x y"', '"hi""hello""yo"', '"""hi"""', '"keys(m)"', '"values(m)"', '"entries(m)"']
|
|
schema = 'pk int, ck int, m int, ' + ','.join([name + " int" for name in quoted_names]) + ', PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
for name in quoted_names:
|
|
cql.execute(f'CREATE INDEX ON {table}({name})')
|
|
names = ','.join(quoted_names)
|
|
values = ','.join(['3' for name in quoted_names])
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, {names}) VALUES (1, 2, {values})')
|
|
for name in quoted_names:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE {name} = 3'))
|
|
|
|
# Moreover, we can have a collection with a quoted name, and can then
|
|
# ask to index something strange-looking like keys("keys(m)").
|
|
schema = 'pk int, ck int, m int, ' + ','.join([name + " map<int,int>" for name in quoted_names]) + ', PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
for name in quoted_names:
|
|
cql.execute(f'CREATE INDEX ON {table}(keys({name}))')
|
|
names = ','.join(quoted_names)
|
|
values = ','.join(['{3:4}' for name in quoted_names])
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, {names}) VALUES (1, 2, {values})')
|
|
for name in quoted_names:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE {name} CONTAINS KEY 3'))
|
|
|
|
@pytest.mark.xfail(reason="#10713 - local collection indexing is not implemented yet")
|
|
def test_local_secondary_index_on_collection(cql, test_keyspace):
|
|
schema = 'pk int, ck int, l list<int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((pk), l)')
|
|
|
|
# Test that queries with the index over collection provide the same answer as it
|
|
# would be without index, but with ALLOW FILTERING.
|
|
# The operations on collections here are picked up randomly.
|
|
def test_secondary_collection_index(cql, test_keyspace):
|
|
|
|
seed = int(time.time()*1e8)
|
|
print(f"Seed for collection index test: {seed}")
|
|
rand = random.Random(seed)
|
|
|
|
schema = f'id int, m map<int, text>, primary key (id)'
|
|
|
|
possible_ids = [100, 101]
|
|
possible_keys = [1, 2, 3]
|
|
possible_values = ['abc', 'def', 'ghi']
|
|
|
|
def insert(table, id, map, **kwargs):
|
|
a = (f'insert into {table}(id, m) values (%s, %s)', (id, map))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def update_cell(table, id, key, value, **kwargs):
|
|
a = (f'update {table} set m[%s] = %s where id = %s', (key, value, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def update_delete(table, id, keys, **kwargs):
|
|
a = (f'update {table} set m = m - %s where id = %s', (keys, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def update_add(table, id, map, **kwargs):
|
|
a = (f'update {table} set m = m + %s where id = %s', (map, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def delete(table, id, **kwargs):
|
|
a = (f'delete m from {table} where id = %s', (id,))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def delete_cell(table, id, key, **kwargs):
|
|
a = (f'delete m[%s] from {table} where id = %s', (key, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
|
|
def random_map():
|
|
size = rand.randrange(len(possible_keys))
|
|
keys = rand.sample(possible_keys, k=size)
|
|
values = rand.choices(possible_values, k=size)
|
|
return dict(zip(keys, values))
|
|
def random_keys():
|
|
return set(random_map())
|
|
def random_key():
|
|
return random.choice(possible_keys)
|
|
def random_value():
|
|
return random.choice(possible_values)
|
|
def random_id():
|
|
return random.choice(possible_ids)
|
|
|
|
def random_operation():
|
|
return random.choice([insert, update_cell, update_delete, update_add, delete, delete_cell])
|
|
def random_args():
|
|
return {
|
|
'map': random_map(),
|
|
'key': random_key(),
|
|
'value': random_value(),
|
|
'keys': random_keys(),
|
|
'id': random_id(),
|
|
}
|
|
|
|
with new_test_table(cql, test_keyspace, schema) as tab1, new_test_table(cql, test_keyspace, schema) as tab2:
|
|
def select(cql, table, where, *args):
|
|
query = f'select id from {table} where {where}'
|
|
if table is tab2:
|
|
query += ' allow filtering'
|
|
try:
|
|
return cql.execute(query, *args)
|
|
except:
|
|
print('args=', args, table, where)
|
|
raise
|
|
def test_all_possible_selects():
|
|
# Choose something that is not possible.
|
|
possible_ids_ = possible_ids + [10000]
|
|
possible_keys_ = possible_keys + [10000]
|
|
possible_values_ = possible_values + ['aaaaa']
|
|
for k, v in itertools.product(possible_keys_, possible_values_):
|
|
r1 = select(cql, tab1, 'm[%s] = %s', (k, v))
|
|
r2 = select(cql, tab2, 'm[%s] = %s', (k, v))
|
|
assert_rows_ignoring_order(r1, *list(r2))
|
|
for k in possible_keys_:
|
|
r1 = select(cql, tab1, 'm contains key %s', (k,))
|
|
r2 = select(cql, tab2, 'm contains key %s', (k,))
|
|
assert_rows_ignoring_order(r1, *list(r2))
|
|
for v in possible_values_:
|
|
r1 = select(cql, tab1, 'm contains %s', (v,))
|
|
r2 = select(cql, tab2, 'm contains %s', (v,))
|
|
assert_rows_ignoring_order(r1, *list(r2))
|
|
|
|
|
|
cql.execute(f'create index on {tab1}(keys(m))')
|
|
cql.execute(f'create index on {tab1}(values(m))')
|
|
cql.execute(f'create index on {tab1}(entries(m))')
|
|
|
|
for _ in range(50):
|
|
op = random_operation()
|
|
args = random_args()
|
|
print(f"op={op}, args={args}")
|
|
for tab in [tab1, tab2]:
|
|
op(tab, **args)
|
|
test_all_possible_selects()
|
|
|
|
# Test that paging through a select using a secondary index works as
|
|
# expected, returning pages of the requested size.
|
|
# We have several tests here, for different schemas, that exercises
|
|
# different code paths and may expose different bugs.
|
|
|
|
def test_index_paging_pk_ck(cql, test_keyspace):
|
|
schema = 'p int, c int, x int, primary key (p,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(x)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,c,x) VALUES (?,?,?)")
|
|
for i in range(10):
|
|
cql.execute(insert, [i, i, 3])
|
|
cql.execute(insert, [17, 17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE x = 3", fetch_size=page_size)
|
|
# Check that:
|
|
# 1. Each page of results has the expected page_size, or less in
|
|
# the last page. Although partial pages are theoretically
|
|
# allowed (and happen in other tests), in this test we don't
|
|
# expect Scylla or Cassandra to generate them.
|
|
# 2. Check that all the results read over all pages are the
|
|
# expected ones (0...9)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
def test_index_paging_pk_only(cql, test_keyspace):
|
|
schema = 'p int, x int, primary key (p)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(x)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,x) VALUES (?,?)")
|
|
for i in range(10):
|
|
cql.execute(insert, [i, 3])
|
|
cql.execute(insert, [17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE x = 3", fetch_size=page_size)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
# When a partition key is indexed (it is redundant to index the entire
|
|
# partition key, so this test has a compound partition key and indexes only
|
|
# one component), the restriction can match the entire partition, but paging
|
|
# still needs to page through it - and not return the entire partition as one
|
|
# page! Reproduces #7432.
|
|
@pytest.mark.xfail(reason="issue #7432")
|
|
def test_index_paging_match_partition(cql, test_keyspace):
|
|
schema = 'p1 int, p2 int, c int, primary key (p1,p2,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(p2)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p1,p2,c) VALUES (?,?,?)")
|
|
for i in range(10):
|
|
# All of these have p2 = 1:
|
|
cql.execute(insert, [1, 1, i])
|
|
cql.execute(insert, [17, 17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
stmt = SimpleStatement(f"SELECT c FROM {table} WHERE p2 = 1", fetch_size=page_size)
|
|
# Check that:
|
|
# 1. Each page of results has the expected page_size, or less in
|
|
# the last page. Although partial pages are theoretically
|
|
# allowed (and happen in other tests), in this test we don't
|
|
# expect Scylla or Cassandra to generate them.
|
|
# 2. Check that all the results read over all pages are the
|
|
# expected ones (0...9)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
# Currently, paging of queries that uses secondary indexes on static columns
|
|
# is unable to page through partitions of the base table and must return them
|
|
# in whole. Related to #7432.
|
|
@pytest.mark.xfail(reason="issue #7432")
|
|
def test_index_paging_static_column(cql, test_keyspace):
|
|
schema = 'p int, c int, s int static, PRIMARY KEY(p, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,c,s) VALUES (?,?,?)")
|
|
for p in range(5):
|
|
for c in range(5):
|
|
cql.execute(insert, [p, c, 42])
|
|
for page_size in [1, 2, 3, 4, 100]:
|
|
stmt = SimpleStatement(f"SELECT p, c FROM {table} WHERE s = 42", fetch_size=page_size)
|
|
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(p,c) for p in range(5) for c in range(5)]
|
|
|
|
# If, in contrast with test_index_paging_match_partition above which indexed
|
|
# a partition key column, we index a clustering key column, paging does work
|
|
# as expected and stops at the right page size. However, as was noted in
|
|
# issue #7432, if we add "GROUP BY p" to the query, Scylla now mistakenly
|
|
# returns all the results in one page instead of stopping. So the following
|
|
# test passes with use_group_by = False but failed with use_group_by = True.
|
|
@pytest.mark.parametrize("use_group_by", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#7432")), False])
|
|
def test_index_paging_group_by(cql, test_keyspace, use_group_by):
|
|
schema = 'p int, c1 int, c2 int, primary key (p,c1,c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,c1,c2) VALUES (?,?,?)")
|
|
for i in range(10):
|
|
# All of these have c1 = 1:
|
|
cql.execute(insert, [i, 1, i])
|
|
cql.execute(insert, [17, 17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
group_by = 'GROUP BY p' if use_group_by else ''
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE c1 = 1 {group_by}", fetch_size=page_size)
|
|
# Check that:
|
|
# 1. Each page of results has the expected page_size, or less in
|
|
# the last page. Although partial pages are theoretically
|
|
# allowed (and happen in other tests), in this test we don't
|
|
# expect Scylla or Cassandra to generate them.
|
|
# 2. Check that all the results read over all pages are the
|
|
# expected ones (0...9)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
# Tests basic operations on a static column index.
|
|
def test_static_column_index(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
# Insert
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (1, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (2, 1)')
|
|
|
|
assert [(0,),(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
|
|
# Update
|
|
cql.execute(f'UPDATE {table} SET s = 1 WHERE pk = 1')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
|
|
# Partition delete
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 2')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
|
|
# Tests that building static indexes from a non-empty state works.
|
|
def test_static_column_index_build(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (1, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (2, 0)')
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
# Indexes are created in the background, so we should wait here.
|
|
# I don't know how to get information about secondary index build
|
|
# status on C*, so we'll just wait until 30 seconds elapse or
|
|
# the index appears to be properly built.
|
|
start_time = time.time()
|
|
rows = None
|
|
while time.time() < start_time + 30:
|
|
try:
|
|
rows = sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
if len(rows) == 3:
|
|
break
|
|
except:
|
|
# In Cassandra, the SELECT can also fail for a short while
|
|
# instead of returning nothing - as the index is not yet
|
|
# set up properly. Let's ignore this failure and try again
|
|
# until the index is ready.
|
|
pass
|
|
time.sleep(0.1)
|
|
|
|
assert [(0,),(1,),(2,)] == rows
|
|
|
|
# Tests combinations of lookup in an index of static column with other
|
|
# restrictions. Reproduces #12829.
|
|
# NOTE: currently marked with skip instead of xfail because
|
|
# on_internal_error() crashes Scylla.
|
|
@pytest.mark.skip(reason="issue #12829")
|
|
def test_static_column_index_restrictions(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s) VALUES (0, 3, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s) VALUES (0, 4, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s) VALUES (1, 4, 2)')
|
|
|
|
assert [(0,3),(0,4)] == sorted(cql.execute(f'SELECT pk,c FROM {table} WHERE s = 1'))
|
|
assert [(0,3),(0,4)] == sorted(cql.execute(f'SELECT pk,c FROM {table} WHERE pk = 0 AND s = 1'))
|
|
# Reproduces #12829:
|
|
assert [(0,3)] == sorted(cql.execute(f'SELECT pk,c FROM {table} WHERE pk = 0 AND s = 1 AND c = 3'))
|
|
|
|
# Checks that clustering row deletions do not affect static columns.
|
|
def test_static_column_index_unaffected_by_clustering_row_ops(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s, v) VALUES (0, 0, 42, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 1, 10)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 2, 20)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 3, 30)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 4, 40)')
|
|
|
|
# We are not using SELECT DISTINCT because it is not implemented yet
|
|
# for queries that restrict a non-pk column. Therefore, `pk` appears
|
|
# multiple times in the result.
|
|
|
|
assert [(0,)]*5 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Row delete
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0 AND c = 4')
|
|
assert [(0,)]*4 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Range delete
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0 AND c >= 1 AND c < 3')
|
|
assert [(0,)]*2 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Range delete, but this time get rid of all rows (static row should stay)
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0 AND c >= 0 AND c <= 4')
|
|
assert [(0,)]*1 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Finally, perform a partition delete and get rid of the row
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0')
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Checks that changing a static column's value is correctly reflected by queries
|
|
# accelerated by a secondary index.
|
|
def test_static_column_index_all_clustering_rows_moved_by_static_column_update(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
rows_for_pk = [
|
|
[(0, 0, 0), (0, 1, 10), (0, 2, 20)],
|
|
[(1, 0, 0), (1, 1, 10), (1, 2, 20)],
|
|
]
|
|
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
for pk in range(2):
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s, v) VALUES ({pk}, 0, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES ({pk}, 1, 10)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES ({pk}, 2, 20)')
|
|
|
|
assert rows_for_pk[0] + rows_for_pk[1] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 0'))
|
|
assert [] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 1'))
|
|
|
|
cql.execute(f"UPDATE {table} SET s = 1 WHERE pk = 1")
|
|
|
|
assert rows_for_pk[0] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 0'))
|
|
assert rows_for_pk[1] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 1'))
|
|
|
|
cql.execute(f"UPDATE {table} SET s = 1 WHERE pk = 0")
|
|
|
|
assert [] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 0'))
|
|
assert rows_for_pk[0] + rows_for_pk[1] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 1'))
|
|
|
|
|
|
# Tests operations on tables which have both static column and regular column indexes.
|
|
# Checks that one does not interfere with the other.
|
|
def test_static_and_regular_index_operations(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
cql.execute(f'CREATE INDEX ON {table}(v)')
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (0, 0, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (1, 0, 0, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (2, 1, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (3, 1, 0, 1)')
|
|
|
|
assert [(0,),(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 1'))
|
|
|
|
cql.execute(f'UPDATE {table} SET s = 1 WHERE pk = 1')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,),(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 1'))
|
|
|
|
cql.execute(f'UPDATE {table} SET v = 0 WHERE pk = 1 AND c = 0')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,),(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
assert [(0,),(1,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 0'))
|
|
assert [(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 1'))
|
|
|
|
# There are separate codepaths for processing static column addition/removal
|
|
# in case the static row isn't the last element of the mutation.
|
|
# The operations below allow us to test those cases
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (4, 0, 4)')
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 2'))
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 4'))
|
|
|
|
# Static column is set on a partition which didn't have a static row yet
|
|
cql.execute(f'BEGIN BATCH \
|
|
UPDATE {table} SET s = 2 WHERE pk = 4; \
|
|
UPDATE {table} SET v = 5 WHERE pk = 4 AND c = 0; \
|
|
APPLY BATCH')
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 2'))
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 4'))
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 5'))
|
|
|
|
# Static column is removed from a partition
|
|
# In order to construct the batch, we need the write timestamp
|
|
timestamp = list(cql.execute(f'SELECT writetime(v) FROM {table} WHERE pk = 4 AND c = 0'))[0][0]
|
|
cql.execute(f'BEGIN BATCH \
|
|
DELETE FROM {table} USING TIMESTAMP {timestamp} WHERE pk = 4; \
|
|
UPDATE {table} USING TIMESTAMP {timestamp+1} SET v = 6 WHERE pk = 4 AND c = 0; \
|
|
APPLY BATCH')
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 2'))
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 5'))
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 6'))
|
|
|
|
# Make sure that, when there are multiple static column indexes and only one
|
|
# column is modified, only the index relevant to that column is modified.
|
|
def test_multiple_static_column_indexes(cql, test_keyspace):
|
|
schema = 'pk int, c int, s1 int STATIC, s2 int STATIC, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s1)')
|
|
cql.execute(f'CREATE INDEX ON {table}(s2)')
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (0, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (1, 0, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (2, 1, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (3, 1, 1)')
|
|
|
|
assert [(0,),(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 0'))
|
|
assert [(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 1'))
|
|
|
|
cql.execute(f'UPDATE {table} SET s1 = 1 WHERE pk = 1')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 0'))
|
|
assert [(1,),(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 1'))
|
|
|
|
# Test that creating a local index on a static column is disallowed.
|
|
# Local static indexes are not useful because there is only one value
|
|
# of a static column allowed for a given partition.
|
|
def test_disallow_local_indexes_on_static_columns(scylla_only, cql, test_keyspace):
|
|
schema = 'pk int, c int, s int static, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
with pytest.raises(InvalidRequest, match="Local indexes containing static columns are not supported"):
|
|
cql.execute(f'CREATE INDEX ON {table}((pk), s)')
|
|
|
|
# Check that a "SELECT p FROM table WHERE v = 1 AND token(p) > ..."
|
|
# works where p is a base table's partition key and v is an indexed
|
|
# regular column. For this to work, it requires that a secondary index
|
|
# should hold the list of matching p's in token order, and for the
|
|
# query planner to understand this (so ALLOW FILTERING isn't necessary).
|
|
# Reproduces issue #7043. See also C++ tests
|
|
# test_select_with_token_range_*() in test/boost/secondary_index_test.cc
|
|
def test_index_filter_by_token(cql, test_keyspace):
|
|
schema = 'p int, v int, primary key (p)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,v) VALUES (?,?)")
|
|
for i in range(10):
|
|
# All of these have v = 1:
|
|
cql.execute(insert, [i, 1])
|
|
# Scan the base table in token order
|
|
base_results = list(cql.execute(f"SELECT token(p), p FROM {table}"))
|
|
# Scanning the secondary index yields the same order
|
|
# (see also test_partition_order_with_si() above)
|
|
si_results = list(cql.execute(f"SELECT token(p), p FROM {table} WHERE v = 1"))
|
|
assert si_results == base_results
|
|
# Find one of the tokens token and add a "token(p) >" restriction
|
|
# Check that the token(p) > .. restriction works (yielding just
|
|
# a subset of base_results) and also the v = ... works together
|
|
# (v=1 yields the data, v=0 yields nothing).
|
|
token3 = base_results[3].system_token_p
|
|
assert base_results[4:] == list(cql.execute(f"SELECT token(p), p FROM {table} WHERE v = 1 AND token(p) > {token3}"))
|
|
assert [] == list(cql.execute(f"SELECT token(p), p FROM {table} WHERE v = 0 AND token(p) > {token3}"))
|
|
|
|
# Looking up a null value in a secondary index should match nothing (in
|
|
# general in Scylla, "= null" matches nothing - not even a null value).
|
|
# The "= null" expression is disallowed in Cassandra, so these tests are
|
|
# Scylla-only.
|
|
def test_global_secondary_index_null_lookup(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int PRIMARY KEY, v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(v)')
|
|
p = unique_key_int()
|
|
cql.execute(f'INSERT INTO {table}(p,v) VALUES ({p},1)')
|
|
assert [] == list(cql.execute(f'SELECT p FROM {table} WHERE v=null'))
|
|
|
|
def test_local_secondary_index_null_lookup(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int PRIMARY KEY, v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((p), v)')
|
|
p = unique_key_int()
|
|
cql.execute(f'INSERT INTO {table}(p,v) VALUES ({p},1)')
|
|
assert [] == list(cql.execute(f'SELECT * FROM {table} WHERE p={p} AND v=null'))
|
|
|
|
def test_local_secondary_index_null_lookup2(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int, c int, PRIMARY KEY (p, c), v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((p), v)')
|
|
p = unique_key_int()
|
|
cql.execute(f'INSERT INTO {table}(p,c,v) VALUES ({p},0,1)')
|
|
assert [] == list(cql.execute(f'SELECT * FROM {table} WHERE p={p} AND c=0 AND v=null'))
|
|
|
|
# Reproducers for issue #7659, which involves a query with multiple indexes
|
|
# which wrongly tries to use an index for a non-EQ restriction (whereas an
|
|
# index can only be used for EQ). We have one reproducer for this issue as
|
|
# a C++ test, but this test shows the bug for two kinds of non-EQ restrictions
|
|
# with two different symptoms (one used to assert, the other "just" threw an
|
|
# exception), and for both global and local indexes.
|
|
|
|
def test_7659_global(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'a int, d int, f int, PRIMARY KEY (a, d)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(d)')
|
|
cql.execute(f'CREATE INDEX ON {table}(f)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (1, 0, 1)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (2, 0, 2)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (3, 0, 3)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (4, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (5, 1, 1)')
|
|
# With issue #7659, this generated an assertion failure:
|
|
assert [(1,),(2,)] == sorted(list(cql.execute(f"SELECT a FROM {table} WHERE d=0 AND f in (1,2) ALLOW FILTERING")))
|
|
# With issue #7659, this generated an exception and failed request:
|
|
assert [(1,),(2,),(3,)] == sorted(list(cql.execute(f"SELECT a FROM {table} WHERE d=0 AND f>0 ALLOW FILTERING")))
|
|
|
|
# the version of this test for local index is scylla_only, because LSI is
|
|
# a Scylla extension that doesn't exist in Cassandra.
|
|
def test_7659_local(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'a int, b int, c int, d int, e int, f int, PRIMARY KEY ((a, b), c, d)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((a, b), f)')
|
|
cql.execute(f'CREATE INDEX ON {table}(d)')
|
|
cql.execute(f'INSERT INTO {table}(a,b,c,d,e,f) VALUES (0,0,0,0,0,1)')
|
|
# With issue #7659, this generated an assertion failure:
|
|
assert [(0,)] == list(cql.execute(f"SELECT a FROM {table} WHERE a=0 and b=0 AND d=0 AND f in (1,2) ALLOW FILTERING"))
|
|
# With issue #7659, this generated an exception and failed request:
|
|
assert [(0,)] == list(cql.execute(f"SELECT a FROM {table} WHERE a=0 and b=0 AND d=0 AND f>0 ALLOW FILTERING"))
|
|
|
|
# An index can be used to satisfy equality relations (a=2) but not
|
|
# inequality (e.g., a>=2). If those are present, Scylla needs to ignore
|
|
# the index and just do filtering normally.
|
|
# Reproduces #5823
|
|
def test_index_non_eq_relation(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'a bigint, b bigint, c bigint, PRIMARY KEY ((a, b))') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(a)')
|
|
cql.execute(f'INSERT INTO {table} (a,b,c) VALUES (0,2,1)')
|
|
cql.execute(f'INSERT INTO {table} (a,b,c) VALUES (1,2,3)')
|
|
cql.execute(f'INSERT INTO {table} (a,b,c) VALUES (2,2,4)')
|
|
assert [(3,),(4,)] == sorted(cql.execute(f"SELECT c FROM {table} WHERE a>0 and b=2 ALLOW FILTERING"))
|
|
assert [(4,)] == sorted(cql.execute(f"SELECT c FROM {table} WHERE a>=2 ALLOW FILTERING"))
|
|
|
|
# A reproducer for issue #12762: When scan with a PER PARTITION LIMIT uses
|
|
# a secondary index for its filtering, it still needs apply the PER PARTITION
|
|
# LIMIT (and considering the original base-table partitions for this, not the
|
|
# index partition).
|
|
# With issue #12762, this test used to pass without an index (use_index=False)
|
|
# but failed with an index (use_index=True) - it seems the PER PARTITION LIMIT
|
|
# request was just ignored.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#12762")), False])
|
|
def test_index_filtering_scan_and_per_partition_limit(cql, test_keyspace, use_index):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
if use_index:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p, c, v) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 0])
|
|
cql.execute(stmt, [0, 1, 0])
|
|
cql.execute(stmt, [1, 0, 0])
|
|
cql.execute(stmt, [1, 1, 0])
|
|
# ALLOW FILTERING isn't needed if there's an index. Scylla must
|
|
# use the index when it's available (otherwise the query v=0 won't
|
|
# be efficient), but shouldn't forget also the PER PARTITION LIMIT.
|
|
allow_filtering = '' if use_index else 'ALLOW FILTERING'
|
|
assert {(0,0,0), (1, 0, 0)} == set(cql.execute(f'SELECT * FROM {table} WHERE v=0 PER PARTITION LIMIT 1 {allow_filtering}'))
|
|
|
|
# Similar to above test with PER PARTITION LIMIT, but also adds further
|
|
# filtering which eliminates some result candidates retrieved by the index.
|
|
# The count that PER PARTITION LIMIT limits should be post-filtering, so that
|
|
# PER PARTITION LIMIT 1 should *return* one result per partition, and if the
|
|
# first result happens not to match the filter it needs to continue trying
|
|
# the second result, and so on, and not skip to the next partition until
|
|
# we have one post-filtered result.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#12762")), False])
|
|
def test_index_filtering2_scan_and_per_partition_limit(cql, test_keyspace, use_index):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, z int, PRIMARY KEY (p, c)") as table:
|
|
if use_index:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p, c, v, z) VALUES (?, ?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 0, 0])
|
|
cql.execute(stmt, [0, 1, 0, 1])
|
|
cql.execute(stmt, [0, 2, 0, 1])
|
|
cql.execute(stmt, [1, 0, 0, 0])
|
|
cql.execute(stmt, [1, 1, 0, 1])
|
|
cql.execute(stmt, [1, 2, 0, 1])
|
|
assert {(0, 1, 0, 1), (1, 1, 0, 1)} == set(cql.execute(f'SELECT * FROM {table} WHERE v=0 AND z=1 PER PARTITION LIMIT 1 ALLOW FILTERING'))
|
|
|
|
# Test that when adding an index to a table, queries should not begin to
|
|
# use it before it's fully built - otherwise we would get wrong query
|
|
# results. Reproduces issue #7963.
|
|
# The test is marked cassandra_bug, because Cassandra 4 also fails on it -
|
|
# the ALLOW FILTERING request right after the CREATE INDEX causes an
|
|
# exception in SecondaryIndexManagement and a failed read. This is despite
|
|
# CASSANDRA-8505 claiming that this issue was already fixed in 2015.
|
|
@pytest.mark.xfail(reason="issue #7963")
|
|
def test_unbuilt_index_not_used(cql, test_keyspace, cassandra_bug):
|
|
# The bigger "count" is the slower the test and the higher the chance
|
|
# of reproducing the bug #7963. With dev build on my laptop, count=100
|
|
# is enough for reproducing the failure in 90% of the runs.
|
|
count = 100
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, c, v) VALUES (?, ?, ?)")
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, i*10, i*100])
|
|
assert list(cql.execute(f"SELECT p FROM {table} WHERE v=100 ALLOW FILTERING")) == [(1,)]
|
|
# Before we add an index, the same query without ALLOW FILTERING is
|
|
# not allowed:
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f"SELECT p FROM {table} WHERE v=100")
|
|
# Create an index and then, quickly before (usually) the index is
|
|
# completely built, retry the same query as the above successful
|
|
# query (the one with ALLOW FILTERING):
|
|
cql.execute(f"CREATE INDEX ON {table} (v)")
|
|
# Failure here reproduces #7963: (note that depending on timing, it
|
|
# might not fail every time even before the bug is fixed)
|
|
assert list(cql.execute(f"SELECT p FROM {table} WHERE v=100 ALLOW FILTERING")) == [(1,)]
|
|
# If we retry the same query *without* the ALLOW FILTERING phrase,
|
|
# this should either be allowed and produce correct results (if the
|
|
# index has finished building) - or, should complain that ALLOW
|
|
# FILTERING is needed (as was the case above before we added the index)
|
|
try:
|
|
# If query without ALLOW FILTERING works, it must return the
|
|
# correct results. Failure here reproduces #7963.
|
|
assert list(cql.execute(f"SELECT p FROM {table} WHERE v=100")) == [(1,)]
|
|
except InvalidRequest as e:
|
|
# Or, it's also fine that the query doesn't yet work without ALLOW
|
|
# FILTERING, if the index wasn't yet built.
|
|
assert "ALLOW FILTERING" in str(e)
|
|
|
|
# Utility function waiting (up to a timeout) for the given index to be built.
|
|
# Uses the "IndexInfo" system table, supported by both Scylla and Cassandra.
|
|
def wait_for_index(cql, keyspace, index_name, timeout_sec=60):
|
|
start_time = time.time()
|
|
while time.time() < start_time + timeout_sec:
|
|
if list(cql.execute(f"SELECT index_name FROM system.\"IndexInfo\" WHERE table_name = '{keyspace}' and index_name = '{index_name}'")):
|
|
return
|
|
time.sleep(0.1)
|
|
pytest.fail(f"Timeout ({timeout_sec} seconds) waiting for index {keyspace}.{index_name}")
|
|
|
|
# The following test starts a filtering request (with ALLOW FILTERING),
|
|
# pages through the results, and between two pages adds a secondary-index
|
|
# that could be used - or not - for continuing the request, but certainly
|
|
# shouldn't break the request. Reproduces #18992.
|
|
@pytest.mark.xfail(reason="issue #18992")
|
|
def test_paging_and_create_index(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v) VALUES (?, ?)")
|
|
# Add 'count' partitions with v=17 in all of them, so the filter
|
|
# "WHERE v=17" will return all rows.
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, 17])
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v=17 ALLOW FILTERING", fetch_size=page_size)
|
|
# Save the result of a full query (this would be count rows ordered
|
|
# by token(p) order) so we can later compare this to what we get
|
|
# when we insert a CREATE INDEX between pages.
|
|
expected = list(cql.execute(stmt))
|
|
# Run the same paged query again but this time use the page-by-page
|
|
# API, and stick a CREATE INDEX ON v between the first and second page.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
# We wait for the index to be built, since we don't want to reproduce
|
|
# #7963 again here (an index getting used before actually built).
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
# This test is the same as the previous one, but in this test the request
|
|
# filters on both v1 and v2 is already using an index for column v2, and
|
|
# now between the pages we add an index for v1. Reproduces #18992.
|
|
@pytest.mark.xfail(reason="issue #18992")
|
|
def test_paging_and_create_index2(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v1 text, v2 int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v1, v2) VALUES (?, ?, ?)")
|
|
# Add 'count' partitions with v1="dog", v2=3 in all of them, so the
|
|
# filter "WHERE v1='dog' AND v2=3" will return all rows.
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, "dog", 3])
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v1='dog' AND v2=3 ALLOW FILTERING", fetch_size=page_size)
|
|
expected = list(cql.execute(stmt))
|
|
# Create the index on v2, and run the query again, should get the same
|
|
# results.
|
|
index_name2 = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name2} ON {table}(v2)")
|
|
wait_for_index(cql, test_keyspace, index_name2)
|
|
assert expected == list(cql.execute(stmt))
|
|
# Run the same paged query again but this time use the page-by-page
|
|
# API, and do a CREATE INDEX ON v1 between the first and second page.
|
|
# Scylla currently prefers the index for the first column mentioned
|
|
# in the query, so the risk is that it will switch to using v1 instead
|
|
# of v2 for the paging and get confused by the paging state.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size
|
|
got.extend(r.current_rows)
|
|
index_name1 = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name1} ON {table}(v1)")
|
|
wait_for_index(cql, test_keyspace, index_name1)
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
# Similar to the previous tests, but here a secondary index which is used
|
|
# for the original request is suddenly deleted between pages. In this test,
|
|
# the query has "ALLOW FILTERING" so the query can continue to work -
|
|
# inefficiently - after the index is deleted. In the following test, we
|
|
# will do the same without ALLOW FILTERING in the query - and we'll see the
|
|
# query can't be resumed after the index is dropped.
|
|
# Reproduces #18992.
|
|
@pytest.mark.xfail(reason="issue #18992")
|
|
def test_paging_and_drop_index_allow_filtering(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v) VALUES (?, ?)")
|
|
# Add 'count' partitions with v=17 in all of them, so the filter v=17
|
|
# will return all rows.
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, 17])
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v=17 ALLOW FILTERING", fetch_size=page_size)
|
|
expected = list(cql.execute(stmt))
|
|
# Create the index on v, and run the query again with the index,
|
|
# should get the same results.
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
assert expected == list(cql.execute(stmt))
|
|
# Finally, run the same paged query again but this time use the page-
|
|
# by-page API, and do a DROP INDEX between the first and second page.
|
|
# The query still has ALLOW FILTERING to prevent Scylla from rejecting
|
|
# the query because now (without the index) it needs ALLOW FILTERING.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{index_name}")
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
# This test is the same as the previous one, except that it uses a query
|
|
# *without* ALLOW FILTERING while an index existed, and when it attempts
|
|
# to get the next page after a DROP INDEX, we expect the usual error
|
|
# message about ALLOW FILTERING being necessary.
|
|
def test_paging_and_drop_index_no_allow_filtering(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v) VALUES (?, ?)")
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, 17])
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v=17", fetch_size=page_size)
|
|
expected = list(cql.execute(stmt))
|
|
# Run the same paged query again but this time use the page-by-page
|
|
# API, and do a DROP INDEX between the first and second page.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size
|
|
got.extend(r.current_rows)
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{index_name}")
|
|
# Because the query does not have "ALLOW FILTERING", even if we
|
|
# could resume this query it would be inefficient without the
|
|
# index, so the resumed query should fail with an error about
|
|
# ALLOW FILTERING being needed.
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
|
|
# Test index representation in system.* tables
|
|
def test_index_in_system_tables(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
if is_scylla(cql):
|
|
res = [ f'{r.keyspace_name}.{r.view_name}' for r in cql.execute('select * from system.built_views')]
|
|
assert f'{test_keyspace}.{index_name}_index' in res
|
|
res = [ f'{r.table_name}::{r.index_name}' for r in cql.execute('select * from system."IndexInfo"')]
|
|
assert f'{test_keyspace}::{index_name}' in res
|
|
res = cql.execute(f'select * from system."IndexInfo" where table_name = \'{test_keyspace}\' AND index_name = \'{index_name}\'').one()
|
|
assert (test_keyspace, index_name) == (res.table_name, res.index_name)
|
|
|
|
# Test index representation in REST API
|
|
def test_index_in_API(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
res = rest_api.get_request(cql, f"column_family/built_indexes/{table.replace('.',':')}")
|
|
assert index_name in res
|
|
|
|
|
|
# Test that a LIMIT works correctly across multiple partitions.
|
|
# The test creates an index on a partition key column and checks if the LIMIT
|
|
# is correctly enforced, i.e., the last partition in the query result is
|
|
# truncated. Truncation is required when the last partition participating in the
|
|
# query result contains more than one rows, and causes the LIMIT to be exceeded.
|
|
#
|
|
# Reproduces #22158 - The test fails when LIMIT is 3 because the query result
|
|
# contains 4 rows instead of 3. The coordinator applies the LIMIT on the number
|
|
# of primary keys it fetches from the index view, and then re-applies the LIMIT
|
|
# on each partition it fetches from the base table, but does not truncate the
|
|
# last partition if necessary (this would require counting the rows across all
|
|
# partitions). If the query result contains more than one partitions, it may
|
|
# exceed the LIMIT. The LIMIT is supposed to control the number of rows of the
|
|
# whole query result.
|
|
#
|
|
# As a side effect, the excessive rows in the result also cause the Has_more_pages
|
|
# flag to be set, although the result has been exhausted. This happens because
|
|
# the last primary key in the query result does not match the last primary key
|
|
# from the index view (where the LIMIT is correctly applied). A mismatch is
|
|
# always interpreted as more pages being available, which in this case is incorrect.
|
|
# See `generate_view_paging_state_from_base_query_results()` for more details.
|
|
@pytest.mark.xfail(reason="issue #22158")
|
|
def test_limit_partition(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'pk1 int, pk2 int, ck int, primary key ((pk1, pk2), ck)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(pk2)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk1, pk2, ck) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [1, 1, 2])
|
|
cql.execute(stmt, [2, 1, 1])
|
|
cql.execute(stmt, [2, 1, 2])
|
|
# Test LIMIT within a single partition - succeeds.
|
|
rs = cql.execute(f'SELECT pk1, ck FROM {table} WHERE pk2 = 1 LIMIT 1')
|
|
assert sorted(list(rs)) == [(2,1)]
|
|
assert rs.has_more_pages == False
|
|
# Test LIMIT across partitions - reproduces #22158.
|
|
rs = cql.execute(f'SELECT pk1, ck FROM {table} WHERE pk2 = 1 LIMIT 3')
|
|
assert sorted(list(rs)) == [(1,1), (1,2), (2,1)]
|
|
assert rs.has_more_pages == False
|
|
|
|
|
|
# Same as test_limit_partition above, except that it uses partition slices
|
|
# instead of whole partitions. This is achieved by indexing the first clustering
|
|
# key column.
|
|
@pytest.mark.xfail(reason="issue #22158")
|
|
def test_limit_partition_slice(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'pk int, ck1 int, ck2 int, primary key (pk, ck1, ck2)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(ck1)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck1, ck2) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [1, 1, 2])
|
|
cql.execute(stmt, [2, 1, 1])
|
|
cql.execute(stmt, [2, 1, 2])
|
|
# Test LIMIT within a single partition slice - succeeds.
|
|
rs = cql.execute(f'SELECT pk, ck2 FROM {table} WHERE ck1 = 1 LIMIT 1')
|
|
assert sorted(list(rs)) == [(1,1)]
|
|
assert rs.has_more_pages == False
|
|
# Test LIMIT across partition slices - reproduces #22158.
|
|
rs = cql.execute(f'SELECT pk, ck2 FROM {table} WHERE ck1 = 1 LIMIT 3')
|
|
assert sorted(list(rs)) == [(1,1), (1,2), (2,1)]
|
|
assert rs.has_more_pages == False |