mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-22 09:30:45 +00:00
Add a test to check that paged secondary index queries behave correctly
when pages are short. This is currently failing in Scylla, but passes in
Cassandra 5, therefore marked as "xfailing". Refer to the test's
docstring for more details.
The bug is a regression introduced by commit f6f18b1.
`test/cqlpy/run --release ...` shows that the test passes in 5.1 but
fails in 5.2 onwards.
Refs #25839.
Signed-off-by: Nikos Dragazis <nikolaos.dragazis@scylladb.com>
Closes scylladb/scylladb#25843
2236 lines
130 KiB
Python
2236 lines
130 KiB
Python
# Copyright 2020-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
# Tests for secondary indexes
|
|
|
|
import random
|
|
import itertools
|
|
import time
|
|
import tempfile
|
|
import pytest
|
|
import os
|
|
from contextlib import ExitStack
|
|
from . import rest_api
|
|
from cassandra.protocol import SyntaxException, AlreadyExists, InvalidRequest, ConfigurationException, ReadFailure, WriteFailure
|
|
from cassandra.query import SimpleStatement
|
|
from .cassandra_tests.porting import assert_rows, assert_row_count, assert_rows_ignoring_order, assert_empty
|
|
|
|
from .util import new_test_table, unique_name, unique_key_int, is_scylla, ScyllaMetrics, config_value_context
|
|
|
|
# A reproducer for issue #7443: Normally, when the entire table is SELECTed,
|
|
# the partitions are returned sorted by the partitions' token. When there
|
|
# is filtering, this order is not expected to change. Furthermore, when this
|
|
# filtering happens to use a secondary index, again the order is not expected
|
|
# to change.
|
|
def test_partition_order_with_si(cql, test_keyspace):
|
|
schema = 'pk int, x int, PRIMARY KEY ((pk))'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Insert 20 partitions, all of them with x=1 so that filtering by x=1
|
|
# will yield the same 20 partitions:
|
|
N = 20
|
|
stmt = cql.prepare('INSERT INTO '+table+' (pk, x) VALUES (?, ?)')
|
|
for i in range(N):
|
|
cql.execute(stmt, [i, 1])
|
|
# SELECT all the rows, and verify they are returned in increasing
|
|
# partition token order (note that the token is a *signed* number):
|
|
tokens = [row.system_token_pk for row in cql.execute('SELECT token(pk) FROM '+table)]
|
|
assert len(tokens) == N
|
|
assert sorted(tokens) == tokens
|
|
# Now select all the partitions with filtering of x=1. Since all
|
|
# rows have x=1, this shouldn't change the list of matching rows, and
|
|
# also shouldn't check their order:
|
|
tokens1 = [row.system_token_pk for row in cql.execute('SELECT token(pk) FROM '+table+' WHERE x=1 ALLOW FILTERING')]
|
|
assert tokens1 == tokens
|
|
# Now add an index on x, which allows implementing the "x=1"
|
|
# restriction differently. With the index, "ALLOW FILTERING" is
|
|
# no longer necessary. But the order of the results should
|
|
# still not change. Issue #7443 is about the order changing here.
|
|
cql.execute('CREATE INDEX ON '+table+'(x)')
|
|
# "CREATE INDEX" does not wait until the index is actually available
|
|
# for use. Reads immediately after the CREATE INDEX may fail or return
|
|
# partial results. So let's retry until reads resume working:
|
|
for i in range(100):
|
|
try:
|
|
tokens2 = [row.system_token_pk for row in cql.execute('SELECT token(pk) FROM '+table+' WHERE x=1')]
|
|
if len(tokens2) == N:
|
|
break
|
|
except ReadFailure:
|
|
pass
|
|
time.sleep(0.1)
|
|
assert tokens2 == tokens
|
|
|
|
# Test which ensures that indexes for a query are picked by the order in which
|
|
# they appear in restrictions. That way, users can deterministically pick
|
|
# which indexes are used for which queries.
|
|
# Note that the order of picking indexing is not set in stone and may be
|
|
# subject to change - in which case this test case should be amended as well.
|
|
# The order tested in this case was decided as a good first step in issue
|
|
# #7969, but it's possible that it will eventually be implemented another
|
|
# way, e.g. dynamically based on estimated query selectivity statistics.
|
|
# In any case, the order must be consistent across coordinators and time
|
|
# (and upgrades...) to allow paged queries that use an index to continue.
|
|
# Ref: #7969
|
|
def test_order_of_indexes(scylla_only, cql, test_keyspace):
|
|
schema = 'p int primary key, v1 int, v2 int, v3 int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX my_v3_idx ON {table}(v3)")
|
|
cql.execute(f"CREATE INDEX my_v1_idx ON {table}(v1)")
|
|
cql.execute(f"CREATE INDEX my_v2_idx ON {table}((p),v2)")
|
|
# All queries below should use the first index they find in the list
|
|
# of restrictions. Tracing information will be consulted to ensure
|
|
# it's true. Currently some of the cases below succeed, because the
|
|
# order is not well defined (and may, for instance, change upon
|
|
# server restart), but some of them fail. Once a proper ordering
|
|
# is implemented, all cases below should succeed.
|
|
def index_used(query, index_name):
|
|
assert any([index_name in event.description for event in cql.execute(query, trace=True).get_query_trace().events])
|
|
index_used(f"SELECT * FROM {table} WHERE v3 = 1", "my_v3_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 allow filtering", "my_v1_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v3 = 1 and v1 = 2 allow filtering", "my_v3_idx")
|
|
# Local indexes are still skipped if they cannot be used
|
|
index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v1 = 2 allow filtering", "my_v1_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE v2 = 1 and v3 = 2 and v1 = 3 allow filtering", "my_v3_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE v1 = 1 and v2 = 2 and v3 = 3 allow filtering", "my_v1_idx")
|
|
# Local indexes are still preferred over global ones, if they can be used
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v1 = 1 and v3 = 2 and v2 = 2 allow filtering", "my_v2_idx")
|
|
index_used(f"SELECT * FROM {table} WHERE p = 1 and v2 = 1 and v1 = 2 allow filtering", "my_v2_idx")
|
|
|
|
# Indexes can be created without an explicit name, in which case a default name is chosen.
|
|
# However, due to #8620 it was possible to break the index creation mechanism by creating
|
|
# a properly named regular table, which conflicts with the generated index name.
|
|
def test_create_unnamed_index_when_its_name_is_taken(cql, test_keyspace):
|
|
schema = 'p int primary key, v int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
try:
|
|
cql.execute(f"CREATE TABLE {table}_v_idx_index (i_do_not_exist_in_the_base_table int primary key)")
|
|
# Creating an index should succeed, even though its default name is taken
|
|
# by the table above
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
finally:
|
|
cql.execute(f"DROP TABLE {table}_v_idx_index")
|
|
|
|
# Indexed created with an explicit name cause a materialized view to be created,
|
|
# and this view has a specific name - <index-name>_index. If there happens to be
|
|
# a regular table (or another view) named just like that, index creation should fail.
|
|
def test_create_named_index_when_its_name_is_taken(scylla_only, cql, test_keyspace):
|
|
schema = 'p int primary key, v int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
index_name = unique_name()
|
|
try:
|
|
cql.execute(f"CREATE TABLE {test_keyspace}.{index_name}_index (i_do_not_exist_in_the_base_table int primary key)")
|
|
# Creating an index should fail, because it's impossible to create
|
|
# its underlying materialized view, because its name is taken by a regular table
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
finally:
|
|
cql.execute(f"DROP TABLE {test_keyspace}.{index_name}_index")
|
|
|
|
# Tests for CREATE INDEX IF NOT EXISTS
|
|
# Reproduces issue #8717.
|
|
def test_create_index_if_not_exists(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v int') as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
# Can't create the same index again without "IF NOT EXISTS", but can
|
|
# do it with "IF NOT EXISTS":
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS ON {table}(v)")
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_v_idx")
|
|
|
|
# Now test the same thing for named indexes. This is what broke in #8717:
|
|
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS xyz ON {table}(v)")
|
|
cql.execute(f"DROP INDEX {test_keyspace}.xyz")
|
|
|
|
# Exactly the same with non-lower case name.
|
|
cql.execute(f'CREATE INDEX "CamelCase" ON {table}(v)')
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f'CREATE INDEX "CamelCase" ON {table}(v)')
|
|
cql.execute(f'CREATE INDEX IF NOT EXISTS "CamelCase" ON {table}(v)')
|
|
cql.execute(f'DROP INDEX {test_keyspace}."CamelCase"')
|
|
|
|
# Trying to create an index for an attribute that's already indexed,
|
|
# but with a different name. The "IF NOT EXISTS" appears to succeed
|
|
# in this case, but does not actually create the new index name -
|
|
# only the old one remains.
|
|
cql.execute(f"CREATE INDEX xyz ON {table}(v)")
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f"CREATE INDEX abc ON {table}(v)")
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS abc ON {table}(v)")
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(f"DROP INDEX {test_keyspace}.abc")
|
|
cql.execute(f"DROP INDEX {test_keyspace}.xyz")
|
|
|
|
# Another test for CREATE INDEX IF NOT EXISTS: Checks what happens if an index
|
|
# with the given *name* already exists, but it's a different index than the
|
|
# one requested, i.e.,
|
|
# CREATE INDEX xyz ON tbl(a)
|
|
# CREATE INDEX IF NOT EXIST xyz ON tbl(b)
|
|
# Should the second command
|
|
# 1. Silently do nothing (because xyz already exists),
|
|
# 2. or try to create an index (because an index on tbl(b) doesn't yet exist)
|
|
# and visibly fail when it can't because the name is already taken?
|
|
# Cassandra chose the first behavior (silently do nothing), Scylla chose the
|
|
# second behavior. We consider Cassandra's behavior to be *wrong* and
|
|
# unhelpful - the intention of the user was ensure that an index tbl(b)
|
|
# (an index on column b of table tbl) exists, and if we can't, an error
|
|
# message is better than silently doing nothing.
|
|
# So this test is marked "cassandra_bug" - passes on Scylla and xfails on
|
|
# Cassandra.
|
|
# Reproduces issue #9182
|
|
def test_create_index_if_not_exists2(cql, test_keyspace, cassandra_bug):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v1 int, v2 int') as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v1)")
|
|
# Obviously can't create a different index with the same name:
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v2)")
|
|
# Even with "IF NOT EXISTS" we still get a failure. An index for
|
|
# {table}(v2) does not yet exist, so the index creation is attempted.
|
|
with pytest.raises(InvalidRequest, match="already exists"):
|
|
cql.execute(f"CREATE INDEX IF NOT EXISTS {index_name} ON {table}(v2)")
|
|
|
|
# Verify that oversized index names are cleanly rejected as InvalidRequest
|
|
# Reproduces issue #20755
|
|
def test_create_index_oversized_name(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v int') as table:
|
|
index_name = 'x'*500
|
|
with pytest.raises((InvalidRequest, ConfigurationException)):
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
|
|
# Test that the paging state works properly for indexes on tables
|
|
# with descending clustering order. There was a problem with indexes
|
|
# created on clustering keys with DESC clustering order - they are represented
|
|
# as "reverse" types internally and Scylla assertions failed that the base type
|
|
# is different from the underlying view type, even though, from the perspective
|
|
# of deserialization, they're equal. Issue #8666
|
|
def test_paging_with_desc_clustering_order(cql, test_keyspace):
|
|
schema = 'p int, c int, primary key (p,c)'
|
|
extra = 'with clustering order by (c desc)'
|
|
with new_test_table(cql, test_keyspace, schema, extra) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c)")
|
|
for i in range(3):
|
|
cql.execute(f"INSERT INTO {table}(p,c) VALUES ({i}, 42)")
|
|
stmt = SimpleStatement(f"SELECT * FROM {table} WHERE c = 42", fetch_size=1)
|
|
assert len([row for row in cql.execute(stmt)]) == 3
|
|
|
|
# The following test is similar to the above, except that the reversed type
|
|
# is not of the indexed column itself - but it's still part of the index's
|
|
# materialized view (because it's part of the key), and we use SELECT DISTINCT
|
|
# instead of SELECT. This reproduces Scylla Enterprise issue #2449,
|
|
# and is a regression test for commit c8653d1321ca9ff5963f9e5479372a6000a0f096.
|
|
def test_paging_with_desc_clustering_order2(cql, test_keyspace):
|
|
schema = 'p1 int, p2 int, c int, primary key ((p1,p2),c)'
|
|
extra = 'with clustering order by (c desc)'
|
|
with new_test_table(cql, test_keyspace, schema, extra) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(p1)")
|
|
for i in range(3):
|
|
cql.execute(f"INSERT INTO {table}(p1,p2,c) VALUES (7, {i}, 42)")
|
|
stmt = SimpleStatement(f"SELECT p1,p2 FROM {table} WHERE p1 = 7", fetch_size=1)
|
|
assert len(list(cql.execute(stmt))) == 3
|
|
# Reproduces Scylla Enterprise issue #2449:
|
|
stmt = SimpleStatement(f"SELECT DISTINCT p1,p2 FROM {table} WHERE p1 = 7", fetch_size=1)
|
|
assert len(list(cql.execute(stmt))) == 3
|
|
|
|
# Test that deleting a base partition works fine, even if it produces a large batch
|
|
# of individual view updates. Refs #8852 - view updates used to be applied with
|
|
# per-partition granularity, but after fixing the issue it's no longer the case,
|
|
# so a regression test is necessary. Scylla-only - relies on the underlying
|
|
# representation of the index table.
|
|
def test_partition_deletion(cql, test_keyspace, scylla_only):
|
|
schema = 'p int, c1 int, c2 int, v int, primary key (p,c1,c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
prep = cql.prepare(f"INSERT INTO {table}(p,c1,c2) VALUES (1, ?, 1)")
|
|
for i in range(1342):
|
|
cql.execute(prep, [i])
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 1")
|
|
res = [row for row in cql.execute(f"SELECT * FROM {table}_c1_idx_index")]
|
|
assert len(res) == 0
|
|
|
|
# Test that deleting a clustering range works fine, even if it produces a large batch
|
|
# of individual view updates. Refs #8852 - view updates used to be applied with
|
|
# per-partition granularity, but after fixing the issue it's no longer the case,
|
|
# so a regression test is necessary. Scylla-only - relies on the underlying
|
|
# representation of the index table.
|
|
def test_range_deletion(cql, test_keyspace, scylla_only):
|
|
schema = 'p int, c1 int, c2 int, v int, primary key (p,c1,c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
prep = cql.prepare(f"INSERT INTO {table}(p,c1,c2) VALUES (1, ?, 1)")
|
|
for i in range(1342):
|
|
cql.execute(prep, [i])
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 1 AND c1 > 5 and c1 < 846")
|
|
res = [row.c1 for row in cql.execute(f"SELECT * FROM {table}_c1_idx_index")]
|
|
assert sorted(res) == [x for x in range(1342) if x <= 5 or x >= 846]
|
|
|
|
# Reproduces #8627:
|
|
# Test that trying to insert a value for an indexed column that exceeds 64KiB fails,
|
|
# because this value is too large to be written as a key in the underlying index
|
|
@pytest.mark.xfail(reason="issue #8627")
|
|
def test_too_large_indexed_value(cql, test_keyspace):
|
|
schema = 'p int, c int, v text, primary key (p,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
big = 'x'*66536
|
|
with pytest.raises(InvalidRequest, match='size'):
|
|
cql.execute(f"INSERT INTO {table}(p,c,v) VALUES (0,1,'{big}')")
|
|
|
|
# Similar to the above test (test_too_large_indexed_value) but when indexing
|
|
# keys of collection. Modern Cassandra, and Scylla, allow collection keys
|
|
# and values to be up to 2GB, but the keys written to an index are limited
|
|
# to 64 KB. When a collection is indexed, the insertion of oversized elements
|
|
# should fail cleanly at the time of write.
|
|
# Reproduces #8627
|
|
@pytest.mark.xfail(reason="issue #8627")
|
|
def test_too_large_indexed_collection_value(cql, test_keyspace):
|
|
schema = 'p int, c int, m map<text,text>, primary key (p,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(values(m))")
|
|
cql.execute(f"CREATE INDEX ON {table}(keys(m))")
|
|
big = 'x'*66536
|
|
with pytest.raises(InvalidRequest, match='size'):
|
|
cql.execute(f"INSERT INTO {table}(p,c,m) VALUES (0,1,{{'hi': '{big}'}})")
|
|
with pytest.raises(InvalidRequest, match='size'):
|
|
cql.execute(f"INSERT INTO {table}(p,c,m) VALUES (0,1,{{'{big}': 'hi'}})")
|
|
|
|
# Reproduces #8627:
|
|
# Same as test_too_large_indexed_value above, just check adding an index
|
|
# to a table with pre-existing data. The background index-building process
|
|
# cannot return an error to the user, but we do expect it to skip the
|
|
# problematic row and continue to complete the rest of the index build.
|
|
@pytest.mark.xfail(reason="issue #8627")
|
|
def test_too_large_indexed_value_build(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
|
# No index yet - a "big" value in v is perfectly fine:
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p,v) VALUES (?, ?)')
|
|
for i in range(30):
|
|
cql.execute(stmt, [i, str(i)])
|
|
big = 'x'*66536
|
|
cql.execute(stmt, [30, big])
|
|
assert [(30,big)] == list(cql.execute(f'SELECT * FROM {table} WHERE p=30'))
|
|
# Create an index on v as the new key. The background index-building
|
|
# process should start promptly.
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
# If Scylla's view builder hangs or stops, there is no way to
|
|
# tell this state apart from a view build that simply hasn't
|
|
# completed yet (besides looking at the logs, which we don't).
|
|
# This means, unfortunately, that a failure of this test is slow -
|
|
# it needs to wait for a timeout.
|
|
# However, today we are lucky (?) that the cql.execute(read, [big])
|
|
# test also fails immediately on Scylla, so this test fails quickly.
|
|
read = cql.prepare(f'SELECT * FROM {table} WHERE v = ?')
|
|
start_time = time.time()
|
|
while time.time() < start_time + 30:
|
|
# The oversized "big" cannot be a key in the view, and
|
|
# cannot be searched. Cassandra reports: "Index expression
|
|
# values may not be larger than 64K".
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(read, [big])
|
|
# All the other keys should eventually be there
|
|
c = 0
|
|
for i in range(30):
|
|
if list(cql.execute(read, [str(i)])):
|
|
c += 1
|
|
if c == 30:
|
|
break
|
|
print(c)
|
|
time.sleep(0.1)
|
|
for i in range(30):
|
|
assert list(cql.execute(read, [str(i)]))
|
|
|
|
# Selecting values using only clustering key should require filtering, but work correctly
|
|
# Reproduces issue #8991
|
|
def test_filter_cluster_key(cql, test_keyspace):
|
|
schema = 'p int, c1 int, c2 int, primary key (p, c1, c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c2)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 1)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 0, 1)")
|
|
|
|
stmt = SimpleStatement(f"SELECT c1, c2 FROM {table} WHERE c1 = 1 and c2 = 1 ALLOW FILTERING")
|
|
rows = cql.execute(stmt)
|
|
assert_rows(rows, [1, 1])
|
|
|
|
# Reproduces #8627:
|
|
# Reproduced #13548:
|
|
# Insert a string with > 64K characters (reproducing issue #8627 - see other tests reproducing it),
|
|
# which causes the view builder to never finish.
|
|
# If Scylla is run with `--enable-keyspace-column-family-metrics 1` cmd line param,
|
|
# and we immediately recreate the table (drop and create),
|
|
# an "old" table's object in memory won't get deallocated before the "new" table's object is created,
|
|
# which raised a seastar::metrics::double_registration exception
|
|
def test_instant_table_recreation(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
|
insert_statement = cql.prepare(f'INSERT INTO {table} (p,v) VALUES (?, ?)')
|
|
big_string = 'x'*66536
|
|
cql.execute(insert_statement, [0, big_string])
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
# going out of scope DROPs the above table. Create another table with the same name:
|
|
with new_test_table(cql, test_keyspace, 'p int primary key, v text') as table:
|
|
pass
|
|
|
|
# Selecting *only* an indexed clustering key does not require filtering, it's
|
|
# a full-index scan (the amount of output is proportional to the read).
|
|
# Additionally, with unnecessary parentheses the query also works, and isn't
|
|
# handled like a multi-column restriction (reproduces #13250).
|
|
@pytest.mark.xfail(reason="issue #13250")
|
|
def test_index_scan_multicolumn_syntax(cql, test_keyspace):
|
|
schema = 'p int, c1 int, c2 int, primary key (p, c1, c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
cql.execute(f"CREATE INDEX ON {table}(c2)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 1)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 0, 1)")
|
|
cql.execute(f"INSERT INTO {table} (p, c1, c2) VALUES (0, 1, 0)")
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c2 FROM {table} WHERE c1 = 1'))
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c1 FROM {table} WHERE c2 = 1'))
|
|
# The query (c1) = (1) isn't a real multi-column restriction (it
|
|
# should mean the same as c1=1) so it can use the index - and work
|
|
# without ALLOW FILTERING. Reproduces #13250:
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c2 FROM {table} WHERE (c1) = (1)'))
|
|
# The query (c2) = (1) isn't a real multi-column restriction (it
|
|
# should mean the same as c2=1) so it should be allowed despite
|
|
# missing a restriction on c1. In our case c2=1 is allowed because
|
|
# c2 is indexed. Reproduces #13250:
|
|
assert [(0,), (1,)] == list(cql.execute(f'SELECT c1 FROM {table} WHERE (c2) = (1)'))
|
|
|
|
def test_multi_column_with_regular_index(cql, test_keyspace):
|
|
"""Reproduces #9085."""
|
|
with new_test_table(cql, test_keyspace, 'p int, c1 int, c2 int, r int, primary key(p,c1,c2)') as tbl:
|
|
cql.execute(f'CREATE INDEX ON {tbl}(r)')
|
|
cql.execute(f'INSERT INTO {tbl}(p, c1, c2, r) VALUES (1, 1, 1, 0)')
|
|
cql.execute(f'INSERT INTO {tbl}(p, c1, c2, r) VALUES (1, 1, 2, 1)')
|
|
cql.execute(f'INSERT INTO {tbl}(p, c1, c2, r) VALUES (1, 2, 1, 0)')
|
|
assert_rows(cql.execute(f'SELECT c1 FROM {tbl} WHERE (c1,c2)<(2,0) AND r=0 ALLOW FILTERING'), [1])
|
|
assert_rows(cql.execute(f'SELECT c1 FROM {tbl} WHERE p=1 AND (c1,c2)<(2,0) AND r=0 ALLOW FILTERING'), [1])
|
|
|
|
# Test that indexing an *empty string* works as expected. There is nothing
|
|
# wrong or unusual about an empty string, and it should be supported just
|
|
# like any other string.
|
|
# Reproduces issue #9364
|
|
def test_index_empty_string(cql, test_keyspace):
|
|
schema = 'p int, v text, primary key (p)'
|
|
# Searching for v='' without an index (with ALLOW FILTERING), works
|
|
# as expected:
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (1, 'hello')")
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (2, '')")
|
|
assert_rows(cql.execute(f"SELECT p FROM {table} WHERE v='' ALLOW FILTERING"), [2])
|
|
# Now try the same thing with an index on v. ALLOW FILTERING should
|
|
# no longer be needed, and the correct row should be found (in #9364
|
|
# it wasn't). We create here a new table instead of adding an index to
|
|
# the existing table to avoid the question of how will we know when the
|
|
# new index is ready.
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (1, 'hello')")
|
|
cql.execute(f"INSERT INTO {table} (p, v) VALUES (2, '')")
|
|
# The following assert fails in #9364:
|
|
# Note that on a single-node cqlpy, index updates are
|
|
# synchronous so we don't have to retry the SELECT.
|
|
assert_rows(cql.execute(f"SELECT p FROM {table} WHERE v=''"), [2])
|
|
|
|
# Test that trying to delete an entry based on an indexed column
|
|
# does not cause the whole partition to be wiped. Refs #9495
|
|
def test_try_deleting_based_on_index_column(cql, test_keyspace):
|
|
schema = 'p int, c1 int, c2 int, v int, primary key (p, c1, c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
for i in range(10):
|
|
cql.execute(f"INSERT INTO {table} (p,c1,c2,v) VALUES (0,{i},{i},{i})")
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 0 AND c2 = 1500")
|
|
assert_row_count(cql.execute(f"SELECT v FROM {table}"), 10)
|
|
cql.execute(f"CREATE INDEX ON {table}(c2)")
|
|
# Creating an index should *not* influence the fact that deletion
|
|
# is not allowed
|
|
with pytest.raises(InvalidRequest):
|
|
cql.execute(f"DELETE FROM {table} WHERE p = 0 AND c2 = 1500")
|
|
assert_row_count(cql.execute(f"SELECT v FROM {table}"), 10)
|
|
|
|
# Reproducer for issue #3403: A column name may contain all sorts of
|
|
# non-alphanumeric characters, including even "/". When an index is created,
|
|
# its default name - composed from the column name - may contain these
|
|
# characters and lead to problems - or at worst access to unintended file
|
|
# by using things like "../../.." in the column name.
|
|
def test_index_weird_chars_in_col_name(cql, test_keyspace):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
# When issue #3403 exists, column name ../../...../tmpdir/x_yz will
|
|
# cause Scylla to create the new index in tmpdir!
|
|
# Of course a more sinister attacker can cause Scylla to create
|
|
# directories anywhere in the file system - or to crash if the
|
|
# directory creation fails - e.g., if magic_path ends in
|
|
# /dev/null/hello, and /dev/null is not a directory
|
|
magic_path='/..'*20 + tmpdir + '/x_yz'
|
|
schema = f'pk int PRIMARY KEY, "{magic_path}" int'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}("{magic_path}")')
|
|
# Creating the index should not have miraculously created
|
|
# something in tmpdir! If it has, we have issue #3403.
|
|
assert os.listdir(tmpdir) == []
|
|
# Check that the expected index name was chosen - based on
|
|
# only the alphanumeric/underscore characters of the column name.
|
|
ks, cf = table.split('.')
|
|
index_name = list(cql.execute(f"SELECT index_name FROM system_schema.indexes WHERE keyspace_name = '{ks}' AND table_name = '{cf}'"))[0].index_name
|
|
iswordchar = lambda x: str.isalnum(x) or x == '_'
|
|
cleaned_up_column_name = ''.join(filter(iswordchar, magic_path))
|
|
assert index_name == cf + '_' + cleaned_up_column_name + '_idx'
|
|
|
|
# Cassandra does not allow IN restrictions on non-primary-key columns,
|
|
# and Scylla does (see test_filtering.py::test_filter_in_restriction).
|
|
# However Scylla currently allows this only with ALLOW FILTERING.
|
|
# In theory, on an index column we could allow it also without filtering,
|
|
# just like we allow it on the partition key. But currently we don't
|
|
# so the following test is marked xfail. It's also cassandra_bug because
|
|
# Cassandra doesn't support it either (it gives the message "not yet
|
|
# supported" suggesting it may be fixed in the future).
|
|
@pytest.mark.xfail
|
|
def test_index_in_restriction(cql, test_keyspace, cassandra_bug):
|
|
schema = 'pk int, ck int, x int, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck, x) VALUES (?, ?, ?)')
|
|
for i in range(3):
|
|
cql.execute(stmt, [1, i, i*2])
|
|
assert [(1,), (2,)] == list(cql.execute(f'SELECT ck FROM {table} WHERE x IN (2, 4)'))
|
|
assert [(1,)] == list(cql.execute(f'SELECT ck FROM {table} WHERE x IN (2, 7)'))
|
|
assert [] == list(cql.execute(f'SELECT ck FROM {table} WHERE x IN (3, 7)'))
|
|
|
|
# Test that a LIMIT works correctly in conjunction filtering - with an
|
|
# without a secondary index. The LIMIT is supposed to control the number
|
|
# of rows that the query returns - after filtering - not the number of
|
|
# rows before filtering.
|
|
# Reproduces #10649 - when use_index=True the test failed because LIMIT
|
|
# returned fewer than the requested number of rows.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#10649")), False])
|
|
def test_filter_and_limit(cql, test_keyspace, use_index, driver_bug_1):
|
|
with new_test_table(cql, test_keyspace, 'pk int primary key, x int, y int') as table:
|
|
if use_index:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, x, y) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [0, 1, 0])
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [2, 1, 0])
|
|
cql.execute(stmt, [3, 1, 1])
|
|
cql.execute(stmt, [4, 1, 0])
|
|
cql.execute(stmt, [5, 1, 1])
|
|
cql.execute(stmt, [6, 1, 0])
|
|
cql.execute(stmt, [7, 1, 1])
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND y = 0 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,), (6,)]
|
|
# Make sure that with LIMIT 3 we get back exactly 3 results - not
|
|
# less and also not more.
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND y = 0 LIMIT 3 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,)]
|
|
# Make the test even harder (exercising more code paths) by asking
|
|
# to fetch the 3 results in tiny one-result pages instead of one page.
|
|
s = cql.prepare(f'SELECT pk FROM {table} WHERE x = 1 AND y = 0 LIMIT 3 ALLOW FILTERING')
|
|
s.fetch_size = 1
|
|
assert sorted(cql.execute(s)) == [(0,), (2,), (4,)]
|
|
|
|
# The following test is similar to the previous one (test_filter_and_limit)
|
|
# with one main difference: Whereas in the previous test the table's schema
|
|
# had only a partition key, here we also add a clustering key.
|
|
# This variation in the test is important because Scylla's index-using code
|
|
# has a different code path for the case that the index lookup results in a
|
|
# list of matching partitions (the previous test) and when it results in a
|
|
# list of matching rows (this test).
|
|
# Reproduces #10649 - when use_index=True the test failed because LIMIT
|
|
# returned fewer than the requested number of rows.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#10649")), False])
|
|
def test_filter_and_limit_clustering(cql, test_keyspace, use_index):
|
|
with new_test_table(cql, test_keyspace, 'pk int, ck int, x int, PRIMARY KEY(pk, ck)') as table:
|
|
if use_index:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck, x) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 1])
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [2, 0, 1])
|
|
cql.execute(stmt, [3, 1, 1])
|
|
cql.execute(stmt, [4, 0, 1])
|
|
cql.execute(stmt, [5, 1, 1])
|
|
cql.execute(stmt, [6, 0, 1])
|
|
cql.execute(stmt, [7, 1, 1])
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND ck = 0 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,), (6,)]
|
|
# Make sure that with LIMIT 3 we get back exactly 3 results - not
|
|
# less and also not more.
|
|
results = list(cql.execute(f'SELECT pk FROM {table} WHERE x = 1 AND ck = 0 LIMIT 3 ALLOW FILTERING'))
|
|
assert sorted(results) == [(0,), (2,), (4,)]
|
|
# Make the test even harder (exercising more code paths) by asking
|
|
# to fetch the 3 results in tiny one-result pages instead of one page.
|
|
s = cql.prepare(f'SELECT pk FROM {table} WHERE x = 1 AND ck = 0 LIMIT 3 ALLOW FILTERING')
|
|
s.fetch_size = 1
|
|
assert sorted(cql.execute(s)) == [(0,), (2,), (4,)]
|
|
|
|
# Another reproducer for #10649, similar to the previous test
|
|
# (test_filter_and_limit_clustering) but indexes the clustering
|
|
# key column. This test is closer to the use case of the original user
|
|
# who discovered #10649, and involves slightly different code paths in
|
|
# Scylla (the index-driver query needs to read individual rows, not
|
|
# entire partitions, from the base table).
|
|
@pytest.mark.xfail(reason="#10649")
|
|
def test_filter_and_limit_2(cql, test_keyspace):
|
|
schema = 'pk int, ck1 int, ck2 int, x int, PRIMARY KEY (pk, ck1, ck2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(ck2)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck1, ck2, x) VALUES (?, ?, ?, ?)')
|
|
N = 10
|
|
J = 3
|
|
for i in range(N):
|
|
for j in range(J):
|
|
cql.execute(stmt, [1, i, j, j+i%2])
|
|
results = list(cql.execute(f'SELECT ck1 FROM {table} WHERE ck2 = 2 AND x = 3 ALLOW FILTERING'))
|
|
# Note in the data-adding loop above, all rows match our pk=1, and
|
|
# when ck=2 it means j=2 and at that point - x=3 if i%2==1. So the
|
|
# expected results are:
|
|
assert results == [(i,) for i in range(N) if i%2==1]
|
|
for i in [3, 1, N]:
|
|
assert results[0:i] == list(cql.execute(f'SELECT ck1 FROM {table} WHERE ck2 = 2 AND x = 3 LIMIT {i} ALLOW FILTERING'))
|
|
# Try exactly the same with adding pk=1, which shouldn't change
|
|
# anything in the result (because all our rows have pk=1).
|
|
for i in [3, 1, N]:
|
|
assert results[0:i] == list(cql.execute(f'SELECT ck1 FROM {table} WHERE pk = 1 AND ck2 = 2 AND x = 3 LIMIT {i} ALLOW FILTERING'))
|
|
|
|
# Yet another reproducer for #10649, this time using a local index instead
|
|
# of a global index. As before, test that a LIMIT works correctly in
|
|
# conjunction filtering. A user tried this variant in issue #12766.
|
|
# This is a scylla_only test because local index is a Scylla-only feature.
|
|
@pytest.mark.parametrize("use_local_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#10649")), False])
|
|
def test_filter_and_limit_local_index(cql, test_keyspace, use_local_index, driver_bug_1, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int, c int, x int, y int, primary key (p, c)') as table:
|
|
if use_local_index:
|
|
cql.execute(f'CREATE INDEX ON {table}((p), x)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p, c, x, y) VALUES (?, ?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 1, 0])
|
|
cql.execute(stmt, [0, 1, 1, 1])
|
|
cql.execute(stmt, [0, 2, 1, 0])
|
|
cql.execute(stmt, [0, 3, 1, 1])
|
|
cql.execute(stmt, [0, 4, 1, 0])
|
|
cql.execute(stmt, [0, 5, 1, 1])
|
|
cql.execute(stmt, [0, 6, 1, 0])
|
|
cql.execute(stmt, [0, 7, 1, 1])
|
|
results = list(cql.execute(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 ALLOW FILTERING'))
|
|
assert sorted(results) == [(1,), (3,), (5,), (7,)]
|
|
# Make sure that with LIMIT N we get back exactly N results - not
|
|
# less and also not more.
|
|
assert [(1,)] == sorted(list(cql.execute(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 LIMIT 1 ALLOW FILTERING')))
|
|
assert [(1,), (3,), (5,)] == sorted(list(cql.execute(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 LIMIT 3 ALLOW FILTERING')))
|
|
# Make the test even harder (exercising more code paths) by asking
|
|
# to fetch the 3 results in tiny one-result pages instead of one page.
|
|
s = cql.prepare(f'SELECT c FROM {table} WHERE p = 0 AND x = 1 AND y = 1 LIMIT 3 ALLOW FILTERING')
|
|
s.fetch_size = 1
|
|
assert sorted(cql.execute(s)) == [(1,), (3,), (5,)]
|
|
|
|
# Tests for issue #2962 - different type of indexing on collection columns.
|
|
# Note that we also have a randomized test for this feature as a C++ unit
|
|
# tests, as well as many tests translated from Cassandra's unit tests (grep
|
|
# the issue number #2962 to find them). Unlike the randomized test, the goal
|
|
# here is to try to cover as many corner cases we can think of, explicitly.
|
|
#
|
|
# Note that we can assume that on a single-node cqlpy, materialized view
|
|
# (and therefore index) updates are synchronous, so none of these tests need
|
|
# loops to wait for a change to be indexed.
|
|
|
|
def test_index_list(cql, test_keyspace):
|
|
schema = 'pk int, ck int, l list<int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(l)')
|
|
# The list is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 3'))
|
|
# Add a values to the list, check they can be found
|
|
cql.execute(f'UPDATE {table} set l = l + [7] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
cql.execute(f'UPDATE {table} set l = l + [8] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
# Remove values from the list, check they can no longer be found
|
|
cql.execute(f'UPDATE {table} set l = l - [7] WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set l = l - [8] WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
# Replace entire value of list, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, l) VALUES (1, 2, [4, 5])')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set l = [2, 5] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 8'))
|
|
# A list can have the same value more than once, here we append
|
|
# another 2 to the list which will now contain [2, 5, 2]. Searching
|
|
# for 2, we should find this row, but only once.
|
|
cql.execute(f'UPDATE {table} set l = l + [2] WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
# If we remove the first element from the list [2, 5, 2], there is
|
|
# still a 2 remaining, so it should still be found:
|
|
cql.execute(f'DELETE l[0] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
# Removing the other 2 (now l=[5,2]) should leaving a search for 2
|
|
# returning nothing:
|
|
cql.execute(f'DELETE l[1] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
# The list is now [5]. Replace the 5 with 2 and see 2 is found, 5 not:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
cql.execute(f'UPDATE {table} set l[0] = 2 WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 5'))
|
|
# Replacing the list [2] with an empty list works as expected:
|
|
cql.execute(f'UPDATE {table} SET l = [] WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE l CONTAINS 2'))
|
|
|
|
def test_index_set(cql, test_keyspace):
|
|
schema = 'pk int, ck int, s set<int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
# The set is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 3'))
|
|
# Add a values to the set, check they can be found
|
|
cql.execute(f'UPDATE {table} set s = s + {{7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
cql.execute(f'UPDATE {table} set s = s + {{8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
# Remove values from the set, check they can no longer be found
|
|
cql.execute(f'UPDATE {table} set s = s - {{7}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set s = s - {{8}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
# Replace entire value of set, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, s) VALUES (1, 2, {{4, 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set s = {{2, 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 8'))
|
|
# A set can only have the same value once. The set is now {2,5},
|
|
# trying to add 2 again makes no difference - and removing it just once
|
|
# will remove this value:
|
|
cql.execute(f'UPDATE {table} set s = s + {{2}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 2'))
|
|
cql.execute(f'UPDATE {table} set s = s - {{2}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 2'))
|
|
# The set is now {5}. Replacing it with an empty set works as expected:
|
|
cql.execute(f'UPDATE {table} SET s = {{}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 5'))
|
|
# The DELETE operation does the same thing:
|
|
cql.execute(f'UPDATE {table} SET s = {{17,18}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 17'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 18'))
|
|
cql.execute(f'DELETE s FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 17'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE s CONTAINS 18'))
|
|
|
|
def test_index_map_values(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# indexing m (same as values(m)) will allow CONTAINS but not CONTAINS KEY
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3')
|
|
# The map is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3'))
|
|
# Add a elements to the map, check their values can be found
|
|
cql.execute(f'UPDATE {table} set m = m + {{10: 7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
cql.execute(f'UPDATE {table} set m = m + {{11: 8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
# Remove elements from the map, check they can no longer be found
|
|
cql.execute(f'DELETE m[10] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
cql.execute(f'DELETE m[11] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
# Replace entire value of map, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
cql.execute(f'UPDATE {table} set m = {{3: 2, 4: 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 8'))
|
|
# A map can have multiple elements with the same *value*. The list is now
|
|
# {3:2, 4:5}, if we add another element of value 2, searching for value 2
|
|
# will return the item only once. But we'll need to delete both elements
|
|
# to no longer find the value 2:
|
|
cql.execute(f'UPDATE {table} set m = m + {{14: 2}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
cql.execute(f'DELETE m[3] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
cql.execute(f'DELETE m[14] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 2'))
|
|
# The map is now {4:5}. Change the value of 4 and see it take effect:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
cql.execute(f'UPDATE {table} set m[4] = 6 WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 5'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 6'))
|
|
# The "-" operation also works as expected (it removes specific *key*,
|
|
# not *values*, depsite what some confused documentation claims):
|
|
cql.execute(f'UPDATE {table} set m = m - {{4}} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 6'))
|
|
|
|
# In the previous test (test_index_map_values) we noted that if one map has
|
|
# several keys with the same value, then the "values(m)" index must store
|
|
# all all them, so that we can still match this value even after removing one
|
|
# of those keys. We tested in the previous test that although the same value
|
|
# appears more than once, when we search for it, we only get the same item
|
|
# once. Under the hood, Scylla does find the same value multiple times, but
|
|
# then eliminates the duplicate matched raw and returns it only once.
|
|
# There is a complication, that this de-duplication does not easily span
|
|
# *paging*. So the purpose of this test is to check that paging does not
|
|
# cause the same row to be returned more than once.
|
|
def test_index_map_values_paging(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# index m (same as values(m)). Will allow "CONTAINS".
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# Insert a map where 10 out of the 12 elements have the same value 3
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{0:4, 1:3, 2:3, 3:3, 4:3, 5:3, 6:3, 7:3, 8:3, 9:3, 10:3, 11:7}})')
|
|
# But looking for "m CONTAINS 3" should return the row (1,2) only once:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3'))
|
|
# Try exactly the same with paging with small page sizes. If Scylla
|
|
# doesn't de-duplicate the results between pages, the same row will
|
|
# be returned multiple times.
|
|
for page_size in [1, 2, 3, 7]:
|
|
stmt = SimpleStatement(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3', fetch_size=page_size)
|
|
assert [(1,2)] == list(cql.execute(stmt))
|
|
|
|
# In the previous test (test_index_map_values*) all tests involved a single
|
|
# row, which could match a search, or not. In this test we verify that the
|
|
# case of multiple matching rows also works.
|
|
def test_index_map_values_multiple_matching_rows(cql, test_keyspace, driver_bug_1):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# index m (same as values(m)). Will allow "CONTAINS".
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# Insert several rows with several different maps, some of them have
|
|
# the value 3 in them somewhere, others don't. One of the maps has
|
|
# multiple occurrences of the value 3, so we also reproduce here the
|
|
# same bug that test_index_map_values_paging() reproduces.
|
|
# Note: Scylla needs to skip a duplicate 3 value in (2,4) which
|
|
# results in an empty page in the result set when page_size=1. We
|
|
# need the driver to correctly support this, hence the "driver_bug_1".
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{1:2, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 3, {{1:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 4, {{7:3}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (2, 2, {{1:3, 2:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (2, 4, {{}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (2, 5, {{7:3}})')
|
|
assert [(1,3),(1,4),(2,2),(2,5)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3'))
|
|
for page_size in [1, 2, 3, 7]:
|
|
stmt = SimpleStatement(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3', fetch_size=page_size)
|
|
assert [(1,3),(1,4),(2,2),(2,5)] == list(cql.execute(stmt))
|
|
|
|
# In the previous tests (test_index_map_values*) all tests involved a base
|
|
# table with both partition keys and clustering keys. Because some of the
|
|
# implementation is different depending the schema has clustering keys,
|
|
# let's also write a similar test with just a partition key:
|
|
def test_index_map_values_partition_key_only(cql, test_keyspace, driver_bug_1):
|
|
schema = 'pk int, m map<int,int>, PRIMARY KEY (pk)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# index m (same as values(m)). Will allow "CONTAINS".
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
# Insert several rows with several different maps, some of them have
|
|
# the value 3 in them somewhere, others don't. One of the maps has
|
|
# multiple occurrences of the value 3, so we also reproduce here the
|
|
# same bug that test_index_map_values_paging() reproduces (and here
|
|
# test its intersection with the case of no clustering key).
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (1, {{1:2, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (2, {{1:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (3, {{7:3}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (4, {{1:3, 2:3, 3:4}})')
|
|
cql.execute(f'INSERT INTO {table} (pk, m) VALUES (5, {{}})')
|
|
assert [(2,), (3,), (4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE m CONTAINS 3'))
|
|
for page_size in [1, 2, 3, 7]:
|
|
stmt = SimpleStatement(f'SELECT pk FROM {table} WHERE m CONTAINS 3', fetch_size=page_size)
|
|
assert [(2,), (3,), (4,)] == sorted(cql.execute(stmt))
|
|
# I wanted to check here that page_size is actually obeyed,
|
|
# but we can't - when Scylla skips one of the duplicate values
|
|
# it can result in a smaller page, and while not great (Cassandra
|
|
# doesn't do it, all its pages are full size), it's legal.
|
|
|
|
def test_index_map_keys(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# Without an index, the "CONTAINS KEY" restriction requires filtering
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
# indexing keys(m) will allow CONTAINS KEY but not CONTAINS
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3')
|
|
# The map is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
# Add a elements to the map, check their keys can be found
|
|
cql.execute(f'UPDATE {table} set m = m + {{10: 7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
cql.execute(f'UPDATE {table} set m = m + {{11: 8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
# Remove elements from the map, check they can no longer be found
|
|
cql.execute(f'DELETE m[10] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
cql.execute(f'DELETE m[11] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
# Replace entire value of map, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 17'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 18'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 10'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 11'))
|
|
cql.execute(f'UPDATE {table} set m = {{3: 2, 4: 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 17'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 18'))
|
|
# The map is now {3:2, 4:5}. Change the value of 4 and see see it has
|
|
# no effect on keys:
|
|
cql.execute(f'UPDATE {table} set m[4] = 6 WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 4'))
|
|
# The "-" operation also works as expected (it removes specific *key*,
|
|
# not *values*, depsite what some confused documentation claims):
|
|
cql.execute(f'UPDATE {table} set m = m - {{4}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 4'))
|
|
|
|
def test_index_map_entries(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
# indexing entries(m) will allow neither CONTAINS KEY nor CONTAINS
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 3')
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 3')
|
|
# The map is still empty, nothing should match
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[1] = 2'))
|
|
# Add a elements to the map, check their keys can be found
|
|
cql.execute(f'UPDATE {table} set m = m + {{10: 7}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 8'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 7'))
|
|
cql.execute(f'UPDATE {table} set m = m + {{11: 8}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 8'))
|
|
# Remove elements from the map, check they can no longer be found
|
|
cql.execute(f'DELETE m[10] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 8'))
|
|
cql.execute(f'DELETE m[11] FROM {table} WHERE pk=1 AND ck=2')
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[10] = 7'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[11] = 8'))
|
|
# Replace entire value of map, everything in it should be indexed
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[17] = 4'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[18] = 5'))
|
|
cql.execute(f'UPDATE {table} set m = {{3: 2, 4: 5}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[3] = 2'))
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 5'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[17] = 4'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[18] = 5'))
|
|
# The map is now {3:2, 4:5}. Change the value of 4 and see see it has
|
|
# the expected effect
|
|
cql.execute(f'UPDATE {table} set m[4] = 6 WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 6'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 5'))
|
|
# The "-" operation also works as expected (it removes specific *key*,
|
|
# not *values*, depsite what some confused documentation claims):
|
|
cql.execute(f'UPDATE {table} set m = m - {{4}} WHERE pk=1 AND ck=2')
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[3] = 2'))
|
|
assert [] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[4] = 6'))
|
|
|
|
# Check that it is possible to index the same map column in different ways
|
|
# (values, keys and entries) at the same time:
|
|
def test_index_map_multiple(cql, test_keyspace):
|
|
schema = 'pk int, ck int, m map<int,int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, m) VALUES (1, 2, {{17: 4, 18: 5}})')
|
|
# values(m) can do this:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS 4'))
|
|
# keys(m) can do this:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m CONTAINS KEY 18'))
|
|
# entries(m) can do this:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE m[18] = 5'))
|
|
|
|
# Test that indexing keys(x), values(x) or entries(x) is only allowed for
|
|
# specific column types (namely, specific kinds of collections).
|
|
def test_index_collection_wrong_type(cql, test_keyspace):
|
|
schema = 'pk int primary key, x int, l list<int>, s set<int>, m map<int,int>, t tuple<int,int>'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
# scalar type: only the bare column name is allowed:
|
|
cql.execute(f'CREATE INDEX ON {table}(x)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(x))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(x))')
|
|
with pytest.raises(InvalidRequest, match="values()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(x))')
|
|
# list: bare column name and values() is allowed. Both of these
|
|
# refer to the same index - so you can't have both at the same time.
|
|
cql.execute(f'CREATE INDEX ON {table}(l)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(l))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(l))')
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(l))')
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_l_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(values(l))')
|
|
# set: bare column name and values() is allowed. Both of these
|
|
# refer to the same index - so you can't have both at the same time.
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(s))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(s))')
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(s))')
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_s_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(values(s))')
|
|
# map: bare column name, values(), keys() and entries() are all
|
|
# allowed. The first tworefer to the same index - so you can't have
|
|
# both at the same time.
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
with pytest.raises(InvalidRequest, match="duplicate"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{table.split('.')[1]}_m_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
# A tuple is not a collection, and doesn't support indexing its
|
|
# elements separately (this would not have been possible, by the way,
|
|
# because it can have elements of different types).
|
|
cql.execute(f'CREATE INDEX ON {table}(t)')
|
|
with pytest.raises(InvalidRequest, match="keys()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(t))')
|
|
with pytest.raises(InvalidRequest, match="entries()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(t))')
|
|
with pytest.raises(InvalidRequest, match="values()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(values(t))')
|
|
# None of the above types allow a FULL index - it is only
|
|
# allowed for *frozen* collections.
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(t))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(l))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(m))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(s))')
|
|
with pytest.raises(InvalidRequest, match="full()"):
|
|
cql.execute(f'CREATE INDEX ON {table}(full(t))')
|
|
|
|
# Check the default name of collection indexes. This "default name" is
|
|
# needed to drop an index which was created without explicitly specifying
|
|
# a name for it. We want the default name to be identical to Cassandra,
|
|
# because an application may assume it is so.
|
|
def test_index_collection_default_name(cql, test_keyspace):
|
|
schema = 'pk int primary key, m map<int,int>'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
# values(m) and m refer to the same thing so must have the same
|
|
# default name.
|
|
cql.execute(f'CREATE INDEX ON {table}(values(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
# key(m) and entries(m) are different indexes than just m, but
|
|
# their default index name turns out to be exactly the same one:
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
# We can create multiple types of the above indexes at the same
|
|
# time (see also test_index_map_multiple() above), so they will
|
|
# get different default names using the standard default index
|
|
# name mechanism (adding _1, etc.)
|
|
cql.execute(f'CREATE INDEX ON {table}(m)')
|
|
cql.execute(f'CREATE INDEX ON {table}(keys(m))')
|
|
cql.execute(f'CREATE INDEX ON {table}(entries(m))')
|
|
cql.execute(f"DROP INDEX {table}_m_idx")
|
|
cql.execute(f"DROP INDEX {table}_m_idx_1")
|
|
cql.execute(f"DROP INDEX {table}_m_idx_2")
|
|
|
|
# Reproducer for issue #10707 - indexing a column whose name is a quoted
|
|
# string should work fine. Even if the quoted string happens to look like
|
|
# an instruction to index a collection, e.g., "keys(m)".
|
|
def test_index_quoted_names(cql, test_keyspace):
|
|
quoted_names = ['"hEllo"', '"x y"', '"hi""hello""yo"', '"""hi"""', '"keys(m)"', '"values(m)"', '"entries(m)"']
|
|
schema = 'pk int, ck int, m int, ' + ','.join([name + " int" for name in quoted_names]) + ', PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
for name in quoted_names:
|
|
cql.execute(f'CREATE INDEX ON {table}({name})')
|
|
names = ','.join(quoted_names)
|
|
values = ','.join(['3' for name in quoted_names])
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, {names}) VALUES (1, 2, {values})')
|
|
for name in quoted_names:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE {name} = 3'))
|
|
|
|
# Moreover, we can have a collection with a quoted name, and can then
|
|
# ask to index something strange-looking like keys("keys(m)").
|
|
schema = 'pk int, ck int, m int, ' + ','.join([name + " map<int,int>" for name in quoted_names]) + ', PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
for name in quoted_names:
|
|
cql.execute(f'CREATE INDEX ON {table}(keys({name}))')
|
|
names = ','.join(quoted_names)
|
|
values = ','.join(['{3:4}' for name in quoted_names])
|
|
cql.execute(f'INSERT INTO {table} (pk, ck, {names}) VALUES (1, 2, {values})')
|
|
for name in quoted_names:
|
|
assert [(1,2)] == list(cql.execute(f'SELECT pk,ck FROM {table} WHERE {name} CONTAINS KEY 3'))
|
|
|
|
@pytest.mark.xfail(reason="#10713 - local collection indexing is not implemented yet")
|
|
def test_local_secondary_index_on_collection(cql, test_keyspace):
|
|
schema = 'pk int, ck int, l list<int>, PRIMARY KEY (pk, ck)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((pk), l)')
|
|
|
|
# Test that queries with the index over collection provide the same answer as it
|
|
# would be without index, but with ALLOW FILTERING.
|
|
# The operations on collections here are picked up randomly.
|
|
def test_secondary_collection_index(cql, test_keyspace):
|
|
|
|
seed = int(time.time()*1e8)
|
|
print(f"Seed for collection index test: {seed}")
|
|
rand = random.Random(seed)
|
|
|
|
schema = f'id int, m map<int, text>, primary key (id)'
|
|
|
|
possible_ids = [100, 101]
|
|
possible_keys = [1, 2, 3]
|
|
possible_values = ['abc', 'def', 'ghi']
|
|
|
|
def insert(table, id, map, **kwargs):
|
|
a = (f'insert into {table}(id, m) values (%s, %s)', (id, map))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def update_cell(table, id, key, value, **kwargs):
|
|
a = (f'update {table} set m[%s] = %s where id = %s', (key, value, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def update_delete(table, id, keys, **kwargs):
|
|
a = (f'update {table} set m = m - %s where id = %s', (keys, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def update_add(table, id, map, **kwargs):
|
|
a = (f'update {table} set m = m + %s where id = %s', (map, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def delete(table, id, **kwargs):
|
|
a = (f'delete m from {table} where id = %s', (id,))
|
|
print(a)
|
|
cql.execute(*a)
|
|
def delete_cell(table, id, key, **kwargs):
|
|
a = (f'delete m[%s] from {table} where id = %s', (key, id))
|
|
print(a)
|
|
cql.execute(*a)
|
|
|
|
def random_map():
|
|
size = rand.randrange(len(possible_keys))
|
|
keys = rand.sample(possible_keys, k=size)
|
|
values = rand.choices(possible_values, k=size)
|
|
return dict(zip(keys, values))
|
|
def random_keys():
|
|
return set(random_map())
|
|
def random_key():
|
|
return random.choice(possible_keys)
|
|
def random_value():
|
|
return random.choice(possible_values)
|
|
def random_id():
|
|
return random.choice(possible_ids)
|
|
|
|
def random_operation():
|
|
return random.choice([insert, update_cell, update_delete, update_add, delete, delete_cell])
|
|
def random_args():
|
|
return {
|
|
'map': random_map(),
|
|
'key': random_key(),
|
|
'value': random_value(),
|
|
'keys': random_keys(),
|
|
'id': random_id(),
|
|
}
|
|
|
|
with new_test_table(cql, test_keyspace, schema) as tab1, new_test_table(cql, test_keyspace, schema) as tab2:
|
|
def select(cql, table, where, *args):
|
|
query = f'select id from {table} where {where}'
|
|
if table is tab2:
|
|
query += ' allow filtering'
|
|
try:
|
|
return cql.execute(query, *args)
|
|
except:
|
|
print('args=', args, table, where)
|
|
raise
|
|
def test_all_possible_selects():
|
|
# Choose something that is not possible.
|
|
possible_ids_ = possible_ids + [10000]
|
|
possible_keys_ = possible_keys + [10000]
|
|
possible_values_ = possible_values + ['aaaaa']
|
|
for k, v in itertools.product(possible_keys_, possible_values_):
|
|
r1 = select(cql, tab1, 'm[%s] = %s', (k, v))
|
|
r2 = select(cql, tab2, 'm[%s] = %s', (k, v))
|
|
assert_rows_ignoring_order(r1, *list(r2))
|
|
for k in possible_keys_:
|
|
r1 = select(cql, tab1, 'm contains key %s', (k,))
|
|
r2 = select(cql, tab2, 'm contains key %s', (k,))
|
|
assert_rows_ignoring_order(r1, *list(r2))
|
|
for v in possible_values_:
|
|
r1 = select(cql, tab1, 'm contains %s', (v,))
|
|
r2 = select(cql, tab2, 'm contains %s', (v,))
|
|
assert_rows_ignoring_order(r1, *list(r2))
|
|
|
|
|
|
cql.execute(f'create index on {tab1}(keys(m))')
|
|
cql.execute(f'create index on {tab1}(values(m))')
|
|
cql.execute(f'create index on {tab1}(entries(m))')
|
|
|
|
for _ in range(50):
|
|
op = random_operation()
|
|
args = random_args()
|
|
print(f"op={op}, args={args}")
|
|
for tab in [tab1, tab2]:
|
|
op(tab, **args)
|
|
test_all_possible_selects()
|
|
|
|
# Test that paging through a select using a secondary index works as
|
|
# expected, returning pages of the requested size.
|
|
# We have several tests here, for different schemas, that exercises
|
|
# different code paths and may expose different bugs.
|
|
|
|
def test_index_paging_pk_ck(cql, test_keyspace):
|
|
schema = 'p int, c int, x int, primary key (p,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(x)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,c,x) VALUES (?,?,?)")
|
|
for i in range(10):
|
|
cql.execute(insert, [i, i, 3])
|
|
cql.execute(insert, [17, 17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE x = 3", fetch_size=page_size)
|
|
# Check that:
|
|
# 1. Each page of results has the expected page_size, or less in
|
|
# the last page. Although partial pages are theoretically
|
|
# allowed (and happen in other tests), in this test we don't
|
|
# expect Scylla or Cassandra to generate them.
|
|
# 2. Check that all the results read over all pages are the
|
|
# expected ones (0...9)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
def test_index_paging_pk_only(cql, test_keyspace):
|
|
schema = 'p int, x int, primary key (p)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(x)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,x) VALUES (?,?)")
|
|
for i in range(10):
|
|
cql.execute(insert, [i, 3])
|
|
cql.execute(insert, [17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE x = 3", fetch_size=page_size)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
# When a partition key is indexed (it is redundant to index the entire
|
|
# partition key, so this test has a compound partition key and indexes only
|
|
# one component), the restriction can match the entire partition, but paging
|
|
# still needs to page through it - and not return the entire partition as one
|
|
# page! Reproduces #7432.
|
|
@pytest.mark.xfail(reason="issue #7432")
|
|
def test_index_paging_match_partition(cql, test_keyspace):
|
|
schema = 'p1 int, p2 int, c int, primary key (p1,p2,c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(p2)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p1,p2,c) VALUES (?,?,?)")
|
|
for i in range(10):
|
|
# All of these have p2 = 1:
|
|
cql.execute(insert, [1, 1, i])
|
|
cql.execute(insert, [17, 17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
stmt = SimpleStatement(f"SELECT c FROM {table} WHERE p2 = 1", fetch_size=page_size)
|
|
# Check that:
|
|
# 1. Each page of results has the expected page_size, or less in
|
|
# the last page. Although partial pages are theoretically
|
|
# allowed (and happen in other tests), in this test we don't
|
|
# expect Scylla or Cassandra to generate them.
|
|
# 2. Check that all the results read over all pages are the
|
|
# expected ones (0...9)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
# Currently, paging of queries that uses secondary indexes on static columns
|
|
# is unable to page through partitions of the base table and must return them
|
|
# in whole. Related to #7432.
|
|
@pytest.mark.xfail(reason="issue #7432")
|
|
def test_index_paging_static_column(cql, test_keyspace):
|
|
schema = 'p int, c int, s int static, PRIMARY KEY(p, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,c,s) VALUES (?,?,?)")
|
|
for p in range(5):
|
|
for c in range(5):
|
|
cql.execute(insert, [p, c, 42])
|
|
for page_size in [1, 2, 3, 4, 100]:
|
|
stmt = SimpleStatement(f"SELECT p, c FROM {table} WHERE s = 42", fetch_size=page_size)
|
|
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(p,c) for p in range(5) for c in range(5)]
|
|
|
|
# If, in contrast with test_index_paging_match_partition above which indexed
|
|
# a partition key column, we index a clustering key column, paging does work
|
|
# as expected and stops at the right page size. However, as was noted in
|
|
# issue #7432, if we add "GROUP BY p" to the query, Scylla now mistakenly
|
|
# returns all the results in one page instead of stopping. So the following
|
|
# test passes with use_group_by = False but failed with use_group_by = True.
|
|
@pytest.mark.parametrize("use_group_by", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#7432")), False])
|
|
def test_index_paging_group_by(cql, test_keyspace, use_group_by):
|
|
schema = 'p int, c1 int, c2 int, primary key (p,c1,c2)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(c1)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,c1,c2) VALUES (?,?,?)")
|
|
for i in range(10):
|
|
# All of these have c1 = 1:
|
|
cql.execute(insert, [i, 1, i])
|
|
cql.execute(insert, [17, 17, 2])
|
|
for page_size in [1, 2, 3, 100]:
|
|
group_by = 'GROUP BY p' if use_group_by else ''
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE c1 = 1 {group_by}", fetch_size=page_size)
|
|
# Check that:
|
|
# 1. Each page of results has the expected page_size, or less in
|
|
# the last page. Although partial pages are theoretically
|
|
# allowed (and happen in other tests), in this test we don't
|
|
# expect Scylla or Cassandra to generate them.
|
|
# 2. Check that all the results read over all pages are the
|
|
# expected ones (0...9)
|
|
all_rows = []
|
|
results = cql.execute(stmt)
|
|
while len(results.current_rows) == page_size:
|
|
all_rows.extend(results.current_rows)
|
|
results = cql.execute(stmt, paging_state=results.paging_state)
|
|
# After pages of page_size, the last page should be partial
|
|
assert len(results.current_rows) < page_size
|
|
all_rows.extend(results.current_rows)
|
|
# Finally check that altogether, we read the right rows.
|
|
assert sorted(all_rows) == [(i,) for i in range(10)]
|
|
|
|
# Tests basic operations on a static column index.
|
|
def test_static_column_index(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
# Insert
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (1, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (2, 1)')
|
|
|
|
assert [(0,),(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
|
|
# Update
|
|
cql.execute(f'UPDATE {table} SET s = 1 WHERE pk = 1')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
|
|
# Partition delete
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 2')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
|
|
# Tests that building static indexes from a non-empty state works.
|
|
def test_static_column_index_build(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (1, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s) VALUES (2, 0)')
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
# Indexes are created in the background, so we should wait here.
|
|
# I don't know how to get information about secondary index build
|
|
# status on C*, so we'll just wait until 30 seconds elapse or
|
|
# the index appears to be properly built.
|
|
start_time = time.time()
|
|
rows = None
|
|
while time.time() < start_time + 30:
|
|
try:
|
|
rows = sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
if len(rows) == 3:
|
|
break
|
|
except:
|
|
# In Cassandra, the SELECT can also fail for a short while
|
|
# instead of returning nothing - as the index is not yet
|
|
# set up properly. Let's ignore this failure and try again
|
|
# until the index is ready.
|
|
pass
|
|
time.sleep(0.1)
|
|
|
|
assert [(0,),(1,),(2,)] == rows
|
|
|
|
# Tests combinations of lookup in an index of static column with other
|
|
# restrictions. Reproduces #12829.
|
|
# NOTE: currently marked with skip instead of xfail because
|
|
# on_internal_error() crashes Scylla.
|
|
@pytest.mark.skip(reason="issue #12829")
|
|
def test_static_column_index_restrictions(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s) VALUES (0, 3, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s) VALUES (0, 4, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s) VALUES (1, 4, 2)')
|
|
|
|
assert [(0,3),(0,4)] == sorted(cql.execute(f'SELECT pk,c FROM {table} WHERE s = 1'))
|
|
assert [(0,3),(0,4)] == sorted(cql.execute(f'SELECT pk,c FROM {table} WHERE pk = 0 AND s = 1'))
|
|
# Reproduces #12829:
|
|
assert [(0,3)] == sorted(cql.execute(f'SELECT pk,c FROM {table} WHERE pk = 0 AND s = 1 AND c = 3'))
|
|
|
|
# Checks that clustering row deletions do not affect static columns.
|
|
def test_static_column_index_unaffected_by_clustering_row_ops(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s, v) VALUES (0, 0, 42, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 1, 10)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 2, 20)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 3, 30)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (0, 4, 40)')
|
|
|
|
# We are not using SELECT DISTINCT because it is not implemented yet
|
|
# for queries that restrict a non-pk column. Therefore, `pk` appears
|
|
# multiple times in the result.
|
|
|
|
assert [(0,)]*5 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Row delete
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0 AND c = 4')
|
|
assert [(0,)]*4 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Range delete
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0 AND c >= 1 AND c < 3')
|
|
assert [(0,)]*2 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Range delete, but this time get rid of all rows (static row should stay)
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0 AND c >= 0 AND c <= 4')
|
|
assert [(0,)]*1 == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Finally, perform a partition delete and get rid of the row
|
|
cql.execute(f'DELETE FROM {table} WHERE pk = 0')
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 42'))
|
|
|
|
# Checks that changing a static column's value is correctly reflected by queries
|
|
# accelerated by a secondary index.
|
|
def test_static_column_index_all_clustering_rows_moved_by_static_column_update(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
rows_for_pk = [
|
|
[(0, 0, 0), (0, 1, 10), (0, 2, 20)],
|
|
[(1, 0, 0), (1, 1, 10), (1, 2, 20)],
|
|
]
|
|
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
|
|
for pk in range(2):
|
|
cql.execute(f'INSERT INTO {table} (pk, c, s, v) VALUES ({pk}, 0, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES ({pk}, 1, 10)')
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES ({pk}, 2, 20)')
|
|
|
|
assert rows_for_pk[0] + rows_for_pk[1] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 0'))
|
|
assert [] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 1'))
|
|
|
|
cql.execute(f"UPDATE {table} SET s = 1 WHERE pk = 1")
|
|
|
|
assert rows_for_pk[0] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 0'))
|
|
assert rows_for_pk[1] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 1'))
|
|
|
|
cql.execute(f"UPDATE {table} SET s = 1 WHERE pk = 0")
|
|
|
|
assert [] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 0'))
|
|
assert rows_for_pk[0] + rows_for_pk[1] == sorted(cql.execute(f'SELECT pk, c, v FROM {table} WHERE s = 1'))
|
|
|
|
|
|
# Tests operations on tables which have both static column and regular column indexes.
|
|
# Checks that one does not interfere with the other.
|
|
def test_static_and_regular_index_operations(cql, test_keyspace):
|
|
schema = 'pk int, c int, s int STATIC, v int, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s)')
|
|
cql.execute(f'CREATE INDEX ON {table}(v)')
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (0, 0, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (1, 0, 0, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (2, 1, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s, c, v) VALUES (3, 1, 0, 1)')
|
|
|
|
assert [(0,),(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 1'))
|
|
|
|
cql.execute(f'UPDATE {table} SET s = 1 WHERE pk = 1')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,),(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 1'))
|
|
|
|
cql.execute(f'UPDATE {table} SET v = 0 WHERE pk = 1 AND c = 0')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 0'))
|
|
assert [(1,),(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 1'))
|
|
assert [(0,),(1,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 0'))
|
|
assert [(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 1'))
|
|
|
|
# There are separate codepaths for processing static column addition/removal
|
|
# in case the static row isn't the last element of the mutation.
|
|
# The operations below allow us to test those cases
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, c, v) VALUES (4, 0, 4)')
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 2'))
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 4'))
|
|
|
|
# Static column is set on a partition which didn't have a static row yet
|
|
cql.execute(f'BEGIN BATCH \
|
|
UPDATE {table} SET s = 2 WHERE pk = 4; \
|
|
UPDATE {table} SET v = 5 WHERE pk = 4 AND c = 0; \
|
|
APPLY BATCH')
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 2'))
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 4'))
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 5'))
|
|
|
|
# Static column is removed from a partition
|
|
# In order to construct the batch, we need the write timestamp
|
|
timestamp = list(cql.execute(f'SELECT writetime(v) FROM {table} WHERE pk = 4 AND c = 0'))[0][0]
|
|
cql.execute(f'BEGIN BATCH \
|
|
DELETE FROM {table} USING TIMESTAMP {timestamp} WHERE pk = 4; \
|
|
UPDATE {table} USING TIMESTAMP {timestamp+1} SET v = 6 WHERE pk = 4 AND c = 0; \
|
|
APPLY BATCH')
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s = 2'))
|
|
assert [] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 5'))
|
|
assert [(4,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE v = 6'))
|
|
|
|
# Make sure that, when there are multiple static column indexes and only one
|
|
# column is modified, only the index relevant to that column is modified.
|
|
def test_multiple_static_column_indexes(cql, test_keyspace):
|
|
schema = 'pk int, c int, s1 int STATIC, s2 int STATIC, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(s1)')
|
|
cql.execute(f'CREATE INDEX ON {table}(s2)')
|
|
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (0, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (1, 0, 1)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (2, 1, 0)')
|
|
cql.execute(f'INSERT INTO {table} (pk, s1, s2) VALUES (3, 1, 1)')
|
|
|
|
assert [(0,),(1,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 0'))
|
|
assert [(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 1'))
|
|
|
|
cql.execute(f'UPDATE {table} SET s1 = 1 WHERE pk = 1')
|
|
|
|
assert [(0,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 0'))
|
|
assert [(1,),(2,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s1 = 1'))
|
|
assert [(0,),(2,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 0'))
|
|
assert [(1,),(3,)] == sorted(cql.execute(f'SELECT pk FROM {table} WHERE s2 = 1'))
|
|
|
|
# Test that creating a local index on a static column is disallowed.
|
|
# Local static indexes are not useful because there is only one value
|
|
# of a static column allowed for a given partition.
|
|
def test_disallow_local_indexes_on_static_columns(scylla_only, cql, test_keyspace):
|
|
schema = 'pk int, c int, s int static, PRIMARY KEY(pk, c)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
with pytest.raises(InvalidRequest, match="Local indexes containing static columns are not supported"):
|
|
cql.execute(f'CREATE INDEX ON {table}((pk), s)')
|
|
|
|
# Check that a "SELECT p FROM table WHERE v = 1 AND token(p) > ..."
|
|
# works where p is a base table's partition key and v is an indexed
|
|
# regular column. For this to work, it requires that a secondary index
|
|
# should hold the list of matching p's in token order, and for the
|
|
# query planner to understand this (so ALLOW FILTERING isn't necessary).
|
|
# Reproduces issue #7043. See also C++ tests
|
|
# test_select_with_token_range_*() in test/boost/secondary_index_test.cc
|
|
def test_index_filter_by_token(cql, test_keyspace):
|
|
schema = 'p int, v int, primary key (p)'
|
|
with new_test_table(cql, test_keyspace, schema) as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
insert = cql.prepare(f"INSERT INTO {table}(p,v) VALUES (?,?)")
|
|
for i in range(10):
|
|
# All of these have v = 1:
|
|
cql.execute(insert, [i, 1])
|
|
# Scan the base table in token order
|
|
base_results = list(cql.execute(f"SELECT token(p), p FROM {table}"))
|
|
# Scanning the secondary index yields the same order
|
|
# (see also test_partition_order_with_si() above)
|
|
si_results = list(cql.execute(f"SELECT token(p), p FROM {table} WHERE v = 1"))
|
|
assert si_results == base_results
|
|
# Find one of the tokens token and add a "token(p) >" restriction
|
|
# Check that the token(p) > .. restriction works (yielding just
|
|
# a subset of base_results) and also the v = ... works together
|
|
# (v=1 yields the data, v=0 yields nothing).
|
|
token3 = base_results[3].system_token_p
|
|
assert base_results[4:] == list(cql.execute(f"SELECT token(p), p FROM {table} WHERE v = 1 AND token(p) > {token3}"))
|
|
assert [] == list(cql.execute(f"SELECT token(p), p FROM {table} WHERE v = 0 AND token(p) > {token3}"))
|
|
|
|
# Looking up a null value in a secondary index should match nothing (in
|
|
# general in Scylla, "= null" matches nothing - not even a null value).
|
|
# The "= null" expression is disallowed in Cassandra, so these tests are
|
|
# Scylla-only.
|
|
def test_global_secondary_index_null_lookup(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int PRIMARY KEY, v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(v)')
|
|
p = unique_key_int()
|
|
cql.execute(f'INSERT INTO {table}(p,v) VALUES ({p},1)')
|
|
assert [] == list(cql.execute(f'SELECT p FROM {table} WHERE v=null'))
|
|
|
|
def test_local_secondary_index_null_lookup(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int PRIMARY KEY, v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((p), v)')
|
|
p = unique_key_int()
|
|
cql.execute(f'INSERT INTO {table}(p,v) VALUES ({p},1)')
|
|
assert [] == list(cql.execute(f'SELECT * FROM {table} WHERE p={p} AND v=null'))
|
|
|
|
def test_local_secondary_index_null_lookup2(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'p int, c int, PRIMARY KEY (p, c), v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((p), v)')
|
|
p = unique_key_int()
|
|
cql.execute(f'INSERT INTO {table}(p,c,v) VALUES ({p},0,1)')
|
|
assert [] == list(cql.execute(f'SELECT * FROM {table} WHERE p={p} AND c=0 AND v=null'))
|
|
|
|
# Reproducers for issue #7659, which involves a query with multiple indexes
|
|
# which wrongly tries to use an index for a non-EQ restriction (whereas an
|
|
# index can only be used for EQ). We have one reproducer for this issue as
|
|
# a C++ test, but this test shows the bug for two kinds of non-EQ restrictions
|
|
# with two different symptoms (one used to assert, the other "just" threw an
|
|
# exception), and for both global and local indexes.
|
|
|
|
def test_7659_global(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'a int, d int, f int, PRIMARY KEY (a, d)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(d)')
|
|
cql.execute(f'CREATE INDEX ON {table}(f)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (1, 0, 1)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (2, 0, 2)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (3, 0, 3)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (4, 0, 0)')
|
|
cql.execute(f'INSERT INTO {table}(a,d,f) VALUES (5, 1, 1)')
|
|
# With issue #7659, this generated an assertion failure:
|
|
assert [(1,),(2,)] == sorted(list(cql.execute(f"SELECT a FROM {table} WHERE d=0 AND f in (1,2) ALLOW FILTERING")))
|
|
# With issue #7659, this generated an exception and failed request:
|
|
assert [(1,),(2,),(3,)] == sorted(list(cql.execute(f"SELECT a FROM {table} WHERE d=0 AND f>0 ALLOW FILTERING")))
|
|
|
|
# the version of this test for local index is scylla_only, because LSI is
|
|
# a Scylla extension that doesn't exist in Cassandra.
|
|
def test_7659_local(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, 'a int, b int, c int, d int, e int, f int, PRIMARY KEY ((a, b), c, d)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}((a, b), f)')
|
|
cql.execute(f'CREATE INDEX ON {table}(d)')
|
|
cql.execute(f'INSERT INTO {table}(a,b,c,d,e,f) VALUES (0,0,0,0,0,1)')
|
|
# With issue #7659, this generated an assertion failure:
|
|
assert [(0,)] == list(cql.execute(f"SELECT a FROM {table} WHERE a=0 and b=0 AND d=0 AND f in (1,2) ALLOW FILTERING"))
|
|
# With issue #7659, this generated an exception and failed request:
|
|
assert [(0,)] == list(cql.execute(f"SELECT a FROM {table} WHERE a=0 and b=0 AND d=0 AND f>0 ALLOW FILTERING"))
|
|
|
|
# An index can be used to satisfy equality relations (a=2) but not
|
|
# inequality (e.g., a>=2). If those are present, Scylla needs to ignore
|
|
# the index and just do filtering normally.
|
|
# Reproduces #5823
|
|
def test_index_non_eq_relation(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'a bigint, b bigint, c bigint, PRIMARY KEY ((a, b))') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(a)')
|
|
cql.execute(f'INSERT INTO {table} (a,b,c) VALUES (0,2,1)')
|
|
cql.execute(f'INSERT INTO {table} (a,b,c) VALUES (1,2,3)')
|
|
cql.execute(f'INSERT INTO {table} (a,b,c) VALUES (2,2,4)')
|
|
assert [(3,),(4,)] == sorted(cql.execute(f"SELECT c FROM {table} WHERE a>0 and b=2 ALLOW FILTERING"))
|
|
assert [(4,)] == sorted(cql.execute(f"SELECT c FROM {table} WHERE a>=2 ALLOW FILTERING"))
|
|
|
|
# A reproducer for issue #12762: When scan with a PER PARTITION LIMIT uses
|
|
# a secondary index for its filtering, it still needs apply the PER PARTITION
|
|
# LIMIT (and considering the original base-table partitions for this, not the
|
|
# index partition).
|
|
# With issue #12762, this test used to pass without an index (use_index=False)
|
|
# but failed with an index (use_index=True) - it seems the PER PARTITION LIMIT
|
|
# request was just ignored.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#12762")), False])
|
|
def test_index_filtering_scan_and_per_partition_limit(cql, test_keyspace, use_index):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
if use_index:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p, c, v) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 0])
|
|
cql.execute(stmt, [0, 1, 0])
|
|
cql.execute(stmt, [1, 0, 0])
|
|
cql.execute(stmt, [1, 1, 0])
|
|
# ALLOW FILTERING isn't needed if there's an index. Scylla must
|
|
# use the index when it's available (otherwise the query v=0 won't
|
|
# be efficient), but shouldn't forget also the PER PARTITION LIMIT.
|
|
allow_filtering = '' if use_index else 'ALLOW FILTERING'
|
|
assert {(0,0,0), (1, 0, 0)} == set(cql.execute(f'SELECT * FROM {table} WHERE v=0 PER PARTITION LIMIT 1 {allow_filtering}'))
|
|
|
|
# Similar to above test with PER PARTITION LIMIT, but also adds further
|
|
# filtering which eliminates some result candidates retrieved by the index.
|
|
# The count that PER PARTITION LIMIT limits should be post-filtering, so that
|
|
# PER PARTITION LIMIT 1 should *return* one result per partition, and if the
|
|
# first result happens not to match the filter it needs to continue trying
|
|
# the second result, and so on, and not skip to the next partition until
|
|
# we have one post-filtered result.
|
|
@pytest.mark.parametrize("use_index", [
|
|
pytest.param(True, marks=pytest.mark.xfail(reason="#12762")), False])
|
|
def test_index_filtering2_scan_and_per_partition_limit(cql, test_keyspace, use_index):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, z int, PRIMARY KEY (p, c)") as table:
|
|
if use_index:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
stmt = cql.prepare(f'INSERT INTO {table} (p, c, v, z) VALUES (?, ?, ?, ?)')
|
|
cql.execute(stmt, [0, 0, 0, 0])
|
|
cql.execute(stmt, [0, 1, 0, 1])
|
|
cql.execute(stmt, [0, 2, 0, 1])
|
|
cql.execute(stmt, [1, 0, 0, 0])
|
|
cql.execute(stmt, [1, 1, 0, 1])
|
|
cql.execute(stmt, [1, 2, 0, 1])
|
|
assert {(0, 1, 0, 1), (1, 1, 0, 1)} == set(cql.execute(f'SELECT * FROM {table} WHERE v=0 AND z=1 PER PARTITION LIMIT 1 ALLOW FILTERING'))
|
|
|
|
# Test that when adding an index to a table, queries should not begin to
|
|
# use it before it's fully built - otherwise we would get wrong query
|
|
# results. Reproduces issue #7963.
|
|
# The test is marked cassandra_bug, because Cassandra 4 also fails on it -
|
|
# the ALLOW FILTERING request right after the CREATE INDEX causes an
|
|
# exception in SecondaryIndexManagement and a failed read. This is despite
|
|
# CASSANDRA-8505 claiming that this issue was already fixed in 2015.
|
|
@pytest.mark.xfail(reason="issue #7963")
|
|
def test_unbuilt_index_not_used(cql, test_keyspace, cassandra_bug):
|
|
# The bigger "count" is the slower the test and the higher the chance
|
|
# of reproducing the bug #7963. With dev build on my laptop, count=100
|
|
# is enough for reproducing the failure in 90% of the runs.
|
|
count = 100
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, c, v) VALUES (?, ?, ?)")
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, i*10, i*100])
|
|
assert list(cql.execute(f"SELECT p FROM {table} WHERE v=100 ALLOW FILTERING")) == [(1,)]
|
|
# Before we add an index, the same query without ALLOW FILTERING is
|
|
# not allowed:
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
cql.execute(f"SELECT p FROM {table} WHERE v=100")
|
|
# Create an index and then, quickly before (usually) the index is
|
|
# completely built, retry the same query as the above successful
|
|
# query (the one with ALLOW FILTERING):
|
|
cql.execute(f"CREATE INDEX ON {table} (v)")
|
|
# Failure here reproduces #7963: (note that depending on timing, it
|
|
# might not fail every time even before the bug is fixed)
|
|
assert list(cql.execute(f"SELECT p FROM {table} WHERE v=100 ALLOW FILTERING")) == [(1,)]
|
|
# If we retry the same query *without* the ALLOW FILTERING phrase,
|
|
# this should either be allowed and produce correct results (if the
|
|
# index has finished building) - or, should complain that ALLOW
|
|
# FILTERING is needed (as was the case above before we added the index)
|
|
try:
|
|
# If query without ALLOW FILTERING works, it must return the
|
|
# correct results. Failure here reproduces #7963.
|
|
assert list(cql.execute(f"SELECT p FROM {table} WHERE v=100")) == [(1,)]
|
|
except InvalidRequest as e:
|
|
# Or, it's also fine that the query doesn't yet work without ALLOW
|
|
# FILTERING, if the index wasn't yet built.
|
|
assert "ALLOW FILTERING" in str(e)
|
|
|
|
# Utility function waiting (up to a timeout) for the given index to be built.
|
|
# Uses the "IndexInfo" system table, supported by both Scylla and Cassandra.
|
|
def wait_for_index(cql, keyspace, index_name, timeout_sec=60):
|
|
start_time = time.time()
|
|
while time.time() < start_time + timeout_sec:
|
|
if list(cql.execute(f"SELECT index_name FROM system.\"IndexInfo\" WHERE table_name = '{keyspace}' and index_name = '{index_name}'")):
|
|
return
|
|
time.sleep(0.1)
|
|
pytest.fail(f"Timeout ({timeout_sec} seconds) waiting for index {keyspace}.{index_name}")
|
|
|
|
# The following test starts a filtering request (with ALLOW FILTERING),
|
|
# pages through the results, and between two pages adds a secondary-index
|
|
# that could be used - or not - for continuing the request, but certainly
|
|
# shouldn't break the request. Reproduces #18992.
|
|
@pytest.mark.xfail(reason="issue #18992")
|
|
def test_paging_and_create_index(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v) VALUES (?, ?)")
|
|
# Add 'count' partitions with v=17 in all of them, so the filter
|
|
# "WHERE v=17" will return all rows.
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, 17])
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v=17 ALLOW FILTERING", fetch_size=page_size)
|
|
# Save the result of a full query (this would be count rows ordered
|
|
# by token(p) order) so we can later compare this to what we get
|
|
# when we insert a CREATE INDEX between pages.
|
|
expected = list(cql.execute(stmt))
|
|
# Run the same paged query again but this time use the page-by-page
|
|
# API, and stick a CREATE INDEX ON v between the first and second page.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
# We wait for the index to be built, since we don't want to reproduce
|
|
# #7963 again here (an index getting used before actually built).
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
# This test is the same as the previous one, but in this test the request
|
|
# filters on both v1 and v2 is already using an index for column v2, and
|
|
# now between the pages we add an index for v1. Reproduces #18992.
|
|
@pytest.mark.xfail(reason="issue #18992")
|
|
def test_paging_and_create_index2(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v1 text, v2 int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v1, v2) VALUES (?, ?, ?)")
|
|
# Add 'count' partitions with v1="dog", v2=3 in all of them, so the
|
|
# filter "WHERE v1='dog' AND v2=3" will return all rows.
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, "dog", 3])
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v1='dog' AND v2=3 ALLOW FILTERING", fetch_size=page_size)
|
|
expected = list(cql.execute(stmt))
|
|
# Create the index on v2, and run the query again, should get the same
|
|
# results.
|
|
index_name2 = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name2} ON {table}(v2)")
|
|
wait_for_index(cql, test_keyspace, index_name2)
|
|
assert expected == list(cql.execute(stmt))
|
|
# Run the same paged query again but this time use the page-by-page
|
|
# API, and do a CREATE INDEX ON v1 between the first and second page.
|
|
# Scylla currently prefers the index for the first column mentioned
|
|
# in the query, so the risk is that it will switch to using v1 instead
|
|
# of v2 for the paging and get confused by the paging state.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size
|
|
got.extend(r.current_rows)
|
|
index_name1 = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name1} ON {table}(v1)")
|
|
wait_for_index(cql, test_keyspace, index_name1)
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
# Similar to the previous tests, but here a secondary index which is used
|
|
# for the original request is suddenly deleted between pages. In this test,
|
|
# the query has "ALLOW FILTERING" so the query can continue to work -
|
|
# inefficiently - after the index is deleted. In the following test, we
|
|
# will do the same without ALLOW FILTERING in the query - and we'll see the
|
|
# query can't be resumed after the index is dropped.
|
|
# Reproduces #18992.
|
|
@pytest.mark.xfail(reason="issue #18992")
|
|
def test_paging_and_drop_index_allow_filtering(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v) VALUES (?, ?)")
|
|
# Add 'count' partitions with v=17 in all of them, so the filter v=17
|
|
# will return all rows.
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, 17])
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v=17 ALLOW FILTERING", fetch_size=page_size)
|
|
expected = list(cql.execute(stmt))
|
|
# Create the index on v, and run the query again with the index,
|
|
# should get the same results.
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
assert expected == list(cql.execute(stmt))
|
|
# Finally, run the same paged query again but this time use the page-
|
|
# by-page API, and do a DROP INDEX between the first and second page.
|
|
# The query still has ALLOW FILTERING to prevent Scylla from rejecting
|
|
# the query because now (without the index) it needs ALLOW FILTERING.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{index_name}")
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size # sanity check
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
# This test is the same as the previous one, except that it uses a query
|
|
# *without* ALLOW FILTERING while an index existed, and when it attempts
|
|
# to get the next page after a DROP INDEX, we expect the usual error
|
|
# message about ALLOW FILTERING being necessary.
|
|
def test_paging_and_drop_index_no_allow_filtering(cql, test_keyspace):
|
|
count = 20
|
|
with new_test_table(cql, test_keyspace,
|
|
"p int, v int, PRIMARY KEY (p)") as table:
|
|
stmt = cql.prepare(f"INSERT INTO {table} (p, v) VALUES (?, ?)")
|
|
for i in range(count):
|
|
cql.execute(stmt, [i, 17])
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
|
|
page_size = 7
|
|
stmt = SimpleStatement(f"SELECT p FROM {table} WHERE v=17", fetch_size=page_size)
|
|
expected = list(cql.execute(stmt))
|
|
# Run the same paged query again but this time use the page-by-page
|
|
# API, and do a DROP INDEX between the first and second page.
|
|
got = []
|
|
r = cql.execute(stmt)
|
|
assert len(r.current_rows) == page_size
|
|
got.extend(r.current_rows)
|
|
cql.execute(f"DROP INDEX {test_keyspace}.{index_name}")
|
|
# Because the query does not have "ALLOW FILTERING", even if we
|
|
# could resume this query it would be inefficient without the
|
|
# index, so the resumed query should fail with an error about
|
|
# ALLOW FILTERING being needed.
|
|
with pytest.raises(InvalidRequest, match="ALLOW FILTERING"):
|
|
while r.has_more_pages:
|
|
r = cql.execute(stmt, paging_state=r.paging_state)
|
|
assert len(r.current_rows) <= page_size
|
|
got.extend(r.current_rows)
|
|
assert expected == got
|
|
|
|
|
|
# Test index representation in system.* tables
|
|
def test_index_in_system_tables(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
if is_scylla(cql):
|
|
res = [ f'{r.keyspace_name}.{r.view_name}' for r in cql.execute('select * from system.built_views')]
|
|
assert f'{test_keyspace}.{index_name}_index' in res
|
|
res = [ f'{r.table_name}::{r.index_name}' for r in cql.execute('select * from system."IndexInfo"')]
|
|
assert f'{test_keyspace}::{index_name}' in res
|
|
res = cql.execute(f'select * from system."IndexInfo" where table_name = \'{test_keyspace}\' AND index_name = \'{index_name}\'').one()
|
|
assert (test_keyspace, index_name) == (res.table_name, res.index_name)
|
|
|
|
# The following test needs an index that we know is already built. We
|
|
# create here an index on an empty table, so it should be built almost
|
|
# immediately, but nevertheless we use wait_for_index() to ensure that
|
|
# it is (we assume that wait_for_index() works somehow, possibly differently
|
|
# on different versions of Scylla and Cassandra).
|
|
@pytest.fixture(scope="module")
|
|
def built_index(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
yield (table, index_name)
|
|
|
|
# Check that a built index is listed in system."IndexInfo". This system
|
|
# table was first introduced in Cassandra 2.1.
|
|
# This test does not check that a created but not-yet-built index is not
|
|
# in IndexInfo.
|
|
def test_built_index_in_system_indexinfo(cql, built_index):
|
|
table, index_name = built_index
|
|
keyspace_name, table_name = table.split('.')
|
|
index_info = [ (r.table_name, r.index_name) for r in cql.execute('select * from system."IndexInfo"')]
|
|
# Note that confusingly, the "table_name" column in IndexInfo is
|
|
# NOT the table's name - it's the keyspace name!
|
|
assert (keyspace_name, index_name) in index_info
|
|
|
|
# Check that an index is listed in system_schema.indexes. This system
|
|
# table was first introduced in Cassandra 3.0, and we also test the
|
|
# extra "kind" and "options" fields it contains.
|
|
# Note that this table lists all created indexes - not just fully-built
|
|
# indexes - but this test doesn't verify that - the index we test on is
|
|
# already built.
|
|
def test_index_in_system_schema_indexes(cql, built_index):
|
|
table, index_name = built_index
|
|
keyspace_name, table_name = table.split('.')
|
|
res = [r for r in cql.execute('select * from system_schema.indexes') if r.index_name == index_name ]
|
|
assert len(res) == 1
|
|
assert res[0].index_name == index_name
|
|
assert res[0].keyspace_name == keyspace_name
|
|
assert res[0].table_name == table_name
|
|
# While it's not clearly documented anywhere what the "kind" and "options"
|
|
# columns in this table mean, and why, we see that in both Scylla and
|
|
# Cassandra, "kind" is always "COMPOSITES" (for a non-custom index on
|
|
# a non-compact table), and the only thing that "options" contains is
|
|
# a "target" which (in the case of a global index of a single column)
|
|
# contains just the column name, here "v". Let's check that this continues
|
|
# to be the true, and doesn't accidentally regress.
|
|
assert res[0].kind == 'COMPOSITES'
|
|
assert res[0].options == {'target': 'v'}
|
|
|
|
# Test index representation in REST API
|
|
def test_index_in_API(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
res = rest_api.get_request(cql, f"column_family/built_indexes/{table.replace('.',':')}")
|
|
assert index_name in res
|
|
|
|
|
|
# Test that a LIMIT works correctly across multiple partitions.
|
|
# The test creates an index on a partition key column and checks if the LIMIT
|
|
# is correctly enforced, i.e., the last partition in the query result is
|
|
# truncated. Truncation is required when the last partition participating in the
|
|
# query result contains more than one rows, and causes the LIMIT to be exceeded.
|
|
#
|
|
# Reproduces #22158 - The test fails when LIMIT is 3 because the query result
|
|
# contains 4 rows instead of 3. The coordinator applies the LIMIT on the number
|
|
# of primary keys it fetches from the index view, and then re-applies the LIMIT
|
|
# on each partition it fetches from the base table, but does not truncate the
|
|
# last partition if necessary (this would require counting the rows across all
|
|
# partitions). If the query result contains more than one partitions, it may
|
|
# exceed the LIMIT. The LIMIT is supposed to control the number of rows of the
|
|
# whole query result.
|
|
#
|
|
# As a side effect, the excessive rows in the result also cause the Has_more_pages
|
|
# flag to be set, although the result has been exhausted. This happens because
|
|
# the last primary key in the query result does not match the last primary key
|
|
# from the index view (where the LIMIT is correctly applied). A mismatch is
|
|
# always interpreted as more pages being available, which in this case is incorrect.
|
|
# See `generate_view_paging_state_from_base_query_results()` for more details.
|
|
@pytest.mark.xfail(reason="issue #22158")
|
|
def test_limit_partition(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'pk1 int, pk2 int, ck int, primary key ((pk1, pk2), ck)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(pk2)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk1, pk2, ck) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [1, 1, 2])
|
|
cql.execute(stmt, [2, 1, 1])
|
|
cql.execute(stmt, [2, 1, 2])
|
|
# Test LIMIT within a single partition - succeeds.
|
|
rs = cql.execute(f'SELECT pk1, ck FROM {table} WHERE pk2 = 1 LIMIT 1')
|
|
assert sorted(list(rs)) == [(2,1)]
|
|
assert rs.has_more_pages == False
|
|
# Test LIMIT across partitions - reproduces #22158.
|
|
rs = cql.execute(f'SELECT pk1, ck FROM {table} WHERE pk2 = 1 LIMIT 3')
|
|
assert sorted(list(rs)) == [(1,1), (2,1), (2,2)]
|
|
assert rs.has_more_pages == False
|
|
|
|
|
|
# Same as test_limit_partition above, except that it uses partition slices
|
|
# instead of whole partitions. This is achieved by indexing the first clustering
|
|
# key column.
|
|
@pytest.mark.xfail(reason="issue #22158")
|
|
def test_limit_partition_slice(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, 'pk int, ck1 int, ck2 int, primary key (pk, ck1, ck2)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(ck1)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck1, ck2) VALUES (?, ?, ?)')
|
|
cql.execute(stmt, [1, 1, 1])
|
|
cql.execute(stmt, [1, 1, 2])
|
|
cql.execute(stmt, [2, 1, 1])
|
|
cql.execute(stmt, [2, 1, 2])
|
|
# Test LIMIT within a single partition slice - succeeds.
|
|
rs = cql.execute(f'SELECT pk, ck2 FROM {table} WHERE ck1 = 1 LIMIT 1')
|
|
assert sorted(list(rs)) == [(1,1)]
|
|
assert rs.has_more_pages == False
|
|
# Test LIMIT across partition slices - reproduces #22158.
|
|
rs = cql.execute(f'SELECT pk, ck2 FROM {table} WHERE ck1 = 1 LIMIT 3')
|
|
assert sorted(list(rs)) == [(1,1), (1,2), (2,1)]
|
|
assert rs.has_more_pages == False
|
|
|
|
|
|
# Test that a query using a secondary index can handle "short reads".
|
|
#
|
|
# A read is short when the query is paged and the page size (in bytes) hits an
|
|
# internal memory limit (`query_page_size_in_bytes` cfg option; default is 1MiB).
|
|
# In this case, the page is cut before reaching the user's requested page size
|
|
# (in number of rows).
|
|
#
|
|
# Reproduces #25839 - The test fails because the third row (i.e., the last row
|
|
# of the first partition) is missing from the query result. This happens because
|
|
# the first page is cut short (2 rows suffice to hit the limit), but the code
|
|
# assumes that the last partition of the previous page was fully read, so it
|
|
# completely ignores it when preparing the second page. This logic is
|
|
# implemented in `find_index_partition_ranges()`.
|
|
@pytest.mark.xfail(reason="issue #25839")
|
|
def test_short_read(cql, test_keyspace):
|
|
page_memory_limit = 1024 if is_scylla(cql) else 1024 * 1024 # 1MiB in Cassandra, 1KiB in Scylla (for faster execution)
|
|
row_size = page_memory_limit // 2 # will cause short read after 2 rows
|
|
with ExitStack() as stack:
|
|
table = stack.enter_context(new_test_table(cql, test_keyspace, 'pk int, ck1 int, ck2 int, data text, primary key (pk, ck1, ck2)'))
|
|
if is_scylla(cql):
|
|
stack.enter_context(config_value_context(cql, 'query_page_size_in_bytes', str(page_memory_limit)))
|
|
cql.execute(f'CREATE INDEX ON {table}(ck1)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (pk, ck1, ck2, data) VALUES (?, ?, ?, ?)')
|
|
cql.execute(stmt, [1, 1, 1, 'A' * row_size])
|
|
cql.execute(stmt, [1, 1, 2, 'B' * row_size])
|
|
cql.execute(stmt, [1, 1, 3, 'C' * row_size])
|
|
cql.execute(stmt, [2, 1, 1, 'D' * row_size])
|
|
rs = cql.execute(f'SELECT pk, ck2, data FROM {table} WHERE ck1 = 1')
|
|
rows = [(col[0], col[1]) for col in list(rs)]
|
|
assert sorted(rows) == [(1,1), (1,2), (1,3), (2,1)]
|
|
|
|
|
|
def test_index_metrics(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
wait_for_index(cql, test_keyspace, index_name)
|
|
initial_metrics = ScyllaMetrics.query(cql)
|
|
initial_count = initial_metrics.get(f'scylla_index_query_latencies_count', {"idx": index_name, "ks": test_keyspace})
|
|
if initial_count is None:
|
|
initial_count = 0
|
|
cql.execute(f"SELECT * FROM {table} WHERE v=1")
|
|
current_metrics = ScyllaMetrics.query(cql)
|
|
current_count = current_metrics.get(f'scylla_index_query_latencies_count', {"idx": index_name, "ks": test_keyspace})
|
|
assert current_count - initial_count == 1
|
|
|
|
# Test combination of indexing, paging and aggregation.
|
|
# Indexed queries used to erroneously return partial per-page results
|
|
# for aggregation queries, as described in issue #4540. This test
|
|
# (originally written in C++ in commit 3d9a37f28fe) reproduced that bug.
|
|
def test_indexing_paging_and_aggregation(cql, test_keyspace):
|
|
# from cql3/statements/select_statement.cc:
|
|
DEFAULT_COUNT_PAGE_SIZE = 10000
|
|
row_count = 2 * DEFAULT_COUNT_PAGE_SIZE + 120
|
|
with new_test_table(cql, test_keyspace, 'id int primary key, v int') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(v)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (id, v) VALUES (?, ?)')
|
|
for i in range(row_count):
|
|
cql.execute(stmt, [i + 1, i % 2])
|
|
# In Scylla, in a single-node test materialized views and therefore
|
|
# secondary indexes have synchronous updates, so we can read the
|
|
# indexed data immediately. In Cassandra, secondary indexes are
|
|
# always synchronously updated (they don't use materialized views).
|
|
res = list(cql.execute(f'SELECT sum(id) FROM {table} WHERE v = 1'))
|
|
# Aggregation (like sum(id)) internally pages through the data
|
|
# DEFAULT_COUNT_PAGE_SIZE (=10,000) rows at a time, but even though
|
|
# we have more rows than that in the table, it must only return a
|
|
# single result row when all the data was aggregated - the CQL API
|
|
# doesn't allow it to return return partial, per-page, results.
|
|
# The expression in the following assert computes the sum of the
|
|
# v=1 entries, which are the even-numbered ids up to row_count.
|
|
# A short proof sketch (remember that row_count is even):
|
|
# 2 + 4 + 6 + ... row_count
|
|
# = 2 * (1 + 2 + 3 + ... row_count/2)
|
|
# According to Gauss's summation formula (easily proved by a child),
|
|
# = 2 * (row_count/2 * (row_count/2 + 1)) / 2
|
|
# = row_count/2 * (row_count/2 + 1)
|
|
# = row_count*row_count/4 + row_count/2
|
|
# We have below several other similar formulas, the margin is too
|
|
# narrow to include the proofs for all of them :-)
|
|
assert res == [(row_count * row_count // 4 + row_count // 2,)]
|
|
# If we specify a page size ("fetch_size") on the request, it is not
|
|
# expected to change anything, since the output is just one row.
|
|
# The aggregation doesn't use this page size to control its internal
|
|
# paging through the data - for that DEFAULT_COUNT_PAGE_SIZE is still
|
|
# used.
|
|
# This test doesn't actually check that internally the tiny page
|
|
# size = 2 doesn't get used, so it doesn't add much...
|
|
stmt = SimpleStatement(f'SELECT sum(id) FROM {table} WHERE v = 0', fetch_size=2)
|
|
assert list(cql.execute(stmt)) == [(row_count * row_count // 4,)]
|
|
# Same check as we did for sum(), but for avg()
|
|
res = list(cql.execute(f'SELECT avg(id) FROM {table} WHERE v = 1'))
|
|
assert res == [(row_count // 2 + 1,)]
|
|
|
|
# Test the same thing again, but this time the indexed column is a
|
|
# clustering key column, not the first one. After issue #3405 we
|
|
# have a special code path for indexing composite non-prefix clustering
|
|
# keys, so we want to exercise it too.
|
|
with new_test_table(cql, test_keyspace, 'id int, c1 int, c2 int, primary key(id, c1, c2)') as table:
|
|
cql.execute(f'CREATE INDEX ON {table}(c2)')
|
|
stmt = cql.prepare(f'INSERT INTO {table} (id, c1, c2) VALUES (?, ?, ?)')
|
|
for i in range(row_count):
|
|
cql.execute(stmt, [i + 1, i + 1, i % 2])
|
|
stmt = SimpleStatement(f'SELECT sum(id) FROM {table} WHERE c2 = 0', fetch_size=2)
|
|
assert list(cql.execute(stmt)) == [(row_count * row_count // 4,)]
|
|
stmt = SimpleStatement(f'SELECT avg(id) FROM {table} WHERE c2 = 1', fetch_size=3)
|
|
assert list(cql.execute(stmt)) == [(row_count / 2 + 1,)]
|
|
|
|
# Verify that a newly created secondary index has
|
|
# the property `tombstone_gc` set to some value.
|
|
#
|
|
# Reproducer of scylladb/scylladb#26542
|
|
def test_tombstone_gc_property(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
index_name = unique_name()
|
|
cql.execute(f"CREATE INDEX {index_name} ON {table}(v)")
|
|
|
|
desc_row = cql.execute(f"DESCRIBE MATERIALIZED VIEW {test_keyspace}.{index_name}_index").one()
|
|
assert "tombstone_gc = {" in desc_row.create_statement
|
|
|
|
# Verify that a newly created unnamed secondary index has
|
|
# the property `tombstone_gc` set to some value.
|
|
#
|
|
# Reproducer of scylladb/scylladb#26542
|
|
def test_tombstone_gc_property_unnamed_index(cql, test_keyspace, scylla_only):
|
|
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY, v int") as table:
|
|
cql.execute(f"CREATE INDEX ON {table}(v)")
|
|
|
|
desc_row = cql.execute(f"DESCRIBE MATERIALIZED VIEW {table}_v_idx_index").one()
|
|
assert "tombstone_gc = {" in desc_row.create_statement
|