Files
scylladb/test/cql-pytest/test_scan.py
Jan Ciolek 84501851eb cql_pytest: ensure that where clauses like token(p) = 0 AND p = 0 are rejected
Scylla doesn't support combining restrictions
on token with other restrictions on partition key columns.

Some pieces of code depend on the assumption
that such combinations are allowed.
In case they were allowed in the future
these functions would silently start
returning wrong results, and we would
return invalid rows.

Add a test that will start failing once
this restriction is removed. It will
warn the developer to change the
functions that used to depend
on the assumption.

Signed-off-by: Jan Ciolek <jan.ciolek@scylladb.com>
2022-11-23 13:09:22 +01:00

218 lines
14 KiB
Python

# Copyright 2022-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
#############################################################################
# Tests for scanning SELECT requests (which read many rows and/or many
# partitions).
# We have a separate test file test_filtering.py for scans which also involve
# filtering, and test_allow_filtering.py for checking when "ALLOW FILTERING"
# is needed in scan. test_secondary_index.py also contains tests for scanning
# using a secondary index.
#############################################################################
import pytest
from util import new_test_table, new_type, user_type
from cassandra.protocol import InvalidRequest
from cassandra.query import SimpleStatement
# Test that in a table with multiple clustering-key columns, we can have
# multi-column restrictions on involving various legal combinations of
# clustering key columns. multi-column restrictions are expressions involving
# tuples of columns - such as (c2, c3) = (2,3) or (c2, c3) < (2,3).
# This test focuses on cases which do not need ALLOW FILTERING. The next
# test will focus on those that do.
# Reproduces issue #64 and #4244
@pytest.mark.xfail(reason="issues #64 and #4244")
def test_multi_column_restrictions_ck(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int, c1 int, c2 int, c3 int, PRIMARY KEY (p, c1, c2, c3)") as table:
stmt = cql.prepare(f"INSERT INTO {table} (p, c1, c2, c3) VALUES (1, ?, ?, ?)")
for i in range(3):
for j in range(3):
for k in range(3):
cql.execute(stmt, [i, j, k])
# Restrictions with equality on a full prefix of clustering keys do
# not require ALLOW FILTERING. This prefix may be composed of
# single-column restrictions or multi-column restrictions:
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1=1")) == [(1,0,0), (1,0,1), (1,0,2), (1,1,0), (1,1,1), (1,1,2), (1,2,0), (1,2,1), (1,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1=1 AND c2=2")) == [(1,2,0), (1,2,1), (1,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND (c1, c2) = (1, 2)")) == [(1,2,0), (1,2,1), (1,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1=1 AND c2=2 and c3=1")) == [(1,2,1)]
# Reproduces #4244:
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND (c1, c2) = (1, 2) and c3=1")) == [(1,2,1)]
# Reproduces #64:
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1=1 AND (c2, c3) = (2, 1)")) == [(1,2,1)]
# Multi-column restrictions are only allowed on adjacent clustering
# columns - it cannot involved non-adjacent clustering columns, or
# partition columns.
with pytest.raises(InvalidRequest, match='ulti-column'):
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c2=2 AND (c1, c3) = (1, 1)")) == [(1,2,1)]
with pytest.raises(InvalidRequest, match='ulti-column'):
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE (p, c1) = (1,1) AND c2 = 2 AND c3 = 1")) == [(1,2,1)]
# Restrictions with inequality on a full prefix of clustering keys
# ending in the inequality also do not require ALLOW FILTERING.
# The inequality may be composed of single-column restrictions or
# multi-column restrictions:
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1<1")) == [(0,0,0), (0,0,1), (0,0,2), (0,1,0), (0,1,1), (0,1,2), (0,2,0), (0,2,1), (0,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1=1 AND c2<1")) == [(1,0,0), (1,0,1), (1,0,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND (c1,c2) < (0,2)")) == [(0,0,0), (0,0,1), (0,0,2), (0,1,0), (0,1,1), (0,1,2)]
# Reproduces #64:
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE p=1 AND c1=1 AND (c2,c3) < (1,2)")) == [(1,0,0), (1,0,1), (1,0,2), (1,1,0), (1,1,1)]
# Test that if we have multi-column restrictions on the clustering key
# and additional filtering on regular columns, both restrictions are obeyed.
# Reproduces #6200.
def test_multi_column_restrictions_and_filtering(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int, c1 int, c2 int, r int, PRIMARY KEY (p, c1, c2)") as table:
stmt = cql.prepare(f"INSERT INTO {table} (p, c1, c2, r) VALUES (1, ?, ?, ?)")
for i in range(2):
for j in range(2):
cql.execute(stmt, [i, j, j])
assert list(cql.execute(f"SELECT c1,c2,r FROM {table} WHERE p=1 AND (c1, c2) = (0,1)")) == [(0,1,1)]
# Since in that result r=1, adding "AND r=1" should return the same
# result, and adding "AND r=0" should return nothing.
assert list(cql.execute(f"SELECT c1,c2,r FROM {table} WHERE p=1 AND (c1, c2) = (0,1) AND r=1 ALLOW FILTERING")) == [(0,1,1)]
# Reproduces #6200:
assert list(cql.execute(f"SELECT c1,c2,r FROM {table} WHERE p=1 AND (c1, c2) = (0,1) AND r=0 ALLOW FILTERING")) == []
# Test that if we have a range multi-column restrictions on the clustering key
# and additional filtering on regular columns, both restrictions are obeyed.
# Similar to test_multi_column_restrictions_and_filtering, but uses a range
# restriction on the clustering key columns.
# Reproduces #12014, the code is taken from a reproducer provided by a user.
def test_multi_column_range_restrictions_and_filtering(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "pk int, ts timestamp, id int, processed boolean, PRIMARY KEY (pk, ts, id)") as table:
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 0, true)")
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 1, true)")
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 2, false)")
cql.execute(f"INSERT INTO {table} (pk, ts, id, processed) VALUES (0, currentTimestamp(), 3, false)")
# This select doesn't use multi-column restrictions, the result shouldn't change when it does.
rows1 = list(cql.execute(f"SELECT id, processed FROM {table} WHERE pk = 0 AND ts >= 0 AND processed = false ALLOW FILTERING"))
assert rows1 == [(2, False), (3, False)]
# Reproduces #12014
rows2 = list(cql.execute(f"SELECT id, processed FROM {table} WHERE pk = 0 AND (ts, id) >= (0, 0) AND processed = false ALLOW FILTERING"))
assert rows1 == rows2
# Like the previous test, just in cases that require ALLOW FILTERING.
# We add another clustering key column to ensure that filtering *in*
# a long partition is really needed - not just filtering on the partitions
# (these are two different code paths).
@pytest.mark.xfail(reason="issue #64")
def test_multi_column_restrictions_ck_filtering(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int, c0 int, c1 int, c2 int, c3 int, PRIMARY KEY (p, c0, c1, c2, c3)") as table:
stmt = cql.prepare(f"INSERT INTO {table} (p, c0, c1, c2, c3) VALUES (1, 1, ?, ?, ?)")
for i in range(3):
for j in range(3):
for k in range(3):
cql.execute(stmt, [i, j, k])
# Check various equality conditions with single and multi-column
# restrictions. The conditions do not restrict p or c0, so they
# require ALLOW FILTERING and use Scylla's filtering code path.
# All of the tests below reproduce #64 because the restriction
# "skipped" c0.
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE c1=1 ALLOW FILTERING")) == [(1,0,0), (1,0,1), (1,0,2), (1,1,0), (1,1,1), (1,1,2), (1,2,0), (1,2,1), (1,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE c2=1 ALLOW FILTERING")) == [(0,1,0), (0,1,1), (0,1,2), (1,1,0), (1,1,1), (1,1,2), (2,1,0), (2,1,1), (2,1,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE (c1,c2) = (1,2) ALLOW FILTERING")) == [(1,2,0), (1,2,1), (1,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE (c2,c3) = (1,2) ALLOW FILTERING")) == [(0,1,2), (1,1,2), (2,1,2)]
with pytest.raises(InvalidRequest, match='ulti-column'):
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE (c1,c3) = (1,2) ALLOW FILTERING")) == [(1,0,2), (1,1,2), (1,2,2)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE (c1,c2) = (1,2) AND c3=1 ALLOW FILTERING")) == [(1,2,1)]
assert list(cql.execute(f"SELECT c1,c2,c3 FROM {table} WHERE c1 = 1 AND (c2,c3) = (2,1) ALLOW FILTERING")) == [(1,2,1)]
# Test that it is allowed for the same column to participate in both a muli-
# column restriction and a single-column restriction - for example
# (c1,c2) > (0,1) AND c1<10
# Reproduces #4244. Contrasting with the other reproducers for #4244 above,
# in this test the single-column restriction is on the same column as the
# multi-column restriction, not a different column.
@pytest.mark.xfail(reason="issue #4244")
def test_multi_column_and_single_column_restriction_same_ck(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int, c1 int, c2 int, PRIMARY KEY (p, c1, c2)") as table:
stmt = cql.prepare(f"INSERT INTO {table} (p, c1, c2) VALUES (1, ?, ?)")
for i in range(3):
for j in range(4):
cql.execute(stmt, [i, j])
assert list(cql.execute(f"SELECT c1,c2 FROM {table} WHERE p=1 AND (c1,c2) > (1,2)")) == [(1,3), (2,0), (2,1), (2,2), (2,3)]
# Reproduces #4244:
assert list(cql.execute(f"SELECT c1,c2 FROM {table} WHERE p=1 AND (c1,c2) > (1,2) AND c1 < 2")) == [(1,3)]
# Cassandra does not support the following request, saying that
# "Column "c2" cannot be restricted by two inequalities not starting
# with the same column". I think this is a Cassandra bug - such a
# query could have been supported with ALLOW FILTERING, but for
# now, let's just not test this case.
#assert list(cql.execute(f"SELECT c1,c2 FROM {table} WHERE p=1 AND (c1,c2) > (1,2) AND c2 < 2 ALLOW FILTERING")) == [(2,0), (2,1)]
# Test that a token restriction can be combined with a non-token restriction,
# on the same column and on a different column..
# Reproduces issue #4244 (note that this is a different aspect of #4244 than
# the multi-column restriction problems reproduced by other tests above).
@pytest.mark.xfail(reason="issue #4244")
def test_restriction_token_and_nontoken(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int, c int, PRIMARY KEY (p, c)") as table:
stmt = cql.prepare(f"INSERT INTO {table} (p, c) VALUES (?, ?)")
for i in range(10):
for j in range(2):
cql.execute(stmt, [i, j])
# We don't know which tokens the partitions keys 0..9 got, so we need
# to retrieve them first:
p_tokens = [(x[0], x[1]) for x in cql.execute(f"SELECT p,token(p) FROM {table} WHERE c = 1 ALLOW FILTERING")]
# Check just token(p) < sometoken:
somep, sometoken = p_tokens[5] # the fifth element
result = list(cql.execute(f"SELECT p,c FROM {table} WHERE token(p) < {sometoken}"))
expected = [(x[0], y) for x in p_tokens if x[1] < sometoken for y in range(2)]
assert result == expected
# Now check combination of restriction on token(p) and on c:
result = list(cql.execute(f"SELECT p,c FROM {table} WHERE token(p) < {sometoken} AND c=1 ALLOW FILTERING"))
expected = [(x[0], 1) for x in p_tokens if x[1] < sometoken]
assert result == expected
# Now check combination of restriction on token(p) and on p itself
# This reproduces issue #4244.
result = list(cql.execute(f"SELECT p,c FROM {table} WHERE token(p) <= {sometoken} AND p = {somep}"))
assert result == [(somep,0), (somep,1)]
# Until #4244 is fixed Scylla should forbid combining restrictions on both
# token and partition key columns. Correctness of some functions depends
# on the assumption that when a token restriction is present there
# no restrictions on partition key columns. One such function is is_satisfied_by,
# which needs to be modified when #4244 gets fixed
def test_restriction_token_and_nontoken_forbidden(scylla_only, cql, test_keyspace):
with new_test_table(cql, test_keyspace, "p int PRIMARY KEY") as table:
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p = 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE p = 0 AND token(p) = 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) < 0 AND p = 1")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) > 0 AND token(p) < 10 AND p = 1")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) > 0 AND token(p) < 0 AND p = 1")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p < 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p <= 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p > 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p >= 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p != 0")
with pytest.raises(InvalidRequest):
cql.execute(f"SELECT p FROM {table} WHERE token(p) = 0 AND p IN (0, 1, 2)")
# Regression test for #9482
def test_scan_ending_with_static_row(cql, test_keyspace):
with new_test_table(cql, test_keyspace, "pk int, ck int, s int STATIC, v int, PRIMARY KEY (pk, ck)") as table:
stmt = cql.prepare(f"UPDATE {table} SET s = ? WHERE pk = ?")
for pk in range(100):
cql.execute(stmt, (0, pk))
statement = SimpleStatement(f"SELECT * FROM {table}", fetch_size=10)
# This will trigger an error in either processing or building the query
# results. The success criteria for this test is the query finishing
# without errors.
res = list(cql.execute(statement))