mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
cql: fix hang during certain SELECT statements
The function intersection(r1,r2) in statement_restrictions.cc is used
when several WHERE restrictions were applied to the same column.
For example, for "WHERE b<1 AND b<2" the intersection of the two ranges
is calculated to be b<1.
As noted in issue #18690, Scylla is inconsistent in where it allows or
doesn't allow these intersecting restrictions. But where they are
allowed they must be implemented correctly. And it turns out the
function intersection() had a bug that caused it to sometimes enter
an infinite loop - when the intent was only to call itself once with
swapped parameters.
This patch includes a test reproducing this bug, and a fix for the
bug. The test hangs before the fix, and passes after the fix.
While at it, I carefully reviewed the entire code used to implement
the intersection() function to try to make sure that the bug we found
was the only one. I also added a few more comments where I thought they
were needed to understand complicated logic of the code.
The bug, the fix and the test were originally discovered by
Michał Chojnowski.
Fixes #18688
Refs #18690
Signed-off-by: Nadav Har'El <nyh@scylladb.com>
(cherry picked from commit 27ab560abd)
Closes scylladb/scylladb#18717
This commit is contained in:
committed by
Botond Dénes
parent
ae6c8753e6
commit
0d4e22ef55
@@ -1132,13 +1132,14 @@ bool starts_before_start(
|
||||
const auto len1 = r1.start()->value().representation().size();
|
||||
const auto len2 = r2.start()->value().representation().size();
|
||||
if (len1 == len2) { // The values truly are equal.
|
||||
// (a)>=(1) starts before (a)>(1)
|
||||
return r1.start()->is_inclusive() && !r2.start()->is_inclusive();
|
||||
} else if (len1 < len2) { // r1 start is a prefix of r2 start.
|
||||
// (a)>=(1) starts before (a,b)>=(1,1), but (a)>(1) doesn't.
|
||||
return r1.start()->is_inclusive();
|
||||
} else { // r2 start is a prefix of r1 start.
|
||||
// (a,b)>=(1,1) starts before (a)>(1) but after (a)>=(1).
|
||||
return r2.start()->is_inclusive();
|
||||
return !r2.start()->is_inclusive();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1163,6 +1164,7 @@ bool starts_before_or_at_end(
|
||||
const auto len1 = r1.start()->value().representation().size();
|
||||
const auto len2 = r2.end()->value().representation().size();
|
||||
if (len1 == len2) { // The values truly are equal.
|
||||
// (a)>=(1) starts at end of (a)<=(1)
|
||||
return r1.start()->is_inclusive() && r2.end()->is_inclusive();
|
||||
} else if (len1 < len2) { // r1 start is a prefix of r2 end.
|
||||
// a>=(1) starts before (a,b)<=(1,1) ends, but (a)>(1) doesn't.
|
||||
@@ -1194,6 +1196,7 @@ bool ends_before_end(
|
||||
const auto len1 = r1.end()->value().representation().size();
|
||||
const auto len2 = r2.end()->value().representation().size();
|
||||
if (len1 == len2) { // The values truly are equal.
|
||||
// (a)<(1) ends before (a)<=(1) ends
|
||||
return !r1.end()->is_inclusive() && r2.end()->is_inclusive();
|
||||
} else if (len1 < len2) { // r1 end is a prefix of r2 end.
|
||||
// (a)<(1) ends before (a,b)<=(1,1), but (a)<=(1) doesn't.
|
||||
@@ -1209,7 +1212,10 @@ std::optional<query::clustering_range> intersection(
|
||||
const query::clustering_range& r1,
|
||||
const query::clustering_range& r2,
|
||||
const clustering_key_prefix::prefix_equal_tri_compare& cmp) {
|
||||
// Assume r1's start is to the left of r2's start.
|
||||
// If needed, swap r1 and r2 so that r1's start is to the left of r2's
|
||||
// start. Note that to avoid infinite recursion (#18688) the function
|
||||
// starts_before_start() must never return true for both (r1,r2) and
|
||||
// (r2,r1) - in other words, it must be a *strict* partial order.
|
||||
if (starts_before_start(r2, r1, cmp)) {
|
||||
return intersection(r2, r1, cmp);
|
||||
}
|
||||
|
||||
@@ -29,6 +29,11 @@ def table3(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "a int, b int, c int, d int, PRIMARY KEY (a, b, c, d)") as table:
|
||||
yield table
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def table4(cql, test_keyspace):
|
||||
with new_test_table(cql, test_keyspace, "a int, b int, c int, PRIMARY KEY (a, b, c)") as table:
|
||||
yield table
|
||||
|
||||
# Cassandra does not allow a WHERE clause restricting the same column with
|
||||
# an equality more than once. It complains that that column "cannot be
|
||||
# restricted by more than one relation if it includes an Equal".
|
||||
@@ -80,6 +85,47 @@ def test_multiple_restrictions_on_ck(cql, table2, scylla_only):
|
||||
assert [(p, 0)] == list(cql.execute(f'SELECT * FROM {table2} WHERE a = {p} AND b < 1 AND b < 4'))
|
||||
assert [(p, 0)] == list(cql.execute(f'SELECT * FROM {table2} WHERE a = {p} AND b < 1 AND b < 2'))
|
||||
|
||||
# In the above test we noted that Cassandra doesn't allow restricting the
|
||||
# same clustering-key column more than once in the same direction, such as
|
||||
# b < 1 AND b < 2, but Scylla *does* allow it. But it turns out that for
|
||||
# multi-column restrictions we have different validation code, and Scylla
|
||||
# forbids (b) < (1) AND (b,c) < (2,2) in exactly the same way that is
|
||||
# forbidden in Cassandra.
|
||||
# This should be reconsidered (this is what issue #18690 asks) - we should
|
||||
# consider allowing this query in Scylla, but until we do, this test checks
|
||||
# the existing situation that it refused on both Scylla and Cassandra:
|
||||
def test_multiple_multi_column_restrictions_on_ck(cql, table4):
|
||||
p = unique_key_int()
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 1)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 2)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 3)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 1)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 2)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 3)')
|
||||
assert [(0,1),(0,2),(0,3)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b) < (1)'))
|
||||
assert [(0,1),(0,2),(0,3),(1,1)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b,c) < (1,2)'))
|
||||
# Ideally the following query should return the intersection of the
|
||||
# previous two results, but it's currently not supported by Scylla
|
||||
# (#18690) or by Cassandra.
|
||||
with pytest.raises(InvalidRequest, match='More than one restriction was found for the end bound on b'):
|
||||
assert [(0,1),(0,2),(0,3)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b) < (1) AND (b,c) < (1,2)'))
|
||||
|
||||
# Reproducer for issue #18688 where a specially-crafted multi-column
|
||||
# restriction with two start restrictions could hang Scylla.
|
||||
def test_intersection_hang(cql, table4):
|
||||
p = unique_key_int()
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 0)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 1)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 2)')
|
||||
cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 1)')
|
||||
# The restriction below should either be rejected with InvalidRequest (as
|
||||
# it is in Cassandra, see discussion in #18690) or return the correct
|
||||
# results. Of course it mustn't hang as it did in #18688.
|
||||
try:
|
||||
assert [(0,1),(0,2)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b) < (1) AND (b) >= (0) AND (b,c) > (0,0)'))
|
||||
except InvalidRequest as err:
|
||||
assert 'More than one restriction was found for the start bound on b' in str(err)
|
||||
|
||||
# Try a multi-column restriction on several clustering columns, with the wrong
|
||||
# number of values on the right-hand-side of the restriction. Scylla should
|
||||
# cleanly report the error - and not silently ignore it or even crash as in
|
||||
|
||||
Reference in New Issue
Block a user