diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 14ce28b9c9..f4cf490d07 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1132,13 +1132,14 @@ bool starts_before_start( const auto len1 = r1.start()->value().representation().size(); const auto len2 = r2.start()->value().representation().size(); if (len1 == len2) { // The values truly are equal. + // (a)>=(1) starts before (a)>(1) return r1.start()->is_inclusive() && !r2.start()->is_inclusive(); } else if (len1 < len2) { // r1 start is a prefix of r2 start. // (a)>=(1) starts before (a,b)>=(1,1), but (a)>(1) doesn't. return r1.start()->is_inclusive(); } else { // r2 start is a prefix of r1 start. // (a,b)>=(1,1) starts before (a)>(1) but after (a)>=(1). - return r2.start()->is_inclusive(); + return !r2.start()->is_inclusive(); } } @@ -1163,6 +1164,7 @@ bool starts_before_or_at_end( const auto len1 = r1.start()->value().representation().size(); const auto len2 = r2.end()->value().representation().size(); if (len1 == len2) { // The values truly are equal. + // (a)>=(1) starts at end of (a)<=(1) return r1.start()->is_inclusive() && r2.end()->is_inclusive(); } else if (len1 < len2) { // r1 start is a prefix of r2 end. // a>=(1) starts before (a,b)<=(1,1) ends, but (a)>(1) doesn't. @@ -1194,6 +1196,7 @@ bool ends_before_end( const auto len1 = r1.end()->value().representation().size(); const auto len2 = r2.end()->value().representation().size(); if (len1 == len2) { // The values truly are equal. + // (a)<(1) ends before (a)<=(1) ends return !r1.end()->is_inclusive() && r2.end()->is_inclusive(); } else if (len1 < len2) { // r1 end is a prefix of r2 end. // (a)<(1) ends before (a,b)<=(1,1), but (a)<=(1) doesn't. @@ -1209,7 +1212,10 @@ std::optional intersection( const query::clustering_range& r1, const query::clustering_range& r2, const clustering_key_prefix::prefix_equal_tri_compare& cmp) { - // Assume r1's start is to the left of r2's start. + // If needed, swap r1 and r2 so that r1's start is to the left of r2's + // start. Note that to avoid infinite recursion (#18688) the function + // starts_before_start() must never return true for both (r1,r2) and + // (r2,r1) - in other words, it must be a *strict* partial order. if (starts_before_start(r2, r1, cmp)) { return intersection(r2, r1, cmp); } diff --git a/test/cql-pytest/test_restrictions.py b/test/cql-pytest/test_restrictions.py index 003ca10b8e..c7e7d089e6 100644 --- a/test/cql-pytest/test_restrictions.py +++ b/test/cql-pytest/test_restrictions.py @@ -29,6 +29,11 @@ def table3(cql, test_keyspace): with new_test_table(cql, test_keyspace, "a int, b int, c int, d int, PRIMARY KEY (a, b, c, d)") as table: yield table +@pytest.fixture(scope="module") +def table4(cql, test_keyspace): + with new_test_table(cql, test_keyspace, "a int, b int, c int, PRIMARY KEY (a, b, c)") as table: + yield table + # Cassandra does not allow a WHERE clause restricting the same column with # an equality more than once. It complains that that column "cannot be # restricted by more than one relation if it includes an Equal". @@ -80,6 +85,47 @@ def test_multiple_restrictions_on_ck(cql, table2, scylla_only): assert [(p, 0)] == list(cql.execute(f'SELECT * FROM {table2} WHERE a = {p} AND b < 1 AND b < 4')) assert [(p, 0)] == list(cql.execute(f'SELECT * FROM {table2} WHERE a = {p} AND b < 1 AND b < 2')) +# In the above test we noted that Cassandra doesn't allow restricting the +# same clustering-key column more than once in the same direction, such as +# b < 1 AND b < 2, but Scylla *does* allow it. But it turns out that for +# multi-column restrictions we have different validation code, and Scylla +# forbids (b) < (1) AND (b,c) < (2,2) in exactly the same way that is +# forbidden in Cassandra. +# This should be reconsidered (this is what issue #18690 asks) - we should +# consider allowing this query in Scylla, but until we do, this test checks +# the existing situation that it refused on both Scylla and Cassandra: +def test_multiple_multi_column_restrictions_on_ck(cql, table4): + p = unique_key_int() + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 1)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 2)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 3)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 1)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 2)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 3)') + assert [(0,1),(0,2),(0,3)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b) < (1)')) + assert [(0,1),(0,2),(0,3),(1,1)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b,c) < (1,2)')) + # Ideally the following query should return the intersection of the + # previous two results, but it's currently not supported by Scylla + # (#18690) or by Cassandra. + with pytest.raises(InvalidRequest, match='More than one restriction was found for the end bound on b'): + assert [(0,1),(0,2),(0,3)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b) < (1) AND (b,c) < (1,2)')) + +# Reproducer for issue #18688 where a specially-crafted multi-column +# restriction with two start restrictions could hang Scylla. +def test_intersection_hang(cql, table4): + p = unique_key_int() + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 0)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 1)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 0, 2)') + cql.execute(f'INSERT INTO {table4} (a, b, c) VALUES ({p}, 1, 1)') + # The restriction below should either be rejected with InvalidRequest (as + # it is in Cassandra, see discussion in #18690) or return the correct + # results. Of course it mustn't hang as it did in #18688. + try: + assert [(0,1),(0,2)] == list(cql.execute(f'SELECT b,c FROM {table4} WHERE a = {p} AND (b) < (1) AND (b) >= (0) AND (b,c) > (0,0)')) + except InvalidRequest as err: + assert 'More than one restriction was found for the start bound on b' in str(err) + # Try a multi-column restriction on several clustering columns, with the wrong # number of values on the right-hand-side of the restriction. Scylla should # cleanly report the error - and not silently ignore it or even crash as in