Files
scylladb/test/boost/statement_restrictions_test.cc
Avi Kivity 620df7103f cql3: statement_restrictions: do not pass view schema back and forth
For indexed queries, statement_restrictions calculates _view_schema,
which is passed via get_view_schema() to indexed_select_statement(),
which passes it right back to statement_restrictions via one of three
functions to calculate clustering ranges.

Avoid the back-and-forth and use the stored value. Using a different
value would be broken.

This change allows unifying the signatures of the four functions that
get clustering ranges.
2026-04-19 20:57:03 +03:00

1321 lines
68 KiB
C++

/*
* Copyright (C) 2021-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#undef SEASTAR_TESTING_MAIN
#include <seastar/testing/test_case.hh>
#include <vector>
#include <bit>
#include <fmt/ranges.h>
#include "cql3/restrictions/statement_restrictions.hh"
#include "cql3/expr/expr-utils.hh"
#include "cql3/util.hh"
#include "index/secondary_index_manager.hh"
#include "test/lib/cql_assertions.hh"
#include "test/lib/cql_test_env.hh"
#include "test/lib/test_utils.hh"
BOOST_AUTO_TEST_SUITE(statement_restrictions_test)
using namespace cql3;
namespace {
/// Returns statement_restrictions::get_clustering_bounds() of where_clause, with reasonable defaults in
/// boilerplate.
query::clustering_row_ranges slice(
const std::vector<expr::expression>& where_clause, cql_test_env& env,
const sstring& table_name = "t", const sstring& keyspace_name = "ks") {
prepare_context ctx;
return restrictions::analyze_statement_restrictions(
env.data_dictionary(),
env.local_db().find_schema(keyspace_name, table_name),
statements::statement_type::SELECT,
expr::conjunction{where_clause},
ctx,
/*contains_only_static_columns=*/false,
/*for_view=*/false,
/*allow_filtering=*/true,
restrictions::check_indexes::yes)
->get_clustering_bounds(query_options({}));
}
/// Overload that parses the WHERE clause from string. Named differently to disambiguate when where_clause is
/// brace-initialized.
query::clustering_row_ranges slice_parse(
std::string_view where_clause, cql_test_env& env,
const sstring& table_name = "t", const sstring& keyspace_name = "ks") {
return slice(boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{})), env, table_name, keyspace_name);
}
auto I(int32_t x) { return int32_type->decompose(x); }
auto T(const char* t) { return utf8_type->decompose(t); }
const auto open_ended = query::clustering_range::make_open_ended_both_sides();
auto singular(std::vector<bytes> values) {
return query::clustering_range::make_singular(clustering_key_prefix(std::move(values)));
}
constexpr bool inclusive = true, exclusive = false;
auto left_open(std::vector<bytes> lb) {
return query::clustering_range::make_starting_with({clustering_key_prefix(std::move(lb)), exclusive});
}
auto left_closed(std::vector<bytes> lb) {
return query::clustering_range::make_starting_with({clustering_key_prefix(std::move(lb)), inclusive});
}
auto right_open(std::vector<bytes> ub) {
return query::clustering_range::make_ending_with({clustering_key_prefix(std::move(ub)), exclusive});
}
auto right_closed(std::vector<bytes> ub) {
return query::clustering_range::make_ending_with({clustering_key_prefix(std::move(ub)), inclusive});
}
auto left_open_right_closed(std::vector<bytes> lb, std::vector<bytes> ub) {
clustering_key_prefix cklb(std::move(lb)), ckub(std::move(ub));
return query::clustering_range({{cklb, exclusive}}, {{ckub, inclusive}});
}
auto left_closed_right_open(std::vector<bytes> lb, std::vector<bytes> ub) {
clustering_key_prefix cklb(std::move(lb)), ckub(std::move(ub));
return query::clustering_range({{cklb, inclusive}}, {{ckub, exclusive}});
}
auto both_open(std::vector<bytes> lb, std::vector<bytes> ub) {
clustering_key_prefix cklb(std::move(lb)), ckub(std::move(ub));
return query::clustering_range({{cklb, exclusive}}, {{ckub, exclusive}});
}
auto both_closed(std::vector<bytes> lb, std::vector<bytes> ub) {
clustering_key_prefix cklb(std::move(lb)), ckub(std::move(ub));
return query::clustering_range({{cklb, inclusive}}, {{ckub, inclusive}});
}
expr::tuple_constructor
column_definitions_as_tuple_constructor(const std::vector<const column_definition*>& defs) {
std::vector<expr::expression> columns;
std::vector<data_type> column_types;
columns.reserve(defs.size());
for (auto& def : defs) {
columns.push_back(expr::column_value{def});
column_types.push_back(def->type);
}
data_type ttype = tuple_type_impl::get_instance(std::move(column_types));
return expr::tuple_constructor{std::move(columns), std::move(ttype)};
}
} // anonymous namespace
SEASTAR_TEST_CASE(slice_empty_restriction) {
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table ks.t(p int, c int, primary key(p,c))");
BOOST_CHECK_EQUAL(slice(/*where_clause=*/{}, e), std::vector{open_ended});
});
}
SEASTAR_TEST_CASE(slice_one_column) {
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table ks.t(p int, c text, primary key(p,c))");
BOOST_CHECK_EQUAL(slice_parse("p=1", e), std::vector{open_ended});
BOOST_CHECK_EQUAL(slice_parse("c='123'", e), std::vector{singular({T("123")})});
BOOST_CHECK_EQUAL(slice_parse("c='a' and c='a'", e), std::vector{singular({T("a")})});
BOOST_CHECK_EQUAL(slice_parse("c='a' and c='b'", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c like '123'", e), std::vector{open_ended});
BOOST_CHECK_EQUAL(slice_parse("c in ('x','y','z')", e),
(std::vector{singular({T("x")}), singular({T("y")}), singular({T("z")})}));
BOOST_CHECK_EQUAL(slice_parse("c in ('x')", e), std::vector{singular({T("x")})});
BOOST_CHECK_EQUAL(slice_parse("c in ()", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c in ('x','y') and c in ('a','b')", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c in ('x','y') and c='z'", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c in ('x','y') and c='x'", e), std::vector{singular({T("x")})});
BOOST_CHECK_EQUAL(slice_parse("c in ('a','b','c','b','a')", e), std::vector({
singular({T("a")}), singular({T("b")}), singular({T("c")})}));
BOOST_CHECK_EQUAL(slice_parse("c>'x'", e), std::vector{left_open({T("x")})});
BOOST_CHECK_EQUAL(slice_parse("c>='x'", e), std::vector{left_closed({T("x")})});
BOOST_CHECK_EQUAL(slice_parse("c<'x'", e), std::vector{right_open({T("x")})});
BOOST_CHECK_EQUAL(slice_parse("c<='x'", e), std::vector{right_closed({T("x")})});
});
}
SEASTAR_TEST_CASE(slice_two_columns) {
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table ks.t(p int, c1 int, c2 text, primary key(p,c1,c2))");
BOOST_CHECK_EQUAL(slice_parse("c1=123 and c2='321'", e), std::vector{singular({I(123), T("321")})});
BOOST_CHECK_EQUAL(slice_parse("c1=123", e), std::vector{singular({I(123)})});
BOOST_CHECK_EQUAL(slice_parse("c1=123 and c2 like '321'", e), std::vector{singular({I(123)})});
BOOST_CHECK_EQUAL(slice_parse("c1=123 and c1=123", e), std::vector{singular({I(123)})});
BOOST_CHECK_EQUAL(slice_parse("c2='abc'", e), std::vector{open_ended});
BOOST_CHECK_EQUAL(slice_parse("c1=0 and c1=1 and c2='a'", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c1=0 and c2='a' and c1=0", e), std::vector{singular({I(0), T("a")})});
BOOST_CHECK_EQUAL(slice_parse("c2='abc' and c1 in (1,2,3)", e),
(std::vector{
singular({I(1), T("abc")}),
singular({I(2), T("abc")}),
singular({I(3), T("abc")})}));
BOOST_CHECK_EQUAL(slice_parse("c1 in (1,2) and c2='x'", e),
(std::vector{
singular({I(1), T("x")}),
singular({I(2), T("x")})}));
BOOST_CHECK_EQUAL(slice_parse("c1 in (1,2) and c2 in ('x','y')", e),
(std::vector{
singular({I(1), T("x")}), singular({I(1), T("y")}),
singular({I(2), T("x")}), singular({I(2), T("y")})}));
BOOST_CHECK_EQUAL(slice_parse("c1 in (1) and c1 in (1) and c2 in ('x', 'y')", e),
(std::vector{singular({I(1), T("x")}), singular({I(1), T("y")})}));
BOOST_CHECK_EQUAL(slice_parse("c1 in (1) and c1 in (2) and c2 in ('x')", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c1 in (1) and c2='x'", e), std::vector{singular({I(1), T("x")})});
BOOST_CHECK_EQUAL(slice_parse("c1 in () and c2='x'", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c2 in ('x','y')", e), std::vector{open_ended});
BOOST_CHECK_EQUAL(slice_parse("c1 in (1,2,3)", e),
(std::vector{singular({I(1)}), singular({I(2)}), singular({I(3)})}));
BOOST_CHECK_EQUAL(slice_parse("c1 in (1)", e), std::vector{singular({I(1)})});
BOOST_CHECK_EQUAL(slice_parse("c1 in ()", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("c2 like 'a' and c1 in (1,2)", e),
(std::vector{singular({I(1)}), singular({I(2)})}));
BOOST_CHECK_EQUAL(slice_parse("c1=123 and c2>'321'", e), std::vector{
left_open_right_closed({I(123), T("321")}, {I(123)})});
BOOST_CHECK_EQUAL(slice_parse("c1<123 and c2>'321'", e), std::vector{right_open({I(123)})});
BOOST_CHECK_EQUAL(slice_parse("c1>=123 and c2='321'", e), std::vector{left_closed({I(123)})});
});
}
SEASTAR_TEST_CASE(slice_multi_column) {
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "create table ks.t(p int, c1 int, c2 int, c3 int, primary key(p,c1,c2,c3))");
BOOST_CHECK_EQUAL(slice_parse("(c1)=(1)", e), std::vector{singular({I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)=(1,2)", e), std::vector{singular({I(1), I(2)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)=(1,2,3)", e), std::vector{singular({I(1), I(2), I(3)})});
// TODO: Uncomment when supported:
// BOOST_CHECK_EQUAL(slice_parse("(c1)=(1) and (c1)=(2)", e), query::clustering_row_ranges{});
// BOOST_CHECK_EQUAL(slice_parse("(c1,c2)=(1,2) and (c1,c2)>(2)", e), query::clustering_row_ranges{});
// BOOST_CHECK_EQUAL(slice_parse("(c1,c2)=(1,2) and (c1,c2)=(1,2)", e),
// std::vector{singular({I(1), I(2)})});
BOOST_CHECK_EQUAL(slice_parse("(c1)<(1)", e), std::vector{right_open({I(1)})});
// TODO: Uncomment when supported:
// BOOST_CHECK_EQUAL(slice_parse("(c1)<(1) and (c1)<=(3)", e), std::vector{right_open({I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1)>(0) and (c1)<=(1)", e), std::vector{
left_open_right_closed({I(0)}, {I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)>=(1,2)", e), std::vector{left_closed({I(1), I(2)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)>=(1,2) and (c1)<(9)", e), std::vector{
left_closed_right_open({I(1), I(2)}, {I(9)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)>=(1,2) and (c1,c2)<=(11,12)", e),
std::vector{both_closed({I(1), I(2)}, {I(11), I(12)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,2,3)", e), std::vector{left_open({I(1), I(2), I(3)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,2,3) and (c1,c2,c3)<(1,2,3)", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,2,3) and (c1,c2,c3)<(10,20,30)", e),
std::vector{both_open({I(1), I(2), I(3)}, {I(10), I(20), I(30)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,2,3) and (c1,c2)<(10,20)", e),
std::vector{both_open({I(1), I(2), I(3)}, {I(10), I(20)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,2,3) and (c1,c2)<(1,2)", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)>(1,2) and (c1,c2,c3)<=(1,2,3)", e), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1) IN ((1))", e), std::vector{singular({I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1) IN ((1),(10))", e), (std::vector{
singular({I(1)}), singular({I(10)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2) IN ((1,2),(10,20))", e), (std::vector{
singular({I(1), I(2)}), singular({I(10), I(20)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2) IN ((10,20),(1,2))", e), (std::vector{
singular({I(1), I(2)}), singular({I(10), I(20)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3) IN ((1,2,3),(10,20,30))", e), (std::vector{
singular({I(1), I(2), I(3)}), singular({I(10), I(20), I(30)})}));
});
}
SEASTAR_TEST_CASE(slice_multi_column_mixed_order) {
return do_with_cql_env_thread([](cql_test_env& e) {
// First two columns ascending:
cquery_nofail(
e,
"create table t1(p int, c1 int, c2 int, c3 int, c4 int, primary key(p,c1,c2,c3,c4)) "
"with clustering order by (c1 asc, c2 asc, c3 desc, c4 desc)");
// Not mixed order:
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)>(1,1) and (c1,c2)<(9,9)", e, "t1"), (std::vector{
both_open({I(1), I(1)}, {I(9), I(9)})}));
// Same upper/lower bound lengths:
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2,c3)<(9,9,9)", e, "t1"), (std::vector{
// 1<c1<9
both_open({I(1)}, {I(9)}),
// or (c1=1 and c2>1)
left_open_right_closed({I(1), I(1)}, {I(1)}),
// or (c1=1 and c2=1 and c3>1)
left_closed_right_open({I(1), I(1)}, {I(1), I(1), I(1)}),
// or (c1=9 and c2<9)
left_closed_right_open({I(9)}, {I(9), I(9)}),
// or (c1=9 and c2=9 and c3<9)
left_open_right_closed({I(9), I(9), I(9)}, {I(9), I(9)})}));
// Common initial values in lower/upper bound:
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2,c3)<(1,9,9)", e, "t1"), (std::vector{
// c1=1 and c2>1 and c2<9
both_open({I(1), I(1)}, {I(1), I(9)}),
// or c1=1 and c2=1 and c3>1
left_closed_right_open({I(1), I(1)}, {I(1), I(1), I(1)}),
// or c1=1 and c2=9 and c3<9
left_open_right_closed({I(1), I(9), I(9)}, {I(1), I(9)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,1,1) and (c1,c2,c3)<(1,9,9)", e, "t1"), (std::vector{
// c1=1 and c2>1 and c2<9
both_open({I(1), I(1)}, {I(1), I(9)}),
// or c1=1 and c2=1 and c3>=1
both_closed({I(1), I(1)}, {I(1), I(1), I(1)}),
// or c1=1 and c2=9 and c3<9
left_open_right_closed({I(1), I(9), I(9)}, {I(1), I(9)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2,c3)<=(1,9,9)", e, "t1"), (std::vector{
// c1=1 and c2>1 and c2<9
both_open({I(1), I(1)}, {I(1), I(9)}),
// or c1=1 and c2=1 and c3>1
left_closed_right_open({I(1), I(1)}, {I(1), I(1), I(1)}),
// or c1=1 and c2=9 and c3<=9
both_closed({I(1), I(9), I(9)}, {I(1), I(9)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,1,1) and (c1,c2,c3)<=(1,9,9)", e, "t1"), (std::vector{
// c1=1 and c2>1 and c2<9
both_open({I(1), I(1)}, {I(1), I(9)}),
// or c1=1 and c2=1 and c3>=1
both_closed({I(1), I(1)}, {I(1), I(1), I(1)}),
// or c1=1 and c2=9 and c3<=9
both_closed({I(1), I(9), I(9)}, {I(1), I(9)})}));
// Same result for same inequalities in different order:
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)<=(1,9,9) and (c1,c2,c3)>=(1,1,1)", e, "t1"), (std::vector{
both_open({I(1), I(1)}, {I(1), I(9)}),
both_closed({I(1), I(1)}, {I(1), I(1), I(1)}),
both_closed({I(1), I(9), I(9)}, {I(1), I(9)})}));
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2,c3)<(1,1,9)", e, "t1"), std::vector{
// c1=1 and c2=1 and 9>c3>1
both_open({I(1), I(1), I(9)}, {I(1), I(1), I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2,c3)<(1,1,1)", e, "t1"),
query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2,c3)<=(1,1,1)", e, "t1"),
query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,1,1) and (c1,c2,c3)<(1,1,1)", e, "t1"),
query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,1,1) and (c1,c2,c3)<=(1,1,1)", e, "t1"), std::vector{
singular({I(1), I(1), I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2)<(1,1)", e, "t1"), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,1,1) and (c1,c2)<(1,1)", e, "t1"), query::clustering_row_ranges{});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>(1,1,1) and (c1,c2)<=(1,1)", e, "t1"), std::vector{
// c1=1 and c2=1 and c3>1
left_closed_right_open({I(1), I(1)}, {I(1), I(1), I(1)})});
// Equality:
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)=(1,1,1)", e, "t1"), std::vector{singular({I(1), I(1), I(1)})});
// TODO: Uncomment when supported.
// BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)=(1,1,1) and (c1,c2,c3)=(1,1,1)", e, "t1"), std::vector{
// singular({I(1), I(1), I(1)})});
BOOST_CHECK_EQUAL(slice_parse("(c1,c2,c3)>=(1,1,1) and (c1,c2,c3)<=(1,1,1)", e, "t1"), std::vector{
singular({I(1), I(1), I(1)})});
// First two columns descending:
cquery_nofail(
e,
"create table t2(p int, c1 int, c2 int, c3 int, primary key(p,c1,c2,c3)) "
"with clustering order by (c1 desc, c2 desc, c3 asc)");
BOOST_CHECK_EQUAL(slice_parse("(c1,c2)>(1,1) and (c1,c2)<(9,9)", e, "t2"), std::vector{
both_open({I(9), I(9)}, {I(1), I(1)})});
// Alternating desc and asc:
cquery_nofail(
e,
"create table t3 (p int, a int, b int, c int, d int, PRIMARY KEY (p, a, b, c, d)) "
"with clustering order by (a desc, b asc, c desc, d asc);");
BOOST_CHECK_EQUAL(slice_parse("(a,b,c,d)>=(0,1,2,3) and (a,b)<=(0,1)", e, "t3"), (std::vector{
// a=0 and b=1 and c>2
left_closed_right_open({I(0), I(1)}, {I(0), I(1), I(2)}),
// or a=0 and b=1 and c=2 and d>=3
both_closed({I(0), I(1), I(2), I(3)}, {I(0), I(1), I(2)})}));
BOOST_CHECK_EQUAL(slice_parse("(a,b)>=(0,1)", e, "t3"), (std::vector{
// a>0
right_open({I(0)}),
// or a=0 and b>=1
both_closed({I(0), I(1)}, {I(0)})}));
BOOST_CHECK_EQUAL(slice_parse("(a,b)>=SCYLLA_CLUSTERING_BOUND(0,1)", e, "t3"), std::vector{
left_closed({I(0), I(1)})});
});
}
SEASTAR_TEST_CASE(slice_single_column_mixed_order) {
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(
e,
"create table t (p int, a int, b int, c int, d int, PRIMARY KEY (p, a, b, c, d)) "
"with clustering order by (a desc, b asc, c desc, d asc);");
BOOST_CHECK_EQUAL(slice_parse("a in (1,2,3,2,1)", e), (std::vector{
singular({I(3)}), singular({I(2)}), singular({I(1)})}));
BOOST_CHECK_EQUAL(slice_parse("a in (1,2,3,2,1) and b in (1,2,1)", e), (std::vector{
singular({I(3), I(1)}), singular({I(3), I(2)}),
singular({I(2), I(1)}), singular({I(2), I(2)}),
singular({I(1), I(1)}), singular({I(1), I(2)})}));
});
}
// Regression test: verifies that index selection (find_idx), uses_secondary_indexing,
// and need_filtering produce consistent results across all supported index types:
// - regular_values (standard column EQ)
// - keys (set CONTAINS, map CONTAINS KEY)
// - collection_values (map/list CONTAINS)
// - keys_and_values (map subscript EQ)
// - full (frozen collection EQ; round-trips to regular_values via serialization)
// - local vs global index scoring
SEASTAR_TEST_CASE(index_selection) {
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "CREATE TABLE ks.idx_test ("
" pk1 int, pk2 int,"
" ck1 int, ck2 int,"
" v1 int, v2 int, v3 int,"
" s1 set<int>,"
" m1 map<int, text>,"
" l1 list<int>,"
" fs frozen<set<int>>,"
" PRIMARY KEY ((pk1, pk2), ck1, ck2)"
")");
// 10 indexes covering all target types.
cquery_nofail(e, "CREATE INDEX idx_v1 ON ks.idx_test(v1)");
cquery_nofail(e, "CREATE INDEX idx_v2 ON ks.idx_test(v2)");
cquery_nofail(e, "CREATE INDEX idx_v3_local ON ks.idx_test((pk1,pk2), v3)");
cquery_nofail(e, "CREATE INDEX idx_ck1 ON ks.idx_test(ck1)");
cquery_nofail(e, "CREATE INDEX idx_s1 ON ks.idx_test(s1)"); // keys (rewritten from VALUES for sets)
cquery_nofail(e, "CREATE INDEX idx_m1_values ON ks.idx_test(VALUES(m1))"); // collection_values
cquery_nofail(e, "CREATE INDEX idx_m1_keys ON ks.idx_test(KEYS(m1))"); // keys
cquery_nofail(e, "CREATE INDEX idx_m1_entries ON ks.idx_test(ENTRIES(m1))"); // keys_and_values
cquery_nofail(e, "CREATE INDEX idx_l1 ON ks.idx_test(l1)"); // collection_values
cquery_nofail(e, "CREATE INDEX idx_fs ON ks.idx_test(FULL(fs))"); // full -> round-trips to regular_values
auto schema = e.local_db().find_schema("ks", "idx_test");
auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager();
struct expected {
std::string_view where_clause;
std::optional<sstring> index_name; // nullopt = no index selected
bool uses_secondary_indexing;
bool need_filtering;
};
// Build statement_restrictions from a WHERE clause string and return the
// index-selection result.
auto check = [&](std::string_view where_clause) -> expected {
prepare_context ctx;
auto factors = where_clause.empty()
? std::vector<expr::expression>{}
: boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{}));
auto sr = restrictions::analyze_statement_restrictions(
e.data_dictionary(),
schema,
statements::statement_type::SELECT,
expr::conjunction{std::move(factors)},
ctx,
/*contains_only_static_columns=*/false,
/*for_view=*/false,
/*allow_filtering=*/true,
restrictions::check_indexes::yes);
auto [idx, restrictions_expr] = sr->find_idx(sim);
return {where_clause,
idx ? std::optional(idx->metadata().name()) : std::nullopt,
sr->uses_secondary_indexing(),
sr->need_filtering()};
};
auto none = std::optional<sstring>{};
auto idx = [](const char* name) { return std::optional<sstring>(name); };
auto verify = [](const expected& got, const expected& want) {
BOOST_CHECK_MESSAGE(got.index_name == want.index_name,
fmt::format("WHERE {}: index_name: got {} want {}",
want.where_clause,
got.index_name.value_or("(none)"),
want.index_name.value_or("(none)")));
BOOST_CHECK_MESSAGE(got.uses_secondary_indexing == want.uses_secondary_indexing,
fmt::format("WHERE {}: uses_secondary_indexing: got {} want {}",
want.where_clause,
got.uses_secondary_indexing,
want.uses_secondary_indexing));
BOOST_CHECK_MESSAGE(got.need_filtering == want.need_filtering,
fmt::format("WHERE {}: need_filtering: got {} want {}",
want.where_clause,
got.need_filtering,
want.need_filtering));
};
// --- A. Regular column EQ (target_type: regular_values) ---
verify(check("v1 = 1"), {"", idx("idx_v1"), true, false});
verify(check("v2 = 1"), {"", idx("idx_v2"), true, false});
// WHERE-clause order tiebreak: first column in WHERE wins for equal scores.
verify(check("v1 = 1 AND v2 = 1"), {"", idx("idx_v1"), true, true});
verify(check("v2 = 1 AND v1 = 1"), {"", idx("idx_v2"), true, true});
// Slices (GT/LT) are not supported by standard secondary indexes.
verify(check("v1 > 1"), {"", none, false, true});
// --- B. Local vs global index scoring ---
// Local index with full PK scores 2, global scores 1.
verify(check("pk1 = 1 AND pk2 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, false});
// Local (score 2) beats global (score 1) even when global column appears first.
verify(check("pk1 = 1 AND pk2 = 1 AND v1 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, true});
// Local index without full PK gets score 0 and is never picked.
verify(check("v3 = 1"), {"", none, false, true});
// --- C. CK column index (search group ordering) ---
verify(check("ck1 = 1"), {"", idx("idx_ck1"), true, false});
// CK group is iterated before non-PK group, regardless of WHERE order.
verify(check("ck1 = 1 AND v1 = 1"), {"", idx("idx_ck1"), true, true});
verify(check("v1 = 1 AND ck1 = 1"), {"", idx("idx_ck1"), true, true});
// --- D. Set CONTAINS (target_type: keys, rewritten from VALUES for sets) ---
verify(check("s1 CONTAINS 1"), {"", idx("idx_s1"), true, false});
// --- E. Map indexes ---
// CONTAINS on map values (target_type: collection_values).
verify(check("m1 CONTAINS 'one'"), {"", idx("idx_m1_values"), true, false});
// CONTAINS KEY on map keys (target_type: keys).
verify(check("m1 CONTAINS KEY 1"), {"", idx("idx_m1_keys"), true, false});
// Subscript EQ on map entries (target_type: keys_and_values).
verify(check("m1[1] = 'one'"), {"", idx("idx_m1_entries"), true, false});
// --- F. List CONTAINS (target_type: collection_values) ---
verify(check("l1 CONTAINS 1"), {"", idx("idx_l1"), true, false});
// --- G. Frozen collection (FULL index, round-trips to regular_values) ---
verify(check("fs = {1}"), {"", idx("idx_fs"), true, false});
// Same with full PK (local index on v3 is available but idx_fs is global, score 1).
verify(check("pk1 = 1 AND pk2 = 1 AND fs = {1}"), {"", idx("idx_fs"), true, false});
// --- H. Double CONTAINS on same column: CollectionYes && CollectionYes = No ---
verify(check("s1 CONTAINS 1 AND s1 CONTAINS 2"), {"", none, false, true});
// --- I. Collection + regular column tiebreak (WHERE-clause order) ---
verify(check("s1 CONTAINS 1 AND v1 = 1"), {"", idx("idx_s1"), true, true});
verify(check("v1 = 1 AND s1 CONTAINS 1"), {"", idx("idx_v1"), true, true});
// --- J. CK group beats collection in non-PK group ---
verify(check("m1 CONTAINS 'one' AND ck1 = 1"), {"", idx("idx_ck1"), true, true});
// --- K. Edge cases ---
// Full PK only: no secondary index needed.
verify(check("pk1 = 1 AND pk2 = 1"), {"", none, false, false});
// No restrictions at all.
verify(check(""), {"", none, false, false});
// Token restriction with a regular column: index is used.
verify(check("token(pk1, pk2) > 0 AND v1 = 1"), {"", idx("idx_v1"), true, false});
});
}
// Exhaustive combinatorial test: iterates over all 2^N subsets of N restriction
// fragments and, for each subset, verifies a broad set of statement_restrictions
// public APIs. This catches any refactoring that accidentally changes observable
// behaviour for *any* combination of restriction types.
//
// Restriction fragments (15 independent bits, 2^15 = 32768 combinations):
// bit 0: pk1 = 1
// bit 1: pk2 = 2
// bit 2: ck1 = 3 (single-column EQ)
// bit 3: ck2 > 4 (single-column slice)
// bit 4: ck1 IN (3, 6) (single-column IN; includes CK1_EQ value 3)
// bit 5: (ck1, ck2) = (7, 8) (multi-column EQ)
// bit 6: (ck1, ck2) > (9, 10) (multi-column slice)
// bit 7: (ck1, ck2) IN ((11, 12), (13, 14)) (multi-column IN)
// bit 8: v1 = 15 (global index comb_v1, target: regular_values)
// bit 9: v3 = 16 (local index comb_v3_local, target: regular_values)
// bit 10: s1 CONTAINS 17 (global index comb_s1, target: keys — set)
// bit 11: m1 CONTAINS 'alpha' (global index comb_m1_values, target: collection_values)
// bit 12: m2 CONTAINS KEY 18 (global index comb_m2_keys, target: keys — map)
// bit 13: m3[19] = 'beta' (global index comb_m3_entries, target: keys_and_values)
// bit 14: fs = {20, 21} (global index comb_fs, target: full — frozen collection)
SEASTAR_TEST_CASE(combinatorial_restrictions) {
// ASAN's fake-stack shadow buffer for the large lambda below is ~248 KiB;
// bump the thread stack so it doesn't overflow under sanitized builds.
seastar::thread_attributes tattr;
#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer)
tattr.stack_size = 2 * 1024 * 1024;
#endif
return do_with_cql_env_thread([](cql_test_env& e) {
cquery_nofail(e, "CREATE TABLE ks.comb ("
" pk1 int, pk2 int,"
" ck1 int, ck2 int,"
" v1 int, v2 int, v3 int,"
" s1 set<int>,"
" m1 map<int, text>,"
" m2 map<int, text>,"
" m3 map<int, text>,"
" fs frozen<set<int>>,"
" PRIMARY KEY ((pk1, pk2), ck1, ck2)"
")");
cquery_nofail(e, "CREATE INDEX comb_pk1 ON ks.comb(pk1)");
cquery_nofail(e, "CREATE INDEX comb_v1 ON ks.comb(v1)");
cquery_nofail(e, "CREATE INDEX comb_v3_local ON ks.comb((pk1,pk2), v3)");
cquery_nofail(e, "CREATE INDEX comb_s1 ON ks.comb(s1)");
cquery_nofail(e, "CREATE INDEX comb_ck1 ON ks.comb(ck1)");
cquery_nofail(e, "CREATE INDEX comb_m1_values ON ks.comb(VALUES(m1))");
cquery_nofail(e, "CREATE INDEX comb_m2_keys ON ks.comb(KEYS(m2))");
cquery_nofail(e, "CREATE INDEX comb_m3_entries ON ks.comb(ENTRIES(m3))");
cquery_nofail(e, "CREATE INDEX comb_fs ON ks.comb(FULL(fs))");
auto schema = e.local_db().find_schema("ks", "comb");
auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager();
const auto& pk1_def = *schema->get_column_definition("pk1");
const auto& pk2_def = *schema->get_column_definition("pk2");
const auto& ck1_def = *schema->get_column_definition("ck1");
const auto& ck2_def = *schema->get_column_definition("ck2");
const auto& v1_def = *schema->get_column_definition("v1");
const auto& v3_def = *schema->get_column_definition("v3");
const auto& s1_def = *schema->get_column_definition("s1");
const auto& m1_def = *schema->get_column_definition("m1");
const auto& m2_def = *schema->get_column_definition("m2");
const auto& m3_def = *schema->get_column_definition("m3");
const auto& fs_def = *schema->get_column_definition("fs");
// Every restriction fragment is an independent bit in the mask.
// This includes CK restriction variants, giving us exhaustive
// coverage of all 2^15 = 32768 combinations.
enum frag : unsigned {
PK1 = 1u << 0, // pk1 = 1 (global index, regular_values)
PK2 = 1u << 1, // pk2 = 2 (no index)
CK1_EQ = 1u << 2, // ck1 = 3
CK2_SLICE = 1u << 3, // ck2 > 4
CK1_IN = 1u << 4, // ck1 IN (3, 6)
MULTI_EQ = 1u << 5, // (ck1, ck2) = (7, 8)
MULTI_SLICE= 1u << 6, // (ck1, ck2) > (9, 10)
MULTI_IN = 1u << 7, // (ck1, ck2) IN ((11, 12), (13, 14))
V1 = 1u << 8, // v1 = 15 (global index, regular_values)
V3 = 1u << 9, // v3 = 16 (local index, regular_values)
S1 = 1u << 10, // s1 CONTAINS 17 (global index, keys — set)
M_VAL = 1u << 11, // m1 CONTAINS 'alpha' (global index, collection_values)
M_KEY = 1u << 12, // m2 CONTAINS KEY 18 (global index, keys — map)
M_ENT = 1u << 13, // m3[19] = 'beta' (global index, keys_and_values)
FS = 1u << 14, // fs = {20, 21} (global index, full)
};
constexpr unsigned N_FRAG = 15;
constexpr unsigned FRAG_TOTAL = 1u << N_FRAG;
constexpr unsigned SINGLE_CK_MASK = CK1_EQ | CK2_SLICE | CK1_IN;
constexpr unsigned MULTI_CK_MASK = MULTI_EQ | MULTI_SLICE | MULTI_IN;
struct fragment_info {
unsigned bit;
const char* clause;
};
// Each fragment uses unique values so that conjunction intersections
// are predictable. CK1_IN includes the CK1_EQ value (3) to ensure
// the intersection is non-empty when both are present.
const fragment_info fragments[] = {
{PK1, "pk1 = 1"},
{PK2, "pk2 = 2"},
{CK1_EQ, "ck1 = 3"},
{CK2_SLICE, "ck2 > 4"},
{CK1_IN, "ck1 IN (3, 6)"},
{MULTI_EQ, "(ck1, ck2) = (7, 8)"},
{MULTI_SLICE, "(ck1, ck2) > (9, 10)"},
{MULTI_IN, "(ck1, ck2) IN ((11, 12), (13, 14))"},
{V1, "v1 = 15"},
{V3, "v3 = 16"},
{S1, "s1 CONTAINS 17"},
{M_VAL, "m1 CONTAINS 'alpha'"},
{M_KEY, "m2 CONTAINS KEY 18"},
{M_ENT, "m3[19] = 'beta'"},
{FS, "fs = {20, 21}"},
};
unsigned total_tested = 0;
unsigned total_illegal = 0;
for (unsigned mask = 0; mask < FRAG_TOTAL; ++mask) {
// --- Illegality detection ---
// Rule 1: Mixing single-column and multi-column CK restrictions
// is always illegal.
// Rule 2: At most one multi-column CK restriction type allowed.
bool has_single_ck = (mask & SINGLE_CK_MASK) != 0;
bool has_multi_ck = (mask & MULTI_CK_MASK) != 0;
unsigned multi_ck_count = std::popcount(mask & MULTI_CK_MASK);
bool is_illegal = (has_single_ck && has_multi_ck)
|| (multi_ck_count > 1);
// Build WHERE clause from all set bits.
std::string where_clause;
for (auto& f : fragments) {
if (mask & f.bit) {
if (!where_clause.empty()) {
where_clause += " AND ";
}
where_clause += f.clause;
}
}
auto ctx_msg = [&](std::string_view api) {
return fmt::format("mask=0x{:04x} WHERE [{}]: {}",
mask, where_clause, api);
};
prepare_context ctx;
auto where_expr = where_clause.empty()
? expr::expression(expr::conjunction{})
: cql3::util::where_clause_to_relations(where_clause, cql3::dialect{});
shared_ptr<const restrictions::statement_restrictions> sr;
try {
sr = restrictions::analyze_statement_restrictions(
e.data_dictionary(),
schema,
statements::statement_type::SELECT,
where_expr,
ctx,
/*contains_only_static_columns=*/false,
/*for_view=*/false,
/*allow_filtering=*/true,
restrictions::check_indexes::yes);
} catch (const exceptions::invalid_request_exception&) {
}
if (is_illegal) {
BOOST_CHECK_MESSAGE(!sr,
ctx_msg("expected exception for illegal CK combination"));
++total_illegal;
++total_tested;
continue;
}
BOOST_REQUIRE_MESSAGE(sr,
ctx_msg("unexpected exception for legal CK combination"));
// --- Derived CK properties ---
bool has_multi_column = has_multi_ck;
// Which CK columns are restricted?
bool has_ck1 = (mask & (CK1_EQ | CK1_IN | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0;
bool has_ck2 = (mask & (CK2_SLICE | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0;
bool has_any_ck = has_ck1 || has_ck2;
unsigned ck_count = (has_ck1 ? 1u : 0u) + (has_ck2 ? 1u : 0u);
// clustering_key_restrictions_has_IN: any IN binop present
bool has_ck_in = (mask & (CK1_IN | MULTI_IN)) != 0;
// clustering_key_restrictions_has_only_eq: no non-EQ binop in CK
// restrictions. Vacuously true when no CK restrictions.
bool has_only_eq = !(mask & (CK2_SLICE | CK1_IN | MULTI_SLICE | MULTI_IN));
// clustering_key_restrictions_need_filtering (internal predicate,
// before ORing with has_partition_key_unrestricted_components):
// For multi-column restrictions, always false.
// For single-column: true when there's a CK gap (ck2 restricted
// without ck1 being restricted by EQ or IN).
bool ck_need_filtering_internal = !has_multi_column
&& (mask & CK2_SLICE)
&& !(mask & (CK1_EQ | CK1_IN));
// has_eq_restriction_on_column: recognizes column_value and
// tuple_constructor LHS with oper_t::EQ.
// CK1_EQ → true for ck1. MULTI_EQ → true for both ck1 and ck2.
// IN, slice → false.
bool ck1_has_eq = (mask & CK1_EQ) || (mask & MULTI_EQ);
bool ck2_has_eq = (mask & MULTI_EQ) != 0;
// comb_pk1 index selection:
// Requires: pk1 restricted with EQ and PK incomplete
// (_is_key_range). PK restrictions are iterated first in
// _index_restrictions, so comb_pk1 (global, score 1) beats
// all same-score indexes that come later.
bool selects_comb_pk1 = (mask & PK1) && !(mask & PK2);
// comb_ck1 index selection:
// Requires: !full_pk, single-column EQ on ck1 (not IN — index
// only supports EQ), no CK1_IN (makes conjunction unsupported),
// no multi-column.
// In legal combos, CK1_EQ set → no MULTI_* possible.
// When pk1 is also restricted, comb_pk1 takes priority (PK
// group comes before CK group in _index_restrictions).
bool selects_comb_ck1 = (mask & CK1_EQ) && !(mask & CK1_IN);
// Index clustering range multiplier:
// CK1_IN alone produces 2 IN values → 2 ranges.
// CK1_EQ + CK1_IN: intersection narrows to 1.
// Multi-column: not added to prefix, multiplier 1.
unsigned idx_range_multiplier = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1;
// --- Partition key APIs ---
bool has_pk1 = (mask & PK1) != 0;
bool has_pk2 = (mask & PK2) != 0;
bool full_pk = has_pk1 && has_pk2;
BOOST_CHECK_MESSAGE(
sr->partition_key_restrictions_is_empty() == (!has_pk1 && !has_pk2),
ctx_msg("partition_key_restrictions_is_empty"));
BOOST_CHECK_MESSAGE(
sr->partition_key_restrictions_is_all_eq() == true,
ctx_msg("partition_key_restrictions_is_all_eq"));
BOOST_CHECK_MESSAGE(
sr->has_partition_key_unrestricted_components() == (!has_pk1 || !has_pk2),
ctx_msg("has_partition_key_unrestricted_components"));
unsigned pk_restricted = (has_pk1 ? 1u : 0u) + (has_pk2 ? 1u : 0u);
BOOST_CHECK_MESSAGE(
sr->partition_key_restrictions_size() == pk_restricted,
ctx_msg(fmt::format("partition_key_restrictions_size: got {} want {}",
sr->partition_key_restrictions_size(), pk_restricted)));
BOOST_CHECK_MESSAGE(
sr->has_token_restrictions() == false,
ctx_msg("has_token_restrictions"));
BOOST_CHECK_MESSAGE(
sr->key_is_in_relation() == false,
ctx_msg("key_is_in_relation"));
// is_key_range: true unless full PK is specified with EQ
BOOST_CHECK_MESSAGE(
sr->is_key_range() == !full_pk,
ctx_msg("is_key_range"));
// --- Clustering key APIs ---
BOOST_CHECK_MESSAGE(
sr->has_clustering_columns_restriction() == has_any_ck,
ctx_msg("has_clustering_columns_restriction"));
BOOST_CHECK_MESSAGE(
sr->clustering_columns_restrictions_size() == ck_count,
ctx_msg(fmt::format("clustering_columns_restrictions_size: got {} want {}",
sr->clustering_columns_restrictions_size(), ck_count)));
BOOST_CHECK_MESSAGE(
sr->has_unrestricted_clustering_columns() == (ck_count < 2),
ctx_msg("has_unrestricted_clustering_columns"));
BOOST_CHECK_MESSAGE(
sr->clustering_key_restrictions_has_IN() == has_ck_in,
ctx_msg("clustering_key_restrictions_has_IN"));
BOOST_CHECK_MESSAGE(
sr->clustering_key_restrictions_has_only_eq() == has_only_eq,
ctx_msg("clustering_key_restrictions_has_only_eq"));
// ck_restrictions_need_filtering:
// = has_any_ck && (!full_pk || ck_need_filtering_internal)
// The internal predicate captures column-gap / non-prefix issues;
// has_partition_key_unrestricted_components() is ORed in by the
// outer ck_restrictions_need_filtering().
bool ck_needs_filter = has_any_ck && (!full_pk || ck_need_filtering_internal);
BOOST_CHECK_MESSAGE(
sr->ck_restrictions_need_filtering() == ck_needs_filter,
ctx_msg("ck_restrictions_need_filtering"));
// --- Non-primary-key APIs ---
bool has_v1 = (mask & V1) != 0;
bool has_v3 = (mask & V3) != 0;
bool has_s1 = (mask & S1) != 0;
bool has_m_val = (mask & M_VAL) != 0;
bool has_m_key = (mask & M_KEY) != 0;
bool has_m_ent = (mask & M_ENT) != 0;
bool has_fs = (mask & FS) != 0;
bool has_nonpk = has_v1 || has_v3 || has_s1
|| has_m_val || has_m_key || has_m_ent || has_fs;
BOOST_CHECK_MESSAGE(
sr->has_non_primary_key_restriction() == has_nonpk,
ctx_msg("has_non_primary_key_restriction"));
// --- Per-column restriction checks ---
BOOST_CHECK_MESSAGE(
sr->is_restricted(&pk1_def) == has_pk1,
ctx_msg("is_restricted(pk1)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&pk2_def) == has_pk2,
ctx_msg("is_restricted(pk2)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&ck1_def) == has_ck1,
ctx_msg("is_restricted(ck1)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&ck2_def) == has_ck2,
ctx_msg("is_restricted(ck2)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&v1_def) == has_v1,
ctx_msg("is_restricted(v1)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&v3_def) == has_v3,
ctx_msg("is_restricted(v3)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&s1_def) == has_s1,
ctx_msg("is_restricted(s1)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&m1_def) == has_m_val,
ctx_msg("is_restricted(m1)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&m2_def) == has_m_key,
ctx_msg("is_restricted(m2)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&m3_def) == has_m_ent,
ctx_msg("is_restricted(m3)"));
BOOST_CHECK_MESSAGE(
sr->is_restricted(&fs_def) == has_fs,
ctx_msg("is_restricted(fs)"));
// has_eq_restriction_on_column:
// pk1/pk2 always EQ when present.
// ck1/ck2 depend on the CK restriction type.
// v1/v3/fs are EQ, s1 is CONTAINS, m1 CONTAINS, m2 CONTAINS KEY,
// m3[1]='a' is subscript EQ (not recognized), fs is regular EQ.
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(pk1_def) == has_pk1,
ctx_msg("has_eq_restriction_on_column(pk1)"));
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(pk2_def) == has_pk2,
ctx_msg("has_eq_restriction_on_column(pk2)"));
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(ck1_def) == ck1_has_eq,
ctx_msg("has_eq_restriction_on_column(ck1)"));
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(ck2_def) == ck2_has_eq,
ctx_msg("has_eq_restriction_on_column(ck2)"));
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(v1_def) == has_v1,
ctx_msg("has_eq_restriction_on_column(v1)"));
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(v3_def) == has_v3,
ctx_msg("has_eq_restriction_on_column(v3)"));
// s1 CONTAINS is not EQ:
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(s1_def) == false,
ctx_msg("has_eq_restriction_on_column(s1)"));
// m1 CONTAINS is not EQ:
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(m1_def) == false,
ctx_msg("has_eq_restriction_on_column(m1)"));
// m2 CONTAINS KEY is not EQ:
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(m2_def) == false,
ctx_msg("has_eq_restriction_on_column(m2)"));
// m3[1] = 'a' is a subscript EQ — not recognized by
// has_eq_restriction_on_column (needs column_value/tuple_constructor LHS).
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(m3_def) == false,
ctx_msg("has_eq_restriction_on_column(m3)"));
// fs = {1,2} is a regular EQ on frozen collection:
BOOST_CHECK_MESSAGE(
sr->has_eq_restriction_on_column(fs_def) == has_fs,
ctx_msg("has_eq_restriction_on_column(fs)"));
// --- Index selection ---
auto [idx_opt, idx_expr] = sr->find_idx(sim);
// Determine expected index. The scoring algorithm:
// - do_find_idx iterates _index_restrictions (PK group, then
// CK group, then non-PK group in WHERE-clause order).
// - Multi-column restrictions are skipped (line 1358).
// - Score: local index with full PK = 2, global = 1, local
// without full PK = 0.
// - Strict > for tiebreaking → first with highest score wins.
//
// The PK index (comb_pk1) can be selected when:
// (a) pk1 is restricted with EQ, and
// (b) PK is incomplete (_is_key_range), which triggers
// _uses_secondary_indexing via _has_queriable_pk_index.
// PK restrictions are iterated first in _index_restrictions, so
// comb_pk1 (score 1) beats all later global indexes (tie → first
// wins).
//
// The CK index (comb_ck1) can only be selected when:
// (a) ck1 is restricted with single-column EQ (not IN — the
// index only supports EQ, not IN),
// (b) PK is incomplete (_is_key_range), and
// (c) no multi-column CK restriction (_has_multi_column blocks
// the CK-index path at line 1151).
// (d) CK1_EQ + CK1_IN conjunction makes the index unsupported
// (IN child fails is_supported_by).
// (e) pk1 is NOT restricted (otherwise comb_pk1 wins first).
//
// When comb_ck1 qualifies it is iterated after PK but before
// non-PK in _index_restrictions, and its score 1 ties with any
// non-PK global, so it wins.
std::optional<sstring> expected_idx;
if (selects_comb_pk1) {
expected_idx = "comb_pk1";
} else if (selects_comb_ck1 && !full_pk) {
expected_idx = "comb_ck1";
} else if (full_pk && has_v3) {
// Local index scores 2, beats any global (score 1).
expected_idx = "comb_v3_local";
} else if (has_v1) {
expected_idx = "comb_v1";
} else if (has_s1) {
// v3 without full_pk scores 0 and is skipped.
expected_idx = "comb_s1";
} else if (has_m_val) {
expected_idx = "comb_m1_values";
} else if (has_m_key) {
expected_idx = "comb_m2_keys";
} else if (has_m_ent) {
expected_idx = "comb_m3_entries";
} else if (has_fs) {
expected_idx = "comb_fs";
}
// else: no indexable column (v3 alone without full_pk scores 0)
bool uses_idx = expected_idx.has_value();
BOOST_CHECK_MESSAGE(
(idx_opt ? std::optional(idx_opt->metadata().name()) : std::nullopt) == expected_idx,
ctx_msg(fmt::format("find_idx: got {} want {}",
idx_opt ? idx_opt->metadata().name() : "(none)",
expected_idx.value_or("(none)"))));
BOOST_CHECK_MESSAGE(
sr->uses_secondary_indexing() == uses_idx,
ctx_msg(fmt::format("uses_secondary_indexing: got {} want {}",
sr->uses_secondary_indexing(), uses_idx)));
// --- need_filtering ---
// Filtering is needed when:
// - PK is not fully specified (partial PK needs filtering unless
// using index)
// - CK has a gap (ck2 without ck1) needs filtering
// - Non-PK restrictions that aren't consumed by the index need
// filtering
// - When using an index, remaining restrictions beyond the
// indexed one need filtering
// - Multi-column CK restrictions that can't be converted to
// bounds need filtering
// The exact logic is complex; we check invariants.
bool need_filt = sr->need_filtering();
// 1. If no restrictions at all, no filtering needed.
if (mask == 0) {
BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: empty should be false"));
}
// 2. If only the full PK is specified (no CK, no non-PK), no filtering.
if (mask == (PK1 | PK2)) {
BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: full PK only should be false"));
}
// 3. Single indexed column (no CK): no filtering needed.
if (mask == PK1 || mask == V1 || mask == S1) {
BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: single indexed column should be false"));
}
// 4. If using index + has extra non-PK restrictions, filtering is needed.
if (uses_idx) {
int non_pk_indexed_count = (has_v1 ? 1 : 0) + (has_v3 ? 1 : 0) + (has_s1 ? 1 : 0)
+ (has_m_val ? 1 : 0) + (has_m_key ? 1 : 0) + (has_m_ent ? 1 : 0)
+ (has_fs ? 1 : 0);
if (non_pk_indexed_count > 1) {
BOOST_CHECK_MESSAGE(need_filt,
ctx_msg("need_filtering: multiple non-PK restrictions with index should need filtering"));
}
}
// 5. Partial PK with no index needs filtering.
if ((has_pk1 != has_pk2) && !uses_idx) {
BOOST_CHECK_MESSAGE(need_filt,
ctx_msg("need_filtering: partial PK without index should need filtering"));
}
// 6. CK gap (ck2 without ck1, single-column only) needs filtering.
if (ck_need_filtering_internal) {
BOOST_CHECK_MESSAGE(need_filt,
ctx_msg("need_filtering: CK gap should need filtering"));
}
// 7. Non-PK restriction without index needs filtering.
if (has_nonpk && !uses_idx) {
BOOST_CHECK_MESSAGE(need_filt,
ctx_msg("need_filtering: non-PK restriction without index should need filtering"));
}
// --- pk_restrictions_need_filtering ---
bool pk_needs_filter = sr->pk_restrictions_need_filtering();
if (!uses_idx && has_pk1 != has_pk2) {
BOOST_CHECK_MESSAGE(pk_needs_filter,
ctx_msg("pk_restrictions_need_filtering: partial PK should need filtering"));
}
if (!has_pk1 && !has_pk2) {
BOOST_CHECK_MESSAGE(!pk_needs_filter,
ctx_msg("pk_restrictions_need_filtering: no PK should be false"));
}
if (full_pk) {
BOOST_CHECK_MESSAGE(!pk_needs_filter,
ctx_msg("pk_restrictions_need_filtering: full PK should be false"));
}
// --- is_empty ---
BOOST_CHECK_MESSAGE(
sr->is_empty() == (mask == 0),
ctx_msg("is_empty"));
// --- get_not_null_columns: none of our fragments use IS NOT NULL ---
BOOST_CHECK_MESSAGE(
sr->get_not_null_columns().empty(),
ctx_msg("get_not_null_columns should be empty"));
// --- get_partition_key_ranges ---
// Always returns exactly 1 range. Full PK → singular range
// (specific partition). Otherwise → open-ended range.
{
auto pk_ranges = sr->get_partition_key_ranges(query_options({}));
BOOST_CHECK_MESSAGE(pk_ranges.size() == 1,
ctx_msg(fmt::format("get_partition_key_ranges: {} ranges, want 1",
pk_ranges.size())));
if (full_pk) {
BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && pk_ranges[0].is_singular(),
ctx_msg("get_partition_key_ranges: full PK should yield singular range"));
} else {
BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && !pk_ranges[0].is_singular(),
ctx_msg("get_partition_key_ranges: incomplete PK should yield non-singular range"));
}
}
// --- get_clustering_bounds ---
// Expected range count:
// Empty CK restrictions → 1 open-ended range.
// Multi-column: MULTI_IN → 2 singular, else → 1.
// Single-column: CK1_IN without CK1_EQ → 2 (IN values expand),
// else → 1 (CK1_EQ narrows intersection to 1 value).
{
auto ck_bounds = sr->get_clustering_bounds(query_options({}));
unsigned expected_ck_bounds;
if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) {
// No CK restrictions → 1 open-ended range.
expected_ck_bounds = 1;
} else if (mask & MULTI_CK_MASK) {
expected_ck_bounds = (mask & MULTI_IN) ? 2 : 1;
} else {
expected_ck_bounds = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1;
}
BOOST_CHECK_MESSAGE(ck_bounds.size() == expected_ck_bounds,
ctx_msg(fmt::format("get_clustering_bounds: {} ranges, want {}",
ck_bounds.size(), expected_ck_bounds)));
// With no CK restrictions the range should be open-ended.
if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) {
BOOST_CHECK_MESSAGE(
ck_bounds.size() == 1
&& !ck_bounds[0].start()
&& !ck_bounds[0].end(),
ctx_msg("get_clustering_bounds: no CK should be open-ended"));
}
}
// --- Index table range APIs ---
if (uses_idx) {
bool is_local_idx = (expected_idx == "comb_v3_local");
if (is_local_idx) {
// --- get_local_index_clustering_ranges ---
// Local index CK prefix = (indexed_col, base_ck1, ...).
// The indexed column is always EQ (1 value); CK IN values
// multiply via the base CK appended to the prefix.
auto local_ranges = sr->get_local_index_clustering_ranges(query_options({}));
unsigned expected_local = 1 * idx_range_multiplier;
BOOST_CHECK_MESSAGE(!local_ranges.empty(),
ctx_msg("get_local_index_clustering_ranges should not be empty"));
BOOST_CHECK_MESSAGE(local_ranges.size() == expected_local,
ctx_msg(fmt::format("get_local_index_clustering_ranges: {} ranges, want {}",
local_ranges.size(), expected_local)));
} else {
// --- get_global_index_clustering_ranges ---
// Global index CK prefix = (token, pk1, pk2, ...base CK...).
// With full PK: CK IN values expand the prefix.
// Without full PK: prefix is empty → 1 open-ended range.
auto global_ranges = sr->get_global_index_clustering_ranges(query_options({}));
BOOST_CHECK_MESSAGE(!global_ranges.empty(),
ctx_msg("get_global_index_clustering_ranges should not be empty"));
if (full_pk) {
unsigned expected_global = 1 * idx_range_multiplier;
BOOST_CHECK_MESSAGE(global_ranges.size() == expected_global,
ctx_msg(fmt::format(
"get_global_index_clustering_ranges (full PK): {} ranges, want {}",
global_ranges.size(), expected_global)));
} else {
// Without full PK the prefix has no token/PK entries,
// so we get 1 open-ended range.
BOOST_CHECK_MESSAGE(global_ranges.size() == 1,
ctx_msg(fmt::format(
"get_global_index_clustering_ranges (!full PK): {} ranges, want 1",
global_ranges.size())));
}
// --- get_global_index_token_clustering_ranges ---
// For modern (non-v1) indexes the token column is
// long_type, so this dispatches to the same
// get_single_column_clustering_bounds as
// get_global_index_clustering_ranges.
auto token_ranges = sr->get_global_index_token_clustering_ranges(query_options({}));
BOOST_CHECK_MESSAGE(token_ranges.size() == global_ranges.size(),
ctx_msg(fmt::format(
"get_global_index_token_clustering_ranges: {} ranges, want {} (same as global)",
token_ranges.size(), global_ranges.size())));
}
}
++total_tested;
}
BOOST_TEST_MESSAGE(fmt::format("Tested {} restriction combinations ({} legal, {} illegal, 2^{} = {} total)",
total_tested, total_tested - total_illegal, total_illegal, N_FRAG, FRAG_TOTAL));
}, {}, tattr);
}
// Currently expression doesn't have operator==().
// Implementing it is ugly, because there are shared pointers and the term base class.
// For testing purposes checking stringified expressions is enough.
static bool expression_eq(const expr::expression& e1, const expr::expression& e2) {
return to_string(e1) == to_string(e2);
}
static void assert_expr_vec_eq(
const std::vector<expr::expression>& v1,
const std::vector<expr::expression>& v2,
const std::source_location& loc = std::source_location::current()) {
if (std::equal(v1.begin(), v1.end(), v2.begin(), v2.end(), expression_eq)) {
return;
}
std::string error_msg = fmt::format("Location: {}:{}, Expression vectors not equal! [{}] != [{}]",
loc.file_name(), loc.line(), fmt::join(v1, ", "), fmt::join(v2, ", "));
BOOST_FAIL(error_msg);
}
// Unit tests for extract_column_restrictions function
BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) {
using namespace expr;
auto make_column = [](const char* name, column_kind kind, int id) -> column_definition {
column_definition definition(name, int32_type, kind, id);
// column_definition has to have column_specifiction because to_string uses it for column name
::shared_ptr<column_identifier> identifier = ::make_shared<column_identifier>(name, true);
column_specification specification("ks", "cf", std::move(identifier), int32_type);
definition.column_specification = make_lw_shared<column_specification>(
std::move(specification));
return definition;
};
column_definition col_pk1 = make_column("pk1", column_kind::partition_key, 0);
column_definition col_pk2 = make_column("pk2", column_kind::partition_key, 1);
column_definition col_ck1 = make_column("ck1", column_kind::clustering_key, 0);
column_definition col_ck2 = make_column("ck2", column_kind::clustering_key, 1);
column_definition col_r1 = make_column("r2", column_kind::regular_column, 0);
column_definition col_r2 = make_column("r2", column_kind::regular_column, 1);
column_definition col_r3 = make_column("r3", column_kind::regular_column, 2);
// Empty input test
assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {});
// BIG_WHERE test
// big_where contains:
// WHERE pk1 = 0 AND pk2 = 0 AND ck1 = 0 AND ck2 = 0 AND r1 = 0 AND r2 = 0
// AND (pk1, pk2) < (0, 0) AND (pk1, ck2, r1) = (0, 0, 0) AND (r1, r2) > 0
// AND ((c1, c2) < (0, 0) AND r1 < 0)
// AND pk2 > 0 AND r2 > 0
// AND token(pk1, pk2) > 0 AND token(pk1, pk2) < 0
// AND TRUE AND FALSE
// AND token(pk1, pk2)
// AND pk1 AND pk2
// AND (pk1, pk2)
std::vector<expression> big_where;
expr::constant zero_value = constant(raw_value::make_value(I(0)), int32_type);
expression pk1_restriction(binary_operator(column_value(&col_pk1), oper_t::EQ, zero_value));
expression pk2_restriction(binary_operator(column_value(&col_pk2), oper_t::EQ, zero_value));
expression pk2_restriction2(binary_operator(column_value(&col_pk2), oper_t::GT, zero_value));
expression ck1_restriction(binary_operator(column_value(&col_ck1), oper_t::EQ, zero_value));
expression ck2_restriction(binary_operator(column_value(&col_ck2), oper_t::EQ, zero_value));
expression r1_restriction(binary_operator(column_value(&col_r1), oper_t::EQ, zero_value));
expression r1_restriction2(binary_operator(column_value(&col_r1), oper_t::LT, zero_value));
expression r1_restriction3(binary_operator(column_value(&col_r1), oper_t::GT, zero_value));
expression r2_restriction(binary_operator(column_value(&col_r2), oper_t::EQ, zero_value));
auto make_multi_column_restriction = [](std::vector<const column_definition*> columns, oper_t oper) -> expression {
tuple_constructor column_tuple(column_definitions_as_tuple_constructor(columns));
std::vector<managed_bytes_opt> zeros_tuple_elems(columns.size(), managed_bytes_opt(I(0)));
data_type tup_type = tuple_type_impl::get_instance(std::vector<data_type>(columns.size(), int32_type));
managed_bytes tup_bytes = tuple_type_impl::build_value_fragmented(std::move(zeros_tuple_elems));
constant zeros_tuple(raw_value::make_value(std::move(tup_bytes)), std::move(tup_type));
return binary_operator(column_tuple, oper, std::move(zeros_tuple));
};
expression pk1_pk2_restriction = make_multi_column_restriction({&col_pk1, &col_pk2}, oper_t::LT);
expression pk1_ck2_r1_restriction = make_multi_column_restriction({&col_pk1, &col_ck2, &col_r1}, oper_t::EQ);
expression r1_r2_restriction = make_multi_column_restriction({&col_r1, &col_r2}, oper_t::GT);
std::vector<expression> conjunction_elems;
expression ck1_ck2_restriction = make_multi_column_restriction({&col_ck1, &col_ck2}, oper_t::LT);
expression conjunction_expr = conjunction{std::vector{ck1_ck2_restriction, r1_restriction2}};
function_call token_expr = function_call {
.func = functions::function_name::native_function("token"),
.args = {column_value(&col_pk1), column_value(&col_pk2)}
};
expression token_lt_restriction = binary_operator(token_expr, oper_t::LT, zero_value);
expression token_gt_restriction = binary_operator(token_expr, oper_t::GT, zero_value);
expression true_restriction = constant::make_bool(true);
expression false_restriction = constant::make_bool(false);
expression pk1_expr = column_value(&col_pk1);
expression pk2_expr = column_value(&col_pk1);
data_type ttype = tuple_type_impl::get_instance({int32_type, int32_type});
expression pk1_pk2_expr = tuple_constructor{{expression{column_value{&col_pk1}},
expression{column_value{&col_pk2}}},
std::move(ttype)};
big_where.push_back(pk1_restriction);
big_where.push_back(pk2_restriction);
big_where.push_back(ck1_restriction);
big_where.push_back(ck2_restriction);
big_where.push_back(r1_restriction);
big_where.push_back(r2_restriction);
big_where.push_back(pk1_pk2_restriction);
big_where.push_back(pk1_ck2_r1_restriction);
big_where.push_back(r1_r2_restriction);
big_where.push_back(conjunction_expr);
big_where.push_back(pk2_restriction2);
big_where.push_back(r1_restriction3);
big_where.push_back(token_lt_restriction);
big_where.push_back(token_gt_restriction);
big_where.push_back(true_restriction);
big_where.push_back(false_restriction);
big_where.push_back(token_expr);
big_where.push_back(pk1_expr);
big_where.push_back(pk2_expr);
big_where.push_back(pk1_pk2_expr);
expression big_where_expr = conjunction{std::move(big_where)};
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk1),
{pk1_restriction});
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk2),
{pk2_restriction, pk2_restriction2});
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck1),
{ck1_restriction});
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck2),
{ck2_restriction});
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r1),
{r1_restriction, r1_restriction2, r1_restriction3});
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r2),
{r2_restriction});
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r3),
{});
}
BOOST_AUTO_TEST_SUITE_END()