diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 3c5426c3a9..c5976a23fd 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -28,6 +28,7 @@ #include "dht/i_partitioner.hh" #include "db/schema_tables.hh" #include "types/tuple.hh" +#include "utils/overloaded_functor.hh" namespace { struct maybe_column_definition { @@ -55,48 +56,12 @@ using namespace expr; static logging::logger rlogger("restrictions"); static auto& expr_logger = rlogger; // compatibility with code moved from expression.cc -/// A set of discrete values. -using value_list = std::vector; // Sorted and deduped using value comparator. - -/// General set of values. Empty set and single-element sets are always value_list. interval is -/// never singular and never has start > end. Universal set is a interval with both bounds null. -using value_set = std::variant>; - -/// A set of all column values that would satisfy an expression. The _token_values variant finds -/// matching values for the partition token function call instead of the column. -/// -/// An expression restricts possible values of a column or token: -/// - `A>5` restricts A from below -/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10 -/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5 -/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5 -/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression -/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false -/// - an expression without A "restricts" A to unbounded range -extern value_set possible_column_values(const column_definition*, const expression&, const query_options&); -extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema); - /// Turns value_set into a range, unless it's a multi-valued list (in which case this throws). extern interval to_range(const value_set&); /// A range of all X such that X op val. interval to_range(oper_t op, const clustering_key_prefix& val); -/// True iff the index can support the entire expression. -extern bool is_supported_by(const expression&, const secondary_index::index&); - -/// True iff any of the indices from the manager can support the entire expression. If allow_local, use all -/// indices; otherwise, use only global indices. -extern bool has_supporting_index( - const expression&, const secondary_index::secondary_index_manager&, allow_local_index allow_local); - -// Looks at each column individually and checks whether some index can support restrictions on this single column. -// Expression has to consist only of single column restrictions. -extern bool index_supports_some_column( - const expression&, - const secondary_index::secondary_index_manager&, - allow_local_index allow_local); - inline bool needs_filtering(oper_t op) { return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) || (op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN); @@ -106,10 +71,6 @@ inline auto find_needs_filtering(const expression& e) { return find_binop(e, [] (const binary_operator& bo) { return needs_filtering(bo.op); }); } -inline bool is_multi_column(const binary_operator& op) { - return expr::is(op.lhs); -} - inline bool has_slice_or_needs_filtering(const expression& e) { return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); }); } @@ -117,28 +78,20 @@ inline bool has_slice_or_needs_filtering(const expression& e) { /// True iff binary_operator involves a collection. extern bool is_on_collection(const binary_operator&); -// Checks whether the given column has an EQ restriction in the expression. -// EQ restriction is `col = ...` or `(col, col2) = ...` -// IN restriction is NOT an EQ restriction, this function will not look for IN restrictions. -// Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. -bool has_eq_restriction_on_column(const column_definition& column, const expression& e); - -// Checks whether this expression contains restrictions on one single column. -// There might be more than one restriction, but exactly one column. -// The expression must be prepared. -bool is_single_column_restriction(const expression&); - -// Gets the only column from a single_column_restriction expression. -const column_value& get_the_only_column(const expression&); - -// Extracts map of single column restrictions for each column from expression -single_column_restrictions_map get_single_column_restrictions_map(const expression&); - - bool contains_multi_column_restriction(const expression&); bool has_only_eq_binops(const expression&); +static +value_set +solve(const predicate& ac, const query_options& options) { + if (ac.solve_for) { + return ac.solve_for(options); + } + + on_internal_error(rlogger, "solve: no solve_for function"); +} + namespace { const value_set empty_value_set = value_list{}; @@ -172,6 +125,18 @@ value_set intersection(value_set a, value_set b, const abstract_type* type) { return std::visit(intersection_visitor{type}, std::move(a), std::move(b)); } +static +managed_bytes +value_set_to_singleton(const value_set& vs) { + if (std::holds_alternative(vs)) { + const auto& vl = std::get(vs); + if (vl.size() == 1) { + return vl.front(); + } + } + throw std::logic_error("value_set_to_singleton: value_set is not a singleton"); +} + template value_list to_sorted_vector(Range r, const serialized_compare& comparator) { value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access). @@ -225,69 +190,230 @@ interval to_range(oper_t op, const clustering_key_prefix& return to_range(op, val); } +static +data_type +type(const predicate& p) { + return std::visit( + overloaded_functor{ + [] (const on_row&) { return boolean_type; }, // Not true, but the type won't be used. + [] (const on_column& oc) { return oc.column->type->without_reversed().shared_from_this(); }, + [] (const on_partition_key_token&) { return long_type; }, + [] (const on_clustering_key_prefix&) -> data_type { on_internal_error(rlogger, "type: asked for clustering key prefix type"); }, + }, + p.on); +} + +static +predicate +make_conjunction(predicate a, predicate b) { + if (a.on != b.on) { + on_internal_error(rlogger, "make_conjunction: merging predicate targets"); + } + + if (!a.comparable && !b.comparable) { + on_internal_error(rlogger, "make_conjunction: merging non-comparable columns"); + } + + if (a.order != b.order) { + on_internal_error(rlogger, "make_conjunction: merging predicates with different comparison orders"); + } + + auto& sa = a.solve_for; + auto& sb = b.solve_for; + + auto sa_and_sb = std::invoke([&] -> solve_for_t { + if (sa && sb) { + return [sa = std::move(sa), sb = std::move(sb), type = type(a)] (const query_options& options) { + return intersection(sa(options), sb(options), type.get()); + }; + } else { + return {}; + } + }); + + return predicate{ + .solve_for = std::move(sa_and_sb), + .filter = make_conjunction(std::move(a.filter), std::move(b.filter)), + .on = a.on, + .is_singleton = false, // Even if both columns are singletons, the conjunction of them can return zero values. + .comparable = a.comparable && b.comparable, // Result is only comparable if both inputs follow CQL comparison semantics. + .is_multi_column = a.is_multi_column, // Both predicates are on the same target, so they agree on multi-column-ness. + .is_not_null_single_column = false, // A conjunction is not a pure IS NOT NULL check. + .equality = false, // A conjunction is not a single EQ. + .is_in = false, // A conjunction is not a single IN. + .is_slice = false, // A conjunction is not a single slice. + .is_upper_bound = false, // A conjunction has no single direction. + .is_lower_bound = false, // A conjunction has no single direction. + .order = a.order, // Both predicates are on the same column, so comparison order must agree. + .op = std::nullopt, // A conjunction has no single operator. + .is_subscript = a.is_subscript, // Both predicates are on the same target, so they agree on subscript-ness. + }; +} + +static +const column_definition* +require_on_single_column(const predicate& p) { + if (auto* pcol = std::get_if(&p.on)) { + return pcol->column; + } + on_internal_error(rlogger, "require_on_single_column: predicate is not on a single column"); +} + +static +bool +is_null_constant(const expression& e) { + if (auto* c = as_if(&e)) { + return c->value.is_null(); + } + return false; +} + +/// Given an expression, decompose it into a set of predicates, on individual columns, +/// the table's tokens, or multiple columns. A predicate may know how to solve for +/// the set of all column values that would satisfy the expression, treated a a boolean +/// predicate on the column. If it does, the .solve_for member is set. +/// +/// An expression restricts possible values of a column or token: +/// - `A>5` restricts A from below +/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10 +/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5 +/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5 +/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression +/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false +/// - an expression without A "restricts" A to unbounded range +// // When cdef == nullptr it finds possible token values instead of column values. // When finding token values the table_schema_opt argument has to point to a valid schema, // but it isn't used when finding values for column. // The schema is needed to find out whether a call to token() function represents // the partition token. -static value_set possible_lhs_values(const column_definition* cdef, - const expression& expr, - const query_options& options, - const schema* table_schema_opt) { - const auto type = cdef ? &cdef->type->without_reversed() : long_type.get(); +static +std::vector +to_predicates( + const expression& expr, + const schema* table_schema_opt) { + static auto to_vector = [] (predicate p) -> std::vector { + return {std::move(p)}; + }; + static auto cannot_solve = [] (const expression& e) -> std::vector { + return to_vector(predicate{ + .solve_for = nullptr, + .filter = e, + .on = on_row{}, + }); + }; + static auto cannot_solve_on_column = [] (const expression& e, const column_definition* cdef) -> std::vector { + return to_vector(predicate{ + .solve_for = nullptr, + .filter = e, + .on = on_column{cdef}, + }); + }; return expr::visit(overloaded_functor{ - [] (const constant& constant_val) { + [] (const constant& constant_val) -> std::vector { std::optional bool_val = get_bool_value(constant_val); if (bool_val.has_value()) { - return *bool_val ? unbounded_value_set : empty_value_set; + auto solve = *bool_val + ? solve_for_t([] (const query_options&) { return unbounded_value_set; }) + : solve_for_t([] (const query_options&) { return empty_value_set; }); + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = constant_val, + .on = on_row{}, + }); } - on_internal_error(expr_logger, - "possible_lhs_values: a constant that is not a bool value cannot serve as a restriction by itself"); + return to_vector(predicate{ + .solve_for = [] (const query_options&) { return unbounded_value_set; }, + .filter = constant_val, + .on = on_row{}, + }); }, - [&] (const conjunction& conj) { - return std::ranges::fold_left(conj.children, unbounded_value_set, [&](value_set&& acc, const expression& child) { - return intersection( - std::move(acc), possible_lhs_values(cdef, child, options, table_schema_opt), type); - }); + [&] (const conjunction& conj) -> std::vector { + std::vector ret; + for (auto& pa : conj.children) { + auto p = to_predicates(pa, table_schema_opt); + ret.insert(ret.end(), p.begin(), p.end()); + } + return ret; }, - [&] (const binary_operator& oper) -> value_set { + [&] (const binary_operator& oper) -> std::vector { return expr::visit(overloaded_functor{ - [&] (const column_value& col) -> value_set { - if (!cdef || cdef != col.col) { - return unbounded_value_set; + [&] (const column_value& col) -> std::vector { + auto cdef = col.col; + auto type = &cdef->type->without_reversed(); + if (oper.op == oper_t::IS_NOT) { + return to_vector(predicate{ + .solve_for = nullptr, + .filter = oper, + .on = on_column{col.col}, + .is_not_null_single_column = is_null_constant(oper.rhs), + .op = oper.op, + }); } if (is_compare(oper.op)) { + auto solve = [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } return oper.op == oper_t::EQ ? value_set(value_list{*val}) : to_range(oper.op, std::move(*val)); + }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = (oper.op == oper_t::EQ), + .equality = (oper.op == oper_t::EQ), + .is_slice = expr::is_slice(oper.op), + .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), + .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), + .order = oper.order, + .op = oper.op, + }); } else if (oper.op == oper_t::IN) { + auto solve = [oper, type, cdef] (const query_options& options) { return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text()); + }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = false, + .is_in = true, + .order = oper.order, + .op = oper.op, + }); } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { + auto solve = [oper] (const query_options& options) { managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } return value_set(value_list{*val}); + }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = false, + .order = oper.order, + .op = oper.op, + }); } - throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); + return cannot_solve_on_column(oper, col.col); }, - [&] (const subscript& s) -> value_set { + [&] (const subscript& s) -> std::vector { const column_value& col = get_subscripted_column(s); - if (!cdef || cdef != col.col) { - return unbounded_value_set; - } + if (oper.op == oper_t::EQ) { + auto solve = [s, oper] (const query_options& options) { + managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); + if (!sval) { + return empty_value_set; // NULL can't be a map key + } - managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); - if (!sval) { - return empty_value_set; // NULL can't be a map key - } - - if (oper.op == oper_t::EQ) { managed_bytes_opt rval = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!rval) { return empty_value_set; // All NULL comparisons fail; no column values match. @@ -295,22 +421,67 @@ static value_set possible_lhs_values(const column_definition* cdef, managed_bytes_opt elements[] = {sval, rval}; managed_bytes val = tuple_type_impl::build_value_fragmented(elements); return value_set(value_list{val}); + }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_column{col.col}, + .is_singleton = true, + .equality = true, + .order = oper.order, + .op = oper.op, + .is_subscript = true, + }); + } + return cannot_solve_on_column(oper, col.col); + }, + [&] (const tuple_constructor& tuple) -> std::vector { + auto columns = tuple.elements + | std::views::transform([] (const expression& e) { return as(e).col; }) + | std::ranges::to(); + for (unsigned i = 0; i < columns.size(); ++i) { + if (!columns[i]->is_clustering_key() || columns[i]->position() != i) { + on_internal_error(rlogger, "to_predicates: multi-column relation not on a clustering key prefix"); + } } - throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); + // The solve_for lambda is only correct for EQ; other operators + // (IN, slices) are handled directly by + // build_get_multi_column_clustering_bounds_fn() which bypasses + // solve_for and evaluates the binary_operator's RHS itself. + solve_for_t solve = nullptr; + if (oper.op == oper_t::EQ) { + solve = [oper] (const query_options& options) { + managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); + if (!val) { + return empty_value_set; // All NULL comparisons fail; no column values match. + } + return value_set(value_list{*val}); + }; + } + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_clustering_key_prefix{std::move(columns)}, + .is_singleton = oper.op == oper_t::EQ, + .is_multi_column = true, + .equality = (oper.op == oper_t::EQ), + .is_in = (oper.op == oper_t::IN), + .is_slice = expr::is_slice(oper.op), + .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), + .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), + .order = oper.order, + .op = oper.op, + }); }, - [&] (const tuple_constructor& tuple) -> value_set { - on_internal_error(rlogger, - fmt::format("possible_lhs_values: trying to solve for {} on tuple inequality", - cdef ? "single column" : "token")); - }, - [&] (const function_call& token_fun_call) -> value_set { + [&] (const function_call& token_fun_call) -> std::vector { if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { - on_internal_error(expr_logger, "possible_lhs_values: function calls are not supported as the LHS of a binary expression"); + return cannot_solve(oper); } - if (cdef) { - return unbounded_value_set; + if (!(oper.op == oper_t::EQ || is_slice(oper.op))) { + return cannot_solve(oper); } + auto solve = [oper] (const query_options& options) -> value_set { auto val = evaluate(oper.rhs, options).to_managed_bytes_opt(); if (!val) { return empty_value_set; // All NULL comparisons fail; no token values match. @@ -332,94 +503,135 @@ static value_set possible_lhs_values(const column_definition* cdef, } else if (oper.op == oper_t::LTE) { return interval::make_ending_with(interval_bound(std::move(adjusted_val), inclusive)); } - throw std::logic_error(format("get_token_interval invalid operator {}", oper.op)); + throw std::logic_error(format("get_token_interval unexpected operator {}", oper.op)); + }; + return to_vector(predicate{ + .solve_for = std::move(solve), + .filter = oper, + .on = on_partition_key_token{table_schema_opt}, + .is_singleton = (oper.op == oper_t::EQ), + .equality = (oper.op == oper_t::EQ), + .is_slice = expr::is_slice(oper.op), + .is_upper_bound = (oper.op == oper_t::LT || oper.op == oper_t::LTE), + .is_lower_bound = (oper.op == oper_t::GT || oper.op == oper_t::GTE), + .order = oper.order, + .op = oper.op, + }); }, - [&] (const binary_operator&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: nested binary operators are not supported"); + [&] (const binary_operator&) -> std::vector { + return cannot_solve(oper); }, - [&] (const conjunction&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: conjunctions are not supported as the LHS of a binary expression"); + [&] (const conjunction&) -> std::vector { + return cannot_solve(oper); }, - [] (const constant&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: constants are not supported as the LHS of a binary expression"); + [&] (const constant&) -> std::vector { + return cannot_solve(oper); }, - [] (const unresolved_identifier&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: unresolved identifiers are not supported as the LHS of a binary expression"); + [&] (const unresolved_identifier&) -> std::vector { + return cannot_solve(oper); }, - [] (const column_mutation_attribute&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: writetime/ttl are not supported as the LHS of a binary expression"); + [&] (const column_mutation_attribute&) -> std::vector { + return cannot_solve(oper); }, - [] (const cast&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: typecasts are not supported as the LHS of a binary expression"); + [&] (const cast&) -> std::vector { + return cannot_solve(oper); }, - [] (const field_selection&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: field selections are not supported as the LHS of a binary expression"); + [&] (const field_selection&) -> std::vector { + return cannot_solve(oper); }, - [] (const bind_variable&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: bind variables are not supported as the LHS of a binary expression"); + [&] (const bind_variable&) -> std::vector { + return cannot_solve(oper); }, - [] (const untyped_constant&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: untyped constants are not supported as the LHS of a binary expression"); + [&] (const untyped_constant&) -> std::vector { + return cannot_solve(oper); }, - [] (const collection_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: collection constructors are not supported as the LHS of a binary expression"); + [&] (const collection_constructor&) -> std::vector { + return cannot_solve(oper); }, - [] (const usertype_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: user type constructors are not supported as the LHS of a binary expression"); + [&] (const usertype_constructor&) -> std::vector { + return cannot_solve(oper); }, - [] (const temporary&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: temporaries are not supported as the LHS of a binary expression"); + [&] (const temporary&) -> std::vector { + return cannot_solve(oper); }, }, oper.lhs); }, - [] (const column_value&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a column cannot serve as a restriction by itself"); + [] (const column_value& cv) -> std::vector { + return cannot_solve(cv); }, - [] (const subscript&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a subscript cannot serve as a restriction by itself"); + [] (const subscript& s) -> std::vector { + return cannot_solve(s); }, - [] (const unresolved_identifier&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: an unresolved identifier cannot serve as a restriction"); + [] (const unresolved_identifier& ui) -> std::vector { + return cannot_solve(ui); }, - [] (const column_mutation_attribute&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: the writetime/ttl functions cannot serve as a restriction by itself"); + [] (const column_mutation_attribute& cma) -> std::vector { + return cannot_solve(cma); }, - [] (const function_call&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a function call cannot serve as a restriction by itself"); + [] (const function_call& fc) -> std::vector { + return cannot_solve(fc); }, - [] (const cast&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a typecast cannot serve as a restriction by itself"); + [] (const cast& c) -> std::vector { + return cannot_solve(c); }, - [] (const field_selection&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a field selection cannot serve as a restriction by itself"); + [] (const field_selection& fs) -> std::vector { + return cannot_solve(fs); }, - [] (const bind_variable&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a bind variable cannot serve as a restriction by itself"); + [] (const bind_variable& bv) -> std::vector { + return cannot_solve(bv); }, - [] (const untyped_constant&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: an untyped constant cannot serve as a restriction by itself"); + [] (const untyped_constant& uc) -> std::vector { + return cannot_solve(uc); }, - [] (const tuple_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: an tuple constructor cannot serve as a restriction by itself"); + [] (const tuple_constructor& tc) -> std::vector { + return cannot_solve(tc); }, - [] (const collection_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a collection constructor cannot serve as a restriction by itself"); + [] (const collection_constructor& cc) -> std::vector { + return cannot_solve(cc); }, - [] (const usertype_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a user type constructor cannot serve as a restriction by itself"); + [] (const usertype_constructor& uc) -> std::vector { + return cannot_solve(uc); }, - [] (const temporary&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a temporary cannot serve as a restriction by itself"); + [] (const temporary& t) -> std::vector { + return cannot_solve(t); }, }, expr); } -value_set possible_column_values(const column_definition* col, const expression& e, const query_options& options) { - return possible_lhs_values(col, e, options, nullptr); -} - -value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { - return possible_lhs_values(nullptr, e, options, &table_schema); +// Convert an expression to a predicate on a column. If cdef is nullptr, the predicate +// is on the partition key token. +static +predicate +to_predicate_on_column( + const expression& expr, + const column_definition* cdef, + const schema* table_schema_opt) { + auto predicates = to_predicates(expr, table_schema_opt); + using on_t = std::variant< + on_row, // cannot determine, so predicate is on entire row + on_column, // solving for a single column: e.g. c1 = 3 + on_partition_key_token, // solving for the token, e.g. token(pk1, pk2) >= :var + on_clustering_key_prefix // solving for a clustering key prefix: e.g. (ck1, ck2) >= (3, 4) + >; + auto target = cdef ? on_t(on_column{cdef}) : on_t(on_partition_key_token{table_schema_opt}); + auto collected = std::vector{}; + for (auto& predicate : predicates) { + if (predicate.on == target) { + collected.push_back(std::move(predicate)); + continue; + } + } + if (collected.empty()) { + on_internal_error(rlogger, "to_predicate_on_column: no predicates found"); + } + auto ret = std::ranges::fold_left_first( + collected | std::views::as_rvalue, + make_conjunction + ); + if (!ret) { + on_internal_error(rlogger, "to_predicate_on_column: no predicates found"); + } + return std::move(*ret); } interval to_range(const value_set& s) { @@ -434,6 +646,17 @@ interval to_range(const value_set& s) { }, s); } +/// Replaces every column_definition in an expression with this one. Throws if any LHS is not a single +/// column_value. +static +predicate +replace_column_def(predicate p, const column_definition* col) { + // Note: does not replace and `col` embedded in the p.solve_for + p.filter = expr::replace_column_def(p.filter, col); + p.on = on_column{col}; + return p; +} + namespace { constexpr inline secondary_index::index::supports_expression_v operator&&(secondary_index::index::supports_expression_v v1, secondary_index::index::supports_expression_v v2) { using namespace secondary_index; @@ -441,115 +664,127 @@ constexpr inline secondary_index::index::supports_expression_v operator&&(second return v1 == True && v2 == True ? True : index::supports_expression_v::from_bool(false); } -secondary_index::index::supports_expression_v is_supported_by_helper(const expression& expr, const secondary_index::index& idx) { +} + +// Like is_supported_by_helper, but operates on a single predicate instead of walking +// an expression tree. Returns how an index supports this predicate: UsualYes, CollectionYes, or No. +static secondary_index::index::supports_expression_v +is_predicate_supported_by(const predicate& pred, const secondary_index::index& idx) { using ret_t = secondary_index::index::supports_expression_v; + if (!pred.op) { + return ret_t::from_bool(false); + } + return std::visit(overloaded_functor{ + [&] (const on_column& oc) -> ret_t { + if (pred.is_subscript) { + return idx.supports_subscript_expression(*oc.column, *pred.op); + } + return idx.supports_expression(*oc.column, *pred.op); + }, + [&] (const on_clustering_key_prefix& ocp) -> ret_t { + // Single-element tuple_constructor: treat like a single column + if (ocp.columns.size() == 1) { + return idx.supports_expression(*ocp.columns[0], *pred.op); + } + // Multi-element tuple: index cannot avoid filtering + return ret_t::from_bool(false); + }, + [&] (const on_partition_key_token&) -> ret_t { + return ret_t::from_bool(false); + }, + [&] (const on_row&) -> ret_t { + return ret_t::from_bool(false); + }, + }, pred.on); +} + +struct index_search_group { + const single_column_predicate_vectors& pred_vectors; + const expr::expression& restriction_expr; +}; + +// Like index_supports_some_column, but operates on per-column predicate vectors +// instead of walking per-column expression trees. +static bool index_supports_some_column( + const single_column_predicate_vectors& per_column_predicates, + const secondary_index::secondary_index_manager& index_manager, + allow_local_index allow_local) { using namespace secondary_index; - return expr::visit(overloaded_functor{ - [&] (const conjunction& conj) -> ret_t { - if (conj.children.empty()) { - return index::supports_expression_v::from_bool(true); - } - auto init = is_supported_by_helper(conj.children[0], idx); - return std::accumulate(std::begin(conj.children) + 1, std::end(conj.children), init, - [&] (ret_t acc, const expression& child) -> ret_t { - return acc && is_supported_by_helper(child, idx); - }); - }, - [&] (const binary_operator& oper) { - return expr::visit(overloaded_functor{ - [&] (const column_value& col) { - return idx.supports_expression(*col.col, oper.op); - }, - [&] (const tuple_constructor& tuple) { - if (tuple.elements.size() == 1) { - if (auto column = expr::as_if(&tuple.elements[0])) { - return idx.supports_expression(*column->col, oper.op); - } - } - // We don't use index table for multi-column restrictions, as it cannot avoid filtering. - return index::supports_expression_v::from_bool(false); - }, - [&] (const function_call&) { return index::supports_expression_v::from_bool(false); }, - [&] (const subscript& s) -> ret_t { - const column_value& col = get_subscripted_column(s); - return idx.supports_subscript_expression(*col.col, oper.op); - }, - [&] (const binary_operator&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: nested binary operators are not supported"); - }, - [&] (const conjunction&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: conjunctions are not supported as the LHS of a binary expression"); - }, - [] (const constant&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: constants are not supported as the LHS of a binary expression"); - }, - [] (const unresolved_identifier&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: an unresolved identifier is not supported as the LHS of a binary expression"); - }, - [&] (const column_mutation_attribute&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: writetime/ttl are not supported as the LHS of a binary expression"); - }, - [&] (const cast&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: typecasts are not supported as the LHS of a binary expression"); - }, - [&] (const field_selection&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: field selections are not supported as the LHS of a binary expression"); - }, - [&] (const bind_variable&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: bind variables are not supported as the LHS of a binary expression"); - }, - [&] (const untyped_constant&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: untyped constants are not supported as the LHS of a binary expression"); - }, - [&] (const collection_constructor&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: collection constructors are not supported as the LHS of a binary expression"); - }, - [&] (const usertype_constructor&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: user type constructors are not supported as the LHS of a binary expression"); - }, - [&] (const temporary&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: temporaries are not supported as the LHS of a binary expression"); - }, - }, oper.lhs); - }, - [] (const auto& default_case) { return index::supports_expression_v::from_bool(false); } - }, expr); -} -} - -bool is_supported_by(const expression& expr, const secondary_index::index& idx) { - auto s = is_supported_by_helper(expr, idx); - return s != secondary_index::index::supports_expression_v::from_bool(false); -} - - -bool has_supporting_index( - const expression& expr, - const secondary_index::secondary_index_manager& index_manager, - allow_local_index allow_local) { - const auto indexes = index_manager.list_indexes(); - const auto support = std::bind(is_supported_by, std::ref(expr), std::placeholders::_1); - return allow_local ? std::ranges::any_of(indexes, support) - : std::ranges::any_of( - indexes | std::views::filter([] (const secondary_index::index& i) { return !i.metadata().local(); }), - support); -} - -bool index_supports_some_column( - const expression& e, - const secondary_index::secondary_index_manager& index_manager, - allow_local_index allow_local) { - single_column_restrictions_map single_col_restrictions = get_single_column_restrictions_map(e); - - for (auto&& [col, col_restrictions] : single_col_restrictions) { - if (has_supporting_index(col_restrictions, index_manager, allow_local)) { - return true; + for (auto& [col, preds] : per_column_predicates) { + for (const auto& idx : index_manager.list_indexes()) { + if (!allow_local && idx.metadata().local()) { + continue; + } + if (preds.empty()) { + continue; + } + // AND all predicate results for this column-index pair, mirroring the + // conjunction logic in is_supported_by_helper. Seed with the first + // predicate's result instead of from_bool(true) (which is UsualYes) + // so that CollectionYes values are preserved through the chain. + auto result = is_predicate_supported_by(preds[0], idx); + for (size_t i = 1; i < preds.size(); ++i) { + result = result && is_predicate_supported_by(preds[i], idx); + } + if (result) { + return true; + } } } - return false; } +// Check if any index supports any multi-column clustering predicate. +// For each predicate in mc_preds, checks if any column in the predicate's +// columns list is supported by the index for that predicate's operator. +static bool multi_column_predicates_have_supporting_index( + const std::vector& mc_preds, + const secondary_index::secondary_index_manager& index_manager, + allow_local_index allow_local) { + for (const auto& idx : index_manager.list_indexes()) { + if (!allow_local && idx.metadata().local()) { + continue; + } + for (const auto& pred : mc_preds) { + if (!pred.op) { + continue; + } + auto* ocp = std::get_if(&pred.on); + if (!ocp) { + continue; + } + for (const auto* col : ocp->columns) { + if (idx.supports_expression(*col, *pred.op)) { + return true; + } + } + } + } + return false; +} + +// Check if all predicates for a column are supported by an index. +// Mirrors the conjunction logic of is_supported_by_helper: initializes with +// the first predicate's result, then ANDs the rest. +static bool are_predicates_supported_by(const std::vector& preds, + const secondary_index::index& idx) { + if (preds.empty()) { + return true; + } + auto result = is_predicate_supported_by(preds[0], idx); + for (size_t i = 1; i < preds.size(); ++i) { + result = result && is_predicate_supported_by(preds[i], idx); + } + return bool(result); +} + +static std::pair, expr::expression> do_find_idx( + bool uses_secondary_indexing, + const secondary_index::secondary_index_manager& sim, + std::span search_groups, + allow_local_index allow_local); + + bool is_on_collection(const binary_operator& b) { if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { return true; @@ -560,179 +795,6 @@ bool is_on_collection(const binary_operator& b) { return false; } -bool has_eq_restriction_on_column(const column_definition& column, const expression& e) { - std::function column_in_lhs = [&](const expression& e) -> bool { - return visit(overloaded_functor { - [&](const column_value& cv) { - // Use column_defintion::operator== for comparison, - // columns with the same name but different schema will not be equal. - return *cv.col == column; - }, - [&](const tuple_constructor& tc) { - for (const expression& elem : tc.elements) { - if (column_in_lhs(elem)) { - return true; - } - } - - return false; - }, - [&](const auto&) {return false;} - }, e); - }; - - // Look for binary operator describing eq relation with this column on lhs - const binary_operator* eq_restriction_search_res = find_binop(e, [&](const binary_operator& b) { - if (b.op != oper_t::EQ) { - return false; - } - - if (!column_in_lhs(b.lhs)) { - return false; - } - - // These conditions are not allowed to occur in the current code, - // but they might be allowed in the future. - // They are added now to avoid surprises later. - // - // These conditions detect cases like: - // WHERE column1 = column2 - // WHERE column1 = row_number() - if (contains_column(column, b.rhs) || contains_nonpure_function(b.rhs)) { - return false; - } - - return true; - }); - - return eq_restriction_search_res != nullptr; -} - -std::vector extract_single_column_restrictions_for_column(const expression& expr, - const column_definition& column) { - struct visitor { - std::vector restrictions; - const column_definition& column; - const binary_operator* current_binary_operator; - - void operator()(const constant&) {} - - void operator()(const conjunction& conj) { - for (const expression& child : conj.children) { - expr::visit(*this, child); - } - } - - void operator()(const binary_operator& oper) { - if (current_binary_operator != nullptr) { - on_internal_error(expr_logger, - "extract_single_column_restrictions_for_column: nested binary operators are not supported"); - } - - current_binary_operator = &oper; - expr::visit(*this, oper.lhs); - current_binary_operator = nullptr; - } - - void operator()(const column_value& cv) { - if (*cv.col == column && current_binary_operator != nullptr) { - restrictions.emplace_back(*current_binary_operator); - } - } - - void operator()(const subscript& s) { - const column_value& cv = get_subscripted_column(s); - if (*cv.col == column && current_binary_operator != nullptr) { - restrictions.emplace_back(*current_binary_operator); - } - } - - void operator()(const unresolved_identifier&) {} - void operator()(const column_mutation_attribute&) {} - void operator()(const function_call&) {} - void operator()(const cast&) {} - void operator()(const field_selection&) {} - void operator()(const bind_variable&) {} - void operator()(const untyped_constant&) {} - void operator()(const tuple_constructor&) {} - void operator()(const collection_constructor&) {} - void operator()(const usertype_constructor&) {} - void operator()(const temporary&) {} - }; - - visitor v { - .restrictions = std::vector(), - .column = column, - .current_binary_operator = nullptr, - }; - - expr::visit(v, expr); - - return std::move(v.restrictions); -} - -static std::optional> get_single_column_restriction_column(const expression& e) { - if (find_in_expression(e, [](const auto&) {return true;})) { - on_internal_error(expr_logger, - seastar::format("get_single_column_restriction_column expects a prepared expression, but it's not: {}", e)); - } - - const column_value* the_only_column = nullptr; - bool expression_is_single_column = false; - - for_each_expression(e, - [&](const column_value& cval) { - if (the_only_column == nullptr) { - // It's the first column_value we've encountered - set it as the only column - the_only_column = &cval; - expression_is_single_column = true; - return; - } - - if (cval.col != the_only_column->col) { - // In case any other column is encountered the restriction - // restricts more than one column. - expression_is_single_column = false; - } - } - ); - - if (expression_is_single_column) { - return std::cref(*the_only_column); - } else { - return std::nullopt; - } -} - -bool is_single_column_restriction(const expression& e) { - return get_single_column_restriction_column(e).has_value(); -} - -const column_value& get_the_only_column(const expression& e) { - std::optional> result = get_single_column_restriction_column(e); - - if (!result.has_value()) { - on_internal_error(expr_logger, - format("get_the_only_column - bad expression: {}", e)); - } - - return *result; -} - -single_column_restrictions_map get_single_column_restrictions_map(const expression& e) { - single_column_restrictions_map result; - - std::vector sorted_defs = get_sorted_column_defs(e); - for (const column_definition* cdef : sorted_defs) { - expression col_restrictions = conjunction { - .children = extract_single_column_restrictions_for_column(e, *cdef) - }; - result.emplace(cdef, std::move(col_restrictions)); - } - - return result; -} - bool is_empty_restriction(const expression& e) { bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { return !is(e); @@ -741,8 +803,12 @@ bool is_empty_restriction(const expression& e) { return !contains_non_conjunction; } -bytes_opt value_for(const column_definition& cdef, const expression& e, const query_options& options) { - value_set possible_vals = possible_column_values(&cdef, e, options); +static +std::function +build_value_for_fn(const column_definition& cdef, const expression& e, const schema& s) { + auto ac = to_predicate_on_column(e, &cdef, &s); + return [ac] (const query_options& options) -> bytes_opt { + value_set possible_vals = solve(ac, options); return std::visit(overloaded_functor { [&](const value_list& val_list) -> bytes_opt { if (val_list.empty()) { @@ -750,15 +816,16 @@ bytes_opt value_for(const column_definition& cdef, const expression& e, const qu } if (val_list.size() != 1) { - on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", e)); + on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", ac.filter)); } return to_bytes(val_list.front()); }, [&](const interval&) -> bytes_opt { - on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", e)); + on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", ac.filter)); } }, possible_vals); + }; } bool contains_multi_column_restriction(const expression& e) { @@ -778,287 +845,13 @@ bool has_only_eq_binops(const expression& e) { return non_eq_binop == nullptr; } -statement_restrictions::statement_restrictions(schema_ptr schema, bool allow_filtering) +statement_restrictions::statement_restrictions(private_tag, schema_ptr schema, bool allow_filtering) : _schema(schema) , _partition_range_is_simple(true) { } -template -concept visitor_with_binary_operator_context = requires (Visitor v) { - { v.current_binary_operator } -> std::convertible_to; -}; - -void with_current_binary_operator( - visitor_with_binary_operator_context auto& visitor, - std::invocable auto func) { - if (!visitor.current_binary_operator) { - throw std::logic_error("Evaluation expected within binary operator"); - } - func(*visitor.current_binary_operator); -} - -/// Every token, or if no tokens, an EQ/IN of every single PK column. -static std::vector extract_partition_range( - const expr::expression& where_clause, schema_ptr schema) { - using namespace expr; - struct extract_partition_range_visitor { - schema_ptr table_schema; - std::optional tokens; - std::unordered_map single_column; - const binary_operator* current_binary_operator = nullptr; - - void operator()(const conjunction& c) { - std::ranges::for_each(c.children, [this] (const expression& child) { expr::visit(*this, child); }); - } - - void operator()(const binary_operator& b) { - if (current_binary_operator) { - throw std::logic_error("Nested binary operators are not supported"); - } - current_binary_operator = &b; - expr::visit(*this, b.lhs); - current_binary_operator = nullptr; - } - - void operator()(const function_call& token_fun_call) { - if (!is_partition_token_for_schema(token_fun_call, *table_schema)) { - on_internal_error(rlogger, "extract_partition_range(function_call)"); - } - - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (tokens) { - tokens = make_conjunction(std::move(*tokens), b); - } else { - tokens = b; - } - }); - } - - void operator()(const column_value& cv) { - auto s = &cv; - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (s->col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { - const auto [it, inserted] = single_column.try_emplace(s->col, b); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); - } - } - }); - } - - void operator()(const tuple_constructor& s) { - // Partition key columns are not legal in tuples, so ignore tuples. - } - - void operator()(const subscript& sub) { - const column_value& cval = get_subscripted_column(sub.val); - - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (cval.col->is_partition_key() && (b.op == oper_t::EQ || b.op == oper_t::IN)) { - const auto [it, inserted] = single_column.try_emplace(cval.col, b); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); - } - } - }); - } - - void operator()(const constant&) {} - - void operator()(const unresolved_identifier&) { - on_internal_error(rlogger, "extract_partition_range(unresolved_identifier)"); - } - - void operator()(const column_mutation_attribute&) { - on_internal_error(rlogger, "extract_partition_range(column_mutation_attribute)"); - } - - void operator()(const cast&) { - on_internal_error(rlogger, "extract_partition_range(cast)"); - } - - void operator()(const field_selection&) { - on_internal_error(rlogger, "extract_partition_range(field_selection)"); - } - - void operator()(const bind_variable&) { - on_internal_error(rlogger, "extract_partition_range(bind_variable)"); - } - - void operator()(const untyped_constant&) { - on_internal_error(rlogger, "extract_partition_range(untyped_constant)"); - } - - void operator()(const collection_constructor&) { - on_internal_error(rlogger, "extract_partition_range(collection_constructor)"); - } - - void operator()(const usertype_constructor&) { - on_internal_error(rlogger, "extract_partition_range(usertype_constructor)"); - } - - void operator()(const temporary&) { - on_internal_error(rlogger, "extract_partition_range(temporary)"); - } - }; - - extract_partition_range_visitor v { - .table_schema = schema - }; - - expr::visit(v, where_clause); - if (v.tokens) { - return {std::move(*v.tokens)}; - } - if (v.single_column.size() == schema->partition_key_size()) { - return v.single_column | std::views::values | std::ranges::to(); - } - return {}; -} - -/// Extracts where_clause atoms with clustering-column LHS and copies them to a vector. These elements define the -/// boundaries of any clustering slice that can possibly meet where_clause. This vector can be calculated before -/// binding expression markers, since LHS and operator are always known. -static std::vector extract_clustering_prefix_restrictions( - const expr::expression& where_clause, schema_ptr schema) { - using namespace expr; - - /// Collects all clustering-column restrictions from an expression. Presumes the expression only uses - /// conjunction to combine subexpressions. - struct visitor { - schema_ptr table_schema; - std::vector multi; ///< All multi-column restrictions. - /// All single-clustering-column restrictions, grouped by column. Each value is either an atom or a - /// conjunction of atoms. - std::unordered_map single; - const binary_operator* current_binary_operator = nullptr; - - void operator()(const conjunction& c) { - std::ranges::for_each(c.children, [this] (const expression& child) { expr::visit(*this, child); }); - } - - void operator()(const binary_operator& b) { - if (current_binary_operator) { - throw std::logic_error("Nested binary operators are not supported"); - } - current_binary_operator = &b; - expr::visit(*this, b.lhs); - current_binary_operator = nullptr; - } - - void operator()(const tuple_constructor& tc) { - for (auto& e : tc.elements) { - if (!expr::is(e)) { - on_internal_error(rlogger, fmt::format("extract_clustering_prefix_restrictions: tuple of non-column_value: {}", tc)); - } - } - with_current_binary_operator(*this, [&] (const binary_operator& b) { - multi.push_back(b); - }); - } - - void operator()(const column_value& cv) { - auto s = &cv; - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (s->col->is_clustering_key()) { - const auto [it, inserted] = single.try_emplace(s->col, b); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); - } - } - }); - } - - void operator()(const subscript& sub) { - const column_value& cval = get_subscripted_column(sub.val); - - with_current_binary_operator(*this, [&] (const binary_operator& b) { - if (cval.col->is_clustering_key()) { - const auto [it, inserted] = single.try_emplace(cval.col, b); - if (!inserted) { - it->second = make_conjunction(std::move(it->second), b); - } - } - }); - } - - void operator()(const function_call& fun_call) { - if (is_partition_token_for_schema(fun_call, *table_schema)) { - // A token cannot be a clustering prefix restriction - return; - } - - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(function_call)"); - } - - void operator()(const constant&) {} - - void operator()(const unresolved_identifier&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(unresolved_identifier)"); - } - - void operator()(const column_mutation_attribute&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(column_mutation_attribute)"); - } - - void operator()(const cast&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(cast)"); - } - - void operator()(const field_selection&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(field_selection)"); - } - - void operator()(const bind_variable&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(bind_variable)"); - } - - void operator()(const untyped_constant&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(untyped_constant)"); - } - - void operator()(const collection_constructor&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(collection_constructor)"); - } - - void operator()(const usertype_constructor&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(usertype_constructor)"); - } - - void operator()(const temporary&) { - on_internal_error(rlogger, "extract_clustering_prefix_restrictions(temporary)"); - } - }; - visitor v { - .table_schema = schema - }; - - expr::visit(v, where_clause); - - if (!v.multi.empty()) { - return std::move(v.multi); - } - - std::vector prefix; - for (const auto& col : schema->clustering_key_columns()) { - const auto found = v.single.find(&col); - if (found == v.single.end()) { // Any further restrictions are skipping the CK order. - break; - } - if (find_needs_filtering(found->second)) { // This column's restriction doesn't define a clear bound. - // TODO: if this is a conjunction of filtering and non-filtering atoms, we could split them and add the - // latter to the prefix. - break; - } - prefix.push_back(found->second); - if (has_slice(found->second)) { - break; - } - } - return prefix; -} - -statement_restrictions::statement_restrictions(data_dictionary::database db, +statement_restrictions::statement_restrictions(private_tag, + data_dictionary::database db, schema_ptr schema, statements::statement_type type, const expr::expression& where_clause, @@ -1067,9 +860,10 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, bool for_view, bool allow_filtering, check_indexes do_check_indexes) - : statement_restrictions(schema, allow_filtering) + : statement_restrictions(private_tag{}, schema, allow_filtering) { _check_indexes = do_check_indexes; + std::vector prepared_where_clause; for (auto&& relation_expr : boolean_factors(where_clause)) { const expr::binary_operator* relation_binop = expr::as_if(&relation_expr); @@ -1078,36 +872,263 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, } expr::binary_operator prepared_restriction = expr::validate_and_prepare_new_restriction(*relation_binop, db, schema, ctx); - add_restriction(prepared_restriction, schema, allow_filtering, for_view); + prepared_where_clause.push_back(std::move(prepared_restriction)); + } - if (prepared_restriction.op != expr::oper_t::IS_NOT) { - _where = _where.has_value() ? make_conjunction(std::move(*_where), prepared_restriction) : prepared_restriction; + std::vector predicates; + for (auto& prepared_restriction : prepared_where_clause) { + auto preds = to_predicates(prepared_restriction, _schema.get()); + predicates.insert(predicates.end(), std::make_move_iterator(preds.begin()), std::make_move_iterator(preds.end())); + } + + bool ck_is_empty = true; + bool has_mc_clustering = false; + bool ck_has_slice = false; + const column_definition* ck_last_column = nullptr; + const predicate* first_mc_pred = nullptr; + bool pk_is_empty = true; + bool has_token = false; + std::optional token_pred; + std::unordered_map pk_range_preds; + std::vector mc_ck_preds; + std::unordered_map sc_ck_preds; + single_column_predicate_vectors sc_pk_pred_vectors; + single_column_predicate_vectors sc_ck_pred_vectors; + single_column_predicate_vectors sc_nonpk_pred_vectors; + for (auto& pred : predicates) { + if (pred.is_not_null_single_column) { + auto* col = require_on_single_column(pred); + _not_null_columns.insert(col); + + if (!for_view) { + throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", pred.filter)); + } + } else if (pred.is_multi_column) { + // Multi column restrictions are only allowed on clustering columns + if (ck_is_empty) { + _clustering_columns_restrictions = pred.filter; + ck_is_empty = false; + has_mc_clustering = true; + first_mc_pred = &pred; + mc_ck_preds.push_back(pred); + if (pred.is_slice) { + ck_has_slice = true; + } + } else { + + if (!has_mc_clustering) { + throw exceptions::invalid_request_exception("Mixing single column relations and multi column relations on clustering columns is not allowed"); + } + + if (pred.equality) { + throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes an Equal", + expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); + } else if (pred.is_in) { + throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", + expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); + } else if (pred.is_slice) { + if (!ck_has_slice) { + throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", + expr::get_columns_in_commons(_clustering_columns_restrictions, pred.filter))); + } + + // Don't allow to mix plain and SCYLLA_CLUSTERING_BOUND bounds + if (first_mc_pred->order != pred.order) { + static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; + throw exceptions::invalid_request_exception( + format("Invalid combination of restrictions ({} / {})", + order2str(first_mc_pred->order), order2str(pred.order))); + } + + // Here check that there aren't two < <= or two > and >= + if (pred.is_lower_bound && first_mc_pred->is_lower_bound) { + throw exceptions::invalid_request_exception(format( + "More than one restriction was found for the start bound on {}", + expr::get_columns_in_commons(pred.filter, first_mc_pred->filter))); + } + + if (pred.is_upper_bound && first_mc_pred->is_upper_bound) { + throw exceptions::invalid_request_exception(format( + "More than one restriction was found for the end bound on {}", + expr::get_columns_in_commons(pred.filter, first_mc_pred->filter))); + } + + _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); + mc_ck_preds.push_back(pred); + ck_has_slice = true; + } else { + throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", pred.filter)); + } + } + } else if (std::holds_alternative(pred.on)) { + // Token always restricts the partition key + if (!pk_is_empty && !has_token) { + throw exceptions::invalid_request_exception( + seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", + fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | + std::views::transform([](auto* p) { + return maybe_column_definition{p}; + }), + ", "))); + } + + _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); + pk_is_empty = false; + has_token = true; + if (token_pred) { + token_pred = make_conjunction(std::move(*token_pred), pred); + } else { + token_pred = pred; + } + } else if (std::holds_alternative(pred.on)) { + const column_definition* def = std::get(pred.on).column; + if (def->is_partition_key()) { + // View definition allows PK slices, because it's not a performance problem. + if (!pred.equality && !pred.is_in && !allow_filtering && !for_view) { + throw exceptions::invalid_request_exception( + "Only EQ and IN relation are supported on the partition key " + "(unless you use the token() function or ALLOW FILTERING)"); + } + if (has_token) { + throw exceptions::invalid_request_exception( + seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", + fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | + std::views::transform([](auto* p) { + return maybe_column_definition{p}; + }), + ", "))); + } + + _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); + pk_is_empty = false; + { + auto [it, inserted] = _single_column_partition_key_restrictions.try_emplace(def, expr::conjunction{}); + it->second = expr::make_conjunction(std::move(it->second), pred.filter); + } + sc_pk_pred_vectors[def].push_back(pred); + if (pred.equality || pred.is_in) { + auto [it, inserted] = pk_range_preds.try_emplace(def, pred); + if (!inserted) { + it->second = make_conjunction(std::move(it->second), pred); + } + } + _partition_range_is_simple &= !pred.is_in; + } else if (def->is_clustering_key()) { + if (has_mc_clustering) { + throw exceptions::invalid_request_exception( + "Mixing single column relations and multi column relations on clustering columns is not allowed"); + } + + const column_definition* new_column = std::get(pred.on).column; + const column_definition* last_column = ck_last_column; + + if (last_column != nullptr && !allow_filtering) { + if (ck_has_slice && schema->position(*new_column) > schema->position(*last_column)) { + throw exceptions::invalid_request_exception(format("Clustering column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", + new_column->name_as_text(), last_column->name_as_text())); + } + + if (schema->position(*new_column) < schema->position(*last_column)) { + if (pred.is_slice) { + throw exceptions::invalid_request_exception(format("PRIMARY KEY column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", + last_column->name_as_text(), new_column->name_as_text())); + } + } + } + + _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); + ck_is_empty = false; + { + auto [it, inserted] = _single_column_clustering_key_restrictions.try_emplace(def, expr::conjunction{}); + it->second = expr::make_conjunction(std::move(it->second), pred.filter); + } + sc_ck_pred_vectors[def].push_back(pred); + { + auto [it, inserted] = sc_ck_preds.try_emplace(def, pred); + if (!inserted) { + it->second = make_conjunction(std::move(it->second), pred); + } + } + if (pred.is_slice) { + ck_has_slice = true; + } + if (ck_last_column == nullptr || schema->position(*new_column) > schema->position(*ck_last_column)) { + ck_last_column = new_column; + } + } else { + _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, pred.filter); + { + auto [it, inserted] = _single_column_nonprimary_key_restrictions.try_emplace(def, expr::conjunction{}); + it->second = expr::make_conjunction(std::move(it->second), pred.filter); + } + sc_nonpk_pred_vectors[def].push_back(pred); + } + } else { + throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", pred.filter)); + } + + if (!pred.is_not_null_single_column) { + _where.push_back(pred.filter); + } + // Subscript EQ (e.g. m[1] = 'a') is not considered an EQ on the column + // itself, matching the behavior of the old expression-walking code which + // only recognized column_value and tuple_constructor in the LHS. + if (pred.equality && !pred.is_subscript) { + if (auto* sc = std::get_if(&pred.on)) { + _columns_with_eq.insert(sc->column); + } else if (auto* mc = std::get_if(&pred.on)) { + _columns_with_eq.insert(mc->columns.begin(), mc->columns.end()); + } } } - if (_where.has_value()) { - if (!has_token_restrictions()) { - _single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions); + if (!_where.empty()) { + if (!mc_ck_preds.empty()) { + _clustering_prefix_restrictions = std::move(mc_ck_preds); + } else { + std::vector prefix; + for (const auto& col : _schema->clustering_key_columns()) { + const auto found = sc_ck_preds.find(&col); + if (found == sc_ck_preds.end()) { + break; + } + if (find_needs_filtering(found->second.filter)) { + break; + } + prefix.push_back(found->second); + if (has_slice(found->second.filter)) { + break; + } + } + _clustering_prefix_restrictions = std::move(prefix); } - if (!contains_multi_column_restriction(_clustering_columns_restrictions)) { - _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); + if (token_pred) { + _partition_range_restrictions = token_range_restrictions{ + .token_restrictions = std::move(*token_pred), + }; + } else if (pk_range_preds.size() == _schema->partition_key_size()) { + _partition_range_restrictions = single_column_partition_range_restrictions{ + .per_column_restrictions = std::move(pk_range_preds) | std::views::values | std::ranges::to(), + }; } - _single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions); - _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(*_where, _schema); - _partition_range_restrictions = extract_partition_range(*_where, _schema); } - _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); + _has_multi_column = has_mc_clustering; if (_check_indexes) { auto cf = db.find_column_family(schema); auto& sim = cf.get_index_manager(); const expr::allow_local_index allow_local( !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq()); - _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); - _has_queriable_ck_index = clustering_columns_restrictions_have_supporting_index(sim, allow_local) + if (!_has_multi_column) { + _has_queriable_ck_index = index_supports_some_column(sc_ck_pred_vectors, sim, allow_local) + && !type.is_delete(); + } else { + _has_queriable_ck_index = multi_column_predicates_have_supporting_index(mc_ck_preds, sim, allow_local) + && !type.is_delete(); + } + _has_queriable_pk_index = !has_token + && index_supports_some_column(sc_pk_pred_vectors, sim, allow_local) && !type.is_delete(); - _has_queriable_pk_index = parition_key_restrictions_have_supporting_index(sim, allow_local) - && !type.is_delete(); - _has_queriable_regular_index = index_supports_some_column(_nonprimary_key_restrictions, sim, allow_local) + _has_queriable_regular_index = index_supports_some_column(sc_nonpk_pred_vectors, sim, allow_local) && !type.is_delete(); } else { _has_queriable_ck_index = false; @@ -1120,8 +1141,10 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, // Some but not all of the partition key columns have been specified; // hence we need turn these restrictions into index expressions. + std::vector search_groups; if (_uses_secondary_indexing || pk_restrictions_need_filtering()) { _index_restrictions.push_back(_partition_key_restrictions); + search_groups.push_back({sc_pk_pred_vectors, _partition_key_restrictions}); } // If the only updated/deleted columns are static, then we don't need clustering columns. @@ -1154,6 +1177,7 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, if (_uses_secondary_indexing || clustering_key_restrictions_need_filtering()) { _index_restrictions.push_back(_clustering_columns_restrictions); + search_groups.push_back({sc_ck_pred_vectors, _clustering_columns_restrictions}); } else if (find_binop(_clustering_columns_restrictions, is_on_collection)) { fail(unimplemented::cause::INDEXES); } @@ -1167,6 +1191,7 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, "this query despite the performance unpredictability, use ALLOW FILTERING"); } _index_restrictions.push_back(_nonprimary_key_restrictions); + search_groups.push_back({sc_nonpk_pred_vectors, _nonprimary_key_restrictions}); } if (_uses_secondary_indexing && !(for_view || allow_filtering)) { @@ -1176,10 +1201,14 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, if (_check_indexes) { auto cf = db.find_column_family(_schema); auto& sim = cf.get_index_manager(); - std::tie(_idx_opt, _idx_restrictions) = do_find_idx(sim); + const expr::allow_local_index allow_local_for_idx( + !has_partition_key_unrestricted_components() + && partition_key_restrictions_is_all_eq()); + std::tie(_idx_opt, _idx_restrictions) = do_find_idx( + _uses_secondary_indexing, sim, search_groups, allow_local_for_idx); } - calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(db); + calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(db, sc_pk_pred_vectors, sc_ck_pred_vectors, sc_nonpk_pred_vectors); if (pk_restrictions_need_filtering()) { auto partition_key_filter = expr::conjunction{ @@ -1246,12 +1275,20 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, _view_schema = view_schema; if (im.local()) { - prepare_indexed_local(*view_schema); + prepare_indexed_local(*view_schema, sc_pk_pred_vectors, sc_ck_pred_vectors, sc_nonpk_pred_vectors); } else { prepare_indexed_global(*view_schema); } } } + + _get_partition_key_ranges_fn = build_partition_key_ranges_fn(); + + _get_clustering_bounds_fn = build_get_clustering_bounds_fn(); + _get_global_index_clustering_ranges_fn = build_get_global_index_clustering_ranges_fn(); + _get_global_index_token_clustering_ranges_fn = build_get_global_index_token_clustering_ranges_fn(); + _get_local_index_clustering_ranges_fn = build_get_local_index_clustering_ranges_fn(); + _value_for_index_partition_key_fn = build_value_for_index_partition_key_fn(); } bool @@ -1323,26 +1360,30 @@ const std::vector& statement_restrictions::index_restrictions( } bool statement_restrictions::is_empty() const { - return !_where.has_value(); + return _where.empty(); } -// Current score table: -// local and restrictions include full partition key: 2 -// global: 1 -// local and restrictions does not include full partition key: 0 (do not pick) -int statement_restrictions::score(const secondary_index::index& index) const { - if (index.metadata().local()) { - const bool allow_local = !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq(); - return allow_local ? 2 : 0; - } - return 1; -} -std::pair, expr::expression> statement_restrictions::do_find_idx(const secondary_index::secondary_index_manager& sim) const { - if (!_uses_secondary_indexing) { +static std::pair, expr::expression> do_find_idx( + bool uses_secondary_indexing, + const secondary_index::secondary_index_manager& sim, + std::span search_groups, + allow_local_index allow_local) { + if (!uses_secondary_indexing) { return {std::nullopt, expr::conjunction({})}; } + // Current score table: + // local and restrictions include full partition key: 2 + // global: 1 + // local and restrictions does not include full partition key: 0 (do not pick) + auto index_score = [&] (const secondary_index::index& index) -> int { + if (index.metadata().local()) { + return allow_local ? 2 : 0; + } + return 1; + }; + std::optional chosen_index; int chosen_index_score = 0; expr::expression chosen_index_restrictions = expr::conjunction({}); @@ -1354,22 +1395,25 @@ std::pair, expr::expression> statement_res // index), but it is critical that two coordinators - or the same // coordinator over time - must choose the same index for the same query. // Otherwise, paging can break (see issue #7969). - for (const expr::expression& restriction : index_restrictions()) { - if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) { - continue; - } - expr::for_each_expression(restriction, [&](const expr::column_value& cval) { - auto& cdef = cval.col; - expr::expression col_restrictions = expr::conjunction { - .children = extract_single_column_restrictions_for_column(restriction, *cdef) - }; + for (const auto& group : search_groups) { + // Iterate columns in WHERE-clause order (from the restriction expression) + // rather than schema-position order (from the pred_vectors map). When + // scores are tied the first column visited wins (strict >), so the + // iteration order determines which index is chosen for equal-score + // candidates -- matching the old expression-based do_find_idx behaviour. + expr::for_each_expression(group.restriction_expr, [&](const expr::column_value& cval) { + auto it = group.pred_vectors.find(cval.col); + if (it == group.pred_vectors.end()) { + return; + } + const auto& [col, preds] = *it; for (const auto& index : sim.list_indexes()) { - if (cdef->name_as_text() == index.target_column() && - is_supported_by(col_restrictions, index) && - score(index) > chosen_index_score) { + if (col->name_as_text() == index.target_column() && + are_predicates_supported_by(preds, index) && + index_score(index) > chosen_index_score) { chosen_index = index; - chosen_index_score = score(index); - chosen_index_restrictions = restriction; + chosen_index_score = index_score(index); + chosen_index_restrictions = group.restriction_expr; } } }); @@ -1383,37 +1427,39 @@ statement_restrictions::find_idx(const secondary_index::secondary_index_manager& } bool statement_restrictions::has_eq_restriction_on_column(const column_definition& column) const { - if (!_where.has_value()) { - return false; - } - - return restrictions::has_eq_restriction_on_column(column, *_where); + return _columns_with_eq.contains(&column); } std::vector statement_restrictions::get_column_defs_for_filtering(data_dictionary::database db) const { return _column_defs_for_filtering; } -void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(data_dictionary::database db) { +void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index( + data_dictionary::database db, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors) { std::vector column_defs_for_filtering; if (need_filtering()) { std::optional opt_idx; if (_check_indexes) { opt_idx = _idx_opt; } - auto column_uses_indexing = [&opt_idx] (const column_definition* cdef, const expr::expression* single_col_restr) { - return opt_idx && single_col_restr && is_supported_by(*single_col_restr, *opt_idx); + auto column_uses_indexing = [&opt_idx] (const single_column_predicate_vectors& pred_vectors, + const column_definition* cdef) { + if (!opt_idx) { + return false; + } + auto it = pred_vectors.find(cdef); + if (it == pred_vectors.end()) { + return false; + } + return are_predicates_supported_by(it->second, *opt_idx); }; if (pk_restrictions_need_filtering()) { for (auto&& cdef : expr::get_sorted_column_defs(_partition_key_restrictions)) { - const expr::expression* single_col_restr = nullptr; auto it = _single_column_partition_key_restrictions.find(cdef); - if (it != _single_column_partition_key_restrictions.end()) { - if (is_single_column_restriction(it->second)) { - single_col_restr = &it->second; - } - } - if (!column_uses_indexing(cdef, single_col_restr)) { + if (!column_uses_indexing(sc_pk_pred_vectors, cdef)) { column_defs_for_filtering.emplace_back(cdef); } else { _single_column_partition_key_restrictions.erase(it); @@ -1425,12 +1471,8 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr column_id first_filtering_id = pk_has_unrestricted_components ? 0 : _schema->clustering_key_columns().begin()->id + num_clustering_prefix_columns_that_need_not_be_filtered(); for (auto&& cdef : expr::get_sorted_column_defs(_clustering_columns_restrictions)) { - const expr::expression* single_col_restr = nullptr; auto it = _single_column_clustering_key_restrictions.find(cdef); - if (it != _single_column_clustering_key_restrictions.end()) { - single_col_restr = &it->second; - } - if (cdef->id >= first_filtering_id && !column_uses_indexing(cdef, single_col_restr)) { + if (cdef->id >= first_filtering_id && !column_uses_indexing(sc_ck_pred_vectors, cdef)) { column_defs_for_filtering.emplace_back(cdef); } else { _single_column_clustering_key_restrictions.erase(it); @@ -1439,7 +1481,7 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr } for (auto it = _single_column_nonprimary_key_restrictions.begin(); it != _single_column_nonprimary_key_restrictions.end();) { auto&& [cdef, cur_restr] = *it; - if (!column_uses_indexing(cdef, &cur_restr)) { + if (!column_uses_indexing(sc_nonpk_pred_vectors, cdef)) { column_defs_for_filtering.emplace_back(cdef); ++it; } else { @@ -1450,176 +1492,6 @@ void statement_restrictions::calculate_column_defs_for_filtering_and_erase_restr _column_defs_for_filtering = std::move(column_defs_for_filtering); } -void statement_restrictions::add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view) { - if (restr.op == expr::oper_t::IS_NOT) { - // Handle IS NOT NULL restrictions separately - add_is_not_restriction(restr, schema, for_view); - } else if (is_multi_column(restr)) { - // Multi column restrictions are only allowed on clustering columns - add_multi_column_clustering_key_restriction(restr); - } else if (has_partition_token(restr, *_schema)) { - // Token always restricts the partition key - add_token_partition_key_restriction(restr); - } else if (is_single_column_restriction(restr)) { - const column_definition* def = get_the_only_column(restr).col; - if (def->is_partition_key()) { - add_single_column_parition_key_restriction(restr, schema, allow_filtering, for_view); - } else if (def->is_clustering_key()) { - add_single_column_clustering_key_restriction(restr, schema, allow_filtering); - } else { - add_single_column_nonprimary_key_restriction(restr); - } - } else { - throw exceptions::invalid_request_exception(format("Unhandled restriction: {}", restr)); - } -} - -void statement_restrictions::add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view) { - const expr::column_value* lhs_col_def = expr::as_if(&restr.lhs); - // The "IS NOT NULL" restriction is only supported (and - // mandatory) for materialized view creation: - if (lhs_col_def == nullptr) { - throw exceptions::invalid_request_exception("IS NOT only supports single column"); - } - // currently, the grammar only allows the NULL argument to be - // "IS NOT", so this assertion should not be able to fail - if (!expr::is(restr.rhs) || !expr::as(restr.rhs).is_null()) { - throw exceptions::invalid_request_exception("Only IS NOT NULL is supported"); - } - - _not_null_columns.insert(lhs_col_def->col); - - if (!for_view) { - throw exceptions::invalid_request_exception(format("restriction '{}' is only supported in materialized view creation", restr)); - } -} - -void statement_restrictions::add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view) { - // View definition allows PK slices, because it's not a performance problem. - if (restr.op != expr::oper_t::EQ && restr.op != expr::oper_t::IN && !allow_filtering && !for_view) { - throw exceptions::invalid_request_exception( - "Only EQ and IN relation are supported on the partition key " - "(unless you use the token() function or ALLOW FILTERING)"); - } - if (has_token_restrictions()) { - throw exceptions::invalid_request_exception( - seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", - fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | - std::views::transform([](auto* p) { - return maybe_column_definition{p}; - }), - ", "))); - } - - _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); - _partition_range_is_simple &= !find(restr, expr::oper_t::IN); -} - -void statement_restrictions::add_token_partition_key_restriction(const expr::binary_operator& restr) { - if (!partition_key_restrictions_is_empty() && !has_token_restrictions()) { - throw exceptions::invalid_request_exception( - seastar::format("Columns \"{}\" cannot be restricted by both a normal relation and a token relation", - fmt::join(expr::get_sorted_column_defs(_partition_key_restrictions) | - std::views::transform([](auto* p) { - return maybe_column_definition{p}; - }), - ", "))); - } - - _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, restr); -} - -void statement_restrictions::add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering) { - if (find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { - return expr::is(b.lhs); - })) { - throw exceptions::invalid_request_exception( - "Mixing single column relations and multi column relations on clustering columns is not allowed"); - } - - const column_definition* new_column = get_the_only_column(restr).col; - const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); - - if (last_column != nullptr && !allow_filtering) { - if (has_slice(_clustering_columns_restrictions) && schema->position(*new_column) > schema->position(*last_column)) { - throw exceptions::invalid_request_exception(format("Clustering column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", - new_column->name_as_text(), last_column->name_as_text())); - } - - if (schema->position(*new_column) < schema->position(*last_column)) { - if (has_slice(restr)) { - throw exceptions::invalid_request_exception(format("PRIMARY KEY column \"{}\" cannot be restricted (preceding column \"{}\" is restricted by a non-EQ relation)", - last_column->name_as_text(), new_column->name_as_text())); - } - } - } - - _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); -} - -void statement_restrictions::add_multi_column_clustering_key_restriction(const expr::binary_operator& restr) { - if (is_empty_restriction(_clustering_columns_restrictions)) { - _clustering_columns_restrictions = restr; - return; - } - - if (!find_binop(_clustering_columns_restrictions, [] (const expr::binary_operator& b) { - return expr::is(b.lhs); - })) { - throw exceptions::invalid_request_exception("Mixing single column relations and multi column relations on clustering columns is not allowed"); - } - - if (restr.op == expr::oper_t::EQ) { - throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes an Equal", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } else if (restr.op == expr::oper_t::IN) { - throw exceptions::invalid_request_exception(format("{} cannot be restricted by more than one relation if it includes a IN", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } else if (is_slice(restr.op)) { - if (!expr::has_slice(_clustering_columns_restrictions)) { - throw exceptions::invalid_request_exception(format("Column \"{}\" cannot be restricted by both an equality and an inequality relation", - expr::get_columns_in_commons(_clustering_columns_restrictions, restr))); - } - - const expr::binary_operator* other_slice = expr::find_in_expression(_clustering_columns_restrictions, [](const expr::binary_operator){return true;}); - if (other_slice == nullptr) { - on_internal_error(rlogger, "add_multi_column_clustering_key_restriction: _clustering_columns_restrictions is empty!"); - } - - // Don't allow to mix plain and SCYLLA_CLUSTERING_BOUND bounds - if (other_slice->order != restr.order) { - static auto order2str = [](auto o) { return o == expr::comparison_order::cql ? "plain" : "SCYLLA_CLUSTERING_BOUND"; }; - throw exceptions::invalid_request_exception( - format("Invalid combination of restrictions ({} / {})", - order2str(other_slice->order), order2str(restr.order))); - } - - // Here check that there aren't two < <= or two > and >= - auto is_greater = [](expr::oper_t op) {return op == expr::oper_t::GT || op == expr::oper_t::GTE; }; - auto is_less = [](expr::oper_t op) {return op == expr::oper_t::LT || op == expr::oper_t::LTE; }; - - if (is_greater(restr.op) && is_greater(other_slice->op)) { - throw exceptions::invalid_request_exception(format( - "More than one restriction was found for the start bound on {}", - expr::get_columns_in_commons(restr, *other_slice))); - } - - if (is_less(restr.op) && is_less(other_slice->op)) { - throw exceptions::invalid_request_exception(format( - "More than one restriction was found for the end bound on {}", - expr::get_columns_in_commons(restr, *other_slice))); - } - - _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, restr); - } else { - throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", restr)); - } -} - -void statement_restrictions::add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr) { - _nonprimary_key_restrictions = expr::make_conjunction(_nonprimary_key_restrictions, restr); -} - void statement_restrictions::process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type) { // If there is a queryable index, no special condition are required on the other restrictions. // But we still need to know 2 things: @@ -1699,91 +1571,6 @@ const column_definition& statement_restrictions::unrestricted_column(column_kind to_sstring(kind), restrictions)); }; -bool statement_restrictions::clustering_columns_restrictions_have_supporting_index( - const secondary_index::secondary_index_manager& index_manager, - expr::allow_local_index allow_local) const { - // Single column restrictions can be handled by the existing code - if (!contains_multi_column_restriction(_clustering_columns_restrictions)) { - return index_supports_some_column(_clustering_columns_restrictions, index_manager, allow_local); - } - - // Multi column restrictions have to be handled separately - for (const auto& index : index_manager.list_indexes()) { - if (!allow_local && index.metadata().local()) { - continue; - } - if (multi_column_clustering_restrictions_are_supported_by(index)) { - return true; - } - } - return false; -} - -bool statement_restrictions::multi_column_clustering_restrictions_are_supported_by( - const secondary_index::index& index) const { - // Slice restrictions have to be checked depending on the clustering slice - if (has_slice(_clustering_columns_restrictions)) { - bounds_slice clustering_slice = get_clustering_slice(); - - const expr::column_value* supported_column = - find_in_expression(_clustering_columns_restrictions, - [&](const expr::column_value& cval) -> bool { - return clustering_slice.is_supported_by(*cval.col, index); - } - ); - return supported_column != nullptr; - } - - // Otherwise it has to be a single binary operator with EQ or IN. - // This is checked earlier during add_restriction. - const expr::binary_operator* single_binop = - expr::as_if(&_clustering_columns_restrictions); - if (single_binop == nullptr) { - on_internal_error(rlogger, format( - "multi_column_clustering_restrictions_are_supported_by more than one non-slice restriction: {}", - _clustering_columns_restrictions)); - } - - if (single_binop->op != expr::oper_t::IN && single_binop->op != expr::oper_t::EQ) { - on_internal_error(rlogger, format("Disallowed multi column restriction: {}", *single_binop)); - } - - const expr::column_value* supported_column = - find_in_expression(_clustering_columns_restrictions, - [&](const expr::column_value& cval) -> bool { - return index.supports_expression(*cval.col, single_binop->op); - } - ); - return supported_column != nullptr; -} - -bounds_slice statement_restrictions::get_clustering_slice() const { - std::optional result; - - expr::for_each_expression(_clustering_columns_restrictions, - [&](const expr::binary_operator& binop) { - bounds_slice cur_slice = bounds_slice::from_binary_operator(binop); - if (!result.has_value()) { - result = cur_slice; - } else { - result->merge(cur_slice); - } - } - ); - - return *result; -} - -bool statement_restrictions::parition_key_restrictions_have_supporting_index(const secondary_index::secondary_index_manager& index_manager, - expr::allow_local_index allow_local) const { - // Token restrictions can't be supported by an index - if (has_token_restrictions()) { - return false; - } - - return index_supports_some_column(_partition_key_restrictions, index_manager, allow_local); -} - void statement_restrictions::process_clustering_columns_restrictions(bool for_view, bool allow_filtering) { if (!has_clustering_columns_restriction()) { return; @@ -1817,10 +1604,10 @@ namespace { using namespace expr; /// Computes partition-key ranges from token atoms in ex. -dht::partition_range_vector partition_ranges_from_token(const expr::expression& ex, +dht::partition_range_vector partition_ranges_from_token(const predicate& ex, const query_options& options, const schema& table_schema) { - auto values = possible_partition_token_values(ex, options, table_schema); + auto values = solve(ex, options); if (values == value_set(value_list{})) { return {}; } @@ -1866,30 +1653,26 @@ void error_if_exceeds_clustering_key_limit(size_t size, size_t clustering_limit) /// Computes partition-key ranges from expressions, which contains EQ/IN for every partition column. dht::partition_range_vector partition_ranges_from_singles( - const std::vector& expressions, const query_options& options, const schema& schema) { + const std::vector& expressions, const query_options& options, const schema& schema) { const size_t size_limit = options.get_cql_config().restrictions.partition_key_restrictions_max_cartesian_product_size; // Each element is a vector of that column's possible values: std::vector> column_values(schema.partition_key_size()); size_t product_size = 1; for (const auto& e : expressions) { - if (const auto arbitrary_binop = find_binop(e, [] (const binary_operator&) { return true; })) { - if (auto cv = expr::as_if(&arbitrary_binop->lhs)) { - const value_set vals = possible_column_values(cv->col, e, options); + const value_set vals = solve(e, options); if (auto lst = std::get_if(&vals)) { if (lst->empty()) { return {}; } product_size *= lst->size(); error_if_exceeds_partition_key_limit(product_size, size_limit); - column_values[schema.position(*cv->col)] = std::move(*lst); + column_values[schema.position(*require_on_single_column(e))] = std::move(*lst); } else { throw exceptions::invalid_request_exception( "Only EQ and IN relation are supported on the partition key " "(unless you use the token() function or ALLOW FILTERING)"); } - } - } } cartesian_product cp(column_values); dht::partition_range_vector ranges; @@ -1901,15 +1684,14 @@ dht::partition_range_vector partition_ranges_from_singles( /// Computes partition-key ranges from EQ restrictions on each partition column. Returns a single singleton range if /// the EQ restrictions are not mutually conflicting. Otherwise, returns an empty vector. dht::partition_range_vector partition_ranges_from_EQs( - const std::vector& eq_expressions, const query_options& options, const schema& schema) { + const std::vector& eq_expressions, const query_options& options, const schema& schema) { std::vector pk_value(schema.partition_key_size()); for (const auto& e : eq_expressions) { - const auto col = expr::get_subscripted_column(find(e, oper_t::EQ)->lhs).col; - const auto vals = std::get(possible_column_values(col, e, options)); + const auto vals = std::get(solve(e, options)); if (vals.empty()) { // Case of C=1 AND C=2. return {}; } - pk_value[schema.position(*col)] = std::move(vals[0]); + pk_value[schema.position(*require_on_single_column(e))] = std::move(vals[0]); } return {range_from_bytes(schema, pk_value)}; } @@ -1917,21 +1699,34 @@ dht::partition_range_vector partition_ranges_from_EQs( } // anonymous namespace dht::partition_range_vector statement_restrictions::get_partition_key_ranges(const query_options& options) const { - if (_partition_range_restrictions.empty()) { + return _get_partition_key_ranges_fn(options); +} + +get_partition_key_ranges_fn_t +statement_restrictions::build_partition_key_ranges_fn() const { + return std::visit(overloaded_functor{ + [&] (const no_partition_range_restrictions&) -> get_partition_key_ranges_fn_t { + return [] (const query_options& options) -> dht::partition_range_vector{ return {dht::partition_range::make_open_ended_both_sides()}; - } - if (has_partition_token(_partition_range_restrictions[0], *_schema)) { - if (_partition_range_restrictions.size() != 1) { - on_internal_error( - rlogger, - format("Unexpected size of token restrictions: {}", _partition_range_restrictions.size())); + }; + }, + [&] (const token_range_restrictions& r) -> get_partition_key_ranges_fn_t { + return [&] (const query_options& options) -> dht::partition_range_vector { + return partition_ranges_from_token(r.token_restrictions, options, *_schema); + }; + }, + [&] (const single_column_partition_range_restrictions& r) -> get_partition_key_ranges_fn_t { + if (_partition_range_is_simple) { + return [&] (const query_options& options) { + // Special case to avoid extra allocations required for a Cartesian product. + return partition_ranges_from_EQs(r.per_column_restrictions, options, *_schema); + }; + } else { + return [&] (const query_options& options) { + return partition_ranges_from_singles(r.per_column_restrictions, options, *_schema); + }; } - return partition_ranges_from_token(_partition_range_restrictions[0], options, *_schema); - } else if (_partition_range_is_simple) { - // Special case to avoid extra allocations required for a Cartesian product. - return partition_ranges_from_EQs(_partition_range_restrictions, options, *_schema); - } - return partition_ranges_from_singles(_partition_range_restrictions, options, *_schema); + }}, _partition_range_restrictions); } namespace { @@ -2086,160 +1881,117 @@ struct range_less { } }; -/// An expression visitor that translates multi-column atoms into clustering ranges. + struct multi_column_range_accumulator { - const query_options& options; - const schema_ptr schema; std::vector ranges{query::clustering_range::make_open_ended_both_sides()}; - const clustering_key_prefix::prefix_equal_tri_compare prefix3cmp = get_unreversed_tri_compare(*schema); - - void operator()(const binary_operator& binop) { - auto& lhs = expr::as(binop.lhs); - if (is_compare(binop.op)) { - auto opt_values = expr::get_tuple_elements(expr::evaluate(binop.rhs, options), *type_of(binop.rhs)); - std::vector values(lhs.elements.size()); - for (size_t i = 0; i < lhs.elements.size(); ++i) { - auto& col = expr::as(lhs.elements.at(i)); - values[i] = *statements::request_validations::check_not_null( - opt_values[i], - "Invalid null value in condition for column {}", col.col->name_as_text()); - } - intersect_all(to_range(binop.op, clustering_key_prefix(std::move(values)))); - } else if (binop.op == oper_t::IN) { - const cql3::raw_value tup = expr::evaluate(binop.rhs, options); - utils::chunked_vector> tuple_elems; - if (tup.is_value()) { - tuple_elems = expr::get_list_of_tuples_elements(tup, *type_of(binop.rhs)); - } - for(size_t i = 0; i < tuple_elems.size(); ++i) { - if(tuple_elems[i].size() != lhs.elements.size()) { - throw exceptions::invalid_request_exception(format("Expected {} elements in value tuple, but got {}", - lhs.elements.size(), tuple_elems[i].size())); - } - for(size_t j = 0; j < lhs.elements.size(); ++j) { - auto& col = expr::as(lhs.elements.at(j)); - statements::request_validations::check_not_null( - tuple_elems[i][j], - "Invalid null value in condition for column {}", col.col->name_as_text()); - } - } - process_in_values(std::move(tuple_elems)); - } else { - on_internal_error(rlogger, format("multi_column_range_accumulator: unexpected atom {}", binop)); - } - } - - void operator()(const conjunction& c) { - std::ranges::for_each(c.children, [this] (const expression& child) { expr::visit(*this, child); }); - } - - void operator()(const constant& v) { - std::optional bool_val = get_bool_value(v); - if (!bool_val.has_value()) { - on_internal_error(rlogger, "non-bool constant encountered outside binary operator"); - } - - if (*bool_val == false) { - ranges.clear(); - } - } - - void operator()(const column_value&) { - on_internal_error(rlogger, "Column encountered outside binary operator"); - } - - void operator()(const subscript&) { - on_internal_error(rlogger, "Subscript encountered outside binary operator"); - } - - void operator()(const unresolved_identifier&) { - on_internal_error(rlogger, "Unresolved identifier encountered outside binary operator"); - } - - void operator()(const column_mutation_attribute&) { - on_internal_error(rlogger, "writetime/ttl encountered outside binary operator"); - } - - void operator()(const function_call&) { - on_internal_error(rlogger, "function call encountered outside binary operator"); - } - - void operator()(const cast&) { - on_internal_error(rlogger, "typecast encountered outside binary operator"); - } - - void operator()(const field_selection&) { - on_internal_error(rlogger, "field selection encountered outside binary operator"); - } - - void operator()(const bind_variable&) { - on_internal_error(rlogger, "bind variable encountered outside binary operator"); - } - - void operator()(const untyped_constant&) { - on_internal_error(rlogger, "untyped constant encountered outside binary operator"); - } - - void operator()(const tuple_constructor&) { - on_internal_error(rlogger, "tuple constructor encountered outside binary operator"); - } - - void operator()(const collection_constructor&) { - on_internal_error(rlogger, "collection constructor encountered outside binary operator"); - } - - void operator()(const usertype_constructor&) { - on_internal_error(rlogger, "collection constructor encountered outside binary operator"); - } - - void operator()(const temporary&) { - on_internal_error(rlogger, "temporary encountered outside binary operator"); - } - - /// Intersects each range with v. If any intersection is empty, clears ranges. - void intersect_all(const query::clustering_range& v) { - for (auto& r : ranges) { - auto intrs = intersection(r, v, prefix3cmp); - if (!intrs) { - ranges.clear(); - break; - } - r = *intrs; - } - } - - template - requires std::convertible_to - void process_in_values(Range in_values) { - if (ranges.empty()) { - return; // Shortcircuit an easy case. - } - std::set new_ranges(range_less{*schema}); - for (const auto& current_tuple : in_values) { - // Each IN value is like a separate EQ restriction ANDed to the existing state. - auto current_range = to_range( - oper_t::EQ, clustering_key_prefix::from_optional_exploded(*schema, current_tuple)); - for (const auto& r : ranges) { - auto intrs = intersection(r, current_range, prefix3cmp); - if (intrs) { - new_ranges.insert(*intrs); - } - } - } - ranges.assign(new_ranges.cbegin(), new_ranges.cend()); - } }; -/// Calculates clustering bounds for the multi-column case. -std::vector get_multi_column_clustering_bounds( - const query_options& options, - schema_ptr schema, - const std::vector& multi_column_restrictions) { - multi_column_range_accumulator acc{options, schema}; - for (const auto& restr : multi_column_restrictions) { - expr::visit(acc, restr); +/// Intersects each range with v. If any intersection is empty, clears ranges. +void intersect_all(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const query::clustering_range& v) { + auto& ranges = acc.ranges; + for (auto& r : ranges) { + auto intrs = intersection(r, v, prefix3cmp); + if (!intrs) { + ranges.clear(); + break; + } + r = *intrs; } - return acc.ranges; +} + +template +requires std::convertible_to +void process_in_values(multi_column_range_accumulator& acc, const clustering_key_prefix::prefix_equal_tri_compare& prefix3cmp, const schema_ptr& schema, Range in_values) { + auto& ranges = acc.ranges; + if (ranges.empty()) { + return; // Shortcircuit an easy case. + } + std::set new_ranges(range_less{*schema}); + for (const auto& current_tuple : in_values) { + // Each IN value is like a separate EQ restriction ANDed to the existing state. + auto current_range = to_range( + oper_t::EQ, clustering_key_prefix::from_optional_exploded(*schema, current_tuple)); + for (const auto& r : ranges) { + auto intrs = intersection(r, current_range, prefix3cmp); + if (intrs) { + new_ranges.insert(*intrs); + } + } + } + ranges.assign(new_ranges.cbegin(), new_ranges.cend()); +} + +std::vector get_equivalent_ranges( + const query::clustering_range& cql_order_range, const schema& schema); + +/// Calculates clustering bounds for the multi-column case. +std::function (const query_options&)> +build_get_multi_column_clustering_bounds_fn( + schema_ptr schema, + const std::vector& multi_column_restrictions, + bool all_natural, bool all_reverse) { + const auto prefix3cmp = get_unreversed_tri_compare(*schema); + std::vector> range_builders; + for (const auto& pred : multi_column_restrictions) { + const auto& binop = expr::as(pred.filter); + range_builders.emplace_back([binop, schema, prefix3cmp] (multi_column_range_accumulator& acc, const query_options& options) { + auto& lhs = expr::as(binop.lhs); + if (is_compare(binop.op)) { + auto opt_values = expr::get_tuple_elements(expr::evaluate(binop.rhs, options), *type_of(binop.rhs)); + std::vector values(lhs.elements.size()); + for (size_t i = 0; i < lhs.elements.size(); ++i) { + auto& col = expr::as(lhs.elements.at(i)); + values[i] = *statements::request_validations::check_not_null( + opt_values[i], + "Invalid null value in condition for column {}", col.col->name_as_text()); + } + intersect_all(acc, prefix3cmp, to_range(binop.op, clustering_key_prefix(std::move(values)))); + } else if (binop.op == oper_t::IN) { + const cql3::raw_value tup = expr::evaluate(binop.rhs, options); + utils::chunked_vector> tuple_elems; + if (tup.is_value()) { + tuple_elems = expr::get_list_of_tuples_elements(tup, *type_of(binop.rhs)); + } + for (size_t i = 0; i < tuple_elems.size(); ++i) { + if (tuple_elems[i].size() != lhs.elements.size()) { + throw exceptions::invalid_request_exception(format("Expected {} elements in value tuple, but got {}", + lhs.elements.size(), tuple_elems[i].size())); + } + for (size_t j = 0; j < lhs.elements.size(); ++j) { + auto& col = expr::as(lhs.elements.at(j)); + statements::request_validations::check_not_null( + tuple_elems[i][j], + "Invalid null value in condition for column {}", col.col->name_as_text()); + } + } + process_in_values(acc, prefix3cmp, schema, std::move(tuple_elems)); + } else { + on_internal_error(rlogger, format("multi_column_range_accumulator: unexpected atom {}", binop)); + } + }); + } + return [schema, range_builders, all_natural, all_reverse] (const query_options& options) -> std::vector { + multi_column_range_accumulator acc; + for (auto& builder : range_builders) { + builder(acc, options); + } + auto bounds = std::move(acc.ranges); + + if (!all_natural && !all_reverse) { + std::vector bounds_in_clustering_order; + for (const auto& b : bounds) { + const auto eqv = get_equivalent_ranges(b, *schema); + bounds_in_clustering_order.insert(bounds_in_clustering_order.end(), eqv.cbegin(), eqv.cend()); + } + return bounds_in_clustering_order; + } + if (all_reverse) { + for (auto& crange : bounds) { + crange = query::clustering_range(crange.end(), crange.start()); + } + } + return bounds; + }; } /// Reverses the range if the type is reversed. Why don't we have interval::reverse()?? @@ -2251,14 +2003,16 @@ query::clustering_range reverse_if_reqd(query::clustering_range r, const abstrac std::vector get_single_column_clustering_bounds( const query_options& options, const schema& schema, - const std::vector& single_column_restrictions) { + const std::vector& single_column_restrictions) { const size_t size_limit = options.get_cql_config().restrictions.clustering_key_restrictions_max_cartesian_product_size; size_t product_size = 1; std::vector> prior_column_values; // Equality values of columns seen so far. for (size_t i = 0; i < single_column_restrictions.size(); ++i) { - auto values = possible_column_values( - &schema.clustering_column_at(i), // This should be the LHS of restrictions[i]. + if (&schema.clustering_column_at(i) != require_on_single_column(single_column_restrictions[i])) { + break; + } + auto values = solve( single_column_restrictions[i], options); if (auto list = std::get_if(&values)) { @@ -2325,21 +2079,21 @@ std::vector get_single_column_clustering_bounds( static std::vector get_index_v1_token_range_clustering_bounds( const query_options& options, const column_definition& token_column, - const expression& token_restriction) { + const predicate& token_restriction) { - // A workaround in order to make possible_column_values work properly. - // possible_column_values looks at the column type and uses this type's comparator. + // A workaround in order to make to_predicate work properly. + // to_predicate looks at the column type and uses this type's comparator. // This is a problem because when using blob's comparator, -4 is greater than 4. - // This makes possible_column_values think that an expression like token(p) > -4 and token(p) < 4 + // This makes to_predicate think that an expression like token(p) > -4 and token(p) < 4 // is impossible to fulfill. // Create a fake token column with the type set to bigint, translate the restriction to use this column // and use this restriction to calculate possible lhs values. column_definition token_column_bigint = token_column; token_column_bigint.type = long_type; - expression new_token_restrictions = replace_column_def(token_restriction, &token_column_bigint); + predicate new_token_restrictions = replace_column_def(token_restriction, &token_column_bigint); std::variant> values = - possible_column_values(&token_column_bigint, new_token_restrictions, options); + new_token_restrictions.solve_for(options); return std::visit(overloaded_functor { [](const value_list& list) { @@ -2528,11 +2282,14 @@ std::vector get_equivalent_ranges( } /// Extracts raw multi-column bounds from exprs; last one wins. -query::clustering_range range_from_raw_bounds( - const std::vector& exprs, const query_options& options, const schema& schema) { - opt_bound lb, ub; - for (const auto& e : exprs) { +get_clustering_bounds_fn_t +build_range_from_raw_bounds_fn( + const std::vector& exprs, const schema& schema) { + std::vector> range_builders; + for (const auto& e : exprs | std::views::transform(&predicate::filter)) { if (auto b = find_clustering_order(e)) { + range_builders.emplace_back([bb = *b, &schema] (const query_options& options) { + auto* b = &bb; cql3::raw_value tup_val = expr::evaluate(b->rhs, options); if (tup_val.is_null()) { on_internal_error(rlogger, format("range_from_raw_bounds: unexpected atom {}", *b)); @@ -2540,6 +2297,15 @@ query::clustering_range range_from_raw_bounds( const auto r = to_range( b->op, clustering_key_prefix::from_optional_exploded(schema, expr::get_tuple_elements(tup_val, *type_of(b->rhs)))); + return r; + }); + } + } + return [range_builders] (const query_options& options) -> std::vector { + opt_bound lb, ub; + for (auto& builder : range_builders) { + auto r = builder(options); + if (r.start()) { lb = r.start(); } @@ -2547,26 +2313,28 @@ query::clustering_range range_from_raw_bounds( ub = r.end(); } } - } - return {lb, ub}; + return {{lb, ub}}; + }; } } // anonymous namespace -std::vector statement_restrictions::get_clustering_bounds(const query_options& options) const { +get_clustering_bounds_fn_t +statement_restrictions::build_get_clustering_bounds_fn() const { if (_clustering_prefix_restrictions.empty()) { + return [&] (const query_options& options) -> std::vector { return {query::clustering_range::make_open_ended_both_sides()}; + }; } - if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { + if (_clustering_prefix_restrictions[0].is_multi_column) { bool all_natural = true, all_reverse = true; ///< Whether column types are reversed or natural. - for (auto& r : _clustering_prefix_restrictions) { // TODO: move to constructor, do only once. - using namespace expr; - const auto& binop = expr::as(r); - if (is_clustering_order(binop)) { - return {range_from_raw_bounds(_clustering_prefix_restrictions, options, *_schema)}; + for (auto& pred : _clustering_prefix_restrictions) { + if (pred.order == expr::comparison_order::clustering) { + return build_range_from_raw_bounds_fn(_clustering_prefix_restrictions, *_schema); } - for (auto& element : expr::as(binop.lhs).elements) { - auto& cv = expr::as(element); + auto& lhs = expr::as(expr::as(pred.filter).lhs); + for (auto& element : lhs.elements) { + auto& cv = expr::as(element); if (cv.col->type->is_reversed()) { all_natural = false; } else { @@ -2574,26 +2342,19 @@ std::vector statement_restrictions::get_clustering_boun } } } - auto bounds = get_multi_column_clustering_bounds(options, _schema, _clustering_prefix_restrictions); - if (!all_natural && !all_reverse) { - std::vector bounds_in_clustering_order; - for (const auto& b : bounds) { - const auto eqv = get_equivalent_ranges(b, *_schema); - bounds_in_clustering_order.insert(bounds_in_clustering_order.end(), eqv.cbegin(), eqv.cend()); - } - return bounds_in_clustering_order; - } - if (all_reverse) { - for (auto& crange : bounds) { - crange = query::clustering_range(crange.end(), crange.start()); - } - } - return bounds; + return build_get_multi_column_clustering_bounds_fn(_schema, _clustering_prefix_restrictions, + all_natural, all_reverse); } else { + return [&] (const query_options& options) -> std::vector { return get_single_column_clustering_bounds(options, *_schema, _clustering_prefix_restrictions); + }; } } +std::vector statement_restrictions::get_clustering_bounds(const query_options& options) const { + return _get_clustering_bounds_fn(options); +} + namespace { /// True iff get_partition_slice_for_global_index_posting_list() will be able to calculate the token value from the @@ -2698,64 +2459,122 @@ void statement_restrictions::prepare_indexed_global(const schema& idx_tbl_schema // This means that p1 and p2 can have many different values (token is a hash, can have collisions). // Clustering prefix ends after token_restriction, all further restrictions have to be filtered. expr::expression token_restriction = replace_partition_token(_partition_key_restrictions, token_column, *_schema); - _idx_tbl_ck_prefix = std::vector{std::move(token_restriction)}; + _idx_tbl_ck_prefix = std::vector{to_predicate_on_column(token_restriction, token_column, _schema.get())}; return; } // If we're here, it means the index cannot be on a partition column: process_partition_key_restrictions() // avoids indexing when _partition_range_is_simple. See _idx_tbl_ck_prefix blurb for its composition. - _idx_tbl_ck_prefix = std::vector(1 + _schema->partition_key_size(), expr::conjunction({})); + _idx_tbl_ck_prefix = std::vector(1 + _schema->partition_key_size(), predicate{ + .solve_for = nullptr, // FIXME: this is all overwritten later. Should be refactored. + .filter = expr::expression(expr::conjunction{}), + .on = on_column{nullptr}, // Illegal but will be overwritten + .is_singleton = false, + }); _idx_tbl_ck_prefix->reserve(_idx_tbl_ck_prefix->size() + idx_tbl_schema.clustering_key_size()); - for (const auto& e : _partition_range_restrictions) { - const auto col = expr::as(find(e, oper_t::EQ)->lhs).col; + auto *single_column_partition_key_restrictions = std::get_if(&_partition_range_restrictions); + if (single_column_partition_key_restrictions) { + for (const auto& e : single_column_partition_key_restrictions->per_column_restrictions) { + const auto col = require_on_single_column(e); const auto pos = _schema->position(*col) + 1; (*_idx_tbl_ck_prefix)[pos] = replace_column_def(e, &idx_tbl_schema.clustering_column_at(pos)); + } } - if (std::ranges::any_of(*_idx_tbl_ck_prefix | std::views::drop(1), is_empty_restriction)) { + if (std::ranges::any_of(*_idx_tbl_ck_prefix | std::views::drop(1) | std::views::transform(&predicate::filter), is_empty_restriction)) { // If the partition key is not fully restricted, the index clustering key is of no use. - (*_idx_tbl_ck_prefix) = std::vector(); + (*_idx_tbl_ck_prefix) = std::vector(); return; } add_clustering_restrictions_to_idx_ck_prefix(idx_tbl_schema); auto pk_expressions = (*_idx_tbl_ck_prefix) + | std::views::transform(&predicate::filter) | std::views::drop(1) // skip the token restriction | std::views::take(_schema->partition_key_size()) // take only the partition key restrictions | std::views::transform(expr::as) // we know it's an EQ | std::views::transform(std::mem_fn(&expr::binary_operator::rhs)) // "solve" for the column value | std::ranges::to(); + auto pk_solvers = (*_idx_tbl_ck_prefix) + | std::views::drop(1) // skip the token restriction + | std::views::take(_schema->partition_key_size()) // take only the partition key restrictions + | std::views::transform(&predicate::solve_for) + | std::ranges::to(); + + auto is_singleton = std::ranges::all_of( + (*_idx_tbl_ck_prefix) + | std::views::drop(1) + | std::views::take(_schema->partition_key_size()), + &predicate::is_singleton); + + if (!is_singleton) { + on_internal_error(rlogger, "Inconsistency in singleton calculation in indexed query"); + } + auto token_func = make_shared(_schema); - (*_idx_tbl_ck_prefix)[0] = binary_operator( + auto token_expr = binary_operator( column_value(token_column), oper_t::EQ, expr::function_call{.func = std::move(token_func), .args = std::move(pk_expressions)}); + + auto token_solver = [this, pk_solvers = std::move(pk_solvers)] (const query_options& options) -> value_set { + auto pk_values = pk_solvers + | std::views::transform([&] (auto&& solver) { return solver(options); }) + | std::views::transform(value_set_to_singleton) + | std::ranges::to>(); + auto pk = partition_key::from_exploded(pk_values); + auto tok = dht::get_token(*_schema, pk); + return value_list{managed_bytes(serialized(dht::token::to_int64(tok)))}; + }; + + (*_idx_tbl_ck_prefix)[0] = predicate{ + .solve_for = std::move(token_solver), + .filter = std::move(token_expr), + .on = on_column{token_column}, + .is_singleton = is_singleton, + }; } -void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) { +void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors) { if (!_partition_range_is_simple) { return; } // Local index clustering key is (indexed column, base clustering key) - _idx_tbl_ck_prefix = std::vector(); + _idx_tbl_ck_prefix = std::vector(); _idx_tbl_ck_prefix->reserve(1 + _clustering_prefix_restrictions.size()); const column_definition& indexed_column = idx_tbl_schema.column_at(column_kind::clustering_key, 0); const column_definition& indexed_column_base_schema = *_schema->get_column_definition(indexed_column.name()); - // Find index column restrictions in the WHERE clause - std::vector idx_col_restrictions = - extract_single_column_restrictions_for_column(*_where, indexed_column_base_schema); - expr::expression idx_col_restriction_expr = expr::expression(expr::conjunction{std::move(idx_col_restrictions)}); + // Find index column restrictions in the pre-built predicate vectors + const single_column_predicate_vectors* pvecs; + switch (indexed_column_base_schema.kind) { + case column_kind::partition_key: pvecs = &sc_pk_pred_vectors; break; + case column_kind::clustering_key: pvecs = &sc_ck_pred_vectors; break; + default: pvecs = &sc_nonpk_pred_vectors; break; + } + auto it = pvecs->find(&indexed_column_base_schema); + if (it == pvecs->end()) { + on_internal_error(rlogger, format("prepare_indexed_local: no predicates found for column {}", indexed_column_base_schema.name_as_text())); + } + const auto& preds = it->second; - // Translate the restriction to use column from the index schema and add it - expr::expression replaced_idx_restriction = replace_column_def(idx_col_restriction_expr, &indexed_column); - _idx_tbl_ck_prefix->push_back(replaced_idx_restriction); + // Translate each predicate to use column from the index schema, then merge + auto folded = std::ranges::fold_left_first( + preds | std::views::transform([&indexed_column](const predicate& p) { + return replace_column_def(p, &indexed_column); + }), + make_conjunction + ); + _idx_tbl_ck_prefix->push_back(std::move(*folded)); // Add restrictions for the clustering key add_clustering_restrictions_to_idx_ck_prefix(idx_tbl_schema); @@ -2763,16 +2582,19 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const schema& idx_tbl_schema) { for (const auto& e : _clustering_prefix_restrictions) { - if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { + if (_clustering_prefix_restrictions[0].is_multi_column) { // TODO: We could handle single-element tuples, eg. `(c)>=(123)`. break; } - const auto any_binop = find_binop(e, [] (auto&&) { return true; }); + const auto any_binop = find_binop(e.filter, [] (auto&&) { return true; }); if (!any_binop) { break; } const auto col = expr::as(any_binop->lhs).col; - _idx_tbl_ck_prefix->push_back(replace_column_def(e, idx_tbl_schema.get_column_definition(col->name()))); + auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); + auto replaced = replace_column_def(e.filter, col_in_index); + auto a = to_predicate_on_column(replaced, col_in_index, &idx_tbl_schema); + _idx_tbl_ck_prefix->push_back(std::move(a)); } } @@ -2781,82 +2603,101 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const // read). For example, if we have the filter "c1 < 3 and c2 > 3", c1 does not // need filtering but c2 does so num_prefix_columns_that_need_not_be_filtered // will be 1. +// +// _clustering_prefix_restrictions is already built with exactly this logic +// (iterating CK columns in schema order, stopping at gaps, needs-filtering +// predicates, and after a slice), so its size is the answer. Multi-column +// restrictions are treated as needing filtering. unsigned int statement_restrictions::num_clustering_prefix_columns_that_need_not_be_filtered() const { - if (contains_multi_column_restriction(_clustering_columns_restrictions)) { + if (_has_multi_column) { return 0; } + return _clustering_prefix_restrictions.size(); +} - single_column_restrictions_map column_restrictions = - get_single_column_restrictions_map(_clustering_columns_restrictions); - - // Restrictions currently need filtering in three cases: - // 1. any of them is a CONTAINS restriction - // 2. restrictions do not form a contiguous prefix (i.e. there are gaps in it) - // 3. a SLICE restriction isn't on a last place - column_id position = 0; - unsigned int count = 0; - for (const auto& restriction : column_restrictions | std::views::values) { - if (find_needs_filtering(restriction) - || position != get_the_only_column(restriction).col->id) { - return count; - } - if (!has_slice(restriction)) { - position = get_the_only_column(restriction).col->id + 1; - } - count++; +get_clustering_bounds_fn_t +statement_restrictions::build_get_global_index_clustering_ranges_fn() const { + if (!_idx_tbl_ck_prefix) { + return {}; } - return count; + + return [&] (const query_options& options) { + // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + }; } std::vector statement_restrictions::get_global_index_clustering_ranges( - const query_options& options, - const schema& idx_tbl_schema) const { - if (!_idx_tbl_ck_prefix) { - on_internal_error( - rlogger, "statement_restrictions::get_global_index_clustering_ranges called with unprepared index"); - } - - // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. - return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); + const query_options& options) const { + return _get_global_index_clustering_ranges_fn(options); } -std::vector statement_restrictions::get_global_index_token_clustering_ranges( - const query_options& options, - const schema& idx_tbl_schema -) const { +get_clustering_bounds_fn_t +statement_restrictions::build_get_global_index_token_clustering_ranges_fn() const { if (!_idx_tbl_ck_prefix.has_value()) { - on_internal_error( - rlogger, "statement_restrictions::get_global_index_token_clustering_ranges called with unprepared index"); + return {}; } - const column_definition& token_column = idx_tbl_schema.clustering_column_at(0); + const column_definition& token_column = _view_schema->clustering_column_at(0); // In old indexes the token column was of type blob. // This causes problems with sorting and must be handled separately. if (token_column.type != long_type) { + return [&] (const query_options& options) { return get_index_v1_token_range_clustering_bounds(options, token_column, _idx_tbl_ck_prefix->at(0)); + }; } - return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); + return [&] (const query_options& options) { + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + }; +} + +std::vector statement_restrictions::get_global_index_token_clustering_ranges( + const query_options& options) const { + return _get_global_index_token_clustering_ranges_fn(options); +} + +get_clustering_bounds_fn_t +statement_restrictions::build_get_local_index_clustering_ranges_fn() const { + if (!_idx_tbl_ck_prefix.has_value()) { + return {}; + } + + return [&] (const query_options& options) { + // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. + return get_single_column_clustering_bounds(options, *_view_schema, *_idx_tbl_ck_prefix); + }; } std::vector statement_restrictions::get_local_index_clustering_ranges( - const query_options& options, - const schema& idx_tbl_schema) const { - if (!_idx_tbl_ck_prefix.has_value()) { - on_internal_error( - rlogger, "statement_restrictions::get_local_index_clustering_ranges called with unprepared index"); + const query_options& options) const { + return _get_local_index_clustering_ranges_fn(options); +} + +get_singleton_value_fn_t +statement_restrictions::build_value_for_index_partition_key_fn() const { + if (!_idx_opt) { + return {}; + } + const column_definition* cdef = _schema->get_column_definition(to_bytes(_idx_opt->target_column())); + if (!cdef) { + throw exceptions::invalid_request_exception("Indexed column not found in schema"); } - // Multi column restrictions are not added to _idx_tbl_ck_prefix, they are handled later by filtering. - return get_single_column_clustering_bounds(options, idx_tbl_schema, *_idx_tbl_ck_prefix); + return build_value_for_fn(*cdef, _idx_restrictions, *_schema); +} + +bytes_opt +statement_restrictions::value_for_index_partition_key(const query_options& options) const { + return _value_for_index_partition_key_fn(options); } sstring statement_restrictions::to_string() const { - return _where ? expr::to_string(*_where) : ""; + return !_where.empty() ? expr::to_string(expr::conjunction{.children = _where}) : ""; } -static void validate_primary_key_restrictions(const query_options& options, const std::vector& restrictions) { +static void validate_primary_key_restrictions(const query_options& options, std::ranges::range auto&& restrictions) { for (const auto& r: restrictions) { for_each_expression(r, [&](const binary_operator& binop) { if (binop.op != oper_t::EQ && binop.op != oper_t::IN) { @@ -2875,8 +2716,17 @@ static void validate_primary_key_restrictions(const query_options& options, cons } void statement_restrictions::validate_primary_key(const query_options& options) const { - validate_primary_key_restrictions(options, _partition_range_restrictions); - validate_primary_key_restrictions(options, _clustering_prefix_restrictions); + std::visit(overloaded_functor{ + [&] (const no_partition_range_restrictions&) { + }, + [&] (const token_range_restrictions& r) { + validate_primary_key_restrictions(options, std::span(&r.token_restrictions.filter, 1)); + }, + [&] (const single_column_partition_range_restrictions& r) { + validate_primary_key_restrictions(options, r.per_column_restrictions | std::views::transform(&predicate::filter)); + } + }, _partition_range_restrictions); + validate_primary_key_restrictions(options, _clustering_prefix_restrictions | std::views::transform(&predicate::filter)); } @@ -2884,7 +2734,7 @@ const std::unordered_set statement_restrictions::get_n return _not_null_columns; } -statement_restrictions +shared_ptr analyze_statement_restrictions( data_dictionary::database db, schema_ptr schema, @@ -2895,7 +2745,14 @@ analyze_statement_restrictions( bool for_view, bool allow_filtering, check_indexes do_check_indexes) { - return statement_restrictions(db, std::move(schema), type, where_clause, ctx, selects_only_static_columns, for_view, allow_filtering, do_check_indexes); + return make_shared(statement_restrictions::private_tag{}, db, std::move(schema), type, where_clause, ctx, selects_only_static_columns, for_view, allow_filtering, do_check_indexes); +} + +shared_ptr +make_trivial_statement_restrictions( + schema_ptr schema, + bool allow_filtering) { + return make_shared(statement_restrictions::private_tag{}, std::move(schema), allow_filtering); } } // namespace restrictions diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 5af95ee78f..f3a5c7dfbc 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -23,15 +23,113 @@ namespace cql3 { namespace restrictions { +/// A set of discrete values. +using value_list = std::vector; // Sorted and deduped using value comparator. + +/// General set of values. Empty set and single-element sets are always value_list. interval is +/// never singular and never has start > end. Universal set is a interval with both bounds null. +using value_set = std::variant>; + +// For some boolean expression (say (X = 3) = TRUE, this represents a function that solves for X. +// (here, it would return 3). The expression is obtained by equating some factors of the WHERE +// clause to TRUE. +using solve_for_t = std::function; + +struct on_row { + bool operator==(const on_row&) const = default; +}; + +struct on_column { + const column_definition* column; + + bool operator==(const on_column&) const = default; +}; + +// Placeholder type indicating we're solving for the partition key token. +struct on_partition_key_token { + const ::schema* schema; + + bool operator==(const on_partition_key_token&) const = default; +}; + +struct on_clustering_key_prefix { + std::vector columns; + + bool operator==(const on_clustering_key_prefix&) const = default; +}; + +// A predicate on a column or a combination of columns. The WHERE clause analyzer +// will attempt to convert predicates (that return true or false for a particular row) +// to solvers (that return the set of column values that satisfy the predicate) when possible. +struct predicate { + // A function that returns the set of values that satisfy the filter. Can be unset, + // in which case the filter must be interpreted. + solve_for_t solve_for; + // The original filter for this column. + expr::expression filter; + // What column the predicate can be solved for + std::variant< + on_row, // cannot determine, so predicate is on entire row + on_column, // solving for a single column: e.g. c1 = 3 + on_partition_key_token, // solving for the token, e.g. token(pk1, pk2) >= :var + on_clustering_key_prefix // solving for a clustering key prefix: e.g. (ck1, ck2) >= (3, 4) + > on; + // Whether the returned value_set will resolve to a single value. + bool is_singleton = false; + // Whether the returned value_set follows CQL comparison semantics + bool comparable = true; + bool is_multi_column = false; + bool is_not_null_single_column = false; + bool equality = false; // operator is EQ + bool is_in = false; // operator is IN + bool is_slice = false; // operator is LT/LTE/GT/GTE + bool is_upper_bound = false; // operator is LT/LTE + bool is_lower_bound = false; // operator is GT/GTE + expr::comparison_order order = expr::comparison_order::cql; + std::optional op; // the binary operator, if any + bool is_subscript = false; // whether the LHS is a subscript (map element access) +}; + ///In some cases checking if columns have indexes is undesired of even ///impossible, because e.g. the query runs on a pseudo-table, which does not ///have an index-manager, or even a table object. using check_indexes = bool_class; +// A function that returns the partition key ranges for a query. It is the solver of +// WHERE clause fragments such as WHERE token(pk) > 1 or WHERE pk1 IN :list1 AND pk2 IN :list2. +using get_partition_key_ranges_fn_t = std::function; + +// A function that returns the clustering key ranges for a query. It is the solver of +// WHERE clause fragments such as WHERE ck > 1 or WHERE (ck1, ck2) > (1, 2). +using get_clustering_bounds_fn_t = std::function (const query_options& options)>; + +// A function that returns a singleton value, usable for a key (e.g. bytes_opt) +using get_singleton_value_fn_t = std::function; + +struct no_partition_range_restrictions { +}; + +struct token_range_restrictions { + predicate token_restrictions; +}; + +struct single_column_partition_range_restrictions { + std::vector per_column_restrictions; +}; + +using partition_range_restrictions = std::variant< + no_partition_range_restrictions, + token_range_restrictions, + single_column_partition_range_restrictions>; + +// A map of per-column predicate vectors, ordered by schema position. +using single_column_predicate_vectors = std::map, expr::schema_pos_column_definition_comparator>; + /** * The restrictions corresponding to the relations specified on the where-clause of CQL query. */ class statement_restrictions { + struct private_tag {}; // Tag for private constructor private: schema_ptr _schema; @@ -81,7 +179,7 @@ private: bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false; bool _has_multi_column; ///< True iff _clustering_columns_restrictions has a multi-column restriction. - std::optional _where; ///< The entire WHERE clause. + std::vector _where; ///< The entire WHERE clause (factorized). /// Parts of _where defining the clustering slice. /// @@ -96,7 +194,7 @@ private: /// 4.4 elements other than the last have only EQ or IN atoms /// 4.5 the last element has only EQ, IN, or is_slice() atoms /// 5. if multi-column, then each element is a binary_operator - std::vector _clustering_prefix_restrictions; + std::vector _clustering_prefix_restrictions; /// Like _clustering_prefix_restrictions, but for the indexing table (if this is an index-reading statement). /// Recall that the index-table CK is (token, PK, CK) of the base table for a global index and (indexed column, @@ -105,7 +203,7 @@ private: /// Elements are conjunctions of single-column binary operators with the same LHS. /// Element order follows the indexing-table clustering key. /// In case of a global index the first element's (token restriction) RHS is a dummy value, it is filled later. - std::optional> _idx_tbl_ck_prefix; + std::optional> _idx_tbl_ck_prefix; /// Parts of _where defining the partition range. /// @@ -113,16 +211,25 @@ private: /// binary_operators on token. If single-column restrictions define the partition range, each element holds /// restrictions for one partition column. Each partition column has a corresponding element, but the elements /// are in arbitrary order. - std::vector _partition_range_restrictions; + partition_range_restrictions _partition_range_restrictions; bool _partition_range_is_simple; ///< False iff _partition_range_restrictions imply a Cartesian product. check_indexes _check_indexes = check_indexes::yes; + /// Columns that appear on the LHS of an EQ restriction (not IN). + /// For multi-column EQ like (ck1, ck2) = (1, 2), all columns in the tuple are included. + std::unordered_set _columns_with_eq; std::vector _column_defs_for_filtering; schema_ptr _view_schema; std::optional _idx_opt; expr::expression _idx_restrictions = expr::conjunction({}); + get_partition_key_ranges_fn_t _get_partition_key_ranges_fn; + get_clustering_bounds_fn_t _get_clustering_bounds_fn; + get_clustering_bounds_fn_t _get_global_index_clustering_ranges_fn; + get_clustering_bounds_fn_t _get_global_index_token_clustering_ranges_fn; + get_clustering_bounds_fn_t _get_local_index_clustering_ranges_fn; + get_singleton_value_fn_t _value_for_index_partition_key_fn; public: /** * Creates a new empty StatementRestrictions. @@ -130,9 +237,10 @@ public: * @param cfm the column family meta data * @return a new empty StatementRestrictions. */ - statement_restrictions(schema_ptr schema, bool allow_filtering); + statement_restrictions(private_tag, schema_ptr schema, bool allow_filtering); - friend statement_restrictions analyze_statement_restrictions( +public: + friend shared_ptr analyze_statement_restrictions( data_dictionary::database db, schema_ptr schema, statements::statement_type type, @@ -142,9 +250,15 @@ public: bool for_view, bool allow_filtering, check_indexes do_check_indexes); + friend shared_ptr make_trivial_statement_restrictions( + schema_ptr schema, + bool allow_filtering); -private: - statement_restrictions(data_dictionary::database db, + // Important: objects of this class captures `this` extensively and so must remain non-copyable. + statement_restrictions(const statement_restrictions&) = delete; + statement_restrictions& operator=(const statement_restrictions&) = delete; + statement_restrictions(private_tag, + data_dictionary::database db, schema_ptr schema, statements::statement_type type, const expr::expression& where_clause, @@ -211,10 +325,7 @@ public: bool has_token_restrictions() const; - // Checks whether the given column has an EQ restriction. - // EQ restriction is `col = ...` or `(col, col2) = ...` - // IN restriction is NOT an EQ restriction, this function will not look for IN restrictions. - // Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. + // Checks whether the given column has an EQ restriction (not IN). bool has_eq_restriction_on_column(const column_definition&) const; /** @@ -224,12 +335,6 @@ public: */ std::vector get_column_defs_for_filtering(data_dictionary::database db) const; - /** - * Gives a score that the index has - index with the highest score will be chosen - * in find_idx() - */ - int score(const secondary_index::index& index) const; - /** * Determines the index to be used with the restriction. * @param db - the data_dictionary::database context (for extracting index manager) @@ -250,18 +355,8 @@ public: size_t partition_key_restrictions_size() const; - bool parition_key_restrictions_have_supporting_index(const secondary_index::secondary_index_manager& index_manager, expr::allow_local_index allow_local) const; - size_t clustering_columns_restrictions_size() const; - bool clustering_columns_restrictions_have_supporting_index( - const secondary_index::secondary_index_manager& index_manager, - expr::allow_local_index allow_local) const; - - bool multi_column_clustering_restrictions_are_supported_by(const secondary_index::index& index) const; - - bounds_slice get_clustering_slice() const; - /** * Checks if the clustering key has some unrestricted components. * @return true if the clustering key has some unrestricted components, false otherwise. @@ -279,15 +374,6 @@ public: schema_ptr get_view_schema() const { return _view_schema; } private: - std::pair, expr::expression> do_find_idx(const secondary_index::secondary_index_manager& sim) const; - void add_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); - void add_is_not_restriction(const expr::binary_operator& restr, schema_ptr schema, bool for_view); - void add_single_column_parition_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering, bool for_view); - void add_token_partition_key_restriction(const expr::binary_operator& restr); - void add_single_column_clustering_key_restriction(const expr::binary_operator& restr, schema_ptr schema, bool allow_filtering); - void add_multi_column_clustering_key_restriction(const expr::binary_operator& restr); - void add_single_column_nonprimary_key_restriction(const expr::binary_operator& restr); - void process_partition_key_restrictions(bool for_view, bool allow_filtering, statements::statement_type type); /** @@ -315,7 +401,17 @@ private: void add_clustering_restrictions_to_idx_ck_prefix(const schema& idx_tbl_schema); unsigned int num_clustering_prefix_columns_that_need_not_be_filtered() const; - void calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(data_dictionary::database db); + void calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index( + data_dictionary::database db, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors); + get_partition_key_ranges_fn_t build_partition_key_ranges_fn() const; + get_clustering_bounds_fn_t build_get_clustering_bounds_fn() const; + get_clustering_bounds_fn_t build_get_global_index_clustering_ranges_fn() const; + get_clustering_bounds_fn_t build_get_global_index_token_clustering_ranges_fn() const; + get_clustering_bounds_fn_t build_get_local_index_clustering_ranges_fn() const; + get_singleton_value_fn_t build_value_for_index_partition_key_fn() const; public: /** * Returns the specified range of the partition key. @@ -389,7 +485,10 @@ public: private: /// Prepares internal data for evaluating index-table queries. Must be called before /// get_local_index_clustering_ranges(). - void prepare_indexed_local(const schema& idx_tbl_schema); + void prepare_indexed_local(const schema& idx_tbl_schema, + const single_column_predicate_vectors& sc_pk_pred_vectors, + const single_column_predicate_vectors& sc_ck_pred_vectors, + const single_column_predicate_vectors& sc_nonpk_pred_vectors); /// Prepares internal data for evaluating index-table queries. Must be called before /// get_global_index_clustering_ranges() or get_global_index_token_clustering_ranges(). @@ -398,15 +497,18 @@ private: public: /// Calculates clustering ranges for querying a global-index table. std::vector get_global_index_clustering_ranges( - const query_options& options, const schema& idx_tbl_schema) const; + const query_options& options) const; /// Calculates clustering ranges for querying a global-index table for queries with token restrictions present. std::vector get_global_index_token_clustering_ranges( - const query_options& options, const schema& idx_tbl_schema) const; + const query_options& options) const; /// Calculates clustering ranges for querying a local-index table. std::vector get_local_index_clustering_ranges( - const query_options& options, const schema& idx_tbl_schema) const; + const query_options& options) const; + + /// Finds the value of partition key of the index table + bytes_opt value_for_index_partition_key(const query_options&) const; sstring to_string() const; @@ -416,7 +518,7 @@ public: bool is_empty() const; }; -statement_restrictions analyze_statement_restrictions( +shared_ptr analyze_statement_restrictions( data_dictionary::database db, schema_ptr schema, statements::statement_type type, @@ -427,23 +529,14 @@ statement_restrictions analyze_statement_restrictions( bool allow_filtering, check_indexes do_check_indexes); - -// Extracts all binary operators which have the given column on their left hand side. -// Extracts only single-column restrictions. -// Does not include multi-column restrictions. -// Does not include token() restrictions. -// Does not include boolean constant restrictions. -// For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}. -std::vector extract_single_column_restrictions_for_column(const expr::expression&, const column_definition&); +shared_ptr make_trivial_statement_restrictions( + schema_ptr schema, + bool allow_filtering); // Checks whether this expression is empty - doesn't restrict anything bool is_empty_restriction(const expr::expression&); -// Finds the value of the given column in the expression -// In case of multpiple possible values calls on_internal_error -bytes_opt value_for(const column_definition&, const expr::expression&, const query_options&); - } } diff --git a/cql3/statements/modification_statement.cc b/cql3/statements/modification_statement.cc index 1c6cd4a28a..bb687a8220 100644 --- a/cql3/statements/modification_statement.cc +++ b/cql3/statements/modification_statement.cc @@ -626,7 +626,7 @@ modification_statement::prepare(data_dictionary::database db, prepare_context& c // Since this cache is only meaningful for LWT queries, just clear the ids // if it's not a conditional statement so that the AST nodes don't // participate in the caching mechanism later. - if (!prepared_stmt->has_conditions() && prepared_stmt->_restrictions.has_value()) { + if (!prepared_stmt->has_conditions() && prepared_stmt->_restrictions) { ctx.clear_pk_function_calls_cache(); } prepared_stmt->_may_use_token_aware_routing = ctx.get_partition_key_bind_indexes(*schema).size() != 0; diff --git a/cql3/statements/modification_statement.hh b/cql3/statements/modification_statement.hh index 9b88930821..5543ac87d5 100644 --- a/cql3/statements/modification_statement.hh +++ b/cql3/statements/modification_statement.hh @@ -94,7 +94,7 @@ private: std::optional _is_raw_counter_shard_write; protected: - std::optional _restrictions; + shared_ptr _restrictions; public: typedef std::optional> json_cache_opt; diff --git a/cql3/statements/prune_materialized_view_statement.hh b/cql3/statements/prune_materialized_view_statement.hh index 6d952b4cd3..69fefb6b4d 100644 --- a/cql3/statements/prune_materialized_view_statement.hh +++ b/cql3/statements/prune_materialized_view_statement.hh @@ -19,7 +19,7 @@ public: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, diff --git a/cql3/statements/raw/select_statement.hh b/cql3/statements/raw/select_statement.hh index 075503fea1..bcd559d6af 100644 --- a/cql3/statements/raw/select_statement.hh +++ b/cql3/statements/raw/select_statement.hh @@ -109,7 +109,7 @@ public: std::unique_ptr prepare(data_dictionary::database db, cql_stats& stats, const cql_config& cfg, bool for_view); private: std::vector maybe_jsonize_select_clause(std::vector select, data_dictionary::database db, schema_ptr schema); - ::shared_ptr prepare_restrictions( + ::shared_ptr prepare_restrictions( data_dictionary::database db, schema_ptr schema, prepare_context& ctx, diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index c2d25c83f5..aee118f610 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1027,7 +1027,7 @@ view_indexed_table_select_statement::prepare(data_dictionary::database db, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, @@ -1139,7 +1139,7 @@ lw_shared_ptr view_indexed_table_select_stat auto& last_base_pk = last_pos.partition; auto* last_base_ck = last_pos.position.has_key() ? &last_pos.position.key() : nullptr; - bytes_opt indexed_column_value = restrictions::value_for(*cdef, _used_index_restrictions, options); + bytes_opt indexed_column_value = _restrictions->value_for_index_partition_key(options); auto index_pk = [&]() { if (_index.metadata().local()) { @@ -1350,12 +1350,7 @@ dht::partition_range_vector view_indexed_table_select_statement::get_partition_r dht::partition_range_vector view_indexed_table_select_statement::get_partition_ranges_for_global_index_posting_list(const query_options& options) const { dht::partition_range_vector partition_ranges; - const column_definition* cdef = _schema->get_column_definition(to_bytes(_index.target_column())); - if (!cdef) { - throw exceptions::invalid_request_exception("Indexed column not found in schema"); - } - - bytes_opt value = restrictions::value_for(*cdef, _used_index_restrictions, options); + bytes_opt value = _restrictions->value_for_index_partition_key(options); if (value) { auto pk = partition_key::from_single_value(*_view_schema, *value); auto dk = dht::decorate_key(*_view_schema, pk); @@ -1374,11 +1369,11 @@ query::partition_slice view_indexed_table_select_statement::get_partition_slice_ // Only EQ restrictions on base partition key can be used in an index view query if (pk_restrictions_is_single && _restrictions->partition_key_restrictions_is_all_eq()) { partition_slice_builder.with_ranges( - _restrictions->get_global_index_clustering_ranges(options, *_view_schema)); + _restrictions->get_global_index_clustering_ranges(options)); } else if (_restrictions->has_token_restrictions()) { // Restrictions like token(p1, p2) < 0 have all partition key components restricted, but require special handling. partition_slice_builder.with_ranges( - _restrictions->get_global_index_token_clustering_ranges(options, *_view_schema)); + _restrictions->get_global_index_token_clustering_ranges(options)); } } @@ -1389,7 +1384,7 @@ query::partition_slice view_indexed_table_select_statement::get_partition_slice_ partition_slice_builder partition_slice_builder{*_view_schema}; partition_slice_builder.with_ranges( - _restrictions->get_local_index_clustering_ranges(options, *_view_schema)); + _restrictions->get_local_index_clustering_ranges(options)); return partition_slice_builder.build(); } @@ -1607,7 +1602,7 @@ public: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, @@ -1645,7 +1640,7 @@ private: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, parallelized_select_statement::ordering_comparator_type ordering_comparator, @@ -2076,7 +2071,7 @@ static select_statement::ordering_comparator_type get_similarity_ordering_compar ::shared_ptr vector_indexed_table_select_statement::prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional limit, std::optional per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr attrs) { @@ -2589,7 +2584,7 @@ std::unique_ptr select_statement::prepare(data_dictionary::d return make_unique(audit_info(), std::move(stmt), ctx, std::move(partition_key_bind_indices), std::move(warnings)); } -::shared_ptr +::shared_ptr select_statement::prepare_restrictions(data_dictionary::database db, schema_ptr schema, prepare_context& ctx, @@ -2599,8 +2594,8 @@ select_statement::prepare_restrictions(data_dictionary::database db, restrictions::check_indexes do_check_indexes) { try { - return ::make_shared(restrictions::analyze_statement_restrictions(db, schema, statement_type::SELECT, _where_clause, ctx, - selection->contains_only_static_columns(), for_view, allow_filtering, do_check_indexes)); + return restrictions::analyze_statement_restrictions(db, schema, statement_type::SELECT, _where_clause, ctx, + selection->contains_only_static_columns(), for_view, allow_filtering, do_check_indexes); } catch (const exceptions::unrecognized_entity_exception& e) { if (contains_alias(e.entity)) { throw exceptions::invalid_request_exception(format("Aliases aren't allowed in the WHERE clause (name: '{}')", e.entity)); diff --git a/cql3/statements/select_statement.hh b/cql3/statements/select_statement.hh index 0f1d333f01..87ce5accd9 100644 --- a/cql3/statements/select_statement.hh +++ b/cql3/statements/select_statement.hh @@ -200,7 +200,7 @@ public: uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, @@ -372,7 +372,7 @@ public: static ::shared_ptr prepare(data_dictionary::database db, schema_ptr schema, uint32_t bound_terms, lw_shared_ptr parameters, ::shared_ptr selection, - ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, + ::shared_ptr restrictions, ::shared_ptr> group_by_cell_indices, bool is_reversed, ordering_comparator_type ordering_comparator, prepared_ann_ordering_type prepared_ann_ordering, std::optional limit, std::optional per_partition_limit, cql_stats& stats, const secondary_index::index& index, std::unique_ptr attrs); diff --git a/cql3/statements/update_statement.hh b/cql3/statements/update_statement.hh index 356df3f19f..2dfbf49150 100644 --- a/cql3/statements/update_statement.hh +++ b/cql3/statements/update_statement.hh @@ -66,7 +66,7 @@ public: : update_statement(std::move(audit_info), statement_type::INSERT, bound_terms, s, std::move(attrs), stats) , _value(std::move(v)) , _default_unset(default_unset) { - _restrictions = restrictions::statement_restrictions(s, false); + _restrictions = cql3::restrictions::make_trivial_statement_restrictions(s, false); } private: virtual void execute_operations_for_key(mutation& m, const clustering_key_prefix& prefix, const update_parameters& params, const json_cache_opt& json_cache) const override; diff --git a/service/pager/query_pagers.cc b/service/pager/query_pagers.cc index f68854631c..42b09c1949 100644 --- a/service/pager/query_pagers.cc +++ b/service/pager/query_pagers.cc @@ -493,7 +493,7 @@ std::unique_ptr service::pager::query_pagers::pager // If partition row limit is applied to paging, we still need to fall back // to filtering the results to avoid extraneous rows on page breaks. if (!filtering_restrictions && cmd->slice.partition_row_limit() < query::max_rows_if_set) { - filtering_restrictions = ::make_shared(s, true); + filtering_restrictions = cql3::restrictions::make_trivial_statement_restrictions(s, true); } if (filtering_restrictions) { return std::make_unique(proxy, std::move(s), std::move(selection), state, diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index f778abe105..3d7efe5bf6 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -11,12 +11,14 @@ #include #include +#include #include #include "cql3/restrictions/statement_restrictions.hh" #include "cql3/expr/expr-utils.hh" #include "cql3/util.hh" +#include "index/secondary_index_manager.hh" #include "test/lib/cql_assertions.hh" #include "test/lib/cql_test_env.hh" #include "test/lib/test_utils.hh" @@ -43,7 +45,7 @@ query::clustering_row_ranges slice( /*for_view=*/false, /*allow_filtering=*/true, restrictions::check_indexes::yes) - .get_clustering_bounds(query_options({})); + ->get_clustering_bounds(query_options({})); } /// Overload that parses the WHERE clause from string. Named differently to disambiguate when where_clause is @@ -101,19 +103,6 @@ auto both_closed(std::vector lb, std::vector ub) { clustering_key_prefix cklb(std::move(lb)), ckub(std::move(ub)); return query::clustering_range({{cklb, inclusive}}, {{ckub, inclusive}}); } - -expr::tuple_constructor -column_definitions_as_tuple_constructor(const std::vector& defs) { - std::vector columns; - std::vector column_types; - columns.reserve(defs.size()); - for (auto& def : defs) { - columns.push_back(expr::column_value{def}); - column_types.push_back(def->type); - } - data_type ttype = tuple_type_impl::get_instance(std::move(column_types)); - return expr::tuple_constructor{std::move(columns), std::move(ttype)}; -} } // anonymous namespace SEASTAR_TEST_CASE(slice_empty_restriction) { @@ -366,157 +355,914 @@ SEASTAR_TEST_CASE(slice_single_column_mixed_order) { }); } -// Currently expression doesn't have operator==(). -// Implementing it is ugly, because there are shared pointers and the term base class. -// For testing purposes checking stringified expressions is enough. -static bool expression_eq(const expr::expression& e1, const expr::expression& e2) { - return to_string(e1) == to_string(e2); +// Regression test: verifies that index selection (find_idx), uses_secondary_indexing, +// and need_filtering produce consistent results across all supported index types: +// - regular_values (standard column EQ) +// - keys (set CONTAINS, map CONTAINS KEY) +// - collection_values (map/list CONTAINS) +// - keys_and_values (map subscript EQ) +// - full (frozen collection EQ; round-trips to regular_values via serialization) +// - local vs global index scoring +SEASTAR_TEST_CASE(index_selection) { + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "CREATE TABLE ks.idx_test (" + " pk1 int, pk2 int," + " ck1 int, ck2 int," + " v1 int, v2 int, v3 int," + " s1 set," + " m1 map," + " l1 list," + " fs frozen>," + " PRIMARY KEY ((pk1, pk2), ck1, ck2)" + ")"); + // 10 indexes covering all target types. + cquery_nofail(e, "CREATE INDEX idx_v1 ON ks.idx_test(v1)"); + cquery_nofail(e, "CREATE INDEX idx_v2 ON ks.idx_test(v2)"); + cquery_nofail(e, "CREATE INDEX idx_v3_local ON ks.idx_test((pk1,pk2), v3)"); + cquery_nofail(e, "CREATE INDEX idx_ck1 ON ks.idx_test(ck1)"); + cquery_nofail(e, "CREATE INDEX idx_s1 ON ks.idx_test(s1)"); // keys (rewritten from VALUES for sets) + cquery_nofail(e, "CREATE INDEX idx_m1_values ON ks.idx_test(VALUES(m1))"); // collection_values + cquery_nofail(e, "CREATE INDEX idx_m1_keys ON ks.idx_test(KEYS(m1))"); // keys + cquery_nofail(e, "CREATE INDEX idx_m1_entries ON ks.idx_test(ENTRIES(m1))"); // keys_and_values + cquery_nofail(e, "CREATE INDEX idx_l1 ON ks.idx_test(l1)"); // collection_values + cquery_nofail(e, "CREATE INDEX idx_fs ON ks.idx_test(FULL(fs))"); // full -> round-trips to regular_values + + auto schema = e.local_db().find_schema("ks", "idx_test"); + auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager(); + + struct expected { + std::string_view where_clause; + std::optional index_name; // nullopt = no index selected + bool uses_secondary_indexing; + bool need_filtering; + }; + + // Build statement_restrictions from a WHERE clause string and return the + // index-selection result. + auto check = [&](std::string_view where_clause) -> expected { + prepare_context ctx; + auto factors = where_clause.empty() + ? std::vector{} + : boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{})); + auto sr = restrictions::analyze_statement_restrictions( + e.data_dictionary(), + schema, + statements::statement_type::SELECT, + expr::conjunction{std::move(factors)}, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes); + auto [idx, restrictions_expr] = sr->find_idx(sim); + return {where_clause, + idx ? std::optional(idx->metadata().name()) : std::nullopt, + sr->uses_secondary_indexing(), + sr->need_filtering()}; + }; + + auto none = std::optional{}; + auto idx = [](const char* name) { return std::optional(name); }; + + auto verify = [](const expected& got, const expected& want) { + BOOST_CHECK_MESSAGE(got.index_name == want.index_name, + fmt::format("WHERE {}: index_name: got {} want {}", + want.where_clause, + got.index_name.value_or("(none)"), + want.index_name.value_or("(none)"))); + BOOST_CHECK_MESSAGE(got.uses_secondary_indexing == want.uses_secondary_indexing, + fmt::format("WHERE {}: uses_secondary_indexing: got {} want {}", + want.where_clause, + got.uses_secondary_indexing, + want.uses_secondary_indexing)); + BOOST_CHECK_MESSAGE(got.need_filtering == want.need_filtering, + fmt::format("WHERE {}: need_filtering: got {} want {}", + want.where_clause, + got.need_filtering, + want.need_filtering)); + }; + + // --- A. Regular column EQ (target_type: regular_values) --- + verify(check("v1 = 1"), {"", idx("idx_v1"), true, false}); + verify(check("v2 = 1"), {"", idx("idx_v2"), true, false}); + // WHERE-clause order tiebreak: first column in WHERE wins for equal scores. + verify(check("v1 = 1 AND v2 = 1"), {"", idx("idx_v1"), true, true}); + verify(check("v2 = 1 AND v1 = 1"), {"", idx("idx_v2"), true, true}); + // Slices (GT/LT) are not supported by standard secondary indexes. + verify(check("v1 > 1"), {"", none, false, true}); + + // --- B. Local vs global index scoring --- + // Local index with full PK scores 2, global scores 1. + verify(check("pk1 = 1 AND pk2 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, false}); + // Local (score 2) beats global (score 1) even when global column appears first. + verify(check("pk1 = 1 AND pk2 = 1 AND v1 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, true}); + // Local index without full PK gets score 0 and is never picked. + verify(check("v3 = 1"), {"", none, false, true}); + + // --- C. CK column index (search group ordering) --- + verify(check("ck1 = 1"), {"", idx("idx_ck1"), true, false}); + // CK group is iterated before non-PK group, regardless of WHERE order. + verify(check("ck1 = 1 AND v1 = 1"), {"", idx("idx_ck1"), true, true}); + verify(check("v1 = 1 AND ck1 = 1"), {"", idx("idx_ck1"), true, true}); + + // --- D. Set CONTAINS (target_type: keys, rewritten from VALUES for sets) --- + verify(check("s1 CONTAINS 1"), {"", idx("idx_s1"), true, false}); + + // --- E. Map indexes --- + // CONTAINS on map values (target_type: collection_values). + verify(check("m1 CONTAINS 'one'"), {"", idx("idx_m1_values"), true, false}); + // CONTAINS KEY on map keys (target_type: keys). + verify(check("m1 CONTAINS KEY 1"), {"", idx("idx_m1_keys"), true, false}); + // Subscript EQ on map entries (target_type: keys_and_values). + verify(check("m1[1] = 'one'"), {"", idx("idx_m1_entries"), true, false}); + + // --- F. List CONTAINS (target_type: collection_values) --- + verify(check("l1 CONTAINS 1"), {"", idx("idx_l1"), true, false}); + + // --- G. Frozen collection (FULL index, round-trips to regular_values) --- + verify(check("fs = {1}"), {"", idx("idx_fs"), true, false}); + // Same with full PK (local index on v3 is available but idx_fs is global, score 1). + verify(check("pk1 = 1 AND pk2 = 1 AND fs = {1}"), {"", idx("idx_fs"), true, false}); + + // --- H. Double CONTAINS on same column: CollectionYes && CollectionYes = No --- + verify(check("s1 CONTAINS 1 AND s1 CONTAINS 2"), {"", none, false, true}); + + // --- I. Collection + regular column tiebreak (WHERE-clause order) --- + verify(check("s1 CONTAINS 1 AND v1 = 1"), {"", idx("idx_s1"), true, true}); + verify(check("v1 = 1 AND s1 CONTAINS 1"), {"", idx("idx_v1"), true, true}); + + // --- J. CK group beats collection in non-PK group --- + verify(check("m1 CONTAINS 'one' AND ck1 = 1"), {"", idx("idx_ck1"), true, true}); + + // --- K. Edge cases --- + // Full PK only: no secondary index needed. + verify(check("pk1 = 1 AND pk2 = 1"), {"", none, false, false}); + // No restrictions at all. + verify(check(""), {"", none, false, false}); + // Token restriction with a regular column: index is used. + verify(check("token(pk1, pk2) > 0 AND v1 = 1"), {"", idx("idx_v1"), true, false}); + }); } -static void assert_expr_vec_eq( - const std::vector& v1, - const std::vector& v2, - const std::source_location& loc = std::source_location::current()) { +// Exhaustive combinatorial test: iterates over all 2^N subsets of N restriction +// fragments and, for each subset, verifies a broad set of statement_restrictions +// public APIs. This catches any refactoring that accidentally changes observable +// behaviour for *any* combination of restriction types. +// +// Restriction fragments (15 independent bits, 2^15 = 32768 combinations): +// bit 0: pk1 = 1 +// bit 1: pk2 = 2 +// bit 2: ck1 = 3 (single-column EQ) +// bit 3: ck2 > 4 (single-column slice) +// bit 4: ck1 IN (3, 6) (single-column IN; includes CK1_EQ value 3) +// bit 5: (ck1, ck2) = (7, 8) (multi-column EQ) +// bit 6: (ck1, ck2) > (9, 10) (multi-column slice) +// bit 7: (ck1, ck2) IN ((11, 12), (13, 14)) (multi-column IN) +// bit 8: v1 = 15 (global index comb_v1, target: regular_values) +// bit 9: v3 = 16 (local index comb_v3_local, target: regular_values) +// bit 10: s1 CONTAINS 17 (global index comb_s1, target: keys — set) +// bit 11: m1 CONTAINS 'alpha' (global index comb_m1_values, target: collection_values) +// bit 12: m2 CONTAINS KEY 18 (global index comb_m2_keys, target: keys — map) +// bit 13: m3[19] = 'beta' (global index comb_m3_entries, target: keys_and_values) +// bit 14: fs = {20, 21} (global index comb_fs, target: full — frozen collection) +SEASTAR_TEST_CASE(combinatorial_restrictions) { + // ASAN's fake-stack shadow buffer for the large lambda below is ~248 KiB; + // bump the thread stack so it doesn't overflow under sanitized builds. + seastar::thread_attributes tattr; +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer) + tattr.stack_size = 2 * 1024 * 1024; +#endif + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "CREATE TABLE ks.comb (" + " pk1 int, pk2 int," + " ck1 int, ck2 int," + " v1 int, v2 int, v3 int," + " s1 set," + " m1 map," + " m2 map," + " m3 map," + " fs frozen>," + " PRIMARY KEY ((pk1, pk2), ck1, ck2)" + ")"); + cquery_nofail(e, "CREATE INDEX comb_pk1 ON ks.comb(pk1)"); + cquery_nofail(e, "CREATE INDEX comb_v1 ON ks.comb(v1)"); + cquery_nofail(e, "CREATE INDEX comb_v3_local ON ks.comb((pk1,pk2), v3)"); + cquery_nofail(e, "CREATE INDEX comb_s1 ON ks.comb(s1)"); + cquery_nofail(e, "CREATE INDEX comb_ck1 ON ks.comb(ck1)"); + cquery_nofail(e, "CREATE INDEX comb_m1_values ON ks.comb(VALUES(m1))"); + cquery_nofail(e, "CREATE INDEX comb_m2_keys ON ks.comb(KEYS(m2))"); + cquery_nofail(e, "CREATE INDEX comb_m3_entries ON ks.comb(ENTRIES(m3))"); + cquery_nofail(e, "CREATE INDEX comb_fs ON ks.comb(FULL(fs))"); - if (std::equal(v1.begin(), v1.end(), v2.begin(), v2.end(), expression_eq)) { - return; + auto schema = e.local_db().find_schema("ks", "comb"); + auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager(); + const auto& pk1_def = *schema->get_column_definition("pk1"); + const auto& pk2_def = *schema->get_column_definition("pk2"); + const auto& ck1_def = *schema->get_column_definition("ck1"); + const auto& ck2_def = *schema->get_column_definition("ck2"); + const auto& v1_def = *schema->get_column_definition("v1"); + const auto& v3_def = *schema->get_column_definition("v3"); + const auto& s1_def = *schema->get_column_definition("s1"); + const auto& m1_def = *schema->get_column_definition("m1"); + const auto& m2_def = *schema->get_column_definition("m2"); + const auto& m3_def = *schema->get_column_definition("m3"); + const auto& fs_def = *schema->get_column_definition("fs"); + + // Every restriction fragment is an independent bit in the mask. + // This includes CK restriction variants, giving us exhaustive + // coverage of all 2^15 = 32768 combinations. + enum frag : unsigned { + PK1 = 1u << 0, // pk1 = 1 (global index, regular_values) + PK2 = 1u << 1, // pk2 = 2 (no index) + CK1_EQ = 1u << 2, // ck1 = 3 + CK2_SLICE = 1u << 3, // ck2 > 4 + CK1_IN = 1u << 4, // ck1 IN (3, 6) + MULTI_EQ = 1u << 5, // (ck1, ck2) = (7, 8) + MULTI_SLICE= 1u << 6, // (ck1, ck2) > (9, 10) + MULTI_IN = 1u << 7, // (ck1, ck2) IN ((11, 12), (13, 14)) + V1 = 1u << 8, // v1 = 15 (global index, regular_values) + V3 = 1u << 9, // v3 = 16 (local index, regular_values) + S1 = 1u << 10, // s1 CONTAINS 17 (global index, keys — set) + M_VAL = 1u << 11, // m1 CONTAINS 'alpha' (global index, collection_values) + M_KEY = 1u << 12, // m2 CONTAINS KEY 18 (global index, keys — map) + M_ENT = 1u << 13, // m3[19] = 'beta' (global index, keys_and_values) + FS = 1u << 14, // fs = {20, 21} (global index, full) + }; + constexpr unsigned N_FRAG = 15; + constexpr unsigned FRAG_TOTAL = 1u << N_FRAG; + + constexpr unsigned SINGLE_CK_MASK = CK1_EQ | CK2_SLICE | CK1_IN; + constexpr unsigned MULTI_CK_MASK = MULTI_EQ | MULTI_SLICE | MULTI_IN; + + struct fragment_info { + unsigned bit; + const char* clause; + }; + // Each fragment uses unique values so that conjunction intersections + // are predictable. CK1_IN includes the CK1_EQ value (3) to ensure + // the intersection is non-empty when both are present. + const fragment_info fragments[] = { + {PK1, "pk1 = 1"}, + {PK2, "pk2 = 2"}, + {CK1_EQ, "ck1 = 3"}, + {CK2_SLICE, "ck2 > 4"}, + {CK1_IN, "ck1 IN (3, 6)"}, + {MULTI_EQ, "(ck1, ck2) = (7, 8)"}, + {MULTI_SLICE, "(ck1, ck2) > (9, 10)"}, + {MULTI_IN, "(ck1, ck2) IN ((11, 12), (13, 14))"}, + {V1, "v1 = 15"}, + {V3, "v3 = 16"}, + {S1, "s1 CONTAINS 17"}, + {M_VAL, "m1 CONTAINS 'alpha'"}, + {M_KEY, "m2 CONTAINS KEY 18"}, + {M_ENT, "m3[19] = 'beta'"}, + {FS, "fs = {20, 21}"}, + }; + + unsigned total_tested = 0; + unsigned total_illegal = 0; + + for (unsigned mask = 0; mask < FRAG_TOTAL; ++mask) { + // --- Illegality detection --- + // Rule 1: Mixing single-column and multi-column CK restrictions + // is always illegal. + // Rule 2: At most one multi-column CK restriction type allowed. + bool has_single_ck = (mask & SINGLE_CK_MASK) != 0; + bool has_multi_ck = (mask & MULTI_CK_MASK) != 0; + unsigned multi_ck_count = std::popcount(mask & MULTI_CK_MASK); + bool is_illegal = (has_single_ck && has_multi_ck) + || (multi_ck_count > 1); + + // Build WHERE clause from all set bits. + std::string where_clause; + for (auto& f : fragments) { + if (mask & f.bit) { + if (!where_clause.empty()) { + where_clause += " AND "; + } + where_clause += f.clause; + } + } + + auto ctx_msg = [&](std::string_view api) { + return fmt::format("mask=0x{:04x} WHERE [{}]: {}", + mask, where_clause, api); + }; + + prepare_context ctx; + auto where_expr = where_clause.empty() + ? expr::expression(expr::conjunction{}) + : cql3::util::where_clause_to_relations(where_clause, cql3::dialect{}); + + shared_ptr sr; + try { + sr = restrictions::analyze_statement_restrictions( + e.data_dictionary(), + schema, + statements::statement_type::SELECT, + where_expr, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes); + } catch (const exceptions::invalid_request_exception&) { + } + + if (is_illegal) { + BOOST_CHECK_MESSAGE(!sr, + ctx_msg("expected exception for illegal CK combination")); + ++total_illegal; + ++total_tested; + continue; + } + BOOST_REQUIRE_MESSAGE(sr, + ctx_msg("unexpected exception for legal CK combination")); + + // --- Derived CK properties --- + bool has_multi_column = has_multi_ck; + + // Which CK columns are restricted? + bool has_ck1 = (mask & (CK1_EQ | CK1_IN | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0; + bool has_ck2 = (mask & (CK2_SLICE | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0; + bool has_any_ck = has_ck1 || has_ck2; + unsigned ck_count = (has_ck1 ? 1u : 0u) + (has_ck2 ? 1u : 0u); + + // clustering_key_restrictions_has_IN: any IN binop present + bool has_ck_in = (mask & (CK1_IN | MULTI_IN)) != 0; + + // clustering_key_restrictions_has_only_eq: no non-EQ binop in CK + // restrictions. Vacuously true when no CK restrictions. + bool has_only_eq = !(mask & (CK2_SLICE | CK1_IN | MULTI_SLICE | MULTI_IN)); + + // clustering_key_restrictions_need_filtering (internal predicate, + // before ORing with has_partition_key_unrestricted_components): + // For multi-column restrictions, always false. + // For single-column: true when there's a CK gap (ck2 restricted + // without ck1 being restricted by EQ or IN). + bool ck_need_filtering_internal = !has_multi_column + && (mask & CK2_SLICE) + && !(mask & (CK1_EQ | CK1_IN)); + + // has_eq_restriction_on_column: recognizes column_value and + // tuple_constructor LHS with oper_t::EQ. + // CK1_EQ → true for ck1. MULTI_EQ → true for both ck1 and ck2. + // IN, slice → false. + bool ck1_has_eq = (mask & CK1_EQ) || (mask & MULTI_EQ); + bool ck2_has_eq = (mask & MULTI_EQ) != 0; + + // comb_pk1 index selection: + // Requires: pk1 restricted with EQ and PK incomplete + // (_is_key_range). PK restrictions are iterated first in + // _index_restrictions, so comb_pk1 (global, score 1) beats + // all same-score indexes that come later. + bool selects_comb_pk1 = (mask & PK1) && !(mask & PK2); + + // comb_ck1 index selection: + // Requires: !full_pk, single-column EQ on ck1 (not IN — index + // only supports EQ), no CK1_IN (makes conjunction unsupported), + // no multi-column. + // In legal combos, CK1_EQ set → no MULTI_* possible. + // When pk1 is also restricted, comb_pk1 takes priority (PK + // group comes before CK group in _index_restrictions). + bool selects_comb_ck1 = (mask & CK1_EQ) && !(mask & CK1_IN); + + // Index clustering range multiplier: + // CK1_IN alone produces 2 IN values → 2 ranges. + // CK1_EQ + CK1_IN: intersection narrows to 1. + // Multi-column: not added to prefix, multiplier 1. + unsigned idx_range_multiplier = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1; + + // --- Partition key APIs --- + bool has_pk1 = (mask & PK1) != 0; + bool has_pk2 = (mask & PK2) != 0; + bool full_pk = has_pk1 && has_pk2; + + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_is_empty() == (!has_pk1 && !has_pk2), + ctx_msg("partition_key_restrictions_is_empty")); + + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_is_all_eq() == true, + ctx_msg("partition_key_restrictions_is_all_eq")); + + BOOST_CHECK_MESSAGE( + sr->has_partition_key_unrestricted_components() == (!has_pk1 || !has_pk2), + ctx_msg("has_partition_key_unrestricted_components")); + + unsigned pk_restricted = (has_pk1 ? 1u : 0u) + (has_pk2 ? 1u : 0u); + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_size() == pk_restricted, + ctx_msg(fmt::format("partition_key_restrictions_size: got {} want {}", + sr->partition_key_restrictions_size(), pk_restricted))); + + BOOST_CHECK_MESSAGE( + sr->has_token_restrictions() == false, + ctx_msg("has_token_restrictions")); + + BOOST_CHECK_MESSAGE( + sr->key_is_in_relation() == false, + ctx_msg("key_is_in_relation")); + + // is_key_range: true unless full PK is specified with EQ + BOOST_CHECK_MESSAGE( + sr->is_key_range() == !full_pk, + ctx_msg("is_key_range")); + + // --- Clustering key APIs --- + BOOST_CHECK_MESSAGE( + sr->has_clustering_columns_restriction() == has_any_ck, + ctx_msg("has_clustering_columns_restriction")); + + BOOST_CHECK_MESSAGE( + sr->clustering_columns_restrictions_size() == ck_count, + ctx_msg(fmt::format("clustering_columns_restrictions_size: got {} want {}", + sr->clustering_columns_restrictions_size(), ck_count))); + + BOOST_CHECK_MESSAGE( + sr->has_unrestricted_clustering_columns() == (ck_count < 2), + ctx_msg("has_unrestricted_clustering_columns")); + + BOOST_CHECK_MESSAGE( + sr->clustering_key_restrictions_has_IN() == has_ck_in, + ctx_msg("clustering_key_restrictions_has_IN")); + + BOOST_CHECK_MESSAGE( + sr->clustering_key_restrictions_has_only_eq() == has_only_eq, + ctx_msg("clustering_key_restrictions_has_only_eq")); + + // ck_restrictions_need_filtering: + // = has_any_ck && (!full_pk || ck_need_filtering_internal) + // The internal predicate captures column-gap / non-prefix issues; + // has_partition_key_unrestricted_components() is ORed in by the + // outer ck_restrictions_need_filtering(). + bool ck_needs_filter = has_any_ck && (!full_pk || ck_need_filtering_internal); + BOOST_CHECK_MESSAGE( + sr->ck_restrictions_need_filtering() == ck_needs_filter, + ctx_msg("ck_restrictions_need_filtering")); + + // --- Non-primary-key APIs --- + bool has_v1 = (mask & V1) != 0; + bool has_v3 = (mask & V3) != 0; + bool has_s1 = (mask & S1) != 0; + bool has_m_val = (mask & M_VAL) != 0; + bool has_m_key = (mask & M_KEY) != 0; + bool has_m_ent = (mask & M_ENT) != 0; + bool has_fs = (mask & FS) != 0; + bool has_nonpk = has_v1 || has_v3 || has_s1 + || has_m_val || has_m_key || has_m_ent || has_fs; + + BOOST_CHECK_MESSAGE( + sr->has_non_primary_key_restriction() == has_nonpk, + ctx_msg("has_non_primary_key_restriction")); + + // --- Per-column restriction checks --- + BOOST_CHECK_MESSAGE( + sr->is_restricted(&pk1_def) == has_pk1, + ctx_msg("is_restricted(pk1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&pk2_def) == has_pk2, + ctx_msg("is_restricted(pk2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&ck1_def) == has_ck1, + ctx_msg("is_restricted(ck1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&ck2_def) == has_ck2, + ctx_msg("is_restricted(ck2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&v1_def) == has_v1, + ctx_msg("is_restricted(v1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&v3_def) == has_v3, + ctx_msg("is_restricted(v3)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&s1_def) == has_s1, + ctx_msg("is_restricted(s1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m1_def) == has_m_val, + ctx_msg("is_restricted(m1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m2_def) == has_m_key, + ctx_msg("is_restricted(m2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m3_def) == has_m_ent, + ctx_msg("is_restricted(m3)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&fs_def) == has_fs, + ctx_msg("is_restricted(fs)")); + + // has_eq_restriction_on_column: + // pk1/pk2 always EQ when present. + // ck1/ck2 depend on the CK restriction type. + // v1/v3/fs are EQ, s1 is CONTAINS, m1 CONTAINS, m2 CONTAINS KEY, + // m3[1]='a' is subscript EQ (not recognized), fs is regular EQ. + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(pk1_def) == has_pk1, + ctx_msg("has_eq_restriction_on_column(pk1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(pk2_def) == has_pk2, + ctx_msg("has_eq_restriction_on_column(pk2)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(ck1_def) == ck1_has_eq, + ctx_msg("has_eq_restriction_on_column(ck1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(ck2_def) == ck2_has_eq, + ctx_msg("has_eq_restriction_on_column(ck2)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(v1_def) == has_v1, + ctx_msg("has_eq_restriction_on_column(v1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(v3_def) == has_v3, + ctx_msg("has_eq_restriction_on_column(v3)")); + // s1 CONTAINS is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(s1_def) == false, + ctx_msg("has_eq_restriction_on_column(s1)")); + // m1 CONTAINS is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m1_def) == false, + ctx_msg("has_eq_restriction_on_column(m1)")); + // m2 CONTAINS KEY is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m2_def) == false, + ctx_msg("has_eq_restriction_on_column(m2)")); + // m3[1] = 'a' is a subscript EQ — not recognized by + // has_eq_restriction_on_column (needs column_value/tuple_constructor LHS). + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m3_def) == false, + ctx_msg("has_eq_restriction_on_column(m3)")); + // fs = {1,2} is a regular EQ on frozen collection: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(fs_def) == has_fs, + ctx_msg("has_eq_restriction_on_column(fs)")); + + // --- Index selection --- + auto [idx_opt, idx_expr] = sr->find_idx(sim); + + // Determine expected index. The scoring algorithm: + // - do_find_idx iterates _index_restrictions (PK group, then + // CK group, then non-PK group in WHERE-clause order). + // - Multi-column restrictions are skipped (line 1358). + // - Score: local index with full PK = 2, global = 1, local + // without full PK = 0. + // - Strict > for tiebreaking → first with highest score wins. + // + // The PK index (comb_pk1) can be selected when: + // (a) pk1 is restricted with EQ, and + // (b) PK is incomplete (_is_key_range), which triggers + // _uses_secondary_indexing via _has_queriable_pk_index. + // PK restrictions are iterated first in _index_restrictions, so + // comb_pk1 (score 1) beats all later global indexes (tie → first + // wins). + // + // The CK index (comb_ck1) can only be selected when: + // (a) ck1 is restricted with single-column EQ (not IN — the + // index only supports EQ, not IN), + // (b) PK is incomplete (_is_key_range), and + // (c) no multi-column CK restriction (_has_multi_column blocks + // the CK-index path at line 1151). + // (d) CK1_EQ + CK1_IN conjunction makes the index unsupported + // (IN child fails is_supported_by). + // (e) pk1 is NOT restricted (otherwise comb_pk1 wins first). + // + // When comb_ck1 qualifies it is iterated after PK but before + // non-PK in _index_restrictions, and its score 1 ties with any + // non-PK global, so it wins. + + std::optional expected_idx; + + if (selects_comb_pk1) { + expected_idx = "comb_pk1"; + } else if (selects_comb_ck1 && !full_pk) { + expected_idx = "comb_ck1"; + } else if (full_pk && has_v3) { + // Local index scores 2, beats any global (score 1). + expected_idx = "comb_v3_local"; + } else if (has_v1) { + expected_idx = "comb_v1"; + } else if (has_s1) { + // v3 without full_pk scores 0 and is skipped. + expected_idx = "comb_s1"; + } else if (has_m_val) { + expected_idx = "comb_m1_values"; + } else if (has_m_key) { + expected_idx = "comb_m2_keys"; + } else if (has_m_ent) { + expected_idx = "comb_m3_entries"; + } else if (has_fs) { + expected_idx = "comb_fs"; + } + // else: no indexable column (v3 alone without full_pk scores 0) + + bool uses_idx = expected_idx.has_value(); + + BOOST_CHECK_MESSAGE( + (idx_opt ? std::optional(idx_opt->metadata().name()) : std::nullopt) == expected_idx, + ctx_msg(fmt::format("find_idx: got {} want {}", + idx_opt ? idx_opt->metadata().name() : "(none)", + expected_idx.value_or("(none)")))); + BOOST_CHECK_MESSAGE( + sr->uses_secondary_indexing() == uses_idx, + ctx_msg(fmt::format("uses_secondary_indexing: got {} want {}", + sr->uses_secondary_indexing(), uses_idx))); + + // --- need_filtering --- + // Filtering is needed when: + // - PK is not fully specified (partial PK needs filtering unless + // using index) + // - CK has a gap (ck2 without ck1) needs filtering + // - Non-PK restrictions that aren't consumed by the index need + // filtering + // - When using an index, remaining restrictions beyond the + // indexed one need filtering + // - Multi-column CK restrictions that can't be converted to + // bounds need filtering + // The exact logic is complex; we check invariants. + bool need_filt = sr->need_filtering(); + + // 1. If no restrictions at all, no filtering needed. + if (mask == 0) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: empty should be false")); + } + // 2. If only the full PK is specified (no CK, no non-PK), no filtering. + if (mask == (PK1 | PK2)) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: full PK only should be false")); + } + // 3. Single indexed column (no CK): no filtering needed. + if (mask == PK1 || mask == V1 || mask == S1) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: single indexed column should be false")); + } + // 4. If using index + has extra non-PK restrictions, filtering is needed. + if (uses_idx) { + int non_pk_indexed_count = (has_v1 ? 1 : 0) + (has_v3 ? 1 : 0) + (has_s1 ? 1 : 0) + + (has_m_val ? 1 : 0) + (has_m_key ? 1 : 0) + (has_m_ent ? 1 : 0) + + (has_fs ? 1 : 0); + if (non_pk_indexed_count > 1) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: multiple non-PK restrictions with index should need filtering")); + } + } + // 5. Partial PK with no index needs filtering. + if ((has_pk1 != has_pk2) && !uses_idx) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: partial PK without index should need filtering")); + } + // 6. CK gap (ck2 without ck1, single-column only) needs filtering. + if (ck_need_filtering_internal) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: CK gap should need filtering")); + } + // 7. Non-PK restriction without index needs filtering. + if (has_nonpk && !uses_idx) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: non-PK restriction without index should need filtering")); + } + + // --- pk_restrictions_need_filtering --- + bool pk_needs_filter = sr->pk_restrictions_need_filtering(); + if (!uses_idx && has_pk1 != has_pk2) { + BOOST_CHECK_MESSAGE(pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: partial PK should need filtering")); + } + if (!has_pk1 && !has_pk2) { + BOOST_CHECK_MESSAGE(!pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: no PK should be false")); + } + if (full_pk) { + BOOST_CHECK_MESSAGE(!pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: full PK should be false")); + } + + // --- is_empty --- + BOOST_CHECK_MESSAGE( + sr->is_empty() == (mask == 0), + ctx_msg("is_empty")); + + // --- get_not_null_columns: none of our fragments use IS NOT NULL --- + BOOST_CHECK_MESSAGE( + sr->get_not_null_columns().empty(), + ctx_msg("get_not_null_columns should be empty")); + + // --- get_partition_key_ranges --- + // Always returns exactly 1 range. Full PK → singular range + // (specific partition). Otherwise → open-ended range. + { + auto pk_ranges = sr->get_partition_key_ranges(query_options({})); + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1, + ctx_msg(fmt::format("get_partition_key_ranges: {} ranges, want 1", + pk_ranges.size()))); + if (full_pk) { + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && pk_ranges[0].is_singular(), + ctx_msg("get_partition_key_ranges: full PK should yield singular range")); + } else { + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && !pk_ranges[0].is_singular(), + ctx_msg("get_partition_key_ranges: incomplete PK should yield non-singular range")); + } + } + + // --- get_clustering_bounds --- + // Expected range count: + // Empty CK restrictions → 1 open-ended range. + // Multi-column: MULTI_IN → 2 singular, else → 1. + // Single-column: CK1_IN without CK1_EQ → 2 (IN values expand), + // else → 1 (CK1_EQ narrows intersection to 1 value). + { + auto ck_bounds = sr->get_clustering_bounds(query_options({})); + unsigned expected_ck_bounds; + if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) { + // No CK restrictions → 1 open-ended range. + expected_ck_bounds = 1; + } else if (mask & MULTI_CK_MASK) { + expected_ck_bounds = (mask & MULTI_IN) ? 2 : 1; + } else { + expected_ck_bounds = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1; + } + BOOST_CHECK_MESSAGE(ck_bounds.size() == expected_ck_bounds, + ctx_msg(fmt::format("get_clustering_bounds: {} ranges, want {}", + ck_bounds.size(), expected_ck_bounds))); + // With no CK restrictions the range should be open-ended. + if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) { + BOOST_CHECK_MESSAGE( + ck_bounds.size() == 1 + && !ck_bounds[0].start() + && !ck_bounds[0].end(), + ctx_msg("get_clustering_bounds: no CK should be open-ended")); + } + } + + // --- Index table range APIs --- + if (uses_idx) { + bool is_local_idx = (expected_idx == "comb_v3_local"); + + if (is_local_idx) { + // --- get_local_index_clustering_ranges --- + // Local index CK prefix = (indexed_col, base_ck1, ...). + // The indexed column is always EQ (1 value); CK IN values + // multiply via the base CK appended to the prefix. + auto local_ranges = sr->get_local_index_clustering_ranges(query_options({})); + unsigned expected_local = 1 * idx_range_multiplier; + BOOST_CHECK_MESSAGE(!local_ranges.empty(), + ctx_msg("get_local_index_clustering_ranges should not be empty")); + BOOST_CHECK_MESSAGE(local_ranges.size() == expected_local, + ctx_msg(fmt::format("get_local_index_clustering_ranges: {} ranges, want {}", + local_ranges.size(), expected_local))); + } else { + // --- get_global_index_clustering_ranges --- + // Global index CK prefix = (token, pk1, pk2, ...base CK...). + // With full PK: CK IN values expand the prefix. + // Without full PK: prefix is empty → 1 open-ended range. + auto global_ranges = sr->get_global_index_clustering_ranges(query_options({})); + BOOST_CHECK_MESSAGE(!global_ranges.empty(), + ctx_msg("get_global_index_clustering_ranges should not be empty")); + if (full_pk) { + unsigned expected_global = 1 * idx_range_multiplier; + BOOST_CHECK_MESSAGE(global_ranges.size() == expected_global, + ctx_msg(fmt::format( + "get_global_index_clustering_ranges (full PK): {} ranges, want {}", + global_ranges.size(), expected_global))); + } else { + // Without full PK the prefix has no token/PK entries, + // so we get 1 open-ended range. + BOOST_CHECK_MESSAGE(global_ranges.size() == 1, + ctx_msg(fmt::format( + "get_global_index_clustering_ranges (!full PK): {} ranges, want 1", + global_ranges.size()))); + } + + // --- get_global_index_token_clustering_ranges --- + // For modern (non-v1) indexes the token column is + // long_type, so this dispatches to the same + // get_single_column_clustering_bounds as + // get_global_index_clustering_ranges. + auto token_ranges = sr->get_global_index_token_clustering_ranges(query_options({})); + BOOST_CHECK_MESSAGE(token_ranges.size() == global_ranges.size(), + ctx_msg(fmt::format( + "get_global_index_token_clustering_ranges: {} ranges, want {} (same as global)", + token_ranges.size(), global_ranges.size()))); + } + } + + ++total_tested; + } + + BOOST_TEST_MESSAGE(fmt::format("Tested {} restriction combinations ({} legal, {} illegal, 2^{} = {} total)", + total_tested, total_tested - total_illegal, total_illegal, N_FRAG, FRAG_TOTAL)); + }, {}, tattr); +} + +/// Helper to get statement_restrictions from a parsed WHERE clause string. +static shared_ptr make_restrictions( + std::string_view where_clause, cql_test_env& env, + const sstring& table_name = "t", const sstring& keyspace_name = "ks") { + prepare_context ctx; + auto factors = where_clause.empty() + ? std::vector{} + : boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{})); + return restrictions::analyze_statement_restrictions( + env.data_dictionary(), + env.local_db().find_schema(keyspace_name, table_name), + statements::statement_type::SELECT, + expr::conjunction{std::move(factors)}, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes); +} + +/// Extract (column_name, operator) pairs from each boolean factor of a conjunction expression. +/// Each factor must be a binary_operator whose LHS is a column_value or subscript. +static std::vector> factor_ops(const expr::expression& e) { + std::vector> result; + for (auto& factor : expr::boolean_factors(e)) { + BOOST_REQUIRE_MESSAGE(expr::is(factor), + fmt::format("expected binary_operator, got: {}", factor)); + auto& binop = expr::as(factor); + const auto& cv = expr::get_subscripted_column(binop.lhs); + result.emplace_back(cv.col->name_as_text(), binop.op); } - - std::string error_msg = fmt::format("Location: {}:{}, Expression vectors not equal! [{}] != [{}]", - loc.file_name(), loc.line(), fmt::join(v1, ", "), fmt::join(v2, ", ")); - - BOOST_FAIL(error_msg); + return result; } -// Unit tests for extract_column_restrictions function -BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) { - using namespace expr; +// Test that restrictions are correctly routed to per-column maps and that each +// per-column entry contains exactly the right boolean factors (verified by column +// name and operator, not just count). This is the higher-level replacement for +// the old extract_single_column_restrictions_for_column test. +SEASTAR_TEST_CASE(per_column_restriction_routing) { + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "create table ks.trc(pk1 int, pk2 int, ck1 int, ck2 int, v1 int, v2 int, v3 int, " + "primary key((pk1, pk2), ck1, ck2))"); - auto make_column = [](const char* name, column_kind kind, int id) -> column_definition { - column_definition definition(name, int32_type, kind, id); + auto schema = e.local_db().find_schema("ks", "trc"); - // column_definition has to have column_specifiction because to_string uses it for column name - ::shared_ptr identifier = ::make_shared(name, true); - column_specification specification("ks", "cf", std::move(identifier), int32_type); - definition.column_specification = make_lw_shared( - std::move(specification)); + using op = expr::oper_t; + using col_op = std::pair; - return definition; - }; + // Multiple single-column restrictions on regular columns are correctly + // accumulated per-column, while unrestricted columns don't appear. + { + auto sr = make_restrictions( + "pk1=1 AND pk2=2 AND ck1=3 AND ck2=4 AND v1=5 AND v1<10 AND v1>0 AND v2=6", + e, "trc"); - column_definition col_pk1 = make_column("pk1", column_kind::partition_key, 0); - column_definition col_pk2 = make_column("pk2", column_kind::partition_key, 1); - column_definition col_ck1 = make_column("ck1", column_kind::clustering_key, 0); - column_definition col_ck2 = make_column("ck2", column_kind::clustering_key, 1); - column_definition col_r1 = make_column("r2", column_kind::regular_column, 0); - column_definition col_r2 = make_column("r2", column_kind::regular_column, 1); - column_definition col_r3 = make_column("r3", column_kind::regular_column, 2); + // --- Non-PK per-column map --- + auto& npk = sr->get_non_pk_restriction(); + BOOST_CHECK_EQUAL(npk.size(), 2u); - // Empty input test - assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {}); + auto* v1_def = schema->get_column_definition("v1"); + auto* v2_def = schema->get_column_definition("v2"); + auto* v3_def = schema->get_column_definition("v3"); - // BIG_WHERE test - // big_where contains: - // WHERE pk1 = 0 AND pk2 = 0 AND ck1 = 0 AND ck2 = 0 AND r1 = 0 AND r2 = 0 - // AND (pk1, pk2) < (0, 0) AND (pk1, ck2, r1) = (0, 0, 0) AND (r1, r2) > 0 - // AND ((c1, c2) < (0, 0) AND r1 < 0) - // AND pk2 > 0 AND r2 > 0 - // AND token(pk1, pk2) > 0 AND token(pk1, pk2) < 0 - // AND TRUE AND FALSE - // AND token(pk1, pk2) - // AND pk1 AND pk2 - // AND (pk1, pk2) - std::vector big_where; - expr::constant zero_value = constant(raw_value::make_value(I(0)), int32_type); + BOOST_REQUIRE(npk.contains(v1_def)); + BOOST_REQUIRE(npk.contains(v2_def)); + BOOST_CHECK(!npk.contains(v3_def)); - expression pk1_restriction(binary_operator(column_value(&col_pk1), oper_t::EQ, zero_value)); - expression pk2_restriction(binary_operator(column_value(&col_pk2), oper_t::EQ, zero_value)); - expression pk2_restriction2(binary_operator(column_value(&col_pk2), oper_t::GT, zero_value)); - expression ck1_restriction(binary_operator(column_value(&col_ck1), oper_t::EQ, zero_value)); - expression ck2_restriction(binary_operator(column_value(&col_ck2), oper_t::EQ, zero_value)); - expression r1_restriction(binary_operator(column_value(&col_r1), oper_t::EQ, zero_value)); - expression r1_restriction2(binary_operator(column_value(&col_r1), oper_t::LT, zero_value)); - expression r1_restriction3(binary_operator(column_value(&col_r1), oper_t::GT, zero_value)); - expression r2_restriction(binary_operator(column_value(&col_r2), oper_t::EQ, zero_value)); + // v1 should have EQ, LT, GT (in WHERE-clause order). + BOOST_CHECK_EQUAL(factor_ops(npk.at(v1_def)), + (std::vector{{"v1", op::EQ}, {"v1", op::LT}, {"v1", op::GT}})); + // v2 should have a single EQ. + BOOST_CHECK_EQUAL(factor_ops(npk.at(v2_def)), + (std::vector{{"v2", op::EQ}})); - auto make_multi_column_restriction = [](std::vector columns, oper_t oper) -> expression { - tuple_constructor column_tuple(column_definitions_as_tuple_constructor(columns)); + // --- PK expression: pk1=1 AND pk2=2 --- + BOOST_CHECK_EQUAL(factor_ops(sr->get_partition_key_restrictions()), + (std::vector{{"pk1", op::EQ}, {"pk2", op::EQ}})); - std::vector zeros_tuple_elems(columns.size(), managed_bytes_opt(I(0))); - data_type tup_type = tuple_type_impl::get_instance(std::vector(columns.size(), int32_type)); - managed_bytes tup_bytes = tuple_type_impl::build_value_fragmented(std::move(zeros_tuple_elems)); - constant zeros_tuple(raw_value::make_value(std::move(tup_bytes)), std::move(tup_type)); + // --- CK expression: ck1=3 AND ck2=4 --- + BOOST_CHECK_EQUAL(factor_ops(sr->get_clustering_columns_restrictions()), + (std::vector{{"ck1", op::EQ}, {"ck2", op::EQ}})); + } - return binary_operator(column_tuple, oper, std::move(zeros_tuple)); - }; + // Multi-column CK restriction doesn't appear in single-column non-PK map. + { + auto sr = make_restrictions( + "pk1=1 AND pk2=2 AND (ck1, ck2) > (0, 0) AND v1=5", + e, "trc"); - expression pk1_pk2_restriction = make_multi_column_restriction({&col_pk1, &col_pk2}, oper_t::LT); - expression pk1_ck2_r1_restriction = make_multi_column_restriction({&col_pk1, &col_ck2, &col_r1}, oper_t::EQ); - expression r1_r2_restriction = make_multi_column_restriction({&col_r1, &col_r2}, oper_t::GT); + auto& npk = sr->get_non_pk_restriction(); + BOOST_CHECK_EQUAL(npk.size(), 1u); - std::vector conjunction_elems; - expression ck1_ck2_restriction = make_multi_column_restriction({&col_ck1, &col_ck2}, oper_t::LT); - expression conjunction_expr = conjunction{std::vector{ck1_ck2_restriction, r1_restriction2}}; + auto* v1_def = schema->get_column_definition("v1"); + BOOST_REQUIRE(npk.contains(v1_def)); + BOOST_CHECK_EQUAL(factor_ops(npk.at(v1_def)), + (std::vector{{"v1", op::EQ}})); - function_call token_expr = function_call { - .func = functions::function_name::native_function("token"), - .args = {column_value(&col_pk1), column_value(&col_pk2)} - }; - expression token_lt_restriction = binary_operator(token_expr, oper_t::LT, zero_value); - expression token_gt_restriction = binary_operator(token_expr, oper_t::GT, zero_value); + // CK expression should have 1 factor: the multi-column (ck1, ck2) > (0, 0). + // The multi-column restriction's LHS is a tuple_constructor, not a single + // column_value, so we verify only the count and operator here. + auto ck_factors = expr::boolean_factors(sr->get_clustering_columns_restrictions()); + BOOST_CHECK_EQUAL(ck_factors.size(), 1u); + BOOST_REQUIRE(expr::is(ck_factors[0])); + BOOST_CHECK(expr::as(ck_factors[0]).op == op::GT); + } - expression true_restriction = constant::make_bool(true); - expression false_restriction = constant::make_bool(false); - expression pk1_expr = column_value(&col_pk1); - expression pk2_expr = column_value(&col_pk1); - data_type ttype = tuple_type_impl::get_instance({int32_type, int32_type}); - expression pk1_pk2_expr = tuple_constructor{{expression{column_value{&col_pk1}}, - expression{column_value{&col_pk2}}}, - std::move(ttype)}; - - big_where.push_back(pk1_restriction); - big_where.push_back(pk2_restriction); - big_where.push_back(ck1_restriction); - big_where.push_back(ck2_restriction); - big_where.push_back(r1_restriction); - big_where.push_back(r2_restriction); - big_where.push_back(pk1_pk2_restriction); - big_where.push_back(pk1_ck2_r1_restriction); - big_where.push_back(r1_r2_restriction); - big_where.push_back(conjunction_expr); - big_where.push_back(pk2_restriction2); - big_where.push_back(r1_restriction3); - big_where.push_back(token_lt_restriction); - big_where.push_back(token_gt_restriction); - big_where.push_back(true_restriction); - big_where.push_back(false_restriction); - big_where.push_back(token_expr); - big_where.push_back(pk1_expr); - big_where.push_back(pk2_expr); - big_where.push_back(pk1_pk2_expr); - - expression big_where_expr = conjunction{std::move(big_where)}; - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk1), - {pk1_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk2), - {pk2_restriction, pk2_restriction2}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck1), - {ck1_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck2), - {ck2_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r1), - {r1_restriction, r1_restriction2, r1_restriction3}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r2), - {r2_restriction}); - - assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r3), - {}); + // Unrestricted table: all maps are empty. + { + auto sr = make_restrictions("", e, "trc"); + BOOST_CHECK(sr->get_non_pk_restriction().empty()); + BOOST_CHECK(restrictions::is_empty_restriction(sr->get_clustering_columns_restrictions())); + } + }); } BOOST_AUTO_TEST_SUITE_END() diff --git a/test/vector_search/filter_test.cc b/test/vector_search/filter_test.cc index 28d1d83d76..902020bae8 100644 --- a/test/vector_search/filter_test.cc +++ b/test/vector_search/filter_test.cc @@ -23,7 +23,7 @@ using namespace cql3; namespace { /// Helper to create statement_restrictions from a WHERE clause string -restrictions::statement_restrictions make_restrictions( +shared_ptr make_restrictions( std::string_view where_clause, cql_test_env& env, const sstring& table_name = "t", const sstring& keyspace_name = "ks") { prepare_context ctx; @@ -63,8 +63,8 @@ SEASTAR_TEST_CASE(to_json_empty_restrictions) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto schema = e.local_db().find_schema("ks", "t"); - restrictions::statement_restrictions restr(schema, false); - auto json = rjson::print(vector_search::prepare_filter(restr, false).to_json(query_options({}))); + shared_ptr restr = restrictions::make_trivial_statement_restrictions(schema, false); + auto json = rjson::print(vector_search::prepare_filter(*restr, false).to_json(query_options({}))); BOOST_CHECK_EQUAL(json, "{}"); }); @@ -75,7 +75,7 @@ SEASTAR_TEST_CASE(to_json_with_allow_filtering) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -87,7 +87,7 @@ SEASTAR_TEST_CASE(to_json_single_column_eq) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=42", e); - auto json = get_restrictions_json(restr, false); + auto json = get_restrictions_json(*restr, false); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":42}],"allow_filtering":false})json"; BOOST_CHECK_EQUAL(json, expected); @@ -99,7 +99,7 @@ SEASTAR_TEST_CASE(to_json_single_column_lt) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck<100", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"<","lhs":"ck","rhs":100}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -111,7 +111,7 @@ SEASTAR_TEST_CASE(to_json_single_column_gt) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>50", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">","lhs":"ck","rhs":50}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -123,7 +123,7 @@ SEASTAR_TEST_CASE(to_json_single_column_lte) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck<=75", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"<=","lhs":"ck","rhs":75}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -135,7 +135,7 @@ SEASTAR_TEST_CASE(to_json_single_column_gte) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>=25", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">=","lhs":"ck","rhs":25}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -147,7 +147,7 @@ SEASTAR_TEST_CASE(to_json_single_column_in) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck in (1, 2, 3)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"IN","lhs":"ck","rhs":[1,2,3]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -159,7 +159,7 @@ SEASTAR_TEST_CASE(to_json_string_value) { cquery_nofail(e, "create table ks.t(pk text, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk='hello'", e); - auto json = get_restrictions_json(restr, false); + auto json = get_restrictions_json(*restr, false); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":"hello"}],"allow_filtering":false})json"; BOOST_CHECK_EQUAL(json, expected); @@ -171,7 +171,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_eq) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)=(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()==()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -183,7 +183,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_lt) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)<(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()<()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -195,7 +195,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_gt) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)>(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()>()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -207,7 +207,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_lte) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)<=(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()<=()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -219,7 +219,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_gte) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)>=(10, 20)", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()>=()","lhs":["ck1","ck2"],"rhs":[10,20]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -231,7 +231,7 @@ SEASTAR_TEST_CASE(to_json_multi_column_in) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2) in ((1, 2), (3, 4))", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"()IN()","lhs":["ck1","ck2"],"rhs":[[1,2],[3,4]]}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -243,7 +243,7 @@ SEASTAR_TEST_CASE(to_json_multiple_restrictions) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>=10 and ck<100", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">=","lhs":"ck","rhs":10},{"type":"<","lhs":"ck","rhs":100}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -255,7 +255,7 @@ SEASTAR_TEST_CASE(to_json_with_boolean_value) { cquery_nofail(e, "create table ks.t(pk int, ck boolean, v vector, primary key(pk, ck))"); auto restr = make_restrictions("ck=true", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"ck","rhs":true}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -267,7 +267,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_partition_key) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=?", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); std::vector bind_values = {raw_value::make_value(int32_type->decompose(42))}; auto options = make_query_options(std::move(bind_values)); @@ -283,7 +283,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_clustering_key) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=? and ck>?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); std::vector bind_values = { raw_value::make_value(int32_type->decompose(1)), @@ -301,7 +301,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_different_values) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=?", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); std::vector bind_values1 = {raw_value::make_value(int32_type->decompose(100))}; auto options1 = make_query_options(std::move(bind_values1)); @@ -322,7 +322,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_string_value) { cquery_nofail(e, "create table ks.t(pk text, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=?", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); std::vector bind_values = {raw_value::make_value(utf8_type->decompose("hello_world"))}; auto options = make_query_options(std::move(bind_values)); @@ -338,7 +338,7 @@ SEASTAR_TEST_CASE(to_json_mixed_literals_and_bind_markers) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck>?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); std::vector bind_values = {raw_value::make_value(int32_type->decompose(25))}; auto options = make_query_options(std::move(bind_values)); @@ -354,7 +354,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_in_list) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and ck in ?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); auto list_type = list_type_impl::get_instance(int32_type, true); auto list_val = make_list_value(list_type, {data_value(10), data_value(20), data_value(30)}); @@ -373,7 +373,7 @@ SEASTAR_TEST_CASE(to_json_bind_marker_multi_column) { cquery_nofail(e, "create table ks.t(pk int, ck1 int, ck2 int, v vector, primary key(pk, ck1, ck2))"); auto restr = make_restrictions("pk=1 and (ck1, ck2)>?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); auto tuple_type = tuple_type_impl::get_instance({int32_type, int32_type}); auto tuple_val = make_tuple_value(tuple_type, {data_value(10), data_value(20)}); @@ -392,7 +392,7 @@ SEASTAR_TEST_CASE(to_json_no_bind_markers_uses_cache) { cquery_nofail(e, "create table ks.t(pk int, ck int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=42", e); - auto filter = vector_search::prepare_filter(restr, false); + auto filter = vector_search::prepare_filter(*restr, false); auto options1 = query_options({}); auto json1 = rjson::print(filter.to_json(options1)); @@ -412,7 +412,7 @@ SEASTAR_TEST_CASE(to_json_nonprimary_key_eq) { cquery_nofail(e, "create table ks.t(pk int, ck int, r int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and r=42", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":"==","lhs":"r","rhs":42}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -424,7 +424,7 @@ SEASTAR_TEST_CASE(to_json_nonprimary_key_range) { cquery_nofail(e, "create table ks.t(pk int, ck int, r int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and r>10 and r<100", e); - auto json = get_restrictions_json(restr, true); + auto json = get_restrictions_json(*restr, true); auto expected = R"json({"restrictions":[{"type":"==","lhs":"pk","rhs":1},{"type":">","lhs":"r","rhs":10},{"type":"<","lhs":"r","rhs":100}],"allow_filtering":true})json"; BOOST_CHECK_EQUAL(json, expected); @@ -436,7 +436,7 @@ SEASTAR_TEST_CASE(to_json_nonprimary_key_bind_marker) { cquery_nofail(e, "create table ks.t(pk int, ck int, r int, v vector, primary key(pk, ck))"); auto restr = make_restrictions("pk=1 and r=?", e); - auto filter = vector_search::prepare_filter(restr, true); + auto filter = vector_search::prepare_filter(*restr, true); std::vector bind_values = {raw_value::make_value(int32_type->decompose(99))}; auto options = make_query_options(std::move(bind_values));