From 240d9be5e24448586db43fef33d9c8b587b4f706 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 19:07:21 +0300 Subject: [PATCH 01/14] cql3: statement_restrictions: set op on all binary-operator-derived predicates The to_predicates() function had fallthrough paths for operators like LIKE and NOT_IN that created predicates without setting the op field. This meant predicate-based checks like 'p.op && needs_filtering(*p.op)' would miss these operators. Fix by inlining the predicate construction at the fallthrough points (instead of using cannot_solve_on_column) and setting .op = oper.op. This ensures all predicates derived from binary operators carry their operator type, enabling reliable predicate-based analysis. The cannot_solve_on_column helper is now unused and removed. --- cql3/restrictions/statement_restrictions.cc | 22 ++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index ebb545483f..9c7e6bf59d 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -302,13 +302,6 @@ to_predicates( .on = on_row{}, }); }; - static auto cannot_solve_on_column = [] (const expression& e, const column_definition* cdef) -> std::vector { - return to_vector(predicate{ - .solve_for = nullptr, - .filter = e, - .on = on_column{cdef}, - }); - }; return expr::visit(overloaded_functor{ [] (const constant& constant_val) -> std::vector { std::optional bool_val = get_bool_value(constant_val); @@ -402,7 +395,12 @@ to_predicates( .op = oper.op, }); } - return cannot_solve_on_column(oper, col.col); + return to_vector(predicate{ + .solve_for = nullptr, + .filter = oper, + .on = on_column{col.col}, + .op = oper.op, + }); }, [&] (const subscript& s) -> std::vector { const column_value& col = get_subscripted_column(s); @@ -433,7 +431,13 @@ to_predicates( .is_subscript = true, }); } - return cannot_solve_on_column(oper, col.col); + return to_vector(predicate{ + .solve_for = nullptr, + .filter = oper, + .on = on_column{col.col}, + .op = oper.op, + .is_subscript = true, + }); }, [&] (const tuple_constructor& tuple) -> std::vector { auto columns = tuple.elements From 569c85032e9a77087460d14ab33ada29fc4b4839 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:44:19 +0300 Subject: [PATCH 02/14] cql3: statement_restrictions: replace find_binop column extraction with predicate on field In add_clustering_restrictions_to_idx_ck_prefix(), find_binop was used to locate any binary_operator in the predicate's filter just to extract the column from its LHS. Since the predicate already stores this information in its 'on' field (as on_column for single-column predicates), use it directly instead of searching the expression tree. --- cql3/restrictions/statement_restrictions.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 9c7e6bf59d..30e9ae0a17 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2600,11 +2600,11 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const // TODO: We could handle single-element tuples, eg. `(c)>=(123)`. break; } - const auto any_binop = find_binop(e.filter, [] (auto&&) { return true; }); - if (!any_binop) { + auto* on_col = std::get_if(&e.on); + if (!on_col) { break; } - const auto col = expr::as(any_binop->lhs).col; + const auto col = on_col->column; auto col_in_index = idx_tbl_schema.get_column_definition(col->name()); auto replaced = replace_column_def(e.filter, col_in_index); auto a = to_predicate_on_column(replaced, col_in_index, &idx_tbl_schema); From 556262a165470f9ea142e47c585b3281d65bd604 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:48:16 +0300 Subject: [PATCH 03/14] cql3: statement_restrictions: replace find_binop is_on_collection with tracked bool Replace the two find_binop(_clustering_columns_restrictions, is_on_collection) calls with a precomputed _ck_is_on_collection boolean that is tracked incrementally during predicate construction. This avoids walking the expression tree at each call site. The is_on_collection check detects CONTAINS/CONTAINS_KEY operators, which indicate collection restrictions on a clustering key column. --- cql3/restrictions/statement_restrictions.cc | 22 +++++++-------------- cql3/restrictions/statement_restrictions.hh | 1 + 2 files changed, 8 insertions(+), 15 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 30e9ae0a17..330f5f47cb 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -75,9 +75,6 @@ inline bool has_slice_or_needs_filtering(const expression& e) { return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); }); } -/// True iff binary_operator involves a collection. -extern bool is_on_collection(const binary_operator&); - bool contains_multi_column_restriction(const expression&); bool has_only_eq_binops(const expression&); @@ -795,16 +792,6 @@ static std::pair, expr::expression> do_fin allow_local_index allow_local); -bool is_on_collection(const binary_operator& b) { - if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { - return true; - } - if (auto tuple = expr::as_if(&b.lhs)) { - return std::ranges::any_of(tuple->elements, [] (const expression& v) { return expr::is(v); }); - } - return false; -} - bool is_empty_restriction(const expression& e) { bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { return !is(e); @@ -894,6 +881,7 @@ statement_restrictions::statement_restrictions(private_tag, bool ck_is_empty = true; bool has_mc_clustering = false; bool ck_has_slice = false; + bool ck_is_on_collection = false; const column_definition* ck_last_column = nullptr; const predicate* first_mc_pred = nullptr; bool pk_is_empty = true; @@ -1062,6 +1050,9 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.is_slice) { ck_has_slice = true; } + if (pred.op == oper_t::CONTAINS || pred.op == oper_t::CONTAINS_KEY) { + ck_is_on_collection = true; + } if (ck_last_column == nullptr || schema->position(*new_column) > schema->position(*ck_last_column)) { ck_last_column = new_column; } @@ -1122,6 +1113,7 @@ statement_restrictions::statement_restrictions(private_tag, } } _has_multi_column = has_mc_clustering; + _ck_is_on_collection = ck_is_on_collection; if (_check_indexes) { auto cf = db.find_column_family(schema); auto& sim = cf.get_index_manager(); @@ -1188,7 +1180,7 @@ statement_restrictions::statement_restrictions(private_tag, if (_uses_secondary_indexing || clustering_key_restrictions_need_filtering()) { _index_restrictions.push_back(_clustering_columns_restrictions); search_groups.push_back({sc_ck_pred_vectors, _clustering_columns_restrictions}); - } else if (find_binop(_clustering_columns_restrictions, is_on_collection)) { + } else if (_ck_is_on_collection) { fail(unimplemented::cause::INDEXES); } @@ -1590,7 +1582,7 @@ void statement_restrictions::process_clustering_columns_restrictions(bool for_vi return; } - if (find_binop(_clustering_columns_restrictions, is_on_collection) + if (_ck_is_on_collection && !_has_queriable_ck_index && !allow_filtering) { throw exceptions::invalid_request_exception( "Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering"); diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 6b591de30c..720964b793 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -178,6 +178,7 @@ private: bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false; bool _has_multi_column; ///< True iff _clustering_columns_restrictions has a multi-column restriction. + bool _ck_is_on_collection = false; ///< True iff _clustering_columns_restrictions has a collection restriction (CONTAINS/CONTAINS_KEY). std::vector _where; ///< The entire WHERE clause (factorized). From ae7eb860a55a7e30af5f819e2aa06d2a3da3a9e0 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:49:47 +0300 Subject: [PATCH 04/14] cql3: statement_restrictions: replace find_needs_filtering with predicate op check In the clustering prefix construction loop, replace the find_needs_filtering() call (which walks the merged predicate's expression tree looking for needs-filtering binary operators) with a check on the individual predicate vector. This uses the per-predicate op field directly instead of searching the expression tree. --- cql3/restrictions/statement_restrictions.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 330f5f47cb..f91e598c00 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1092,7 +1092,8 @@ statement_restrictions::statement_restrictions(private_tag, if (found == sc_ck_preds.end()) { break; } - if (find_needs_filtering(found->second.filter)) { + const auto& preds = sc_ck_pred_vectors[&col]; + if (std::ranges::any_of(preds, [](const predicate& p) { return p.op && needs_filtering(*p.op); })) { break; } prefix.push_back(found->second); From 6e27c3a185b41ded39ec79f99934e34925100e8b Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:50:49 +0300 Subject: [PATCH 05/14] cql3: statement_restrictions: replace contains_multi_column_restriction with _has_multi_column In clustering_key_restrictions_need_filtering(), replace the contains_multi_column_restriction() call (which uses find_binop to search for a tuple_constructor LHS in the expression tree) with the precomputed _has_multi_column boolean that is already tracked incrementally during predicate construction. --- cql3/restrictions/statement_restrictions.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index f91e598c00..dbaaa1aed8 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1554,7 +1554,7 @@ size_t statement_restrictions::clustering_columns_restrictions_size() const { } bool statement_restrictions::clustering_key_restrictions_need_filtering() const { - if (contains_multi_column_restriction(_clustering_columns_restrictions)) { + if (_has_multi_column) { return false; } From eb98aea4661ee6d1df0071a0a74d02d68fe87bf4 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:54:27 +0300 Subject: [PATCH 06/14] cql3: statement_restrictions: replace has_slice_or_needs_filtering with tracked bool Replace the has_slice_or_needs_filtering() call on _partition_key_restrictions (which uses find_binop to walk the expression tree) with a precomputed _pk_has_slice_or_needs_filtering boolean tracked incrementally during predicate construction in the partition key branch. --- cql3/restrictions/statement_restrictions.cc | 7 ++++++- cql3/restrictions/statement_restrictions.hh | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index dbaaa1aed8..b139f10f7b 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -885,6 +885,7 @@ statement_restrictions::statement_restrictions(private_tag, const column_definition* ck_last_column = nullptr; const predicate* first_mc_pred = nullptr; bool pk_is_empty = true; + bool pk_has_slice_or_needs_filtering = false; bool has_token = false; std::optional token_pred; std::unordered_map pk_range_preds; @@ -1011,6 +1012,9 @@ statement_restrictions::statement_restrictions(private_tag, } } _partition_range_is_simple &= !pred.is_in; + if (pred.is_slice || (pred.op && needs_filtering(*pred.op))) { + pk_has_slice_or_needs_filtering = true; + } } else if (def->is_clustering_key()) { if (has_mc_clustering) { throw exceptions::invalid_request_exception( @@ -1115,6 +1119,7 @@ statement_restrictions::statement_restrictions(private_tag, } _has_multi_column = has_mc_clustering; _ck_is_on_collection = ck_is_on_collection; + _pk_has_slice_or_needs_filtering = pk_has_slice_or_needs_filtering; if (_check_indexes) { auto cf = db.find_column_family(schema); auto& sim = cf.get_index_manager(); @@ -1546,7 +1551,7 @@ size_t statement_restrictions::partition_key_restrictions_size() const { bool statement_restrictions::pk_restrictions_need_filtering() const { return !is_empty_restriction(_partition_key_restrictions) && !has_token_restrictions() - && (has_partition_key_unrestricted_components() || has_slice_or_needs_filtering(_partition_key_restrictions)); + && (has_partition_key_unrestricted_components() || _pk_has_slice_or_needs_filtering); } size_t statement_restrictions::clustering_columns_restrictions_size() const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 720964b793..4fc0f1e883 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -215,6 +215,7 @@ private: partition_range_restrictions _partition_range_restrictions; bool _partition_range_is_simple; ///< False iff _partition_range_restrictions imply a Cartesian product. + bool _pk_has_slice_or_needs_filtering = false; ///< True iff any PK restriction has a slice or needs-filtering operator. check_indexes _check_indexes = check_indexes::yes; From dca2cc512e3b7b1779cf5e4d02794438e836e044 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:55:42 +0300 Subject: [PATCH 07/14] cql3: statement_restrictions: remove unused find_needs_filtering and has_slice_or_needs_filtering These helper functions were wrappers around find_binop that are no longer called, since their call sites have been replaced by predicate-based checks. --- cql3/restrictions/statement_restrictions.cc | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index b139f10f7b..05a329095e 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -67,14 +67,6 @@ inline bool needs_filtering(oper_t op) { (op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN); } -inline auto find_needs_filtering(const expression& e) { - return find_binop(e, [] (const binary_operator& bo) { return needs_filtering(bo.op); }); -} - -inline bool has_slice_or_needs_filtering(const expression& e) { - return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); }); -} - bool contains_multi_column_restriction(const expression&); bool has_only_eq_binops(const expression&); From 4c282f588ab6a21d561768f40c10ee98d44f3eb5 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:58:02 +0300 Subject: [PATCH 08/14] cql3: statement_restrictions: replace contains_multi_column_restriction filter with _has_multi_column In calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(), the code extracted multi-column boolean factors from _clustering_columns_restrictions. Since multi-column and single-column CK restrictions cannot be mixed (the constructor enforces this), when _has_multi_column is true, ALL factors are multi-column. Simplify to just adding _clustering_columns_restrictions directly when _has_multi_column is set. This removes the last caller of contains_multi_column_restriction(), allowing the function (and its find_binop call) to be removed. --- cql3/restrictions/statement_restrictions.cc | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 05a329095e..398b683176 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -67,8 +67,6 @@ inline bool needs_filtering(oper_t op) { (op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN); } -bool contains_multi_column_restriction(const expression&); - bool has_only_eq_binops(const expression&); static @@ -817,13 +815,6 @@ build_value_for_fn(const column_definition& cdef, const expression& e, const sch }; } -bool contains_multi_column_restriction(const expression& e) { - const binary_operator* find_res = find_binop(e, [](const binary_operator& binop) { - return is(binop.lhs); - }); - return find_res != nullptr; -} - bool has_only_eq_binops(const expression& e) { const expr::binary_operator* non_eq_binop = find_in_expression(e, [](const expr::binary_operator& binop) { @@ -1258,13 +1249,9 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_row_level_filter = expr::make_conjunction(std::move(_clustering_row_level_filter), std::move(regular_columns_filter)); - auto multi_column_restrictions = expr::conjunction{ - .children = expr::boolean_factors(_clustering_columns_restrictions) - | std::ranges::views::filter(contains_multi_column_restriction) - | std::ranges::to() - }; - - _clustering_row_level_filter = expr::make_conjunction(std::move(_clustering_row_level_filter), std::move(multi_column_restrictions)); + if (_has_multi_column) { + _clustering_row_level_filter = expr::make_conjunction(std::move(_clustering_row_level_filter), _clustering_columns_restrictions); + } if (uses_secondary_indexing()) { auto& index_opt = _idx_opt; From e10c124cd38c277ef82c59eaeb05fd902312b9bd Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 18:59:03 +0300 Subject: [PATCH 09/14] cql3: statement_restrictions: replace has_slice with predicate is_slice check In the clustering prefix construction loop, replace the has_slice() call (which uses find_binop to search the merged predicate's expression tree for slice operators) with a direct check on the individual predicate vector's is_slice field. --- cql3/restrictions/statement_restrictions.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 398b683176..b1a2e41dfc 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1084,7 +1084,7 @@ statement_restrictions::statement_restrictions(private_tag, break; } prefix.push_back(found->second); - if (has_slice(found->second.filter)) { + if (std::ranges::any_of(preds, [](const predicate& p) { return p.is_slice; })) { break; } } From b3c1ee230b55045b20dffc298ba2e99d7897a465 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 19:00:25 +0300 Subject: [PATCH 10/14] cql3: statement_restrictions: replace has_partition_token with variant check Replace has_token_restrictions()'s call to has_partition_token() (which uses find_binop to search the expression tree for token function calls) with a direct check on the _partition_range_restrictions variant, which already records whether token restrictions exist. --- cql3/restrictions/statement_restrictions.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index b1a2e41dfc..29ac901c61 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1294,7 +1294,7 @@ statement_restrictions::clustering_key_restrictions_has_only_eq() const { bool statement_restrictions::has_token_restrictions() const { - return has_partition_token(_partition_key_restrictions, *_schema); + return std::holds_alternative(_partition_range_restrictions); } bool From a402fe1a65045f6b21d08f8719cf37389d725a18 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 19:01:30 +0300 Subject: [PATCH 11/14] cql3: statement_restrictions: replace find_clustering_order with predicate order field In build_range_from_raw_bounds_fn(), replace find_clustering_order() (which uses find_binop to search for a binary_operator with clustering comparison order) with a direct check of the predicate's order field. Since each multi-column predicate's filter is a single binary_operator, extract it directly with as() instead of searching. --- cql3/restrictions/statement_restrictions.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 29ac901c61..54a276b4c4 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -2277,17 +2277,16 @@ get_clustering_bounds_fn_t build_range_from_raw_bounds_fn( const std::vector& exprs, const schema& schema) { std::vector> range_builders; - for (const auto& e : exprs | std::views::transform(&predicate::filter)) { - if (auto b = find_clustering_order(e)) { - range_builders.emplace_back([bb = *b, &schema] (const query_options& options) { - auto* b = &bb; - cql3::raw_value tup_val = expr::evaluate(b->rhs, options); + for (const auto& pred : exprs) { + if (pred.order == expr::comparison_order::clustering) { + range_builders.emplace_back([b = expr::as(pred.filter), &schema] (const query_options& options) { + cql3::raw_value tup_val = expr::evaluate(b.rhs, options); if (tup_val.is_null()) { - on_internal_error(rlogger, format("range_from_raw_bounds: unexpected atom {}", *b)); + on_internal_error(rlogger, format("range_from_raw_bounds: unexpected atom {}", b)); } const auto r = to_range( - b->op, clustering_key_prefix::from_optional_exploded(schema, expr::get_tuple_elements(tup_val, *type_of(b->rhs)))); + b.op, clustering_key_prefix::from_optional_exploded(schema, expr::get_tuple_elements(tup_val, *type_of(b.rhs)))); return r; }); } From 9e7077160039ba16b6a17cac9ae21903efb56b6f Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 19:31:35 +0300 Subject: [PATCH 12/14] cql3: statement_restrictions: use index-selection predicates for value_for_index_partition_key Instead of rebuilding predicates from the expression tree at build_value_for_index_partition_key_fn() time via to_predicate_on_column, capture the indexed column's predicates directly in do_find_idx() when the index is chosen. Store them as _idx_column_predicates and use them to build the value function without any redundant expression analysis. This eliminates build_value_for_fn and its call to to_predicate_on_column from this code path. --- cql3/restrictions/statement_restrictions.cc | 77 +++++++++++---------- cql3/restrictions/statement_restrictions.hh | 1 + 2 files changed, 40 insertions(+), 38 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 54a276b4c4..0121c7d689 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -775,7 +775,13 @@ static bool are_predicates_supported_by(const std::vector& preds, return bool(result); } -static std::pair, expr::expression> do_find_idx( +struct do_find_idx_result { + std::optional index; + expr::expression restrictions; + std::vector indexed_column_predicates; +}; + +static do_find_idx_result do_find_idx( bool uses_secondary_indexing, const secondary_index::secondary_index_manager& sim, std::span search_groups, @@ -790,31 +796,6 @@ bool is_empty_restriction(const expression& e) { return !contains_non_conjunction; } -static -std::function -build_value_for_fn(const column_definition& cdef, const expression& e, const schema& s) { - auto ac = to_predicate_on_column(e, &cdef, &s); - return [ac] (const query_options& options) -> bytes_opt { - value_set possible_vals = solve(ac, options); - return std::visit(overloaded_functor { - [&](const value_list& val_list) -> bytes_opt { - if (val_list.empty()) { - return std::nullopt; - } - - if (val_list.size() != 1) { - on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", ac.filter)); - } - - return to_bytes(val_list.front()); - }, - [&](const interval&) -> bytes_opt { - on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", ac.filter)); - } - }, possible_vals); - }; -} - bool has_only_eq_binops(const expression& e) { const expr::binary_operator* non_eq_binop = find_in_expression(e, [](const expr::binary_operator& binop) { @@ -1195,12 +1176,13 @@ statement_restrictions::statement_restrictions(private_tag, const expr::allow_local_index allow_local_for_idx( !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq()); - auto [idx_found, idx_restrictions] = do_find_idx( + auto idx_result = do_find_idx( _uses_secondary_indexing, sim, search_groups, allow_local_for_idx); - if (idx_found) { - _idx_opt = std::make_unique(std::move(*idx_found)); + if (idx_result.index) { + _idx_opt = std::make_unique(std::move(*idx_result.index)); } - _idx_restrictions = std::move(idx_restrictions); + _idx_restrictions = std::move(idx_result.restrictions); + _idx_column_predicates = std::move(idx_result.indexed_column_predicates); } calculate_column_defs_for_filtering_and_erase_restrictions_used_for_index(db, sc_pk_pred_vectors, sc_ck_pred_vectors, sc_nonpk_pred_vectors); @@ -1355,13 +1337,13 @@ bool statement_restrictions::is_empty() const { } -static std::pair, expr::expression> do_find_idx( +static do_find_idx_result do_find_idx( bool uses_secondary_indexing, const secondary_index::secondary_index_manager& sim, std::span search_groups, allow_local_index allow_local) { if (!uses_secondary_indexing) { - return {std::nullopt, expr::conjunction({})}; + return {std::nullopt, expr::conjunction({}), {}}; } // Current score table: @@ -1378,6 +1360,7 @@ static std::pair, expr::expression> do_fin std::optional chosen_index; int chosen_index_score = 0; expr::expression chosen_index_restrictions = expr::conjunction({}); + std::vector chosen_index_predicates; // Several indexes may be usable for this query. When their score is tied, // let's pick one by order of the columns mentioned in the restriction @@ -1405,11 +1388,12 @@ static std::pair, expr::expression> do_fin chosen_index = index; chosen_index_score = index_score(index); chosen_index_restrictions = group.restriction_expr; + chosen_index_predicates = preds; } } }); } - return {chosen_index, chosen_index_restrictions}; + return {chosen_index, chosen_index_restrictions, std::move(chosen_index_predicates)}; } std::pair, expr::expression> @@ -2667,15 +2651,32 @@ std::vector statement_restrictions::get_local_index_clu get_singleton_value_fn_t statement_restrictions::build_value_for_index_partition_key_fn() const { - if (!_idx_opt) { + if (_idx_column_predicates.empty()) { return {}; } - const column_definition* cdef = _schema->get_column_definition(to_bytes(_idx_opt->target_column())); - if (!cdef) { - throw exceptions::invalid_request_exception("Indexed column not found in schema"); + auto merged = _idx_column_predicates[0]; + for (size_t i = 1; i < _idx_column_predicates.size(); ++i) { + merged = make_conjunction(std::move(merged), _idx_column_predicates[i]); } + return [merged = std::move(merged)] (const query_options& options) -> bytes_opt { + value_set possible_vals = solve(merged, options); + return std::visit(overloaded_functor { + [&](const value_list& val_list) -> bytes_opt { + if (val_list.empty()) { + return std::nullopt; + } - return build_value_for_fn(*cdef, _idx_restrictions, *_schema); + if (val_list.size() != 1) { + on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", merged.filter)); + } + + return to_bytes(val_list.front()); + }, + [&](const interval&) -> bytes_opt { + on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", merged.filter)); + } + }, possible_vals); + }; } bytes_opt diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 4fc0f1e883..de24a75e9c 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -226,6 +226,7 @@ private: schema_ptr _view_schema; std::unique_ptr _idx_opt; expr::expression _idx_restrictions = expr::conjunction({}); + std::vector _idx_column_predicates; ///< Predicates for the chosen index's target column. get_partition_key_ranges_fn_t _get_partition_key_ranges_fn; get_clustering_bounds_fn_t _get_clustering_bounds_fn; get_clustering_bounds_fn_t _get_global_index_clustering_ranges_fn; From 23d6f458ecc1b88c9dccb4f8138a6cacff7d800d Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Tue, 12 May 2026 20:19:04 +0300 Subject: [PATCH 13/14] cql3: statement_restrictions: replace has_only_eq_binops with tracked booleans Replace has_only_eq_binops() (which uses find_in_expression to search the expression tree for non-EQ binary operators) with precomputed _pk_is_all_eq and _ck_is_all_eq booleans tracked incrementally during predicate construction. Each predicate's equality field is checked as it is processed, covering single-column PK/CK predicates, multi-column CK predicates, and token predicates. This removes the last find_in_expression call in statement_restrictions.cc, and eliminates has_only_eq_binops entirely. --- cql3/restrictions/statement_restrictions.cc | 33 ++++++++++++--------- cql3/restrictions/statement_restrictions.hh | 2 ++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 0121c7d689..7eadd3536b 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -67,8 +67,6 @@ inline bool needs_filtering(oper_t op) { (op == oper_t::IS_NOT) || (op == oper_t::NEQ) || (op == oper_t::NOT_IN); } -bool has_only_eq_binops(const expression&); - static value_set solve(const predicate& ac, const query_options& options) { @@ -796,16 +794,6 @@ bool is_empty_restriction(const expression& e) { return !contains_non_conjunction; } -bool has_only_eq_binops(const expression& e) { - const expr::binary_operator* non_eq_binop = find_in_expression(e, - [](const expr::binary_operator& binop) { - return binop.op != expr::oper_t::EQ; - } - ); - - return non_eq_binop == nullptr; -} - statement_restrictions::statement_restrictions(private_tag, schema_ptr schema, bool allow_filtering) : _schema(schema) , _partition_range_is_simple(true) @@ -846,9 +834,11 @@ statement_restrictions::statement_restrictions(private_tag, bool has_mc_clustering = false; bool ck_has_slice = false; bool ck_is_on_collection = false; + bool ck_is_all_eq = true; const column_definition* ck_last_column = nullptr; const predicate* first_mc_pred = nullptr; bool pk_is_empty = true; + bool pk_is_all_eq = true; bool pk_has_slice_or_needs_filtering = false; bool has_token = false; std::optional token_pred; @@ -877,6 +867,9 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.is_slice) { ck_has_slice = true; } + if (!pred.equality) { + ck_is_all_eq = false; + } } else { if (!has_mc_clustering) { @@ -919,6 +912,7 @@ statement_restrictions::statement_restrictions(private_tag, _clustering_columns_restrictions = expr::make_conjunction(_clustering_columns_restrictions, pred.filter); mc_ck_preds.push_back(pred); ck_has_slice = true; + ck_is_all_eq = false; } else { throw exceptions::invalid_request_exception(format("Unsupported multi-column relation: ", pred.filter)); } @@ -938,6 +932,9 @@ statement_restrictions::statement_restrictions(private_tag, _partition_key_restrictions = expr::make_conjunction(_partition_key_restrictions, pred.filter); pk_is_empty = false; has_token = true; + if (!pred.equality) { + pk_is_all_eq = false; + } if (token_pred) { token_pred = make_conjunction(std::move(*token_pred), pred); } else { @@ -979,6 +976,9 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.is_slice || (pred.op && needs_filtering(*pred.op))) { pk_has_slice_or_needs_filtering = true; } + if (!pred.equality) { + pk_is_all_eq = false; + } } else if (def->is_clustering_key()) { if (has_mc_clustering) { throw exceptions::invalid_request_exception( @@ -1021,6 +1021,9 @@ statement_restrictions::statement_restrictions(private_tag, if (pred.op == oper_t::CONTAINS || pred.op == oper_t::CONTAINS_KEY) { ck_is_on_collection = true; } + if (!pred.equality) { + ck_is_all_eq = false; + } if (ck_last_column == nullptr || schema->position(*new_column) > schema->position(*ck_last_column)) { ck_last_column = new_column; } @@ -1083,6 +1086,8 @@ statement_restrictions::statement_restrictions(private_tag, } _has_multi_column = has_mc_clustering; _ck_is_on_collection = ck_is_on_collection; + _ck_is_all_eq = ck_is_all_eq; + _pk_is_all_eq = pk_is_all_eq; _pk_has_slice_or_needs_filtering = pk_has_slice_or_needs_filtering; if (_check_indexes) { auto cf = db.find_column_family(schema); @@ -1271,7 +1276,7 @@ statement_restrictions::clustering_key_restrictions_has_IN() const { bool statement_restrictions::clustering_key_restrictions_has_only_eq() const { - return has_only_eq_binops(_clustering_columns_restrictions); + return _ck_is_all_eq; } bool @@ -1504,7 +1509,7 @@ bool statement_restrictions::partition_key_restrictions_is_empty() const { } bool statement_restrictions::partition_key_restrictions_is_all_eq() const { - return has_only_eq_binops(_partition_key_restrictions); + return _pk_is_all_eq; } size_t statement_restrictions::partition_key_restrictions_size() const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index de24a75e9c..b14bbadc1d 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -179,6 +179,8 @@ private: bool _has_queriable_regular_index = false, _has_queriable_pk_index = false, _has_queriable_ck_index = false; bool _has_multi_column; ///< True iff _clustering_columns_restrictions has a multi-column restriction. bool _ck_is_on_collection = false; ///< True iff _clustering_columns_restrictions has a collection restriction (CONTAINS/CONTAINS_KEY). + bool _ck_is_all_eq = true; ///< True iff all CK restrictions use EQ operator only. + bool _pk_is_all_eq = true; ///< True iff all PK restrictions use EQ operator only. std::vector _where; ///< The entire WHERE clause (factorized). From 503add224dc4f92f49cc01e5a2fecc6d483d9aa1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 13 May 2026 13:30:10 +0300 Subject: [PATCH 14/14] cql3: statement_restrictions: simplify find_idx to return only the index MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The expression returned as the second element of find_idx()'s pair was stored in view_indexed_table_select_statement::_used_index_restrictions but never read — dead code. Simplify find_idx() to return just the optional, and remove the dead member and constructor parameter from view_indexed_table_select_statement. The now unused _idx_restrictions is also removed. --- cql3/restrictions/statement_restrictions.cc | 5 ++--- cql3/restrictions/statement_restrictions.hh | 4 +--- cql3/statements/select_statement.cc | 5 +---- cql3/statements/select_statement.hh | 2 -- test/boost/statement_restrictions_test.cc | 4 ++-- 5 files changed, 6 insertions(+), 14 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index 7eadd3536b..8ed7d7efba 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -1186,7 +1186,6 @@ statement_restrictions::statement_restrictions(private_tag, if (idx_result.index) { _idx_opt = std::make_unique(std::move(*idx_result.index)); } - _idx_restrictions = std::move(idx_result.restrictions); _idx_column_predicates = std::move(idx_result.indexed_column_predicates); } @@ -1401,9 +1400,9 @@ static do_find_idx_result do_find_idx( return {chosen_index, chosen_index_restrictions, std::move(chosen_index_predicates)}; } -std::pair, expr::expression> +std::optional statement_restrictions::find_idx(const secondary_index::secondary_index_manager& sim) const { - return {_idx_opt ? std::optional(*_idx_opt) : std::nullopt, _idx_restrictions}; + return _idx_opt ? std::optional(*_idx_opt) : std::nullopt; } bool statement_restrictions::has_eq_restriction_on_column(const column_definition& column) const { diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index b14bbadc1d..5a6fff0167 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -227,7 +227,6 @@ private: std::vector _column_defs_for_filtering; schema_ptr _view_schema; std::unique_ptr _idx_opt; - expr::expression _idx_restrictions = expr::conjunction({}); std::vector _idx_column_predicates; ///< Predicates for the chosen index's target column. get_partition_key_ranges_fn_t _get_partition_key_ranges_fn; get_clustering_bounds_fn_t _get_clustering_bounds_fn; @@ -344,9 +343,8 @@ public: * Determines the index to be used with the restriction. * @param db - the data_dictionary::database context (for extracting index manager) * @return If an index can be used, an optional containing this index, otherwise an empty optional. - * In case the index is returned, second parameter returns the index restriction it uses. */ - std::pair, expr::expression> find_idx(const secondary_index::secondary_index_manager& sim) const; + std::optional find_idx(const secondary_index::secondary_index_manager& sim) const; /** * Checks if the partition key has some unrestricted components. diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index 2968ce191c..b94d82434c 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1037,7 +1037,7 @@ view_indexed_table_select_statement::prepare(data_dictionary::database db, { auto cf = db.find_column_family(schema); auto& sim = cf.get_index_manager(); - auto [index_opt, used_index_restrictions] = restrictions->find_idx(sim); + auto index_opt = restrictions->find_idx(sim); if (!index_opt) { throw std::runtime_error("No index found."); @@ -1063,7 +1063,6 @@ view_indexed_table_select_statement::prepare(data_dictionary::database db, std::move(per_partition_limit), stats, *index_opt, - std::move(used_index_restrictions), view_schema, std::move(attrs)); @@ -1080,12 +1079,10 @@ view_indexed_table_select_statement::view_indexed_table_select_statement(schema_ std::optional per_partition_limit, cql_stats &stats, const secondary_index::index& index, - expr::expression used_index_restrictions, schema_ptr view_schema, std::unique_ptr attrs) : select_statement{schema, bound_terms, parameters, selection, restrictions, group_by_cell_indices, is_reversed, ordering_comparator, limit, per_partition_limit, stats, std::move(attrs)} , _index{index} - , _used_index_restrictions(std::move(used_index_restrictions)) , _view_schema(view_schema) { throwing_assert(_view_schema); diff --git a/cql3/statements/select_statement.hh b/cql3/statements/select_statement.hh index 2b61a9c154..e8b0a31ebe 100644 --- a/cql3/statements/select_statement.hh +++ b/cql3/statements/select_statement.hh @@ -186,7 +186,6 @@ public: class view_indexed_table_select_statement : public select_statement { secondary_index::index _index; - expr::expression _used_index_restrictions; schema_ptr _view_schema; noncopyable_function _get_partition_ranges_for_posting_list; noncopyable_function _get_partition_slice_for_posting_list; @@ -219,7 +218,6 @@ public: std::optional per_partition_limit, cql_stats &stats, const secondary_index::index& index, - expr::expression used_index_restrictions, schema_ptr view_schema, std::unique_ptr attrs); diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index 3d7efe5bf6..d1bd8bfded 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -414,7 +414,7 @@ SEASTAR_TEST_CASE(index_selection) { /*for_view=*/false, /*allow_filtering=*/true, restrictions::check_indexes::yes); - auto [idx, restrictions_expr] = sr->find_idx(sim); + auto idx = sr->find_idx(sim); return {where_clause, idx ? std::optional(idx->metadata().name()) : std::nullopt, sr->uses_secondary_indexing(), @@ -897,7 +897,7 @@ SEASTAR_TEST_CASE(combinatorial_restrictions) { ctx_msg("has_eq_restriction_on_column(fs)")); // --- Index selection --- - auto [idx_opt, idx_expr] = sr->find_idx(sim); + auto idx_opt = sr->find_idx(sim); // Determine expected index. The scoring algorithm: // - do_find_idx iterates _index_restrictions (PK group, then