diff --git a/cql3/expr/expr-utils.hh b/cql3/expr/expr-utils.hh index feb99357bf..3cd070daf9 100644 --- a/cql3/expr/expr-utils.hh +++ b/cql3/expr/expr-utils.hh @@ -41,48 +41,6 @@ extern bool is_satisfied_by( const expression& restr, const evaluation_inputs& inputs); -/// A set of discrete values. -using value_list = std::vector; // Sorted and deduped using value comparator. - -/// General set of values. Empty set and single-element sets are always value_list. interval is -/// never singular and never has start > end. Universal set is a interval with both bounds null. -using value_set = std::variant>; - -/// A set of all column values that would satisfy an expression. The _token_values variant finds -/// matching values for the partition token function call instead of the column. -/// -/// An expression restricts possible values of a column or token: -/// - `A>5` restricts A from below -/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10 -/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5 -/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5 -/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression -/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false -/// - an expression without A "restricts" A to unbounded range -extern value_set possible_column_values(const column_definition*, const expression&, const query_options&); -extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema); - -/// Turns value_set into a range, unless it's a multi-valued list (in which case this throws). -extern interval to_range(const value_set&); - -/// A range of all X such that X op val. -interval to_range(oper_t op, const clustering_key_prefix& val); - -/// True iff the index can support the entire expression. -extern bool is_supported_by(const expression&, const secondary_index::index&); - -/// True iff any of the indices from the manager can support the entire expression. If allow_local, use all -/// indices; otherwise, use only global indices. -extern bool has_supporting_index( - const expression&, const secondary_index::secondary_index_manager&, allow_local_index allow_local); - -// Looks at each column individually and checks whether some index can support restrictions on this single column. -// Expression has to consist only of single column restrictions. -extern bool index_supports_some_column( - const expression&, - const secondary_index::secondary_index_manager&, - allow_local_index allow_local); - extern bool recurse_until(const expression& e, const noncopyable_function& predicate_fun); // Looks into the expression and finds the given expression variant @@ -138,15 +96,6 @@ inline const binary_operator* find(const expression& e, oper_t op) { return find_binop(e, [&] (const binary_operator& o) { return o.op == op; }); } -inline bool needs_filtering(oper_t op) { - return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) || - (op == oper_t::IS_NOT) || (op == oper_t::NEQ) ; -} - -inline auto find_needs_filtering(const expression& e) { - return find_binop(e, [] (const binary_operator& bo) { return needs_filtering(bo.op); }); -} - inline bool is_slice(oper_t op) { return (op == oper_t::LT) || (op == oper_t::LTE) || (op == oper_t::GT) || (op == oper_t::GTE); } @@ -169,10 +118,6 @@ inline bool is_compare(oper_t op) { } } -inline bool is_multi_column(const binary_operator& op) { - return expr::is(op.lhs); -} - // Check whether the given expression represents // a call to the token() function. bool is_token_function(const function_call&); @@ -190,10 +135,6 @@ inline bool has_partition_token(const expression& e, const schema& table_schema) return find_binop(e, [&] (const binary_operator& o) { return is_partition_token_for_schema(o.lhs, table_schema); }); } -inline bool has_slice_or_needs_filtering(const expression& e) { - return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); }); -} - inline bool is_clustering_order(const binary_operator& op) { return op.order == comparison_order::clustering; } @@ -210,9 +151,6 @@ std::vector boolean_factors(expression e); /// Run the given function for each element in the top level conjunction. void for_each_boolean_factor(const expression& e, const noncopyable_function& for_each_func); -/// True iff binary_operator involves a collection. -extern bool is_on_collection(const binary_operator&); - // Checks whether the given column occurs in the expression. // Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. bool contains_column(const column_definition& column, const expression& e); @@ -221,12 +159,6 @@ bool contains_column(const column_definition& column, const expression& e); // The expression must be prepared, so that function names are converted to function pointers. bool contains_nonpure_function(const expression&); -// Checks whether the given column has an EQ restriction in the expression. -// EQ restriction is `col = ...` or `(col, col2) = ...` -// IN restriction is NOT an EQ restriction, this function will not look for IN restrictions. -// Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. -bool has_eq_restriction_on_column(const column_definition& column, const expression& e); - /// Replaces every column_definition in an expression with this one. Throws if any LHS is not a single /// column_value. extern expression replace_column_def(const expression&, const column_definition*); @@ -281,14 +213,6 @@ inline oper_t pick_operator(statements::bound b, bool inclusive) { (inclusive ? oper_t::LTE : oper_t::LT); } -// Extracts all binary operators which have the given column on their left hand side. -// Extracts only single-column restrictions. -// Does not include multi-column restrictions. -// Does not include token() restrictions. -// Does not include boolean constant restrictions. -// For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}. -std::vector extract_single_column_restrictions_for_column(const expression&, const column_definition&); - std::optional get_bool_value(const constant&); utils::chunked_vector get_list_elements(const cql3::raw_value&); @@ -316,38 +240,16 @@ void fill_prepare_context(expression&, cql3::prepare_context&); // For example an expression can contain calls to nonpure functions. bool contains_bind_marker(const expression& e); -// Checks whether this expression contains restrictions on one single column. -// There might be more than one restriction, but exactly one column. -// The expression must be prepared. -bool is_single_column_restriction(const expression&); - -// Gets the only column from a single_column_restriction expression. -const column_value& get_the_only_column(const expression&); - // Extracts column_defs from the expression and sorts them using schema_pos_column_definition_comparator. std::vector get_sorted_column_defs(const expression&); // Extracts column_defs and returns the last one according to schema_pos_column_definition_comparator. const column_definition* get_last_column_def(const expression&); -// Extracts map of single column restrictions for each column from expression -single_column_restrictions_map get_single_column_restrictions_map(const expression&); - -// Checks whether this expression is empty - doesn't restrict anything -bool is_empty_restriction(const expression&); - // Finds common columns between both expressions and prints them to a string. // Uses schema_pos_column_definition_comparator for comparison. sstring get_columns_in_commons(const expression& a, const expression& b); -// Finds the value of the given column in the expression -// In case of multpiple possible values calls on_internal_error -bytes_opt value_for(const column_definition&, const expression&, const query_options&); - -bool contains_multi_column_restriction(const expression&); - -bool has_only_eq_binops(const expression&); - /// Finds the data type of writetime(x) or ttl(x) data_type column_mutation_attribute_type(const column_mutation_attribute& e); diff --git a/cql3/expr/expression.cc b/cql3/expr/expression.cc index 4c6dd56f7d..50dd4536d9 100644 --- a/cql3/expr/expression.cc +++ b/cql3/expr/expression.cc @@ -509,37 +509,6 @@ bool is_not_null(const expression& lhs, const expression& rhs, const evaluation_ return !lhs_val.is_null(); } -const value_set empty_value_set = value_list{}; -const value_set unbounded_value_set = interval::make_open_ended_both_sides(); - -struct intersection_visitor { - const abstract_type* type; - value_set operator()(const value_list& a, const value_list& b) const { - value_list common; - common.reserve(std::max(a.size(), b.size())); - boost::set_intersection(a, b, back_inserter(common), type->as_less_comparator()); - return std::move(common); - } - - value_set operator()(const interval& a, const value_list& b) const { - const auto common = b | filtered([&] (const managed_bytes& el) { return a.contains(el, type->as_tri_comparator()); }); - return value_list(common.begin(), common.end()); - } - - value_set operator()(const value_list& a, const interval& b) const { - return (*this)(b, a); - } - - value_set operator()(const interval& a, const interval& b) const { - const auto common_range = a.intersection(b, type->as_tri_comparator()); - return common_range ? *common_range : empty_value_set; - } -}; - -value_set intersection(value_set a, value_set b, const abstract_type* type) { - return std::visit(intersection_visitor{type}, std::move(a), std::move(b)); -} - } // anonymous namespace bool is_satisfied_by(const expression& restr, const evaluation_inputs& inputs) { @@ -547,46 +516,6 @@ bool is_satisfied_by(const expression& restr, const evaluation_inputs& inputs) { return evaluate(restr, inputs).to_managed_bytes_opt() == true_value; } -namespace { - -template -value_list to_sorted_vector(Range r, const serialized_compare& comparator) { - BOOST_CONCEPT_ASSERT((boost::ForwardRangeConcept)); - value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access). - const auto unique = boost::unique(boost::sort(tmp, comparator)); - return value_list(unique.begin(), unique.end()); -} - -const auto non_null = boost::adaptors::filtered([] (const managed_bytes_opt& b) { return b.has_value(); }); - -const auto deref = boost::adaptors::transformed([] (const managed_bytes_opt& b) { return b.value(); }); - -/// Returns possible values from t, which must be RHS of IN. -value_list get_IN_values( - const expression& e, const query_options& options, const serialized_compare& comparator, - sstring_view column_name) { - const cql3::raw_value in_list = evaluate(e, options); - if (in_list.is_null()) { - return value_list(); - } - utils::chunked_vector list_elems = get_list_elements(in_list); - return to_sorted_vector(std::move(list_elems) | non_null | deref, comparator); -} - -/// Returns possible values for k-th column from t, which must be RHS of IN. -value_list get_IN_values(const expression& e, size_t k, const query_options& options, - const serialized_compare& comparator) { - const cql3::raw_value in_list = evaluate(e, options); - const auto split_values = get_list_of_tuples_elements(in_list, *type_of(e)); // Need lvalue from which to make std::view. - const auto result_range = split_values - | boost::adaptors::transformed([k] (const std::vector& v) { return v[k]; }) | non_null | deref; - return to_sorted_vector(std::move(result_range), comparator); -} - -static constexpr bool inclusive = true, exclusive = false; - -} // anonymous namespace - const column_value& get_subscripted_column(const subscript& sub) { if (!is(sub.val)) { on_internal_error(expr_logger, @@ -641,381 +570,6 @@ void for_each_boolean_factor(const expression& e, const noncopyable_function -interval> to_range(oper_t op, T&& val) { - using U = std::remove_cvref_t; - static constexpr bool inclusive = true, exclusive = false; - switch (op) { - case oper_t::EQ: - return interval::make_singular(std::forward(val)); - case oper_t::GT: - return interval::make_starting_with(interval_bound(std::forward(val), exclusive)); - case oper_t::GTE: - return interval::make_starting_with(interval_bound(std::forward(val), inclusive)); - case oper_t::LT: - return interval::make_ending_with(interval_bound(std::forward(val), exclusive)); - case oper_t::LTE: - return interval::make_ending_with(interval_bound(std::forward(val), inclusive)); - default: - throw std::logic_error(format("to_range: unknown comparison operator {}", op)); - } -} - -interval to_range(oper_t op, const clustering_key_prefix& val) { - return to_range(op, val); -} - -// When cdef == nullptr it finds possible token values instead of column values. -// When finding token values the table_schema_opt argument has to point to a valid schema, -// but it isn't used when finding values for column. -// The schema is needed to find out whether a call to token() function represents -// the partition token. -static value_set possible_lhs_values(const column_definition* cdef, - const expression& expr, - const query_options& options, - const schema* table_schema_opt) { - const auto type = cdef ? &cdef->type->without_reversed() : long_type.get(); - return expr::visit(overloaded_functor{ - [] (const constant& constant_val) { - std::optional bool_val = get_bool_value(constant_val); - if (bool_val.has_value()) { - return *bool_val ? unbounded_value_set : empty_value_set; - } - - on_internal_error(expr_logger, - "possible_lhs_values: a constant that is not a bool value cannot serve as a restriction by itself"); - }, - [&] (const conjunction& conj) { - return boost::accumulate(conj.children, unbounded_value_set, - [&] (const value_set& acc, const expression& child) { - return intersection( - std::move(acc), possible_lhs_values(cdef, child, options, table_schema_opt), type); - }); - }, - [&] (const binary_operator& oper) -> value_set { - return expr::visit(overloaded_functor{ - [&] (const column_value& col) -> value_set { - if (!cdef || cdef != col.col) { - return unbounded_value_set; - } - if (is_compare(oper.op)) { - managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); - if (!val) { - return empty_value_set; // All NULL comparisons fail; no column values match. - } - return oper.op == oper_t::EQ ? value_set(value_list{*val}) - : to_range(oper.op, std::move(*val)); - } else if (oper.op == oper_t::IN) { - return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text()); - } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { - managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); - if (!val) { - return empty_value_set; // All NULL comparisons fail; no column values match. - } - return value_set(value_list{*val}); - } - throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); - }, - [&] (const subscript& s) -> value_set { - const column_value& col = get_subscripted_column(s); - - if (!cdef || cdef != col.col) { - return unbounded_value_set; - } - - managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); - if (!sval) { - return empty_value_set; // NULL can't be a map key - } - - if (oper.op == oper_t::EQ) { - managed_bytes_opt rval = evaluate(oper.rhs, options).to_managed_bytes_opt(); - if (!rval) { - return empty_value_set; // All NULL comparisons fail; no column values match. - } - managed_bytes_opt elements[] = {sval, rval}; - managed_bytes val = tuple_type_impl::build_value_fragmented(elements); - return value_set(value_list{val}); - } - throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); - }, - [&] (const tuple_constructor& tuple) -> value_set { - if (!cdef) { - return unbounded_value_set; - } - const auto found = boost::find_if( - tuple.elements, [&] (const expression& c) { return expr::as(c).col == cdef; }); - if (found == tuple.elements.end()) { - return unbounded_value_set; - } - const auto column_index_on_lhs = std::distance(tuple.elements.begin(), found); - if (is_compare(oper.op)) { - // RHS must be a tuple due to upstream checks. - managed_bytes_opt val = get_tuple_elements(evaluate(oper.rhs, options), *type_of(oper.rhs)).at(column_index_on_lhs); - if (!val) { - return empty_value_set; // All NULL comparisons fail; no column values match. - } - if (oper.op == oper_t::EQ) { - return value_list{std::move(*val)}; - } - if (column_index_on_lhs > 0) { - // A multi-column comparison restricts only the first column, because - // comparison is lexicographical. - return unbounded_value_set; - } - return to_range(oper.op, std::move(*val)); - } else if (oper.op == oper_t::IN) { - return get_IN_values(oper.rhs, column_index_on_lhs, options, type->as_less_comparator()); - } - return unbounded_value_set; - }, - [&] (const function_call& token_fun_call) -> value_set { - if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { - on_internal_error(expr_logger, "possible_lhs_values: function calls are not supported as the LHS of a binary expression"); - } - - if (cdef) { - return unbounded_value_set; - } - const auto val = evaluate(oper.rhs, options).to_managed_bytes_opt(); - if (!val) { - return empty_value_set; // All NULL comparisons fail; no token values match. - } - if (oper.op == oper_t::EQ) { - return value_list{*val}; - } else if (oper.op == oper_t::GT) { - return interval::make_starting_with(interval_bound(std::move(*val), exclusive)); - } else if (oper.op == oper_t::GTE) { - return interval::make_starting_with(interval_bound(std::move(*val), inclusive)); - } - static const managed_bytes MININT = managed_bytes(serialized(std::numeric_limits::min())), - MAXINT = managed_bytes(serialized(std::numeric_limits::max())); - // Undocumented feature: when the user types `token(...) < MININT`, we interpret - // that as MAXINT for some reason. - const auto adjusted_val = (*val == MININT) ? MAXINT : *val; - if (oper.op == oper_t::LT) { - return interval::make_ending_with(interval_bound(std::move(adjusted_val), exclusive)); - } else if (oper.op == oper_t::LTE) { - return interval::make_ending_with(interval_bound(std::move(adjusted_val), inclusive)); - } - throw std::logic_error(format("get_token_interval invalid operator {}", oper.op)); - }, - [&] (const binary_operator&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: nested binary operators are not supported"); - }, - [&] (const conjunction&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: conjunctions are not supported as the LHS of a binary expression"); - }, - [] (const constant&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: constants are not supported as the LHS of a binary expression"); - }, - [] (const unresolved_identifier&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: unresolved identifiers are not supported as the LHS of a binary expression"); - }, - [] (const column_mutation_attribute&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: writetime/ttl are not supported as the LHS of a binary expression"); - }, - [] (const cast&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: typecasts are not supported as the LHS of a binary expression"); - }, - [] (const field_selection&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: field selections are not supported as the LHS of a binary expression"); - }, - [] (const bind_variable&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: bind variables are not supported as the LHS of a binary expression"); - }, - [] (const untyped_constant&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: untyped constants are not supported as the LHS of a binary expression"); - }, - [] (const collection_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: collection constructors are not supported as the LHS of a binary expression"); - }, - [] (const usertype_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: user type constructors are not supported as the LHS of a binary expression"); - }, - [] (const temporary&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: temporaries are not supported as the LHS of a binary expression"); - }, - }, oper.lhs); - }, - [] (const column_value&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a column cannot serve as a restriction by itself"); - }, - [] (const subscript&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a subscript cannot serve as a restriction by itself"); - }, - [] (const unresolved_identifier&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: an unresolved identifier cannot serve as a restriction"); - }, - [] (const column_mutation_attribute&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: the writetime/ttl functions cannot serve as a restriction by itself"); - }, - [] (const function_call&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a function call cannot serve as a restriction by itself"); - }, - [] (const cast&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a typecast cannot serve as a restriction by itself"); - }, - [] (const field_selection&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a field selection cannot serve as a restriction by itself"); - }, - [] (const bind_variable&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a bind variable cannot serve as a restriction by itself"); - }, - [] (const untyped_constant&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: an untyped constant cannot serve as a restriction by itself"); - }, - [] (const tuple_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: an tuple constructor cannot serve as a restriction by itself"); - }, - [] (const collection_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a collection constructor cannot serve as a restriction by itself"); - }, - [] (const usertype_constructor&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a user type constructor cannot serve as a restriction by itself"); - }, - [] (const temporary&) -> value_set { - on_internal_error(expr_logger, "possible_lhs_values: a temporary cannot serve as a restriction by itself"); - }, - }, expr); -} - -value_set possible_column_values(const column_definition* col, const expression& e, const query_options& options) { - return possible_lhs_values(col, e, options, nullptr); -} - -value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { - return possible_lhs_values(nullptr, e, options, &table_schema); -} - -interval to_range(const value_set& s) { - return std::visit(overloaded_functor{ - [] (const interval& r) { return r; }, - [] (const value_list& lst) { - if (lst.size() != 1) { - throw std::logic_error(format("to_range called on list of size {}", lst.size())); - } - return interval::make_singular(lst[0]); - }, - }, s); -} - -namespace { -constexpr inline secondary_index::index::supports_expression_v operator&&(secondary_index::index::supports_expression_v v1, secondary_index::index::supports_expression_v v2) { - using namespace secondary_index; - auto True = index::supports_expression_v::from_bool(true); - return v1 == True && v2 == True ? True : index::supports_expression_v::from_bool(false); -} - -secondary_index::index::supports_expression_v is_supported_by_helper(const expression& expr, const secondary_index::index& idx) { - using ret_t = secondary_index::index::supports_expression_v; - using namespace secondary_index; - return expr::visit(overloaded_functor{ - [&] (const conjunction& conj) -> ret_t { - if (conj.children.empty()) { - return index::supports_expression_v::from_bool(true); - } - auto init = is_supported_by_helper(conj.children[0], idx); - return std::accumulate(std::begin(conj.children) + 1, std::end(conj.children), init, - [&] (ret_t acc, const expression& child) -> ret_t { - return acc && is_supported_by_helper(child, idx); - }); - }, - [&] (const binary_operator& oper) { - return expr::visit(overloaded_functor{ - [&] (const column_value& col) { - return idx.supports_expression(*col.col, oper.op); - }, - [&] (const tuple_constructor& tuple) { - if (tuple.elements.size() == 1) { - if (auto column = expr::as_if(&tuple.elements[0])) { - return idx.supports_expression(*column->col, oper.op); - } - } - // We don't use index table for multi-column restrictions, as it cannot avoid filtering. - return index::supports_expression_v::from_bool(false); - }, - [&] (const function_call&) { return index::supports_expression_v::from_bool(false); }, - [&] (const subscript& s) -> ret_t { - const column_value& col = get_subscripted_column(s); - return idx.supports_subscript_expression(*col.col, oper.op); - }, - [&] (const binary_operator&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: nested binary operators are not supported"); - }, - [&] (const conjunction&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: conjunctions are not supported as the LHS of a binary expression"); - }, - [] (const constant&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: constants are not supported as the LHS of a binary expression"); - }, - [] (const unresolved_identifier&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: an unresolved identifier is not supported as the LHS of a binary expression"); - }, - [&] (const column_mutation_attribute&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: writetime/ttl are not supported as the LHS of a binary expression"); - }, - [&] (const cast&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: typecasts are not supported as the LHS of a binary expression"); - }, - [&] (const field_selection&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: field selections are not supported as the LHS of a binary expression"); - }, - [&] (const bind_variable&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: bind variables are not supported as the LHS of a binary expression"); - }, - [&] (const untyped_constant&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: untyped constants are not supported as the LHS of a binary expression"); - }, - [&] (const collection_constructor&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: collection constructors are not supported as the LHS of a binary expression"); - }, - [&] (const usertype_constructor&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: user type constructors are not supported as the LHS of a binary expression"); - }, - [&] (const temporary&) -> ret_t { - on_internal_error(expr_logger, "is_supported_by: temporaries are not supported as the LHS of a binary expression"); - }, - }, oper.lhs); - }, - [] (const auto& default_case) { return index::supports_expression_v::from_bool(false); } - }, expr); -} -} - -bool is_supported_by(const expression& expr, const secondary_index::index& idx) { - auto s = is_supported_by_helper(expr, idx); - return s != secondary_index::index::supports_expression_v::from_bool(false); -} - - -bool has_supporting_index( - const expression& expr, - const secondary_index::secondary_index_manager& index_manager, - allow_local_index allow_local) { - const auto indexes = index_manager.list_indexes(); - const auto support = std::bind(is_supported_by, std::ref(expr), std::placeholders::_1); - return allow_local ? boost::algorithm::any_of(indexes, support) - : boost::algorithm::any_of( - indexes | filtered([] (const secondary_index::index& i) { return !i.metadata().local(); }), - support); -} - -bool index_supports_some_column( - const expression& e, - const secondary_index::secondary_index_manager& index_manager, - allow_local_index allow_local) { - single_column_restrictions_map single_col_restrictions = get_single_column_restrictions_map(e); - - for (auto&& [col, col_restrictions] : single_col_restrictions) { - if (has_supporting_index(col_restrictions, index_manager, allow_local)) { - return true; - } - } - - return false; -} - std::ostream& operator<<(std::ostream& os, const column_value& cv) { os << cv.col->name_as_text(); return os; @@ -1244,16 +798,6 @@ sstring to_string(const expression& expr) { return fmt::format("{}", expr); } -bool is_on_collection(const binary_operator& b) { - if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { - return true; - } - if (auto tuple = expr::as_if(&b.lhs)) { - return boost::algorithm::any_of(tuple->elements, [] (const expression& v) { return expr::is(v); }); - } - return false; -} - bool contains_column(const column_definition& column, const expression& e) { const column_value* find_res = find_in_expression(e, [&](const column_value& column_val) -> bool { @@ -1280,54 +824,6 @@ bool contains_nonpure_function(const expression& e) { return find_res != nullptr; } -bool has_eq_restriction_on_column(const column_definition& column, const expression& e) { - std::function column_in_lhs = [&](const expression& e) -> bool { - return visit(overloaded_functor { - [&](const column_value& cv) { - // Use column_defintion::operator== for comparison, - // columns with the same name but different schema will not be equal. - return *cv.col == column; - }, - [&](const tuple_constructor& tc) { - for (const expression& elem : tc.elements) { - if (column_in_lhs(elem)) { - return true; - } - } - - return false; - }, - [&](const auto&) {return false;} - }, e); - }; - - // Look for binary operator describing eq relation with this column on lhs - const binary_operator* eq_restriction_search_res = find_binop(e, [&](const binary_operator& b) { - if (b.op != oper_t::EQ) { - return false; - } - - if (!column_in_lhs(b.lhs)) { - return false; - } - - // These conditions are not allowed to occur in the current code, - // but they might be allowed in the future. - // They are added now to avoid surprises later. - // - // These conditions detect cases like: - // WHERE column1 = column2 - // WHERE column1 = row_number() - if (contains_column(column, b.rhs) || contains_nonpure_function(b.rhs)) { - return false; - } - - return true; - }); - - return eq_restriction_search_res != nullptr; -} - expression replace_column_def(const expression& expr, const column_definition* new_cdef) { return search_and_replace(expr, [&] (const expression& expr) -> std::optional { if (expr::is(expr)) { @@ -1498,69 +994,6 @@ expression search_and_replace(const expression& e, } } -std::vector extract_single_column_restrictions_for_column(const expression& expr, - const column_definition& column) { - struct visitor { - std::vector restrictions; - const column_definition& column; - const binary_operator* current_binary_operator; - - void operator()(const constant&) {} - - void operator()(const conjunction& conj) { - for (const expression& child : conj.children) { - expr::visit(*this, child); - } - } - - void operator()(const binary_operator& oper) { - if (current_binary_operator != nullptr) { - on_internal_error(expr_logger, - "extract_single_column_restrictions_for_column: nested binary operators are not supported"); - } - - current_binary_operator = &oper; - expr::visit(*this, oper.lhs); - current_binary_operator = nullptr; - } - - void operator()(const column_value& cv) { - if (*cv.col == column && current_binary_operator != nullptr) { - restrictions.emplace_back(*current_binary_operator); - } - } - - void operator()(const subscript& s) { - const column_value& cv = get_subscripted_column(s); - if (*cv.col == column && current_binary_operator != nullptr) { - restrictions.emplace_back(*current_binary_operator); - } - } - - void operator()(const unresolved_identifier&) {} - void operator()(const column_mutation_attribute&) {} - void operator()(const function_call&) {} - void operator()(const cast&) {} - void operator()(const field_selection&) {} - void operator()(const bind_variable&) {} - void operator()(const untyped_constant&) {} - void operator()(const tuple_constructor&) {} - void operator()(const collection_constructor&) {} - void operator()(const usertype_constructor&) {} - void operator()(const temporary&) {} - }; - - visitor v { - .restrictions = std::vector(), - .column = column, - .current_binary_operator = nullptr, - }; - - expr::visit(v, expr); - - return std::move(v.restrictions); -} - constant::constant(cql3::raw_value val, data_type typ) : value(std::move(val)), type(std::move(typ)) { @@ -2404,55 +1837,6 @@ type_of(const expression& e) { }, e); } -static std::optional> get_single_column_restriction_column(const expression& e) { - if (find_in_expression(e, [](const auto&) {return true;})) { - on_internal_error(expr_logger, - seastar::format("get_single_column_restriction_column expects a prepared expression, but it's not: {}", e)); - } - - const column_value* the_only_column = nullptr; - bool expression_is_single_column = false; - - for_each_expression(e, - [&](const column_value& cval) { - if (the_only_column == nullptr) { - // It's the first column_value we've encountered - set it as the only column - the_only_column = &cval; - expression_is_single_column = true; - return; - } - - if (cval.col != the_only_column->col) { - // In case any other column is encountered the restriction - // restricts more than one column. - expression_is_single_column = false; - } - } - ); - - if (expression_is_single_column) { - return std::cref(*the_only_column); - } else { - return std::nullopt; - } -} - -bool is_single_column_restriction(const expression& e) { - return get_single_column_restriction_column(e).has_value(); -} - -const column_value& get_the_only_column(const expression& e) { - std::optional> result = get_single_column_restriction_column(e); - - if (!result.has_value()) { - on_internal_error(expr_logger, - format("get_the_only_column - bad expression: {}", e)); - } - - return *result; -} - - bool schema_pos_column_definition_comparator::operator()(const column_definition *def1, const column_definition *def2) const { auto column_pos = [](const column_definition* cdef) -> uint32_t { if (cdef->is_primary_key()) { @@ -2498,28 +1882,6 @@ const column_definition* get_last_column_def(const expression& e) { return sorted_defs.back(); } -single_column_restrictions_map get_single_column_restrictions_map(const expression& e) { - single_column_restrictions_map result; - - std::vector sorted_defs = get_sorted_column_defs(e); - for (const column_definition* cdef : sorted_defs) { - expression col_restrictions = conjunction { - .children = extract_single_column_restrictions_for_column(e, *cdef) - }; - result.emplace(cdef, std::move(col_restrictions)); - } - - return result; -} - -bool is_empty_restriction(const expression& e) { - bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { - return !is(e); - }); - - return !contains_non_conjunction; -} - sstring get_columns_in_commons(const expression& a, const expression& b) { std::vector ours = get_sorted_column_defs(a); std::vector theirs = get_sorted_column_defs(b); @@ -2539,43 +1901,6 @@ sstring get_columns_in_commons(const expression& a, const expression& b) { return str; } -bytes_opt value_for(const column_definition& cdef, const expression& e, const query_options& options) { - value_set possible_vals = possible_column_values(&cdef, e, options); - return std::visit(overloaded_functor { - [&](const value_list& val_list) -> bytes_opt { - if (val_list.empty()) { - return std::nullopt; - } - - if (val_list.size() != 1) { - on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", e)); - } - - return to_bytes(val_list.front()); - }, - [&](const interval&) -> bytes_opt { - on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", e)); - } - }, possible_vals); -} - -bool contains_multi_column_restriction(const expression& e) { - const binary_operator* find_res = find_binop(e, [](const binary_operator& binop) { - return is(binop.lhs); - }); - return find_res != nullptr; -} - -bool has_only_eq_binops(const expression& e) { - const expr::binary_operator* non_eq_binop = find_in_expression(e, - [](const expr::binary_operator& binop) { - return binop.op != expr::oper_t::EQ; - } - ); - - return non_eq_binop == nullptr; -} - unset_bind_variable_guard::unset_bind_variable_guard(const expr::expression& e) { if (auto bv = expr::as_if(&e)) { _var = *bv; diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index b805d9c6a6..db0bbc2646 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -28,6 +28,7 @@ #include "cql3/selection/selection.hh" #include "cql3/statements/request_validations.hh" #include "dht/i_partitioner.hh" +#include "types/tuple.hh" namespace { struct maybe_column_definition { @@ -50,12 +51,774 @@ struct fmt::formatter : fmt::formatter { namespace cql3 { namespace restrictions { +using namespace expr; + static logging::logger rlogger("restrictions"); +static auto& expr_logger = rlogger; // compatibility with code moved from expression.cc using boost::adaptors::filtered; using boost::adaptors::transformed; using statements::request_validations::invalid_request; + +/// A set of discrete values. +using value_list = std::vector; // Sorted and deduped using value comparator. + +/// General set of values. Empty set and single-element sets are always value_list. interval is +/// never singular and never has start > end. Universal set is a interval with both bounds null. +using value_set = std::variant>; + +/// A set of all column values that would satisfy an expression. The _token_values variant finds +/// matching values for the partition token function call instead of the column. +/// +/// An expression restricts possible values of a column or token: +/// - `A>5` restricts A from below +/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10 +/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5 +/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5 +/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression +/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false +/// - an expression without A "restricts" A to unbounded range +extern value_set possible_column_values(const column_definition*, const expression&, const query_options&); +extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema); + +/// Turns value_set into a range, unless it's a multi-valued list (in which case this throws). +extern interval to_range(const value_set&); + +/// A range of all X such that X op val. +interval to_range(oper_t op, const clustering_key_prefix& val); + +/// True iff the index can support the entire expression. +extern bool is_supported_by(const expression&, const secondary_index::index&); + +/// True iff any of the indices from the manager can support the entire expression. If allow_local, use all +/// indices; otherwise, use only global indices. +extern bool has_supporting_index( + const expression&, const secondary_index::secondary_index_manager&, allow_local_index allow_local); + +// Looks at each column individually and checks whether some index can support restrictions on this single column. +// Expression has to consist only of single column restrictions. +extern bool index_supports_some_column( + const expression&, + const secondary_index::secondary_index_manager&, + allow_local_index allow_local); + +inline bool needs_filtering(oper_t op) { + return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) || + (op == oper_t::IS_NOT) || (op == oper_t::NEQ) ; +} + +inline auto find_needs_filtering(const expression& e) { + return find_binop(e, [] (const binary_operator& bo) { return needs_filtering(bo.op); }); +} + +inline bool is_multi_column(const binary_operator& op) { + return expr::is(op.lhs); +} + +inline bool has_slice_or_needs_filtering(const expression& e) { + return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); }); +} + +/// True iff binary_operator involves a collection. +extern bool is_on_collection(const binary_operator&); + +// Checks whether the given column has an EQ restriction in the expression. +// EQ restriction is `col = ...` or `(col, col2) = ...` +// IN restriction is NOT an EQ restriction, this function will not look for IN restrictions. +// Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal. +bool has_eq_restriction_on_column(const column_definition& column, const expression& e); + +// Checks whether this expression contains restrictions on one single column. +// There might be more than one restriction, but exactly one column. +// The expression must be prepared. +bool is_single_column_restriction(const expression&); + +// Gets the only column from a single_column_restriction expression. +const column_value& get_the_only_column(const expression&); + +// Extracts map of single column restrictions for each column from expression +single_column_restrictions_map get_single_column_restrictions_map(const expression&); + + +bool contains_multi_column_restriction(const expression&); + +bool has_only_eq_binops(const expression&); + +namespace { + +const value_set empty_value_set = value_list{}; +const value_set unbounded_value_set = interval::make_open_ended_both_sides(); + +struct intersection_visitor { + const abstract_type* type; + value_set operator()(const value_list& a, const value_list& b) const { + value_list common; + common.reserve(std::max(a.size(), b.size())); + boost::set_intersection(a, b, back_inserter(common), type->as_less_comparator()); + return std::move(common); + } + + value_set operator()(const interval& a, const value_list& b) const { + const auto common = b | filtered([&] (const managed_bytes& el) { return a.contains(el, type->as_tri_comparator()); }); + return value_list(common.begin(), common.end()); + } + + value_set operator()(const value_list& a, const interval& b) const { + return (*this)(b, a); + } + + value_set operator()(const interval& a, const interval& b) const { + const auto common_range = a.intersection(b, type->as_tri_comparator()); + return common_range ? *common_range : empty_value_set; + } +}; + +value_set intersection(value_set a, value_set b, const abstract_type* type) { + return std::visit(intersection_visitor{type}, std::move(a), std::move(b)); +} + +template +value_list to_sorted_vector(Range r, const serialized_compare& comparator) { + BOOST_CONCEPT_ASSERT((boost::ForwardRangeConcept)); + value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access). + const auto unique = boost::unique(boost::sort(tmp, comparator)); + return value_list(unique.begin(), unique.end()); +} + +const auto non_null = boost::adaptors::filtered([] (const managed_bytes_opt& b) { return b.has_value(); }); + +const auto deref = boost::adaptors::transformed([] (const managed_bytes_opt& b) { return b.value(); }); + +/// Returns possible values from t, which must be RHS of IN. +value_list get_IN_values( + const expression& e, const query_options& options, const serialized_compare& comparator, + sstring_view column_name) { + const cql3::raw_value in_list = evaluate(e, options); + if (in_list.is_null()) { + return value_list(); + } + utils::chunked_vector list_elems = get_list_elements(in_list); + return to_sorted_vector(std::move(list_elems) | non_null | deref, comparator); +} + +/// Returns possible values for k-th column from t, which must be RHS of IN. +value_list get_IN_values(const expression& e, size_t k, const query_options& options, + const serialized_compare& comparator) { + const cql3::raw_value in_list = evaluate(e, options); + const auto split_values = get_list_of_tuples_elements(in_list, *type_of(e)); // Need lvalue from which to make std::view. + const auto result_range = split_values + | boost::adaptors::transformed([k] (const std::vector& v) { return v[k]; }) | non_null | deref; + return to_sorted_vector(std::move(result_range), comparator); +} + +static constexpr bool inclusive = true, exclusive = false; + +} // anonymous namespace + +template +interval> to_range(oper_t op, T&& val) { + using U = std::remove_cvref_t; + static constexpr bool inclusive = true, exclusive = false; + switch (op) { + case oper_t::EQ: + return interval::make_singular(std::forward(val)); + case oper_t::GT: + return interval::make_starting_with(interval_bound(std::forward(val), exclusive)); + case oper_t::GTE: + return interval::make_starting_with(interval_bound(std::forward(val), inclusive)); + case oper_t::LT: + return interval::make_ending_with(interval_bound(std::forward(val), exclusive)); + case oper_t::LTE: + return interval::make_ending_with(interval_bound(std::forward(val), inclusive)); + default: + throw std::logic_error(format("to_range: unknown comparison operator {}", op)); + } +} + +interval to_range(oper_t op, const clustering_key_prefix& val) { + return to_range(op, val); +} + +// When cdef == nullptr it finds possible token values instead of column values. +// When finding token values the table_schema_opt argument has to point to a valid schema, +// but it isn't used when finding values for column. +// The schema is needed to find out whether a call to token() function represents +// the partition token. +static value_set possible_lhs_values(const column_definition* cdef, + const expression& expr, + const query_options& options, + const schema* table_schema_opt) { + const auto type = cdef ? &cdef->type->without_reversed() : long_type.get(); + return expr::visit(overloaded_functor{ + [] (const constant& constant_val) { + std::optional bool_val = get_bool_value(constant_val); + if (bool_val.has_value()) { + return *bool_val ? unbounded_value_set : empty_value_set; + } + + on_internal_error(expr_logger, + "possible_lhs_values: a constant that is not a bool value cannot serve as a restriction by itself"); + }, + [&] (const conjunction& conj) { + return boost::accumulate(conj.children, unbounded_value_set, + [&] (const value_set& acc, const expression& child) { + return intersection( + std::move(acc), possible_lhs_values(cdef, child, options, table_schema_opt), type); + }); + }, + [&] (const binary_operator& oper) -> value_set { + return expr::visit(overloaded_functor{ + [&] (const column_value& col) -> value_set { + if (!cdef || cdef != col.col) { + return unbounded_value_set; + } + if (is_compare(oper.op)) { + managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); + if (!val) { + return empty_value_set; // All NULL comparisons fail; no column values match. + } + return oper.op == oper_t::EQ ? value_set(value_list{*val}) + : to_range(oper.op, std::move(*val)); + } else if (oper.op == oper_t::IN) { + return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text()); + } else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) { + managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt(); + if (!val) { + return empty_value_set; // All NULL comparisons fail; no column values match. + } + return value_set(value_list{*val}); + } + throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); + }, + [&] (const subscript& s) -> value_set { + const column_value& col = get_subscripted_column(s); + + if (!cdef || cdef != col.col) { + return unbounded_value_set; + } + + managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt(); + if (!sval) { + return empty_value_set; // NULL can't be a map key + } + + if (oper.op == oper_t::EQ) { + managed_bytes_opt rval = evaluate(oper.rhs, options).to_managed_bytes_opt(); + if (!rval) { + return empty_value_set; // All NULL comparisons fail; no column values match. + } + managed_bytes_opt elements[] = {sval, rval}; + managed_bytes val = tuple_type_impl::build_value_fragmented(elements); + return value_set(value_list{val}); + } + throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); + }, + [&] (const tuple_constructor& tuple) -> value_set { + if (!cdef) { + return unbounded_value_set; + } + const auto found = boost::find_if( + tuple.elements, [&] (const expression& c) { return expr::as(c).col == cdef; }); + if (found == tuple.elements.end()) { + return unbounded_value_set; + } + const auto column_index_on_lhs = std::distance(tuple.elements.begin(), found); + if (is_compare(oper.op)) { + // RHS must be a tuple due to upstream checks. + managed_bytes_opt val = get_tuple_elements(evaluate(oper.rhs, options), *type_of(oper.rhs)).at(column_index_on_lhs); + if (!val) { + return empty_value_set; // All NULL comparisons fail; no column values match. + } + if (oper.op == oper_t::EQ) { + return value_list{std::move(*val)}; + } + if (column_index_on_lhs > 0) { + // A multi-column comparison restricts only the first column, because + // comparison is lexicographical. + return unbounded_value_set; + } + return to_range(oper.op, std::move(*val)); + } else if (oper.op == oper_t::IN) { + return get_IN_values(oper.rhs, column_index_on_lhs, options, type->as_less_comparator()); + } + return unbounded_value_set; + }, + [&] (const function_call& token_fun_call) -> value_set { + if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) { + on_internal_error(expr_logger, "possible_lhs_values: function calls are not supported as the LHS of a binary expression"); + } + + if (cdef) { + return unbounded_value_set; + } + const auto val = evaluate(oper.rhs, options).to_managed_bytes_opt(); + if (!val) { + return empty_value_set; // All NULL comparisons fail; no token values match. + } + if (oper.op == oper_t::EQ) { + return value_list{*val}; + } else if (oper.op == oper_t::GT) { + return interval::make_starting_with(interval_bound(std::move(*val), exclusive)); + } else if (oper.op == oper_t::GTE) { + return interval::make_starting_with(interval_bound(std::move(*val), inclusive)); + } + static const managed_bytes MININT = managed_bytes(serialized(std::numeric_limits::min())), + MAXINT = managed_bytes(serialized(std::numeric_limits::max())); + // Undocumented feature: when the user types `token(...) < MININT`, we interpret + // that as MAXINT for some reason. + const auto adjusted_val = (*val == MININT) ? MAXINT : *val; + if (oper.op == oper_t::LT) { + return interval::make_ending_with(interval_bound(std::move(adjusted_val), exclusive)); + } else if (oper.op == oper_t::LTE) { + return interval::make_ending_with(interval_bound(std::move(adjusted_val), inclusive)); + } + throw std::logic_error(format("get_token_interval invalid operator {}", oper.op)); + }, + [&] (const binary_operator&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: nested binary operators are not supported"); + }, + [&] (const conjunction&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: conjunctions are not supported as the LHS of a binary expression"); + }, + [] (const constant&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: constants are not supported as the LHS of a binary expression"); + }, + [] (const unresolved_identifier&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: unresolved identifiers are not supported as the LHS of a binary expression"); + }, + [] (const column_mutation_attribute&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: writetime/ttl are not supported as the LHS of a binary expression"); + }, + [] (const cast&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: typecasts are not supported as the LHS of a binary expression"); + }, + [] (const field_selection&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: field selections are not supported as the LHS of a binary expression"); + }, + [] (const bind_variable&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: bind variables are not supported as the LHS of a binary expression"); + }, + [] (const untyped_constant&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: untyped constants are not supported as the LHS of a binary expression"); + }, + [] (const collection_constructor&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: collection constructors are not supported as the LHS of a binary expression"); + }, + [] (const usertype_constructor&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: user type constructors are not supported as the LHS of a binary expression"); + }, + [] (const temporary&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: temporaries are not supported as the LHS of a binary expression"); + }, + }, oper.lhs); + }, + [] (const column_value&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a column cannot serve as a restriction by itself"); + }, + [] (const subscript&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a subscript cannot serve as a restriction by itself"); + }, + [] (const unresolved_identifier&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: an unresolved identifier cannot serve as a restriction"); + }, + [] (const column_mutation_attribute&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: the writetime/ttl functions cannot serve as a restriction by itself"); + }, + [] (const function_call&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a function call cannot serve as a restriction by itself"); + }, + [] (const cast&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a typecast cannot serve as a restriction by itself"); + }, + [] (const field_selection&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a field selection cannot serve as a restriction by itself"); + }, + [] (const bind_variable&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a bind variable cannot serve as a restriction by itself"); + }, + [] (const untyped_constant&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: an untyped constant cannot serve as a restriction by itself"); + }, + [] (const tuple_constructor&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: an tuple constructor cannot serve as a restriction by itself"); + }, + [] (const collection_constructor&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a collection constructor cannot serve as a restriction by itself"); + }, + [] (const usertype_constructor&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a user type constructor cannot serve as a restriction by itself"); + }, + [] (const temporary&) -> value_set { + on_internal_error(expr_logger, "possible_lhs_values: a temporary cannot serve as a restriction by itself"); + }, + }, expr); +} + +value_set possible_column_values(const column_definition* col, const expression& e, const query_options& options) { + return possible_lhs_values(col, e, options, nullptr); +} + +value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) { + return possible_lhs_values(nullptr, e, options, &table_schema); +} + +interval to_range(const value_set& s) { + return std::visit(overloaded_functor{ + [] (const interval& r) { return r; }, + [] (const value_list& lst) { + if (lst.size() != 1) { + throw std::logic_error(format("to_range called on list of size {}", lst.size())); + } + return interval::make_singular(lst[0]); + }, + }, s); +} + +namespace { +constexpr inline secondary_index::index::supports_expression_v operator&&(secondary_index::index::supports_expression_v v1, secondary_index::index::supports_expression_v v2) { + using namespace secondary_index; + auto True = index::supports_expression_v::from_bool(true); + return v1 == True && v2 == True ? True : index::supports_expression_v::from_bool(false); +} + +secondary_index::index::supports_expression_v is_supported_by_helper(const expression& expr, const secondary_index::index& idx) { + using ret_t = secondary_index::index::supports_expression_v; + using namespace secondary_index; + return expr::visit(overloaded_functor{ + [&] (const conjunction& conj) -> ret_t { + if (conj.children.empty()) { + return index::supports_expression_v::from_bool(true); + } + auto init = is_supported_by_helper(conj.children[0], idx); + return std::accumulate(std::begin(conj.children) + 1, std::end(conj.children), init, + [&] (ret_t acc, const expression& child) -> ret_t { + return acc && is_supported_by_helper(child, idx); + }); + }, + [&] (const binary_operator& oper) { + return expr::visit(overloaded_functor{ + [&] (const column_value& col) { + return idx.supports_expression(*col.col, oper.op); + }, + [&] (const tuple_constructor& tuple) { + if (tuple.elements.size() == 1) { + if (auto column = expr::as_if(&tuple.elements[0])) { + return idx.supports_expression(*column->col, oper.op); + } + } + // We don't use index table for multi-column restrictions, as it cannot avoid filtering. + return index::supports_expression_v::from_bool(false); + }, + [&] (const function_call&) { return index::supports_expression_v::from_bool(false); }, + [&] (const subscript& s) -> ret_t { + const column_value& col = get_subscripted_column(s); + return idx.supports_subscript_expression(*col.col, oper.op); + }, + [&] (const binary_operator&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: nested binary operators are not supported"); + }, + [&] (const conjunction&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: conjunctions are not supported as the LHS of a binary expression"); + }, + [] (const constant&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: constants are not supported as the LHS of a binary expression"); + }, + [] (const unresolved_identifier&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: an unresolved identifier is not supported as the LHS of a binary expression"); + }, + [&] (const column_mutation_attribute&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: writetime/ttl are not supported as the LHS of a binary expression"); + }, + [&] (const cast&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: typecasts are not supported as the LHS of a binary expression"); + }, + [&] (const field_selection&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: field selections are not supported as the LHS of a binary expression"); + }, + [&] (const bind_variable&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: bind variables are not supported as the LHS of a binary expression"); + }, + [&] (const untyped_constant&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: untyped constants are not supported as the LHS of a binary expression"); + }, + [&] (const collection_constructor&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: collection constructors are not supported as the LHS of a binary expression"); + }, + [&] (const usertype_constructor&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: user type constructors are not supported as the LHS of a binary expression"); + }, + [&] (const temporary&) -> ret_t { + on_internal_error(expr_logger, "is_supported_by: temporaries are not supported as the LHS of a binary expression"); + }, + }, oper.lhs); + }, + [] (const auto& default_case) { return index::supports_expression_v::from_bool(false); } + }, expr); +} +} + +bool is_supported_by(const expression& expr, const secondary_index::index& idx) { + auto s = is_supported_by_helper(expr, idx); + return s != secondary_index::index::supports_expression_v::from_bool(false); +} + + +bool has_supporting_index( + const expression& expr, + const secondary_index::secondary_index_manager& index_manager, + allow_local_index allow_local) { + const auto indexes = index_manager.list_indexes(); + const auto support = std::bind(is_supported_by, std::ref(expr), std::placeholders::_1); + return allow_local ? boost::algorithm::any_of(indexes, support) + : boost::algorithm::any_of( + indexes | filtered([] (const secondary_index::index& i) { return !i.metadata().local(); }), + support); +} + +bool index_supports_some_column( + const expression& e, + const secondary_index::secondary_index_manager& index_manager, + allow_local_index allow_local) { + single_column_restrictions_map single_col_restrictions = get_single_column_restrictions_map(e); + + for (auto&& [col, col_restrictions] : single_col_restrictions) { + if (has_supporting_index(col_restrictions, index_manager, allow_local)) { + return true; + } + } + + return false; +} + +bool is_on_collection(const binary_operator& b) { + if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { + return true; + } + if (auto tuple = expr::as_if(&b.lhs)) { + return boost::algorithm::any_of(tuple->elements, [] (const expression& v) { return expr::is(v); }); + } + return false; +} + +bool has_eq_restriction_on_column(const column_definition& column, const expression& e) { + std::function column_in_lhs = [&](const expression& e) -> bool { + return visit(overloaded_functor { + [&](const column_value& cv) { + // Use column_defintion::operator== for comparison, + // columns with the same name but different schema will not be equal. + return *cv.col == column; + }, + [&](const tuple_constructor& tc) { + for (const expression& elem : tc.elements) { + if (column_in_lhs(elem)) { + return true; + } + } + + return false; + }, + [&](const auto&) {return false;} + }, e); + }; + + // Look for binary operator describing eq relation with this column on lhs + const binary_operator* eq_restriction_search_res = find_binop(e, [&](const binary_operator& b) { + if (b.op != oper_t::EQ) { + return false; + } + + if (!column_in_lhs(b.lhs)) { + return false; + } + + // These conditions are not allowed to occur in the current code, + // but they might be allowed in the future. + // They are added now to avoid surprises later. + // + // These conditions detect cases like: + // WHERE column1 = column2 + // WHERE column1 = row_number() + if (contains_column(column, b.rhs) || contains_nonpure_function(b.rhs)) { + return false; + } + + return true; + }); + + return eq_restriction_search_res != nullptr; +} + +std::vector extract_single_column_restrictions_for_column(const expression& expr, + const column_definition& column) { + struct visitor { + std::vector restrictions; + const column_definition& column; + const binary_operator* current_binary_operator; + + void operator()(const constant&) {} + + void operator()(const conjunction& conj) { + for (const expression& child : conj.children) { + expr::visit(*this, child); + } + } + + void operator()(const binary_operator& oper) { + if (current_binary_operator != nullptr) { + on_internal_error(expr_logger, + "extract_single_column_restrictions_for_column: nested binary operators are not supported"); + } + + current_binary_operator = &oper; + expr::visit(*this, oper.lhs); + current_binary_operator = nullptr; + } + + void operator()(const column_value& cv) { + if (*cv.col == column && current_binary_operator != nullptr) { + restrictions.emplace_back(*current_binary_operator); + } + } + + void operator()(const subscript& s) { + const column_value& cv = get_subscripted_column(s); + if (*cv.col == column && current_binary_operator != nullptr) { + restrictions.emplace_back(*current_binary_operator); + } + } + + void operator()(const unresolved_identifier&) {} + void operator()(const column_mutation_attribute&) {} + void operator()(const function_call&) {} + void operator()(const cast&) {} + void operator()(const field_selection&) {} + void operator()(const bind_variable&) {} + void operator()(const untyped_constant&) {} + void operator()(const tuple_constructor&) {} + void operator()(const collection_constructor&) {} + void operator()(const usertype_constructor&) {} + void operator()(const temporary&) {} + }; + + visitor v { + .restrictions = std::vector(), + .column = column, + .current_binary_operator = nullptr, + }; + + expr::visit(v, expr); + + return std::move(v.restrictions); +} + +static std::optional> get_single_column_restriction_column(const expression& e) { + if (find_in_expression(e, [](const auto&) {return true;})) { + on_internal_error(expr_logger, + seastar::format("get_single_column_restriction_column expects a prepared expression, but it's not: {}", e)); + } + + const column_value* the_only_column = nullptr; + bool expression_is_single_column = false; + + for_each_expression(e, + [&](const column_value& cval) { + if (the_only_column == nullptr) { + // It's the first column_value we've encountered - set it as the only column + the_only_column = &cval; + expression_is_single_column = true; + return; + } + + if (cval.col != the_only_column->col) { + // In case any other column is encountered the restriction + // restricts more than one column. + expression_is_single_column = false; + } + } + ); + + if (expression_is_single_column) { + return std::cref(*the_only_column); + } else { + return std::nullopt; + } +} + +bool is_single_column_restriction(const expression& e) { + return get_single_column_restriction_column(e).has_value(); +} + +const column_value& get_the_only_column(const expression& e) { + std::optional> result = get_single_column_restriction_column(e); + + if (!result.has_value()) { + on_internal_error(expr_logger, + format("get_the_only_column - bad expression: {}", e)); + } + + return *result; +} + +single_column_restrictions_map get_single_column_restrictions_map(const expression& e) { + single_column_restrictions_map result; + + std::vector sorted_defs = get_sorted_column_defs(e); + for (const column_definition* cdef : sorted_defs) { + expression col_restrictions = conjunction { + .children = extract_single_column_restrictions_for_column(e, *cdef) + }; + result.emplace(cdef, std::move(col_restrictions)); + } + + return result; +} + +bool is_empty_restriction(const expression& e) { + bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool { + return !is(e); + }); + + return !contains_non_conjunction; +} + +bytes_opt value_for(const column_definition& cdef, const expression& e, const query_options& options) { + value_set possible_vals = possible_column_values(&cdef, e, options); + return std::visit(overloaded_functor { + [&](const value_list& val_list) -> bytes_opt { + if (val_list.empty()) { + return std::nullopt; + } + + if (val_list.size() != 1) { + on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", e)); + } + + return to_bytes(val_list.front()); + }, + [&](const interval&) -> bytes_opt { + on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", e)); + } + }, possible_vals); +} + +bool contains_multi_column_restriction(const expression& e) { + const binary_operator* find_res = find_binop(e, [](const binary_operator& binop) { + return is(binop.lhs); + }); + return find_res != nullptr; +} + +bool has_only_eq_binops(const expression& e) { + const expr::binary_operator* non_eq_binop = find_in_expression(e, + [](const expr::binary_operator& binop) { + return binop.op != expr::oper_t::EQ; + } + ); + + return non_eq_binop == nullptr; +} + statement_restrictions::statement_restrictions(schema_ptr schema, bool allow_filtering) : _schema(schema) , _partition_range_is_simple(true) @@ -364,28 +1127,28 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, } if (_where.has_value()) { if (!has_token_restrictions()) { - _single_column_partition_key_restrictions = expr::get_single_column_restrictions_map(_partition_key_restrictions); + _single_column_partition_key_restrictions = get_single_column_restrictions_map(_partition_key_restrictions); } - if (!expr::contains_multi_column_restriction(_clustering_columns_restrictions)) { - _single_column_clustering_key_restrictions = expr::get_single_column_restrictions_map(_clustering_columns_restrictions); + if (!contains_multi_column_restriction(_clustering_columns_restrictions)) { + _single_column_clustering_key_restrictions = get_single_column_restrictions_map(_clustering_columns_restrictions); } - _single_column_nonprimary_key_restrictions = expr::get_single_column_restrictions_map(_nonprimary_key_restrictions); + _single_column_nonprimary_key_restrictions = get_single_column_restrictions_map(_nonprimary_key_restrictions); _clustering_prefix_restrictions = extract_clustering_prefix_restrictions(*_where, _schema); _partition_range_restrictions = extract_partition_range(*_where, _schema); } - _has_multi_column = find_binop(_clustering_columns_restrictions, expr::is_multi_column); + _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); if (_check_indexes) { auto cf = db.find_column_family(schema); auto& sim = cf.get_index_manager(); const expr::allow_local_index allow_local( !has_partition_key_unrestricted_components() && partition_key_restrictions_is_all_eq()); - _has_multi_column = find_binop(_clustering_columns_restrictions, expr::is_multi_column); + _has_multi_column = find_binop(_clustering_columns_restrictions, is_multi_column); _has_queriable_ck_index = clustering_columns_restrictions_have_supporting_index(sim, allow_local) && !type.is_delete(); _has_queriable_pk_index = parition_key_restrictions_have_supporting_index(sim, allow_local) && !type.is_delete(); - _has_queriable_regular_index = expr::index_supports_some_column(_nonprimary_key_restrictions, sim, allow_local) + _has_queriable_regular_index = index_supports_some_column(_nonprimary_key_restrictions, sim, allow_local) && !type.is_delete(); } else { _has_queriable_ck_index = false; @@ -432,11 +1195,11 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, if (_uses_secondary_indexing || clustering_key_restrictions_need_filtering()) { _index_restrictions.push_back(_clustering_columns_restrictions); - } else if (find_binop(_clustering_columns_restrictions, expr::is_on_collection)) { + } else if (find_binop(_clustering_columns_restrictions, is_on_collection)) { fail(unimplemented::cause::INDEXES); } - if (!expr::is_empty_restriction(_nonprimary_key_restrictions)) { + if (!is_empty_restriction(_nonprimary_key_restrictions)) { if (_has_queriable_regular_index && _partition_range_is_simple) { _uses_secondary_indexing = true; } else if (!allow_filtering && !type.is_delete() && !type.is_update()) { @@ -505,7 +1268,7 @@ statement_restrictions::statement_restrictions(data_dictionary::database db, auto multi_column_restrictions = expr::conjunction{ .children = expr::boolean_factors(_clustering_columns_restrictions) - | std::ranges::views::filter(expr::contains_multi_column_restriction) + | std::ranges::views::filter(contains_multi_column_restriction) | std::ranges::to() }; @@ -537,7 +1300,7 @@ statement_restrictions::clustering_key_restrictions_has_IN() const { bool statement_restrictions::clustering_key_restrictions_has_only_eq() const { - return expr::has_only_eq_binops(_clustering_columns_restrictions); + return has_only_eq_binops(_clustering_columns_restrictions); } bool @@ -561,17 +1324,17 @@ statement_restrictions::get_restrictions(column_kind kind) const { bool statement_restrictions::has_clustering_columns_restriction() const { - return !expr::is_empty_restriction(_clustering_columns_restrictions); + return !is_empty_restriction(_clustering_columns_restrictions); } bool statement_restrictions::has_non_primary_key_restriction() const { - return !expr::is_empty_restriction(_nonprimary_key_restrictions); + return !is_empty_restriction(_nonprimary_key_restrictions); } bool statement_restrictions::ck_restrictions_need_filtering() const { - if (expr::is_empty_restriction(_clustering_columns_restrictions)) { + if (is_empty_restriction(_clustering_columns_restrictions)) { return false; } @@ -633,11 +1396,11 @@ std::pair, expr::expression> statement_res expr::for_each_expression(restriction, [&](const expr::column_value& cval) { auto& cdef = cval.col; expr::expression col_restrictions = expr::conjunction { - .children = expr::extract_single_column_restrictions_for_column(restriction, *cdef) + .children = extract_single_column_restrictions_for_column(restriction, *cdef) }; for (const auto& index : sim.list_indexes()) { if (cdef->name_as_text() == index.target_column() && - expr::is_supported_by(col_restrictions, index) && + is_supported_by(col_restrictions, index) && score(index) > chosen_index_score) { chosen_index = index; chosen_index_score = score(index); @@ -659,7 +1422,7 @@ bool statement_restrictions::has_eq_restriction_on_column(const column_definitio return false; } - return expr::has_eq_restriction_on_column(column, *_where); + return restrictions::has_eq_restriction_on_column(column, *_where); } std::vector statement_restrictions::get_column_defs_for_filtering(data_dictionary::database db) const { @@ -726,13 +1489,13 @@ void statement_restrictions::add_restriction(const expr::binary_operator& restr, if (restr.op == expr::oper_t::IS_NOT) { // Handle IS NOT NULL restrictions separately add_is_not_restriction(restr, schema, for_view); - } else if (expr::is_multi_column(restr)) { + } else if (is_multi_column(restr)) { // Multi column restrictions are only allowed on clustering columns add_multi_column_clustering_key_restriction(restr); } else if (has_partition_token(restr, *_schema)) { // Token always restricts the partition key add_token_partition_key_restriction(restr); - } else if (expr::is_single_column_restriction(restr)) { + } else if (is_single_column_restriction(restr)) { const column_definition* def = get_the_only_column(restr).col; if (def->is_partition_key()) { add_single_column_parition_key_restriction(restr, schema, allow_filtering, for_view); @@ -809,7 +1572,7 @@ void statement_restrictions::add_single_column_clustering_key_restriction(const "Mixing single column relations and multi column relations on clustering columns is not allowed"); } - const column_definition* new_column = expr::get_the_only_column(restr).col; + const column_definition* new_column = get_the_only_column(restr).col; const column_definition* last_column = expr::get_last_column_def(_clustering_columns_restrictions); if (last_column != nullptr && !allow_filtering) { @@ -830,7 +1593,7 @@ void statement_restrictions::add_single_column_clustering_key_restriction(const } void statement_restrictions::add_multi_column_clustering_key_restriction(const expr::binary_operator& restr) { - if (expr::is_empty_restriction(_clustering_columns_restrictions)) { + if (is_empty_restriction(_clustering_columns_restrictions)) { _clustering_columns_restrictions = restr; return; } @@ -901,7 +1664,7 @@ void statement_restrictions::process_partition_key_restrictions(bool for_view, b // components must have a EQ. Only the last partition key component can be in IN relation. if (has_token_restrictions()) { _is_key_range = true; - } else if (expr::is_empty_restriction(_partition_key_restrictions)) { + } else if (is_empty_restriction(_partition_key_restrictions)) { _is_key_range = true; _uses_secondary_indexing = _has_queriable_pk_index; } @@ -925,11 +1688,11 @@ bool statement_restrictions::has_partition_key_unrestricted_components() const { } bool statement_restrictions::partition_key_restrictions_is_empty() const { - return expr::is_empty_restriction(_partition_key_restrictions); + return is_empty_restriction(_partition_key_restrictions); } bool statement_restrictions::partition_key_restrictions_is_all_eq() const { - return expr::has_only_eq_binops(_partition_key_restrictions); + return has_only_eq_binops(_partition_key_restrictions); } size_t statement_restrictions::partition_key_restrictions_size() const { @@ -937,9 +1700,9 @@ size_t statement_restrictions::partition_key_restrictions_size() const { } bool statement_restrictions::pk_restrictions_need_filtering() const { - return !expr::is_empty_restriction(_partition_key_restrictions) + return !is_empty_restriction(_partition_key_restrictions) && !has_token_restrictions() - && (has_partition_key_unrestricted_components() || expr::has_slice_or_needs_filtering(_partition_key_restrictions)); + && (has_partition_key_unrestricted_components() || has_slice_or_needs_filtering(_partition_key_restrictions)); } size_t statement_restrictions::clustering_columns_restrictions_size() const { @@ -947,7 +1710,7 @@ size_t statement_restrictions::clustering_columns_restrictions_size() const { } bool statement_restrictions::clustering_key_restrictions_need_filtering() const { - if (expr::contains_multi_column_restriction(_clustering_columns_restrictions)) { + if (contains_multi_column_restriction(_clustering_columns_restrictions)) { return false; } @@ -975,8 +1738,8 @@ bool statement_restrictions::clustering_columns_restrictions_have_supporting_ind const secondary_index::secondary_index_manager& index_manager, expr::allow_local_index allow_local) const { // Single column restrictions can be handled by the existing code - if (!expr::contains_multi_column_restriction(_clustering_columns_restrictions)) { - return expr::index_supports_some_column(_clustering_columns_restrictions, index_manager, allow_local); + if (!contains_multi_column_restriction(_clustering_columns_restrictions)) { + return index_supports_some_column(_clustering_columns_restrictions, index_manager, allow_local); } // Multi column restrictions have to be handled separately @@ -1053,7 +1816,7 @@ bool statement_restrictions::parition_key_restrictions_have_supporting_index(con return false; } - return expr::index_supports_some_column(_partition_key_restrictions, index_manager, allow_local); + return index_supports_some_column(_partition_key_restrictions, index_manager, allow_local); } void statement_restrictions::process_clustering_columns_restrictions(bool for_view, bool allow_filtering) { @@ -1061,7 +1824,7 @@ void statement_restrictions::process_clustering_columns_restrictions(bool for_vi return; } - if (find_binop(_clustering_columns_restrictions, expr::is_on_collection) + if (find_binop(_clustering_columns_restrictions, is_on_collection) && !_has_queriable_ck_index && !allow_filtering) { throw exceptions::invalid_request_exception( "Cannot restrict clustering columns by a CONTAINS relation without a secondary index or filtering"); @@ -1093,10 +1856,10 @@ dht::partition_range_vector partition_ranges_from_token(const expr::expression& const query_options& options, const schema& table_schema) { auto values = possible_partition_token_values(ex, options, table_schema); - if (values == expr::value_set(expr::value_list{})) { + if (values == value_set(value_list{})) { return {}; } - const auto bounds = expr::to_range(values); + const auto bounds = to_range(values); const auto start_token = bounds.start() ? bounds.start()->value().with_linearized([] (bytes_view bv) { return dht::token::from_bytes(bv); }) : dht::minimum_token(); auto end_token = bounds.end() ? bounds.end()->value().with_linearized([] (bytes_view bv) { return dht::token::from_bytes(bv); }) @@ -1519,8 +2282,6 @@ query::clustering_range reverse_if_reqd(query::clustering_range r, const abstrac return t.is_reversed() ? query::clustering_range(r.end(), r.start()) : std::move(r); } -constexpr bool inclusive = true; - /// Calculates clustering bounds for the single-column case. std::vector get_single_column_clustering_bounds( const query_options& options, @@ -1831,7 +2592,7 @@ std::vector statement_restrictions::get_clustering_boun if (_clustering_prefix_restrictions.empty()) { return {query::clustering_range::make_open_ended_both_sides()}; } - if (find_binop(_clustering_prefix_restrictions[0], expr::is_multi_column)) { + if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { bool all_natural = true, all_reverse = true; ///< Whether column types are reversed or natural. for (auto& r : _clustering_prefix_restrictions) { // TODO: move to constructor, do only once. using namespace expr; @@ -1898,8 +2659,8 @@ bool statement_restrictions::need_filtering() const { // Can't calculate the token value, so a naive base-table query must be filtered. Same for any index tables, // except if there's only one restriction supported by an index. return !(npart == 1 && _has_queriable_pk_index && - expr::is_empty_restriction(_clustering_columns_restrictions) && - expr::is_empty_restriction(_nonprimary_key_restrictions)); + is_empty_restriction(_clustering_columns_restrictions) && + is_empty_restriction(_nonprimary_key_restrictions)); } if (pk_restrictions_need_filtering()) { // We most likely cannot calculate token(s). Neither base-table nor index-table queries can avoid filtering. @@ -1914,7 +2675,7 @@ bool statement_restrictions::need_filtering() const { if (nreg == 1) { // Single non-key restriction supported by an index. // Will the index-table query require filtering? That depends on whether its clustering key is restricted to a // continuous range. Recall that this clustering key is (token, pk, ck) of the base table. - if (npart == 0 && expr::is_empty_restriction(_clustering_columns_restrictions)) { + if (npart == 0 && is_empty_restriction(_clustering_columns_restrictions)) { return false; // No clustering key restrictions => whole partitions. } return !token_known(*this) || clustering_key_restrictions_need_filtering() @@ -2024,7 +2785,7 @@ void statement_restrictions::prepare_indexed_local(const schema& idx_tbl_schema) void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const schema& idx_tbl_schema) { for (const auto& e : _clustering_prefix_restrictions) { - if (find_binop(_clustering_prefix_restrictions[0], expr::is_multi_column)) { + if (find_binop(_clustering_prefix_restrictions[0], is_multi_column)) { // TODO: We could handle single-element tuples, eg. `(c)>=(123)`. break; } @@ -2043,12 +2804,12 @@ void statement_restrictions::add_clustering_restrictions_to_idx_ck_prefix(const // need filtering but c2 does so num_prefix_columns_that_need_not_be_filtered // will be 1. unsigned int statement_restrictions::num_clustering_prefix_columns_that_need_not_be_filtered() const { - if (expr::contains_multi_column_restriction(_clustering_columns_restrictions)) { + if (contains_multi_column_restriction(_clustering_columns_restrictions)) { return 0; } - expr::single_column_restrictions_map column_restrictions = - expr::get_single_column_restrictions_map(_clustering_columns_restrictions); + single_column_restrictions_map column_restrictions = + get_single_column_restrictions_map(_clustering_columns_restrictions); // Restrictions currently need filtering in three cases: // 1. any of them is a CONTAINS restriction diff --git a/cql3/restrictions/statement_restrictions.hh b/cql3/restrictions/statement_restrictions.hh index 67a63495ff..8ef13c4c2d 100644 --- a/cql3/restrictions/statement_restrictions.hh +++ b/cql3/restrictions/statement_restrictions.hh @@ -422,6 +422,22 @@ statement_restrictions analyze_statement_restrictions( check_indexes do_check_indexes); +// Extracts all binary operators which have the given column on their left hand side. +// Extracts only single-column restrictions. +// Does not include multi-column restrictions. +// Does not include token() restrictions. +// Does not include boolean constant restrictions. +// For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}. +std::vector extract_single_column_restrictions_for_column(const expr::expression&, const column_definition&); + + +// Checks whether this expression is empty - doesn't restrict anything +bool is_empty_restriction(const expr::expression&); + +// Finds the value of the given column in the expression +// In case of multpiple possible values calls on_internal_error +bytes_opt value_for(const column_definition&, const expr::expression&, const query_options&); + } } diff --git a/cql3/statements/select_statement.cc b/cql3/statements/select_statement.cc index 2807d62994..6970dc513f 100644 --- a/cql3/statements/select_statement.cc +++ b/cql3/statements/select_statement.cc @@ -1083,7 +1083,7 @@ lw_shared_ptr indexed_table_select_statement auto& last_base_pk = last_pos.partition; auto* last_base_ck = last_pos.position.has_key() ? &last_pos.position.key() : nullptr; - bytes_opt indexed_column_value = expr::value_for(*cdef, _used_index_restrictions, options); + bytes_opt indexed_column_value = restrictions::value_for(*cdef, _used_index_restrictions, options); auto index_pk = [&]() { if (_index.metadata().local()) { @@ -1288,7 +1288,7 @@ dht::partition_range_vector indexed_table_select_statement::get_partition_ranges throw exceptions::invalid_request_exception("Indexed column not found in schema"); } - bytes_opt value = expr::value_for(*cdef, _used_index_restrictions, options); + bytes_opt value = restrictions::value_for(*cdef, _used_index_restrictions, options); if (value) { auto pk = partition_key::from_single_value(*_view_schema, *value); auto dk = dht::decorate_key(*_view_schema, pk); @@ -2412,8 +2412,8 @@ static bool needs_allow_filtering_anyway( const auto& pk_restrictions = restrictions.get_partition_key_restrictions(); // Even if no filtering happens on the coordinator, we still warn about poor performance when partition // slice is defined but in potentially unlimited number of partitions (see #7608). - if ((expr::is_empty_restriction(pk_restrictions) || restrictions.has_token_restrictions()) // Potentially unlimited partitions. - && !expr::is_empty_restriction(ck_restrictions) // Slice defined. + if ((restrictions::is_empty_restriction(pk_restrictions) || restrictions.has_token_restrictions()) // Potentially unlimited partitions. + && !restrictions::is_empty_restriction(ck_restrictions) // Slice defined. && !restrictions.uses_secondary_indexing()) { // Base-table is used. (Index-table use always limits partitions.) if (strict_allow_filtering == flag_t::WARN) { warnings.emplace_back("This query should use ALLOW FILTERING and will be rejected in future versions."); diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index 6365299c19..37c5329b4a 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -410,7 +410,7 @@ BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) { column_definition col_r3 = make_column("r3", column_kind::regular_column, 2); // Empty input test - assert_expr_vec_eq(extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {}); + assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {}); // BIG_WHERE test // big_where contains: @@ -494,24 +494,24 @@ BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) { expression big_where_expr = conjunction{std::move(big_where)}; - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_pk1), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk1), {pk1_restriction}); - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_pk2), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk2), {pk2_restriction, pk2_restriction2}); - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_ck1), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck1), {ck1_restriction}); - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_ck2), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck2), {ck2_restriction}); - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_r1), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r1), {r1_restriction, r1_restriction2, r1_restriction3}); - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_r2), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r2), {r2_restriction}); - assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_r3), + assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r3), {}); }