cql3: statement_restrictions, expr: move restrictions-related expression utilities out of expression.cc

Move all of the blatantly restriction-related expression utilities
to statement_restrictions.cc.

Some are so blatant as to include the word "restriction" in their name.
Others are just so specialized that they cannot be used for anything else.

The motivation is that further refactoring will be simplified if it can
happen within the same module, as there will not be a need to prove
it has no effect elsewhere.

Most of the declarations are made non-public (in .cc file) to limit
proliferation. A few are needed for tests or in select_statement.cc
and so are kept public.

Other than that, the only changes are namespace qualifications and
removal of a now-duplicate definition ("inclusive").

Closes scylladb/scylladb#20732
This commit is contained in:
Avi Kivity
2024-09-18 22:24:09 +03:00
committed by Nadav Har'El
parent 3d781c4fc8
commit 657848dcbb
6 changed files with 832 additions and 828 deletions

View File

@@ -41,48 +41,6 @@ extern bool is_satisfied_by(
const expression& restr, const evaluation_inputs& inputs);
/// A set of discrete values.
using value_list = std::vector<managed_bytes>; // Sorted and deduped using value comparator.
/// General set of values. Empty set and single-element sets are always value_list. interval is
/// never singular and never has start > end. Universal set is a interval with both bounds null.
using value_set = std::variant<value_list, interval<managed_bytes>>;
/// A set of all column values that would satisfy an expression. The _token_values variant finds
/// matching values for the partition token function call instead of the column.
///
/// An expression restricts possible values of a column or token:
/// - `A>5` restricts A from below
/// - `A>5 AND A>6 AND B<10 AND A=12 AND B>0` restricts A to 12 and B to between 0 and 10
/// - `A IN (1, 3, 5)` restricts A to 1, 3, or 5
/// - `A IN (1, 3, 5) AND A>3` restricts A to just 5
/// - `A=1 AND A<=0` restricts A to an empty list; no value is able to satisfy the expression
/// - `A>=NULL` also restricts A to an empty list; all comparisons to NULL are false
/// - an expression without A "restricts" A to unbounded range
extern value_set possible_column_values(const column_definition*, const expression&, const query_options&);
extern value_set possible_partition_token_values(const expression&, const query_options&, const schema& table_schema);
/// Turns value_set into a range, unless it's a multi-valued list (in which case this throws).
extern interval<managed_bytes> to_range(const value_set&);
/// A range of all X such that X op val.
interval<clustering_key_prefix> to_range(oper_t op, const clustering_key_prefix& val);
/// True iff the index can support the entire expression.
extern bool is_supported_by(const expression&, const secondary_index::index&);
/// True iff any of the indices from the manager can support the entire expression. If allow_local, use all
/// indices; otherwise, use only global indices.
extern bool has_supporting_index(
const expression&, const secondary_index::secondary_index_manager&, allow_local_index allow_local);
// Looks at each column individually and checks whether some index can support restrictions on this single column.
// Expression has to consist only of single column restrictions.
extern bool index_supports_some_column(
const expression&,
const secondary_index::secondary_index_manager&,
allow_local_index allow_local);
extern bool recurse_until(const expression& e, const noncopyable_function<bool (const expression&)>& predicate_fun);
// Looks into the expression and finds the given expression variant
@@ -138,15 +96,6 @@ inline const binary_operator* find(const expression& e, oper_t op) {
return find_binop(e, [&] (const binary_operator& o) { return o.op == op; });
}
inline bool needs_filtering(oper_t op) {
return (op == oper_t::CONTAINS) || (op == oper_t::CONTAINS_KEY) || (op == oper_t::LIKE) ||
(op == oper_t::IS_NOT) || (op == oper_t::NEQ) ;
}
inline auto find_needs_filtering(const expression& e) {
return find_binop(e, [] (const binary_operator& bo) { return needs_filtering(bo.op); });
}
inline bool is_slice(oper_t op) {
return (op == oper_t::LT) || (op == oper_t::LTE) || (op == oper_t::GT) || (op == oper_t::GTE);
}
@@ -169,10 +118,6 @@ inline bool is_compare(oper_t op) {
}
}
inline bool is_multi_column(const binary_operator& op) {
return expr::is<tuple_constructor>(op.lhs);
}
// Check whether the given expression represents
// a call to the token() function.
bool is_token_function(const function_call&);
@@ -190,10 +135,6 @@ inline bool has_partition_token(const expression& e, const schema& table_schema)
return find_binop(e, [&] (const binary_operator& o) { return is_partition_token_for_schema(o.lhs, table_schema); });
}
inline bool has_slice_or_needs_filtering(const expression& e) {
return find_binop(e, [] (const binary_operator& o) { return is_slice(o.op) || needs_filtering(o.op); });
}
inline bool is_clustering_order(const binary_operator& op) {
return op.order == comparison_order::clustering;
}
@@ -210,9 +151,6 @@ std::vector<expression> boolean_factors(expression e);
/// Run the given function for each element in the top level conjunction.
void for_each_boolean_factor(const expression& e, const noncopyable_function<void (const expression&)>& for_each_func);
/// True iff binary_operator involves a collection.
extern bool is_on_collection(const binary_operator&);
// Checks whether the given column occurs in the expression.
// Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal.
bool contains_column(const column_definition& column, const expression& e);
@@ -221,12 +159,6 @@ bool contains_column(const column_definition& column, const expression& e);
// The expression must be prepared, so that function names are converted to function pointers.
bool contains_nonpure_function(const expression&);
// Checks whether the given column has an EQ restriction in the expression.
// EQ restriction is `col = ...` or `(col, col2) = ...`
// IN restriction is NOT an EQ restriction, this function will not look for IN restrictions.
// Uses column_defintion::operator== for comparison, columns with the same name but different schema will not be equal.
bool has_eq_restriction_on_column(const column_definition& column, const expression& e);
/// Replaces every column_definition in an expression with this one. Throws if any LHS is not a single
/// column_value.
extern expression replace_column_def(const expression&, const column_definition*);
@@ -281,14 +213,6 @@ inline oper_t pick_operator(statements::bound b, bool inclusive) {
(inclusive ? oper_t::LTE : oper_t::LT);
}
// Extracts all binary operators which have the given column on their left hand side.
// Extracts only single-column restrictions.
// Does not include multi-column restrictions.
// Does not include token() restrictions.
// Does not include boolean constant restrictions.
// For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}.
std::vector<expression> extract_single_column_restrictions_for_column(const expression&, const column_definition&);
std::optional<bool> get_bool_value(const constant&);
utils::chunked_vector<managed_bytes_opt> get_list_elements(const cql3::raw_value&);
@@ -316,38 +240,16 @@ void fill_prepare_context(expression&, cql3::prepare_context&);
// For example an expression can contain calls to nonpure functions.
bool contains_bind_marker(const expression& e);
// Checks whether this expression contains restrictions on one single column.
// There might be more than one restriction, but exactly one column.
// The expression must be prepared.
bool is_single_column_restriction(const expression&);
// Gets the only column from a single_column_restriction expression.
const column_value& get_the_only_column(const expression&);
// Extracts column_defs from the expression and sorts them using schema_pos_column_definition_comparator.
std::vector<const column_definition*> get_sorted_column_defs(const expression&);
// Extracts column_defs and returns the last one according to schema_pos_column_definition_comparator.
const column_definition* get_last_column_def(const expression&);
// Extracts map of single column restrictions for each column from expression
single_column_restrictions_map get_single_column_restrictions_map(const expression&);
// Checks whether this expression is empty - doesn't restrict anything
bool is_empty_restriction(const expression&);
// Finds common columns between both expressions and prints them to a string.
// Uses schema_pos_column_definition_comparator for comparison.
sstring get_columns_in_commons(const expression& a, const expression& b);
// Finds the value of the given column in the expression
// In case of multpiple possible values calls on_internal_error
bytes_opt value_for(const column_definition&, const expression&, const query_options&);
bool contains_multi_column_restriction(const expression&);
bool has_only_eq_binops(const expression&);
/// Finds the data type of writetime(x) or ttl(x)
data_type column_mutation_attribute_type(const column_mutation_attribute& e);

View File

@@ -509,37 +509,6 @@ bool is_not_null(const expression& lhs, const expression& rhs, const evaluation_
return !lhs_val.is_null();
}
const value_set empty_value_set = value_list{};
const value_set unbounded_value_set = interval<managed_bytes>::make_open_ended_both_sides();
struct intersection_visitor {
const abstract_type* type;
value_set operator()(const value_list& a, const value_list& b) const {
value_list common;
common.reserve(std::max(a.size(), b.size()));
boost::set_intersection(a, b, back_inserter(common), type->as_less_comparator());
return std::move(common);
}
value_set operator()(const interval<managed_bytes>& a, const value_list& b) const {
const auto common = b | filtered([&] (const managed_bytes& el) { return a.contains(el, type->as_tri_comparator()); });
return value_list(common.begin(), common.end());
}
value_set operator()(const value_list& a, const interval<managed_bytes>& b) const {
return (*this)(b, a);
}
value_set operator()(const interval<managed_bytes>& a, const interval<managed_bytes>& b) const {
const auto common_range = a.intersection(b, type->as_tri_comparator());
return common_range ? *common_range : empty_value_set;
}
};
value_set intersection(value_set a, value_set b, const abstract_type* type) {
return std::visit(intersection_visitor{type}, std::move(a), std::move(b));
}
} // anonymous namespace
bool is_satisfied_by(const expression& restr, const evaluation_inputs& inputs) {
@@ -547,46 +516,6 @@ bool is_satisfied_by(const expression& restr, const evaluation_inputs& inputs) {
return evaluate(restr, inputs).to_managed_bytes_opt() == true_value;
}
namespace {
template<typename Range>
value_list to_sorted_vector(Range r, const serialized_compare& comparator) {
BOOST_CONCEPT_ASSERT((boost::ForwardRangeConcept<Range>));
value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access).
const auto unique = boost::unique(boost::sort(tmp, comparator));
return value_list(unique.begin(), unique.end());
}
const auto non_null = boost::adaptors::filtered([] (const managed_bytes_opt& b) { return b.has_value(); });
const auto deref = boost::adaptors::transformed([] (const managed_bytes_opt& b) { return b.value(); });
/// Returns possible values from t, which must be RHS of IN.
value_list get_IN_values(
const expression& e, const query_options& options, const serialized_compare& comparator,
sstring_view column_name) {
const cql3::raw_value in_list = evaluate(e, options);
if (in_list.is_null()) {
return value_list();
}
utils::chunked_vector<managed_bytes_opt> list_elems = get_list_elements(in_list);
return to_sorted_vector(std::move(list_elems) | non_null | deref, comparator);
}
/// Returns possible values for k-th column from t, which must be RHS of IN.
value_list get_IN_values(const expression& e, size_t k, const query_options& options,
const serialized_compare& comparator) {
const cql3::raw_value in_list = evaluate(e, options);
const auto split_values = get_list_of_tuples_elements(in_list, *type_of(e)); // Need lvalue from which to make std::view.
const auto result_range = split_values
| boost::adaptors::transformed([k] (const std::vector<managed_bytes_opt>& v) { return v[k]; }) | non_null | deref;
return to_sorted_vector(std::move(result_range), comparator);
}
static constexpr bool inclusive = true, exclusive = false;
} // anonymous namespace
const column_value& get_subscripted_column(const subscript& sub) {
if (!is<column_value>(sub.val)) {
on_internal_error(expr_logger,
@@ -641,381 +570,6 @@ void for_each_boolean_factor(const expression& e, const noncopyable_function<voi
}
}
template<typename T>
interval<std::remove_cvref_t<T>> to_range(oper_t op, T&& val) {
using U = std::remove_cvref_t<T>;
static constexpr bool inclusive = true, exclusive = false;
switch (op) {
case oper_t::EQ:
return interval<U>::make_singular(std::forward<T>(val));
case oper_t::GT:
return interval<U>::make_starting_with(interval_bound(std::forward<T>(val), exclusive));
case oper_t::GTE:
return interval<U>::make_starting_with(interval_bound(std::forward<T>(val), inclusive));
case oper_t::LT:
return interval<U>::make_ending_with(interval_bound(std::forward<T>(val), exclusive));
case oper_t::LTE:
return interval<U>::make_ending_with(interval_bound(std::forward<T>(val), inclusive));
default:
throw std::logic_error(format("to_range: unknown comparison operator {}", op));
}
}
interval<clustering_key_prefix> to_range(oper_t op, const clustering_key_prefix& val) {
return to_range<const clustering_key_prefix&>(op, val);
}
// When cdef == nullptr it finds possible token values instead of column values.
// When finding token values the table_schema_opt argument has to point to a valid schema,
// but it isn't used when finding values for column.
// The schema is needed to find out whether a call to token() function represents
// the partition token.
static value_set possible_lhs_values(const column_definition* cdef,
const expression& expr,
const query_options& options,
const schema* table_schema_opt) {
const auto type = cdef ? &cdef->type->without_reversed() : long_type.get();
return expr::visit(overloaded_functor{
[] (const constant& constant_val) {
std::optional<bool> bool_val = get_bool_value(constant_val);
if (bool_val.has_value()) {
return *bool_val ? unbounded_value_set : empty_value_set;
}
on_internal_error(expr_logger,
"possible_lhs_values: a constant that is not a bool value cannot serve as a restriction by itself");
},
[&] (const conjunction& conj) {
return boost::accumulate(conj.children, unbounded_value_set,
[&] (const value_set& acc, const expression& child) {
return intersection(
std::move(acc), possible_lhs_values(cdef, child, options, table_schema_opt), type);
});
},
[&] (const binary_operator& oper) -> value_set {
return expr::visit(overloaded_functor{
[&] (const column_value& col) -> value_set {
if (!cdef || cdef != col.col) {
return unbounded_value_set;
}
if (is_compare(oper.op)) {
managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt();
if (!val) {
return empty_value_set; // All NULL comparisons fail; no column values match.
}
return oper.op == oper_t::EQ ? value_set(value_list{*val})
: to_range(oper.op, std::move(*val));
} else if (oper.op == oper_t::IN) {
return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text());
} else if (oper.op == oper_t::CONTAINS || oper.op == oper_t::CONTAINS_KEY) {
managed_bytes_opt val = evaluate(oper.rhs, options).to_managed_bytes_opt();
if (!val) {
return empty_value_set; // All NULL comparisons fail; no column values match.
}
return value_set(value_list{*val});
}
throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
},
[&] (const subscript& s) -> value_set {
const column_value& col = get_subscripted_column(s);
if (!cdef || cdef != col.col) {
return unbounded_value_set;
}
managed_bytes_opt sval = evaluate(s.sub, options).to_managed_bytes_opt();
if (!sval) {
return empty_value_set; // NULL can't be a map key
}
if (oper.op == oper_t::EQ) {
managed_bytes_opt rval = evaluate(oper.rhs, options).to_managed_bytes_opt();
if (!rval) {
return empty_value_set; // All NULL comparisons fail; no column values match.
}
managed_bytes_opt elements[] = {sval, rval};
managed_bytes val = tuple_type_impl::build_value_fragmented(elements);
return value_set(value_list{val});
}
throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
},
[&] (const tuple_constructor& tuple) -> value_set {
if (!cdef) {
return unbounded_value_set;
}
const auto found = boost::find_if(
tuple.elements, [&] (const expression& c) { return expr::as<column_value>(c).col == cdef; });
if (found == tuple.elements.end()) {
return unbounded_value_set;
}
const auto column_index_on_lhs = std::distance(tuple.elements.begin(), found);
if (is_compare(oper.op)) {
// RHS must be a tuple due to upstream checks.
managed_bytes_opt val = get_tuple_elements(evaluate(oper.rhs, options), *type_of(oper.rhs)).at(column_index_on_lhs);
if (!val) {
return empty_value_set; // All NULL comparisons fail; no column values match.
}
if (oper.op == oper_t::EQ) {
return value_list{std::move(*val)};
}
if (column_index_on_lhs > 0) {
// A multi-column comparison restricts only the first column, because
// comparison is lexicographical.
return unbounded_value_set;
}
return to_range(oper.op, std::move(*val));
} else if (oper.op == oper_t::IN) {
return get_IN_values(oper.rhs, column_index_on_lhs, options, type->as_less_comparator());
}
return unbounded_value_set;
},
[&] (const function_call& token_fun_call) -> value_set {
if (!is_partition_token_for_schema(token_fun_call, *table_schema_opt)) {
on_internal_error(expr_logger, "possible_lhs_values: function calls are not supported as the LHS of a binary expression");
}
if (cdef) {
return unbounded_value_set;
}
const auto val = evaluate(oper.rhs, options).to_managed_bytes_opt();
if (!val) {
return empty_value_set; // All NULL comparisons fail; no token values match.
}
if (oper.op == oper_t::EQ) {
return value_list{*val};
} else if (oper.op == oper_t::GT) {
return interval<managed_bytes>::make_starting_with(interval_bound(std::move(*val), exclusive));
} else if (oper.op == oper_t::GTE) {
return interval<managed_bytes>::make_starting_with(interval_bound(std::move(*val), inclusive));
}
static const managed_bytes MININT = managed_bytes(serialized(std::numeric_limits<int64_t>::min())),
MAXINT = managed_bytes(serialized(std::numeric_limits<int64_t>::max()));
// Undocumented feature: when the user types `token(...) < MININT`, we interpret
// that as MAXINT for some reason.
const auto adjusted_val = (*val == MININT) ? MAXINT : *val;
if (oper.op == oper_t::LT) {
return interval<managed_bytes>::make_ending_with(interval_bound(std::move(adjusted_val), exclusive));
} else if (oper.op == oper_t::LTE) {
return interval<managed_bytes>::make_ending_with(interval_bound(std::move(adjusted_val), inclusive));
}
throw std::logic_error(format("get_token_interval invalid operator {}", oper.op));
},
[&] (const binary_operator&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: nested binary operators are not supported");
},
[&] (const conjunction&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: conjunctions are not supported as the LHS of a binary expression");
},
[] (const constant&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: constants are not supported as the LHS of a binary expression");
},
[] (const unresolved_identifier&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: unresolved identifiers are not supported as the LHS of a binary expression");
},
[] (const column_mutation_attribute&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: writetime/ttl are not supported as the LHS of a binary expression");
},
[] (const cast&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: typecasts are not supported as the LHS of a binary expression");
},
[] (const field_selection&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: field selections are not supported as the LHS of a binary expression");
},
[] (const bind_variable&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: bind variables are not supported as the LHS of a binary expression");
},
[] (const untyped_constant&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: untyped constants are not supported as the LHS of a binary expression");
},
[] (const collection_constructor&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: collection constructors are not supported as the LHS of a binary expression");
},
[] (const usertype_constructor&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: user type constructors are not supported as the LHS of a binary expression");
},
[] (const temporary&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: temporaries are not supported as the LHS of a binary expression");
},
}, oper.lhs);
},
[] (const column_value&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a column cannot serve as a restriction by itself");
},
[] (const subscript&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a subscript cannot serve as a restriction by itself");
},
[] (const unresolved_identifier&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: an unresolved identifier cannot serve as a restriction");
},
[] (const column_mutation_attribute&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: the writetime/ttl functions cannot serve as a restriction by itself");
},
[] (const function_call&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a function call cannot serve as a restriction by itself");
},
[] (const cast&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a typecast cannot serve as a restriction by itself");
},
[] (const field_selection&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a field selection cannot serve as a restriction by itself");
},
[] (const bind_variable&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a bind variable cannot serve as a restriction by itself");
},
[] (const untyped_constant&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: an untyped constant cannot serve as a restriction by itself");
},
[] (const tuple_constructor&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: an tuple constructor cannot serve as a restriction by itself");
},
[] (const collection_constructor&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a collection constructor cannot serve as a restriction by itself");
},
[] (const usertype_constructor&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a user type constructor cannot serve as a restriction by itself");
},
[] (const temporary&) -> value_set {
on_internal_error(expr_logger, "possible_lhs_values: a temporary cannot serve as a restriction by itself");
},
}, expr);
}
value_set possible_column_values(const column_definition* col, const expression& e, const query_options& options) {
return possible_lhs_values(col, e, options, nullptr);
}
value_set possible_partition_token_values(const expression& e, const query_options& options, const schema& table_schema) {
return possible_lhs_values(nullptr, e, options, &table_schema);
}
interval<managed_bytes> to_range(const value_set& s) {
return std::visit(overloaded_functor{
[] (const interval<managed_bytes>& r) { return r; },
[] (const value_list& lst) {
if (lst.size() != 1) {
throw std::logic_error(format("to_range called on list of size {}", lst.size()));
}
return interval<managed_bytes>::make_singular(lst[0]);
},
}, s);
}
namespace {
constexpr inline secondary_index::index::supports_expression_v operator&&(secondary_index::index::supports_expression_v v1, secondary_index::index::supports_expression_v v2) {
using namespace secondary_index;
auto True = index::supports_expression_v::from_bool(true);
return v1 == True && v2 == True ? True : index::supports_expression_v::from_bool(false);
}
secondary_index::index::supports_expression_v is_supported_by_helper(const expression& expr, const secondary_index::index& idx) {
using ret_t = secondary_index::index::supports_expression_v;
using namespace secondary_index;
return expr::visit(overloaded_functor{
[&] (const conjunction& conj) -> ret_t {
if (conj.children.empty()) {
return index::supports_expression_v::from_bool(true);
}
auto init = is_supported_by_helper(conj.children[0], idx);
return std::accumulate(std::begin(conj.children) + 1, std::end(conj.children), init,
[&] (ret_t acc, const expression& child) -> ret_t {
return acc && is_supported_by_helper(child, idx);
});
},
[&] (const binary_operator& oper) {
return expr::visit(overloaded_functor{
[&] (const column_value& col) {
return idx.supports_expression(*col.col, oper.op);
},
[&] (const tuple_constructor& tuple) {
if (tuple.elements.size() == 1) {
if (auto column = expr::as_if<column_value>(&tuple.elements[0])) {
return idx.supports_expression(*column->col, oper.op);
}
}
// We don't use index table for multi-column restrictions, as it cannot avoid filtering.
return index::supports_expression_v::from_bool(false);
},
[&] (const function_call&) { return index::supports_expression_v::from_bool(false); },
[&] (const subscript& s) -> ret_t {
const column_value& col = get_subscripted_column(s);
return idx.supports_subscript_expression(*col.col, oper.op);
},
[&] (const binary_operator&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: nested binary operators are not supported");
},
[&] (const conjunction&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: conjunctions are not supported as the LHS of a binary expression");
},
[] (const constant&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: constants are not supported as the LHS of a binary expression");
},
[] (const unresolved_identifier&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: an unresolved identifier is not supported as the LHS of a binary expression");
},
[&] (const column_mutation_attribute&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: writetime/ttl are not supported as the LHS of a binary expression");
},
[&] (const cast&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: typecasts are not supported as the LHS of a binary expression");
},
[&] (const field_selection&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: field selections are not supported as the LHS of a binary expression");
},
[&] (const bind_variable&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: bind variables are not supported as the LHS of a binary expression");
},
[&] (const untyped_constant&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: untyped constants are not supported as the LHS of a binary expression");
},
[&] (const collection_constructor&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: collection constructors are not supported as the LHS of a binary expression");
},
[&] (const usertype_constructor&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: user type constructors are not supported as the LHS of a binary expression");
},
[&] (const temporary&) -> ret_t {
on_internal_error(expr_logger, "is_supported_by: temporaries are not supported as the LHS of a binary expression");
},
}, oper.lhs);
},
[] (const auto& default_case) { return index::supports_expression_v::from_bool(false); }
}, expr);
}
}
bool is_supported_by(const expression& expr, const secondary_index::index& idx) {
auto s = is_supported_by_helper(expr, idx);
return s != secondary_index::index::supports_expression_v::from_bool(false);
}
bool has_supporting_index(
const expression& expr,
const secondary_index::secondary_index_manager& index_manager,
allow_local_index allow_local) {
const auto indexes = index_manager.list_indexes();
const auto support = std::bind(is_supported_by, std::ref(expr), std::placeholders::_1);
return allow_local ? boost::algorithm::any_of(indexes, support)
: boost::algorithm::any_of(
indexes | filtered([] (const secondary_index::index& i) { return !i.metadata().local(); }),
support);
}
bool index_supports_some_column(
const expression& e,
const secondary_index::secondary_index_manager& index_manager,
allow_local_index allow_local) {
single_column_restrictions_map single_col_restrictions = get_single_column_restrictions_map(e);
for (auto&& [col, col_restrictions] : single_col_restrictions) {
if (has_supporting_index(col_restrictions, index_manager, allow_local)) {
return true;
}
}
return false;
}
std::ostream& operator<<(std::ostream& os, const column_value& cv) {
os << cv.col->name_as_text();
return os;
@@ -1244,16 +798,6 @@ sstring to_string(const expression& expr) {
return fmt::format("{}", expr);
}
bool is_on_collection(const binary_operator& b) {
if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) {
return true;
}
if (auto tuple = expr::as_if<tuple_constructor>(&b.lhs)) {
return boost::algorithm::any_of(tuple->elements, [] (const expression& v) { return expr::is<subscript>(v); });
}
return false;
}
bool contains_column(const column_definition& column, const expression& e) {
const column_value* find_res = find_in_expression<column_value>(e,
[&](const column_value& column_val) -> bool {
@@ -1280,54 +824,6 @@ bool contains_nonpure_function(const expression& e) {
return find_res != nullptr;
}
bool has_eq_restriction_on_column(const column_definition& column, const expression& e) {
std::function<bool(const expression&)> column_in_lhs = [&](const expression& e) -> bool {
return visit(overloaded_functor {
[&](const column_value& cv) {
// Use column_defintion::operator== for comparison,
// columns with the same name but different schema will not be equal.
return *cv.col == column;
},
[&](const tuple_constructor& tc) {
for (const expression& elem : tc.elements) {
if (column_in_lhs(elem)) {
return true;
}
}
return false;
},
[&](const auto&) {return false;}
}, e);
};
// Look for binary operator describing eq relation with this column on lhs
const binary_operator* eq_restriction_search_res = find_binop(e, [&](const binary_operator& b) {
if (b.op != oper_t::EQ) {
return false;
}
if (!column_in_lhs(b.lhs)) {
return false;
}
// These conditions are not allowed to occur in the current code,
// but they might be allowed in the future.
// They are added now to avoid surprises later.
//
// These conditions detect cases like:
// WHERE column1 = column2
// WHERE column1 = row_number()
if (contains_column(column, b.rhs) || contains_nonpure_function(b.rhs)) {
return false;
}
return true;
});
return eq_restriction_search_res != nullptr;
}
expression replace_column_def(const expression& expr, const column_definition* new_cdef) {
return search_and_replace(expr, [&] (const expression& expr) -> std::optional<expression> {
if (expr::is<column_value>(expr)) {
@@ -1498,69 +994,6 @@ expression search_and_replace(const expression& e,
}
}
std::vector<expression> extract_single_column_restrictions_for_column(const expression& expr,
const column_definition& column) {
struct visitor {
std::vector<expression> restrictions;
const column_definition& column;
const binary_operator* current_binary_operator;
void operator()(const constant&) {}
void operator()(const conjunction& conj) {
for (const expression& child : conj.children) {
expr::visit(*this, child);
}
}
void operator()(const binary_operator& oper) {
if (current_binary_operator != nullptr) {
on_internal_error(expr_logger,
"extract_single_column_restrictions_for_column: nested binary operators are not supported");
}
current_binary_operator = &oper;
expr::visit(*this, oper.lhs);
current_binary_operator = nullptr;
}
void operator()(const column_value& cv) {
if (*cv.col == column && current_binary_operator != nullptr) {
restrictions.emplace_back(*current_binary_operator);
}
}
void operator()(const subscript& s) {
const column_value& cv = get_subscripted_column(s);
if (*cv.col == column && current_binary_operator != nullptr) {
restrictions.emplace_back(*current_binary_operator);
}
}
void operator()(const unresolved_identifier&) {}
void operator()(const column_mutation_attribute&) {}
void operator()(const function_call&) {}
void operator()(const cast&) {}
void operator()(const field_selection&) {}
void operator()(const bind_variable&) {}
void operator()(const untyped_constant&) {}
void operator()(const tuple_constructor&) {}
void operator()(const collection_constructor&) {}
void operator()(const usertype_constructor&) {}
void operator()(const temporary&) {}
};
visitor v {
.restrictions = std::vector<expression>(),
.column = column,
.current_binary_operator = nullptr,
};
expr::visit(v, expr);
return std::move(v.restrictions);
}
constant::constant(cql3::raw_value val, data_type typ)
: value(std::move(val)), type(std::move(typ)) {
@@ -2404,55 +1837,6 @@ type_of(const expression& e) {
}, e);
}
static std::optional<std::reference_wrapper<const column_value>> get_single_column_restriction_column(const expression& e) {
if (find_in_expression<unresolved_identifier>(e, [](const auto&) {return true;})) {
on_internal_error(expr_logger,
seastar::format("get_single_column_restriction_column expects a prepared expression, but it's not: {}", e));
}
const column_value* the_only_column = nullptr;
bool expression_is_single_column = false;
for_each_expression<column_value>(e,
[&](const column_value& cval) {
if (the_only_column == nullptr) {
// It's the first column_value we've encountered - set it as the only column
the_only_column = &cval;
expression_is_single_column = true;
return;
}
if (cval.col != the_only_column->col) {
// In case any other column is encountered the restriction
// restricts more than one column.
expression_is_single_column = false;
}
}
);
if (expression_is_single_column) {
return std::cref(*the_only_column);
} else {
return std::nullopt;
}
}
bool is_single_column_restriction(const expression& e) {
return get_single_column_restriction_column(e).has_value();
}
const column_value& get_the_only_column(const expression& e) {
std::optional<std::reference_wrapper<const column_value>> result = get_single_column_restriction_column(e);
if (!result.has_value()) {
on_internal_error(expr_logger,
format("get_the_only_column - bad expression: {}", e));
}
return *result;
}
bool schema_pos_column_definition_comparator::operator()(const column_definition *def1, const column_definition *def2) const {
auto column_pos = [](const column_definition* cdef) -> uint32_t {
if (cdef->is_primary_key()) {
@@ -2498,28 +1882,6 @@ const column_definition* get_last_column_def(const expression& e) {
return sorted_defs.back();
}
single_column_restrictions_map get_single_column_restrictions_map(const expression& e) {
single_column_restrictions_map result;
std::vector<const column_definition*> sorted_defs = get_sorted_column_defs(e);
for (const column_definition* cdef : sorted_defs) {
expression col_restrictions = conjunction {
.children = extract_single_column_restrictions_for_column(e, *cdef)
};
result.emplace(cdef, std::move(col_restrictions));
}
return result;
}
bool is_empty_restriction(const expression& e) {
bool contains_non_conjunction = recurse_until(e, [&](const expression& e) -> bool {
return !is<conjunction>(e);
});
return !contains_non_conjunction;
}
sstring get_columns_in_commons(const expression& a, const expression& b) {
std::vector<const column_definition*> ours = get_sorted_column_defs(a);
std::vector<const column_definition*> theirs = get_sorted_column_defs(b);
@@ -2539,43 +1901,6 @@ sstring get_columns_in_commons(const expression& a, const expression& b) {
return str;
}
bytes_opt value_for(const column_definition& cdef, const expression& e, const query_options& options) {
value_set possible_vals = possible_column_values(&cdef, e, options);
return std::visit(overloaded_functor {
[&](const value_list& val_list) -> bytes_opt {
if (val_list.empty()) {
return std::nullopt;
}
if (val_list.size() != 1) {
on_internal_error(expr_logger, format("expr::value_for - multiple possible values for column: {}", e));
}
return to_bytes(val_list.front());
},
[&](const interval<managed_bytes>&) -> bytes_opt {
on_internal_error(expr_logger, format("expr::value_for - possible values are a range: {}", e));
}
}, possible_vals);
}
bool contains_multi_column_restriction(const expression& e) {
const binary_operator* find_res = find_binop(e, [](const binary_operator& binop) {
return is<tuple_constructor>(binop.lhs);
});
return find_res != nullptr;
}
bool has_only_eq_binops(const expression& e) {
const expr::binary_operator* non_eq_binop = find_in_expression<expr::binary_operator>(e,
[](const expr::binary_operator& binop) {
return binop.op != expr::oper_t::EQ;
}
);
return non_eq_binop == nullptr;
}
unset_bind_variable_guard::unset_bind_variable_guard(const expr::expression& e) {
if (auto bv = expr::as_if<expr::bind_variable>(&e)) {
_var = *bv;

File diff suppressed because it is too large Load Diff

View File

@@ -422,6 +422,22 @@ statement_restrictions analyze_statement_restrictions(
check_indexes do_check_indexes);
// Extracts all binary operators which have the given column on their left hand side.
// Extracts only single-column restrictions.
// Does not include multi-column restrictions.
// Does not include token() restrictions.
// Does not include boolean constant restrictions.
// For example "WHERE c = 1 AND (a, c) = (2, 1) AND token(p) < 2 AND FALSE" will return {"c = 1"}.
std::vector<expr::expression> extract_single_column_restrictions_for_column(const expr::expression&, const column_definition&);
// Checks whether this expression is empty - doesn't restrict anything
bool is_empty_restriction(const expr::expression&);
// Finds the value of the given column in the expression
// In case of multpiple possible values calls on_internal_error
bytes_opt value_for(const column_definition&, const expr::expression&, const query_options&);
}
}

View File

@@ -1083,7 +1083,7 @@ lw_shared_ptr<const service::pager::paging_state> indexed_table_select_statement
auto& last_base_pk = last_pos.partition;
auto* last_base_ck = last_pos.position.has_key() ? &last_pos.position.key() : nullptr;
bytes_opt indexed_column_value = expr::value_for(*cdef, _used_index_restrictions, options);
bytes_opt indexed_column_value = restrictions::value_for(*cdef, _used_index_restrictions, options);
auto index_pk = [&]() {
if (_index.metadata().local()) {
@@ -1288,7 +1288,7 @@ dht::partition_range_vector indexed_table_select_statement::get_partition_ranges
throw exceptions::invalid_request_exception("Indexed column not found in schema");
}
bytes_opt value = expr::value_for(*cdef, _used_index_restrictions, options);
bytes_opt value = restrictions::value_for(*cdef, _used_index_restrictions, options);
if (value) {
auto pk = partition_key::from_single_value(*_view_schema, *value);
auto dk = dht::decorate_key(*_view_schema, pk);
@@ -2412,8 +2412,8 @@ static bool needs_allow_filtering_anyway(
const auto& pk_restrictions = restrictions.get_partition_key_restrictions();
// Even if no filtering happens on the coordinator, we still warn about poor performance when partition
// slice is defined but in potentially unlimited number of partitions (see #7608).
if ((expr::is_empty_restriction(pk_restrictions) || restrictions.has_token_restrictions()) // Potentially unlimited partitions.
&& !expr::is_empty_restriction(ck_restrictions) // Slice defined.
if ((restrictions::is_empty_restriction(pk_restrictions) || restrictions.has_token_restrictions()) // Potentially unlimited partitions.
&& !restrictions::is_empty_restriction(ck_restrictions) // Slice defined.
&& !restrictions.uses_secondary_indexing()) { // Base-table is used. (Index-table use always limits partitions.)
if (strict_allow_filtering == flag_t::WARN) {
warnings.emplace_back("This query should use ALLOW FILTERING and will be rejected in future versions.");

View File

@@ -410,7 +410,7 @@ BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) {
column_definition col_r3 = make_column("r3", column_kind::regular_column, 2);
// Empty input test
assert_expr_vec_eq(extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {});
assert_expr_vec_eq(cql3::restrictions::extract_single_column_restrictions_for_column(conjunction{}, col_pk1), {});
// BIG_WHERE test
// big_where contains:
@@ -494,24 +494,24 @@ BOOST_AUTO_TEST_CASE(expression_extract_column_restrictions) {
expression big_where_expr = conjunction{std::move(big_where)};
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_pk1),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk1),
{pk1_restriction});
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_pk2),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_pk2),
{pk2_restriction, pk2_restriction2});
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_ck1),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck1),
{ck1_restriction});
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_ck2),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_ck2),
{ck2_restriction});
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_r1),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r1),
{r1_restriction, r1_restriction2, r1_restriction3});
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_r2),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r2),
{r2_restriction});
assert_expr_vec_eq(extract_single_column_restrictions_for_column(big_where_expr, col_r3),
assert_expr_vec_eq(restrictions::extract_single_column_restrictions_for_column(big_where_expr, col_r3),
{});
}