/* * Copyright (C) 2020 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #include "expression.hh" #include #include #include #include #include #include "cql3/constants.hh" #include "cql3/lists.hh" #include "cql3/statements/request_validations.hh" #include "cql3/tuples.hh" #include "index/secondary_index_manager.hh" #include "types/list.hh" #include "types/map.hh" #include "types/set.hh" #include "utils/like_matcher.hh" namespace cql3 { namespace expr { using boost::adaptors::filtered; using boost::adaptors::transformed; namespace { static bytes_opt do_get_value(const schema& schema, const column_definition& cdef, const partition_key& key, const clustering_key_prefix& ckey, const row& cells, gc_clock::time_point now) { switch (cdef.kind) { case column_kind::partition_key: return to_bytes(key.get_component(schema, cdef.component_index())); case column_kind::clustering_key: return to_bytes(ckey.get_component(schema, cdef.component_index())); default: auto cell = cells.find_cell(cdef.id); if (!cell) { return std::nullopt; } assert(cdef.is_atomic()); auto c = cell->as_atomic_cell(cdef); return c.is_dead(now) ? std::nullopt : bytes_opt(to_bytes(c.value())); } } using children_t = std::vector; // conjunction's children. children_t explode_conjunction(expression e) { return std::visit(overloaded_functor{ [] (const conjunction& c) { return std::move(c.children); }, [&] (const auto&) { return children_t{std::move(e)}; }, }, e); } using cql3::selection::selection; /// Serialized values for all types of cells, plus selection (to find a column's index) and options (for /// subscript term's value). struct row_data_from_partition_slice { const std::vector& partition_key; const std::vector& clustering_key; const std::vector& other_columns; const selection& sel; }; /// Data used to derive cell values from a mutation. struct row_data_from_mutation { // Underscores avoid name clashes. const partition_key& partition_key_; const clustering_key_prefix& clustering_key_; const row& other_columns; const schema& schema_; gc_clock::time_point now; }; /// Everything needed to compute column values during restriction evaluation. struct column_value_eval_bag { const query_options& options; // For evaluating subscript terms. std::variant row_data; }; /// Returns col's value from queried data. bytes_opt get_value_from_partition_slice( const column_value& col, row_data_from_partition_slice data, const query_options& options) { auto cdef = col.col; if (col.sub) { auto col_type = static_pointer_cast(cdef->type); if (!col_type->is_map()) { throw exceptions::invalid_request_exception(format("subscripting non-map column {}", cdef->name_as_text())); } const auto deserialized = cdef->type->deserialize(*data.other_columns[data.sel.index_of(*cdef)]); const auto& data_map = value_cast(deserialized); const auto key = col.sub->bind_and_get(options); auto&& key_type = col_type->name_comparator(); const auto found = with_linearized(*key, [&] (bytes_view key_bv) { using entry = std::pair; return std::find_if(data_map.cbegin(), data_map.cend(), [&] (const entry& element) { return key_type->compare(element.first.serialize_nonnull(), key_bv) == 0; }); }); return found == data_map.cend() ? bytes_opt() : bytes_opt(found->second.serialize_nonnull()); } else { switch (cdef->kind) { case column_kind::partition_key: return data.partition_key[cdef->id]; case column_kind::clustering_key: return data.clustering_key[cdef->id]; case column_kind::static_column: case column_kind::regular_column: return data.other_columns[data.sel.index_of(*cdef)]; default: throw exceptions::unsupported_operation_exception("Unknown column kind"); } } } /// Returns col's value from a mutation. bytes_opt get_value_from_mutation(const column_value& col, row_data_from_mutation data) { return do_get_value( data.schema_, *col.col, data.partition_key_, data.clustering_key_, data.other_columns, data.now); } /// Returns col's value from the fetched data. bytes_opt get_value(const column_value& col, const column_value_eval_bag& bag) { using std::placeholders::_1; return std::visit(overloaded_functor{ std::bind(get_value_from_mutation, col, _1), std::bind(get_value_from_partition_slice, col, _1, bag.options), }, bag.row_data); } /// Type for comparing results of get_value(). const abstract_type* get_value_comparator(const column_definition* cdef) { return &cdef->type->without_reversed(); } /// Type for comparing results of get_value(). const abstract_type* get_value_comparator(const column_value& cv) { return cv.sub ? static_pointer_cast(cv.col->type)->value_comparator().get() : get_value_comparator(cv.col); } /// If t represents a tuple value, returns that value. Otherwise, null. /// /// Useful for checking binary_operator::rhs, which packs multiple values into a single term when lhs is itself /// a tuple. NOT useful for the IN operator, whose rhs is either a list or tuples::in_value. ::shared_ptr get_tuple(term& t, const query_options& opts) { return dynamic_pointer_cast(t.bind(opts)); } /// True iff lhs's value equals rhs. bool equal(const bytes_opt& rhs, const column_value& lhs, const column_value_eval_bag& bag) { if (!rhs) { return false; } const auto value = get_value(lhs, bag); if (!value) { return false; } return get_value_comparator(lhs)->equal(*value, *rhs); } /// Convenience overload for term. bool equal(term& rhs, const column_value& lhs, const column_value_eval_bag& bag) { return equal(to_bytes_opt(rhs.bind_and_get(bag.options)), lhs, bag); } /// True iff columns' values equal t. bool equal(term& t, const std::vector& columns, const column_value_eval_bag& bag) { const auto tup = get_tuple(t, bag.options); if (!tup) { throw exceptions::invalid_request_exception("multi-column equality has right-hand side that isn't a tuple"); } const auto& rhs = tup->get_elements(); if (rhs.size() != columns.size()) { throw exceptions::invalid_request_exception( format("tuple equality size mismatch: {} elements on left-hand side, {} on right", columns.size(), rhs.size())); } return boost::equal(rhs, columns, [&] (const bytes_opt& b, const column_value& lhs) { return equal(b, lhs, bag); }); } /// True iff lhs is limited by rhs in the manner prescribed by op. bool limits(bytes_view lhs, oper_t op, bytes_view rhs, const abstract_type& type) { const auto cmp = type.compare(lhs, rhs); switch (op) { case oper_t::LT: return cmp < 0; case oper_t::LTE: return cmp <= 0; case oper_t::GT: return cmp > 0; case oper_t::GTE: return cmp >= 0; case oper_t::EQ: return cmp == 0; case oper_t::NEQ: return cmp != 0; default: throw std::logic_error(format("limits() called on non-compare op {}", op)); } } /// True iff the column value is limited by rhs in the manner prescribed by op. bool limits(const column_value& col, oper_t op, term& rhs, const column_value_eval_bag& bag) { if (!is_slice(op)) { // For EQ or NEQ, use equal(). throw std::logic_error("limits() called on non-slice op"); } auto lhs = get_value(col, bag); if (!lhs) { return false; } const auto b = to_bytes_opt(rhs.bind_and_get(bag.options)); return b ? limits(*lhs, op, *b, *get_value_comparator(col)) : false; } /// True iff the column values are limited by t in the manner prescribed by op. bool limits(const std::vector& columns, const oper_t op, term& t, const column_value_eval_bag& bag) { if (!is_slice(op)) { // For EQ or NEQ, use equal(). throw std::logic_error("limits() called on non-slice op"); } const auto tup = get_tuple(t, bag.options); if (!tup) { throw exceptions::invalid_request_exception( "multi-column comparison has right-hand side that isn't a tuple"); } const auto& rhs = tup->get_elements(); if (rhs.size() != columns.size()) { throw exceptions::invalid_request_exception( format("tuple comparison size mismatch: {} elements on left-hand side, {} on right", columns.size(), rhs.size())); } for (size_t i = 0; i < rhs.size(); ++i) { const auto cmp = get_value_comparator(columns[i])->compare( // CQL dictates that columns[i] is a clustering column and non-null. *get_value(columns[i], bag), *rhs[i]); // If the components aren't equal, then we just learned the LHS/RHS order. if (cmp < 0) { if (op == oper_t::LT || op == oper_t::LTE) { return true; } else if (op == oper_t::GT || op == oper_t::GTE) { return false; } else { throw std::logic_error("Unknown slice operator"); } } else if (cmp > 0) { if (op == oper_t::LT || op == oper_t::LTE) { return false; } else if (op == oper_t::GT || op == oper_t::GTE) { return true; } else { throw std::logic_error("Unknown slice operator"); } } // Otherwise, we don't know the LHS/RHS order, so check the next component. } // Getting here means LHS == RHS. return op == oper_t::LTE || op == oper_t::GTE; } /// True iff collection (list, set, or map) contains value. bool contains(const data_value& collection, const raw_value_view& value) { if (!value) { return true; // Compatible with old code, which skips null terms in value comparisons. } auto col_type = static_pointer_cast(collection.type()); auto&& element_type = col_type->is_set() ? col_type->name_comparator() : col_type->value_comparator(); return with_linearized(*value, [&] (bytes_view val) { auto exists_in = [&](auto&& range) { auto found = std::find_if(range.begin(), range.end(), [&] (auto&& element) { return element_type->compare(element.serialize_nonnull(), val) == 0; }); return found != range.end(); }; if (col_type->is_list()) { return exists_in(value_cast(collection)); } else if (col_type->is_set()) { return exists_in(value_cast(collection)); } else if (col_type->is_map()) { auto data_map = value_cast(collection); using entry = std::pair; return exists_in(data_map | transformed([] (const entry& e) { return e.second; })); } else { throw std::logic_error("unsupported collection type in a CONTAINS expression"); } }); } /// True iff a column is a collection containing value. bool contains(const column_value& col, const raw_value_view& value, const column_value_eval_bag& bag) { if (col.sub) { throw exceptions::unsupported_operation_exception("CONTAINS lhs is subscripted"); } const auto collection = get_value(col, bag); if (collection) { return contains(col.col->type->deserialize(*collection), value); } else { return false; } } /// True iff a column is a map containing \p key. bool contains_key(const column_value& col, cql3::raw_value_view key, const column_value_eval_bag& bag) { if (col.sub) { throw exceptions::unsupported_operation_exception("CONTAINS KEY lhs is subscripted"); } if (!key) { return true; // Compatible with old code, which skips null terms in key comparisons. } auto type = col.col->type; const auto collection = get_value(col, bag); if (!collection) { return false; } const auto data_map = value_cast(type->deserialize(*collection)); auto key_type = static_pointer_cast(type)->name_comparator(); auto found = with_linearized(*key, [&] (bytes_view k_bv) { using entry = std::pair; return std::find_if(data_map.begin(), data_map.end(), [&] (const entry& element) { return key_type->compare(element.first.serialize_nonnull(), k_bv) == 0; }); }); return found != data_map.end(); } /// Fetches the next cell value from iter and returns its (possibly null) value. bytes_opt next_value(query::result_row_view::iterator_type& iter, const column_definition* cdef) { if (cdef->type->is_multi_cell()) { auto cell = iter.next_collection_cell(); if (cell) { return linearized(*cell); } } else { auto cell = iter.next_atomic_cell(); if (cell) { return linearized(cell->value()); } } return std::nullopt; } /// Returns values of non-primary-key columns from selection. The kth element of the result /// corresponds to the kth column in selection. std::vector get_non_pk_values(const selection& selection, const query::result_row_view& static_row, const query::result_row_view* row) { const auto& cols = selection.get_columns(); std::vector vals(cols.size()); auto static_row_iterator = static_row.iterator(); auto row_iterator = row ? std::optional(row->iterator()) : std::nullopt; for (size_t i = 0; i < cols.size(); ++i) { switch (cols[i]->kind) { case column_kind::static_column: vals[i] = next_value(static_row_iterator, cols[i]); break; case column_kind::regular_column: if (row) { vals[i] = next_value(*row_iterator, cols[i]); } break; default: // Skip. break; } } return vals; } /// True iff cv matches the CQL LIKE pattern. bool like(const column_value& cv, const bytes_opt& pattern, const column_value_eval_bag& bag) { if (!cv.col->type->is_string()) { throw exceptions::invalid_request_exception( format("LIKE is allowed only on string types, which {} is not", cv.col->name_as_text())); } auto value = get_value(cv, bag); // TODO: reuse matchers. return (pattern && value) ? like_matcher(*pattern)(*value) : false; } /// True iff the column value is in the set defined by rhs. bool is_one_of(const column_value& col, term& rhs, const column_value_eval_bag& bag) { // RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is. if (auto dv = dynamic_cast(&rhs)) { // This is `a IN (1,2,3)`. RHS elements are themselves terms. return boost::algorithm::any_of(dv->get_elements(), [&] (const ::shared_ptr& t) { return equal(*t, col, bag); }); } else if (auto mkr = dynamic_cast(&rhs)) { // This is `a IN ?`. RHS elements are values representable as bytes_opt. const auto values = static_pointer_cast(mkr->bind(bag.options)); statements::request_validations::check_not_null( values, "Invalid null value for column %s", col.col->name_as_text()); return boost::algorithm::any_of(values->get_elements(), [&] (const bytes_opt& b) { return equal(b, col, bag); }); } throw std::logic_error("unexpected term type in is_one_of(single column)"); } /// True iff the tuple of column values is in the set defined by rhs. bool is_one_of(const std::vector& cvs, term& rhs, const column_value_eval_bag& bag) { // RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is. if (auto dv = dynamic_cast(&rhs)) { // This is `(a,b) IN ((1,1),(2,2),(3,3))`. RHS elements are themselves terms. return boost::algorithm::any_of(dv->get_elements(), [&] (const ::shared_ptr& t) { return equal(*t, cvs, bag); }); } else if (auto mkr = dynamic_cast(&rhs)) { // This is `(a,b) IN ?`. RHS elements are themselves tuples, represented as vector. const auto marker_value = static_pointer_cast(mkr->bind(bag.options)); return boost::algorithm::any_of(marker_value->get_split_values(), [&] (const std::vector& el) { return boost::equal(cvs, el, [&] (const column_value& c, const bytes_opt& b) { return equal(b, c, bag); }); }); } throw std::logic_error("unexpected term type in is_one_of(multi-column)"); } /// True iff op means bnd type of bound. bool matches(oper_t op, statements::bound bnd) { switch (op) { case oper_t::GT: case oper_t::GTE: return is_start(bnd); // These set a lower bound. case oper_t::LT: case oper_t::LTE: return is_end(bnd); // These set an upper bound. case oper_t::EQ: return true; // Bounds from both sides. default: return false; } } const value_set empty_value_set = value_list{}; const value_set unbounded_value_set = nonwrapping_range::make_open_ended_both_sides(); struct intersection_visitor { const abstract_type* type; value_set operator()(const value_list& a, const value_list& b) const { value_list common; common.reserve(std::max(a.size(), b.size())); boost::set_intersection(a, b, back_inserter(common), type->as_less_comparator()); return std::move(common); } value_set operator()(const nonwrapping_range& a, const value_list& b) const { const auto common = b | filtered([&] (const bytes& el) { return a.contains(el, type->as_tri_comparator()); }); return value_list(common.begin(), common.end()); } value_set operator()(const value_list& a, const nonwrapping_range& b) const { return (*this)(b, a); } value_set operator()(const nonwrapping_range& a, const nonwrapping_range& b) const { const auto common_range = a.intersection(b, type->as_tri_comparator()); return common_range ? *common_range : empty_value_set; } }; value_set intersection(value_set a, value_set b, const abstract_type* type) { return std::visit(intersection_visitor{type}, std::move(a), std::move(b)); } bool is_satisfied_by(const binary_operator& opr, const column_value_eval_bag& bag) { return std::visit(overloaded_functor{ [&] (const column_value& col) { if (opr.op == oper_t::EQ) { return equal(*opr.rhs, col, bag); } else if (opr.op == oper_t::NEQ) { return !equal(*opr.rhs, col, bag); } else if (is_slice(opr.op)) { return limits(col, opr.op, *opr.rhs, bag); } else if (opr.op == oper_t::CONTAINS) { return contains(col, opr.rhs->bind_and_get(bag.options), bag); } else if (opr.op == oper_t::CONTAINS_KEY) { return contains_key(col, opr.rhs->bind_and_get(bag.options), bag); } else if (opr.op == oper_t::LIKE) { return like(col, to_bytes_opt(opr.rhs->bind_and_get(bag.options)), bag); } else if (opr.op == oper_t::IN) { return is_one_of(col, *opr.rhs, bag); } else { throw exceptions::unsupported_operation_exception(format("Unhandled binary_operator: {}", opr)); } }, [&] (const std::vector& cvs) { if (opr.op == oper_t::EQ) { return equal(*opr.rhs, cvs, bag); } else if (is_slice(opr.op)) { return limits(cvs, opr.op, *opr.rhs, bag); } else if (opr.op == oper_t::IN) { return is_one_of(cvs, *opr.rhs, bag); } else { throw exceptions::unsupported_operation_exception( format("Unhandled multi-column binary_operator: {}", opr)); } }, [] (const token& tok) -> bool { // The RHS value was already used to ensure we fetch only rows in the specified // token range. It is impossible for any fetched row not to match now. return true; }, }, opr.lhs); } bool is_satisfied_by(const expression& restr, const column_value_eval_bag& bag) { return std::visit(overloaded_functor{ [&] (bool v) { return v; }, [&] (const conjunction& conj) { return boost::algorithm::all_of(conj.children, [&] (const expression& c) { return is_satisfied_by(c, bag); }); }, [&] (const binary_operator& opr) { return is_satisfied_by(opr, bag); }, }, restr); } /// If t is a tuple, binds and gets its k-th element. Otherwise, binds and gets t's whole value. bytes_opt get_kth(size_t k, const query_options& options, const ::shared_ptr& t) { auto bound = t->bind(options); if (auto tup = dynamic_pointer_cast(bound)) { return tup->get_elements()[k]; } else { throw std::logic_error("non-tuple RHS for multi-column IN"); } } template value_list to_sorted_vector(Range r, const serialized_compare& comparator) { BOOST_CONCEPT_ASSERT((boost::ForwardRangeConcept)); value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access). const auto unique = boost::unique(boost::sort(tmp, comparator)); return value_list(unique.begin(), unique.end()); } const auto non_null = boost::adaptors::filtered([] (const bytes_opt& b) { return b.has_value(); }); const auto deref = boost::adaptors::transformed([] (const bytes_opt& b) { return b.value(); }); /// Returns possible values from t, which must be RHS of IN. value_list get_IN_values( const ::shared_ptr& t, const query_options& options, const serialized_compare& comparator, sstring_view column_name) { // RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is. if (auto dv = dynamic_pointer_cast(t)) { // Case `a IN (1,2,3)`. const auto result_range = dv->get_elements() | boost::adaptors::transformed([&] (const ::shared_ptr& t) { return to_bytes_opt(t->bind_and_get(options)); }) | non_null | deref; return to_sorted_vector(std::move(result_range), comparator); } else if (auto mkr = dynamic_pointer_cast(t)) { // Case `a IN ?`. Collect all list-element values. const auto val = mkr->bind(options); if (val == constants::UNSET_VALUE) { throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name)); } statements::request_validations::check_not_null(val, "Invalid null value for column %s", column_name); return to_sorted_vector(static_pointer_cast(val)->get_elements() | non_null | deref, comparator); } throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t)); } /// Returns possible values for k-th column from t, which must be RHS of IN. value_list get_IN_values(const ::shared_ptr& t, size_t k, const query_options& options, const serialized_compare& comparator) { // RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is. if (auto dv = dynamic_pointer_cast(t)) { // Case `(a,b) in ((1,1),(2,2),(3,3))`. Get kth value from each term element. const auto result_range = dv->get_elements() | boost::adaptors::transformed(std::bind_front(get_kth, k, options)) | non_null | deref; return to_sorted_vector(std::move(result_range), comparator); } else if (auto mkr = dynamic_pointer_cast(t)) { // Case `(a,b) IN ?`. Get kth value from each vector element. const auto val = static_pointer_cast(mkr->bind(options)); const auto split_values = val->get_split_values(); // Need lvalue from which to make std::view. const auto result_range = split_values | boost::adaptors::transformed([k] (const std::vector& v) { return v[k]; }) | non_null | deref; return to_sorted_vector(std::move(result_range), comparator); } throw std::logic_error(format("get_IN_values(multi-column) on invalid term {}", *t)); } static constexpr bool inclusive = true, exclusive = false; } // anonymous namespace expression make_conjunction(expression a, expression b) { auto children = explode_conjunction(std::move(a)); boost::copy(explode_conjunction(std::move(b)), back_inserter(children)); return conjunction{std::move(children)}; } bool is_satisfied_by( const expression& restr, const std::vector& partition_key, const std::vector& clustering_key, const query::result_row_view& static_row, const query::result_row_view* row, const selection& selection, const query_options& options) { const auto regulars = get_non_pk_values(selection, static_row, row); return is_satisfied_by( restr, {options, row_data_from_partition_slice{partition_key, clustering_key, regulars, selection}}); } bool is_satisfied_by( const expression& restr, const schema& schema, const partition_key& key, const clustering_key_prefix& ckey, const row& cells, const query_options& options, gc_clock::time_point now) { return is_satisfied_by(restr, {options, row_data_from_mutation{key, ckey, cells, schema, now}}); } std::vector first_multicolumn_bound( const expression& restr, const query_options& options, statements::bound bnd) { auto found = find_atom(restr, [bnd] (const binary_operator& oper) { return matches(oper.op, bnd) && is_multi_column(oper); }); if (found) { return static_pointer_cast(found->rhs->bind(options))->get_elements(); } else { return std::vector{}; } } template nonwrapping_range to_range(oper_t op, const T& val) { static constexpr bool inclusive = true, exclusive = false; switch (op) { case oper_t::EQ: return nonwrapping_range::make_singular(val); case oper_t::GT: return nonwrapping_range::make_starting_with(interval_bound(val, exclusive)); case oper_t::GTE: return nonwrapping_range::make_starting_with(interval_bound(val, inclusive)); case oper_t::LT: return nonwrapping_range::make_ending_with(interval_bound(val, exclusive)); case oper_t::LTE: return nonwrapping_range::make_ending_with(interval_bound(val, inclusive)); default: throw std::logic_error(format("to_range: unknown comparison operator {}", op)); } } template nonwrapping_range to_range(oper_t, const clustering_key_prefix&); value_set possible_lhs_values(const column_definition* cdef, const expression& expr, const query_options& options) { const auto type = cdef ? get_value_comparator(cdef) : long_type.get(); return std::visit(overloaded_functor{ [] (bool b) { return b ? unbounded_value_set : empty_value_set; }, [&] (const conjunction& conj) { return boost::accumulate(conj.children, unbounded_value_set, [&] (const value_set& acc, const expression& child) { return intersection( std::move(acc), possible_lhs_values(cdef, child, options), type); }); }, [&] (const binary_operator& oper) -> value_set { return std::visit(overloaded_functor{ [&] (const column_value& col) -> value_set { if (!cdef || cdef != col.col) { return unbounded_value_set; } if (is_compare(oper.op)) { const auto val = to_bytes_opt(oper.rhs->bind_and_get(options)); if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } return oper.op == oper_t::EQ ? value_set(value_list{*val}) : to_range(oper.op, *val); } else if (oper.op == oper_t::IN) { return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text()); } throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper)); }, [&] (const std::vector& cvs) -> value_set { if (!cdef) { return unbounded_value_set; } const auto found = boost::find_if( cvs, [&] (const column_value& c) { return c.col == cdef; }); if (found == cvs.end()) { return unbounded_value_set; } const auto column_index_on_lhs = std::distance(cvs.begin(), found); if (is_compare(oper.op)) { // RHS must be a tuple due to upstream checks. bytes_opt val = get_tuple(*oper.rhs, options)->get_elements()[column_index_on_lhs]; if (!val) { return empty_value_set; // All NULL comparisons fail; no column values match. } if (oper.op == oper_t::EQ) { return value_list{*val}; } if (column_index_on_lhs > 0) { // A multi-column comparison restricts only the first column, because // comparison is lexicographical. return unbounded_value_set; } return to_range(oper.op, *val); } else if (oper.op == oper_t::IN) { return get_IN_values(oper.rhs, column_index_on_lhs, options, type->as_less_comparator()); } return unbounded_value_set; }, [&] (token) -> value_set { if (cdef) { return unbounded_value_set; } const auto val = to_bytes_opt(oper.rhs->bind_and_get(options)); if (!val) { return empty_value_set; // All NULL comparisons fail; no token values match. } if (oper.op == oper_t::EQ) { return value_list{*val}; } else if (oper.op == oper_t::GT) { return nonwrapping_range::make_starting_with(interval_bound(*val, exclusive)); } else if (oper.op == oper_t::GTE) { return nonwrapping_range::make_starting_with(interval_bound(*val, inclusive)); } static const bytes MININT = serialized(std::numeric_limits::min()), MAXINT = serialized(std::numeric_limits::max()); // Undocumented feature: when the user types `token(...) < MININT`, we interpret // that as MAXINT for some reason. const auto adjusted_val = (*val == MININT) ? serialized(MAXINT) : *val; if (oper.op == oper_t::LT) { return nonwrapping_range::make_ending_with(interval_bound(adjusted_val, exclusive)); } else if (oper.op == oper_t::LTE) { return nonwrapping_range::make_ending_with(interval_bound(adjusted_val, inclusive)); } throw std::logic_error(format("get_token_interval invalid operator {}", oper.op)); }, }, oper.lhs); }, }, expr); } nonwrapping_range to_range(const value_set& s) { return std::visit(overloaded_functor{ [] (const nonwrapping_range& r) { return r; }, [] (const value_list& lst) { if (lst.size() != 1) { throw std::logic_error(format("to_range called on list of size {}", lst.size())); } return nonwrapping_range::make_singular(lst[0]); }, }, s); } bool is_supported_by(const expression& expr, const secondary_index::index& idx) { using std::placeholders::_1; return std::visit(overloaded_functor{ [&] (const conjunction& conj) { return boost::algorithm::all_of(conj.children, std::bind(is_supported_by, _1, idx)); }, [&] (const binary_operator& oper) { return std::visit(overloaded_functor{ [&] (const column_value& col) { return idx.supports_expression(*col.col, oper.op); }, [&] (const std::vector& cvs) { if (cvs.size() == 1) { return idx.supports_expression(*cvs[0].col, oper.op); } // We don't use index table for multi-column restrictions, as it cannot avoid filtering. return false; }, [&] (const token&) { return false; }, }, oper.lhs); }, [] (const auto& default_case) { return false; } }, expr); } bool has_supporting_index( const expression& expr, const secondary_index::secondary_index_manager& index_manager, allow_local_index allow_local) { const auto indexes = index_manager.list_indexes(); const auto support = std::bind(is_supported_by, expr, std::placeholders::_1); return allow_local ? boost::algorithm::any_of(indexes, support) : boost::algorithm::any_of( indexes | filtered([] (const secondary_index::index& i) { return !i.metadata().local(); }), support); } std::ostream& operator<<(std::ostream& os, const column_value& cv) { os << cv.col->name_as_text(); if (cv.sub) { os << '[' << *cv.sub << ']'; } return os; } std::ostream& operator<<(std::ostream& os, const expression& expr) { std::visit(overloaded_functor{ [&] (bool b) { os << (b ? "TRUE" : "FALSE"); }, [&] (const conjunction& conj) { fmt::print(os, "({})", fmt::join(conj.children, ") AND (")); }, [&] (const binary_operator& opr) { std::visit(overloaded_functor{ [&] (const token& t) { os << "TOKEN"; }, [&] (const column_value& col) { fmt::print(os, "{}", col); }, [&] (const std::vector& cvs) { fmt::print(os, "({})", fmt::join(cvs, ",")); }, }, opr.lhs); os << ' ' << opr.op << ' ' << *opr.rhs; }, }, expr); return os; } sstring to_string(const expression& expr) { return fmt::format("{}", expr); } bool is_on_collection(const binary_operator& b) { if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) { return true; } if (auto cvs = std::get_if>(&b.lhs)) { return boost::algorithm::any_of(*cvs, [] (const column_value& v) { return v.sub; }); } return false; } expression replace_column_def(const expression& expr, const column_definition* new_cdef) { return std::visit(overloaded_functor{ [] (bool b){ return expression(b); }, [&] (const conjunction& conj) { const auto applied = conj.children | transformed( std::bind(replace_column_def, std::placeholders::_1, new_cdef)); return expression(conjunction{std::vector(applied.begin(), applied.end())}); }, [&] (const binary_operator& oper) { return std::visit(overloaded_functor{ [&] (const column_value& col) { return expression(binary_operator{column_value{new_cdef}, oper.op, oper.rhs}); }, [&] (const std::vector& cvs) -> expression { throw std::logic_error(format("replace_column_def invalid LHS: {}", to_string(oper))); }, [&] (const token&) { return expr; }, }, oper.lhs); }, }, expr); } std::ostream& operator<<(std::ostream& s, oper_t op) { switch (op) { case oper_t::EQ: return s << "="; case oper_t::NEQ: return s << "!="; case oper_t::LT: return s << "<"; case oper_t::LTE: return s << "<="; case oper_t::GT: return s << ">"; case oper_t::GTE: return s << ">="; case oper_t::IN: return s << "IN"; case oper_t::CONTAINS: return s << "CONTAINS"; case oper_t::CONTAINS_KEY: return s << "CONTAINS KEY"; case oper_t::IS_NOT: return s << "IS NOT"; case oper_t::LIKE: return s << "LIKE"; } __builtin_unreachable(); } } // namespace expr } // namespace cql3 template <> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.end(); } template auto format(const cql3::expr::expression& expr, FormatContext& ctx) { std::ostringstream os; os << expr; return format_to(ctx.out(), "{}", os.str()); } }; template <> struct fmt::formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.end(); } template auto format(const cql3::expr::column_value& col, FormatContext& ctx) { std::ostringstream os; os << col; return format_to(ctx.out(), "{}", os.str()); } };