It will come in handy when we start using expressions to calculate the clustering slice. Signed-off-by: Dejan Mircevski <dejan@scylladb.com>
933 lines
40 KiB
C++
933 lines
40 KiB
C++
/*
|
|
* Copyright (C) 2020 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "expression.hh"
|
|
|
|
#include <boost/algorithm/cxx11/all_of.hpp>
|
|
#include <boost/algorithm/cxx11/any_of.hpp>
|
|
#include <boost/range/adaptors.hpp>
|
|
#include <fmt/ostream.h>
|
|
#include <unordered_map>
|
|
|
|
#include "cql3/constants.hh"
|
|
#include "cql3/lists.hh"
|
|
#include "cql3/tuples.hh"
|
|
#include "index/secondary_index_manager.hh"
|
|
#include "types/list.hh"
|
|
#include "types/map.hh"
|
|
#include "types/set.hh"
|
|
#include "utils/like_matcher.hh"
|
|
|
|
namespace cql3 {
|
|
namespace expr {
|
|
|
|
using boost::adaptors::filtered;
|
|
using boost::adaptors::transformed;
|
|
|
|
namespace {
|
|
|
|
static
|
|
bytes_opt do_get_value(const schema& schema,
|
|
const column_definition& cdef,
|
|
const partition_key& key,
|
|
const clustering_key_prefix& ckey,
|
|
const row& cells,
|
|
gc_clock::time_point now) {
|
|
switch (cdef.kind) {
|
|
case column_kind::partition_key:
|
|
return to_bytes(key.get_component(schema, cdef.component_index()));
|
|
case column_kind::clustering_key:
|
|
return to_bytes(ckey.get_component(schema, cdef.component_index()));
|
|
default:
|
|
auto cell = cells.find_cell(cdef.id);
|
|
if (!cell) {
|
|
return std::nullopt;
|
|
}
|
|
assert(cdef.is_atomic());
|
|
auto c = cell->as_atomic_cell(cdef);
|
|
return c.is_dead(now) ? std::nullopt : bytes_opt(to_bytes(c.value()));
|
|
}
|
|
}
|
|
|
|
using children_t = std::vector<expression>; // conjunction's children.
|
|
|
|
children_t explode_conjunction(expression e) {
|
|
return std::visit(overloaded_functor{
|
|
[] (const conjunction& c) { return std::move(c.children); },
|
|
[&] (const auto&) { return children_t{std::move(e)}; },
|
|
}, e);
|
|
}
|
|
|
|
using cql3::selection::selection;
|
|
|
|
/// Serialized values for all types of cells, plus selection (to find a column's index) and options (for
|
|
/// subscript term's value).
|
|
struct row_data_from_partition_slice {
|
|
const std::vector<bytes>& partition_key;
|
|
const std::vector<bytes>& clustering_key;
|
|
const std::vector<bytes_opt>& other_columns;
|
|
const selection& sel;
|
|
};
|
|
|
|
/// Data used to derive cell values from a mutation.
|
|
struct row_data_from_mutation {
|
|
// Underscores avoid name clashes.
|
|
const partition_key& partition_key_;
|
|
const clustering_key_prefix& clustering_key_;
|
|
const row& other_columns;
|
|
const schema& schema_;
|
|
gc_clock::time_point now;
|
|
};
|
|
|
|
/// Everything needed to compute column values during restriction evaluation.
|
|
struct column_value_eval_bag {
|
|
const query_options& options; // For evaluating subscript terms.
|
|
std::variant<row_data_from_partition_slice, row_data_from_mutation> row_data;
|
|
};
|
|
|
|
/// Returns col's value from queried data.
|
|
bytes_opt get_value_from_partition_slice(
|
|
const column_value& col, row_data_from_partition_slice data, const query_options& options) {
|
|
auto cdef = col.col;
|
|
if (col.sub) {
|
|
auto col_type = static_pointer_cast<const collection_type_impl>(cdef->type);
|
|
if (!col_type->is_map()) {
|
|
throw exceptions::invalid_request_exception(format("subscripting non-map column {}", cdef->name_as_text()));
|
|
}
|
|
const auto deserialized = cdef->type->deserialize(*data.other_columns[data.sel.index_of(*cdef)]);
|
|
const auto& data_map = value_cast<map_type_impl::native_type>(deserialized);
|
|
const auto key = col.sub->bind_and_get(options);
|
|
auto&& key_type = col_type->name_comparator();
|
|
const auto found = with_linearized(*key, [&] (bytes_view key_bv) {
|
|
using entry = std::pair<data_value, data_value>;
|
|
return std::find_if(data_map.cbegin(), data_map.cend(), [&] (const entry& element) {
|
|
return key_type->compare(element.first.serialize_nonnull(), key_bv) == 0;
|
|
});
|
|
});
|
|
return found == data_map.cend() ? bytes_opt() : bytes_opt(found->second.serialize_nonnull());
|
|
} else {
|
|
switch (cdef->kind) {
|
|
case column_kind::partition_key:
|
|
return data.partition_key[cdef->id];
|
|
case column_kind::clustering_key:
|
|
return data.clustering_key[cdef->id];
|
|
case column_kind::static_column:
|
|
case column_kind::regular_column:
|
|
return data.other_columns[data.sel.index_of(*cdef)];
|
|
default:
|
|
throw exceptions::unsupported_operation_exception("Unknown column kind");
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Returns col's value from a mutation.
|
|
bytes_opt get_value_from_mutation(const column_value& col, row_data_from_mutation data) {
|
|
return do_get_value(
|
|
data.schema_, *col.col, data.partition_key_, data.clustering_key_, data.other_columns, data.now);
|
|
}
|
|
|
|
/// Returns col's value from the fetched data.
|
|
bytes_opt get_value(const column_value& col, const column_value_eval_bag& bag) {
|
|
using std::placeholders::_1;
|
|
return std::visit(overloaded_functor{
|
|
std::bind(get_value_from_mutation, col, _1),
|
|
std::bind(get_value_from_partition_slice, col, _1, bag.options),
|
|
}, bag.row_data);
|
|
}
|
|
|
|
/// Type for comparing results of get_value().
|
|
const abstract_type* get_value_comparator(const column_definition* cdef) {
|
|
return &cdef->type->without_reversed();
|
|
}
|
|
|
|
/// Type for comparing results of get_value().
|
|
const abstract_type* get_value_comparator(const column_value& cv) {
|
|
return cv.sub ? static_pointer_cast<const collection_type_impl>(cv.col->type)->value_comparator().get()
|
|
: get_value_comparator(cv.col);
|
|
}
|
|
|
|
/// If t represents a tuple value, returns that value. Otherwise, null.
|
|
///
|
|
/// Useful for checking binary_operator::rhs, which packs multiple values into a single term when lhs is itself
|
|
/// a tuple. NOT useful for the IN operator, whose rhs is either a list or tuples::in_value.
|
|
::shared_ptr<tuples::value> get_tuple(term& t, const query_options& opts) {
|
|
return dynamic_pointer_cast<tuples::value>(t.bind(opts));
|
|
}
|
|
|
|
/// True iff lhs's value equals rhs.
|
|
bool equal(const bytes_opt& rhs, const column_value& lhs, const column_value_eval_bag& bag) {
|
|
if (!rhs) {
|
|
return false;
|
|
}
|
|
const auto value = get_value(lhs, bag);
|
|
if (!value) {
|
|
return false;
|
|
}
|
|
return get_value_comparator(lhs)->equal(*value, *rhs);
|
|
}
|
|
|
|
/// Convenience overload for term.
|
|
bool equal(term& rhs, const column_value& lhs, const column_value_eval_bag& bag) {
|
|
return equal(to_bytes_opt(rhs.bind_and_get(bag.options)), lhs, bag);
|
|
}
|
|
|
|
/// True iff columns' values equal t.
|
|
bool equal(term& t, const std::vector<column_value>& columns, const column_value_eval_bag& bag) {
|
|
const auto tup = get_tuple(t, bag.options);
|
|
if (!tup) {
|
|
throw exceptions::invalid_request_exception("multi-column equality has right-hand side that isn't a tuple");
|
|
}
|
|
const auto& rhs = tup->get_elements();
|
|
if (rhs.size() != columns.size()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("tuple equality size mismatch: {} elements on left-hand side, {} on right",
|
|
columns.size(), rhs.size()));
|
|
}
|
|
return boost::equal(rhs, columns, [&] (const bytes_opt& b, const column_value& lhs) {
|
|
return equal(b, lhs, bag);
|
|
});
|
|
}
|
|
|
|
/// True iff lhs is limited by rhs in the manner prescribed by op.
|
|
bool limits(bytes_view lhs, oper_t op, bytes_view rhs, const abstract_type& type) {
|
|
const auto cmp = type.compare(lhs, rhs);
|
|
switch (op) {
|
|
case oper_t::LT:
|
|
return cmp < 0;
|
|
case oper_t::LTE:
|
|
return cmp <= 0;
|
|
case oper_t::GT:
|
|
return cmp > 0;
|
|
case oper_t::GTE:
|
|
return cmp >= 0;
|
|
case oper_t::EQ:
|
|
return cmp == 0;
|
|
case oper_t::NEQ:
|
|
return cmp != 0;
|
|
default:
|
|
throw std::logic_error(format("limits() called on non-compare op {}", op));
|
|
}
|
|
}
|
|
|
|
/// True iff the column value is limited by rhs in the manner prescribed by op.
|
|
bool limits(const column_value& col, oper_t op, term& rhs, const column_value_eval_bag& bag) {
|
|
if (!is_slice(op)) { // For EQ or NEQ, use equal().
|
|
throw std::logic_error("limits() called on non-slice op");
|
|
}
|
|
auto lhs = get_value(col, bag);
|
|
if (!lhs) {
|
|
return false;
|
|
}
|
|
const auto b = to_bytes_opt(rhs.bind_and_get(bag.options));
|
|
return b ? limits(*lhs, op, *b, *get_value_comparator(col)) : false;
|
|
}
|
|
|
|
/// True iff the column values are limited by t in the manner prescribed by op.
|
|
bool limits(const std::vector<column_value>& columns, const oper_t op, term& t,
|
|
const column_value_eval_bag& bag) {
|
|
if (!is_slice(op)) { // For EQ or NEQ, use equal().
|
|
throw std::logic_error("limits() called on non-slice op");
|
|
}
|
|
const auto tup = get_tuple(t, bag.options);
|
|
if (!tup) {
|
|
throw exceptions::invalid_request_exception(
|
|
"multi-column comparison has right-hand side that isn't a tuple");
|
|
}
|
|
const auto& rhs = tup->get_elements();
|
|
if (rhs.size() != columns.size()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("tuple comparison size mismatch: {} elements on left-hand side, {} on right",
|
|
columns.size(), rhs.size()));
|
|
}
|
|
for (size_t i = 0; i < rhs.size(); ++i) {
|
|
const auto cmp = get_value_comparator(columns[i])->compare(
|
|
// CQL dictates that columns[i] is a clustering column and non-null.
|
|
*get_value(columns[i], bag),
|
|
*rhs[i]);
|
|
// If the components aren't equal, then we just learned the LHS/RHS order.
|
|
if (cmp < 0) {
|
|
if (op == oper_t::LT || op == oper_t::LTE) {
|
|
return true;
|
|
} else if (op == oper_t::GT || op == oper_t::GTE) {
|
|
return false;
|
|
} else {
|
|
throw std::logic_error("Unknown slice operator");
|
|
}
|
|
} else if (cmp > 0) {
|
|
if (op == oper_t::LT || op == oper_t::LTE) {
|
|
return false;
|
|
} else if (op == oper_t::GT || op == oper_t::GTE) {
|
|
return true;
|
|
} else {
|
|
throw std::logic_error("Unknown slice operator");
|
|
}
|
|
}
|
|
// Otherwise, we don't know the LHS/RHS order, so check the next component.
|
|
}
|
|
// Getting here means LHS == RHS.
|
|
return op == oper_t::LTE || op == oper_t::GTE;
|
|
}
|
|
|
|
/// True iff collection (list, set, or map) contains value.
|
|
bool contains(const data_value& collection, const raw_value_view& value) {
|
|
if (!value) {
|
|
return true; // Compatible with old code, which skips null terms in value comparisons.
|
|
}
|
|
auto col_type = static_pointer_cast<const collection_type_impl>(collection.type());
|
|
auto&& element_type = col_type->is_set() ? col_type->name_comparator() : col_type->value_comparator();
|
|
return with_linearized(*value, [&] (bytes_view val) {
|
|
auto exists_in = [&](auto&& range) {
|
|
auto found = std::find_if(range.begin(), range.end(), [&] (auto&& element) {
|
|
return element_type->compare(element.serialize_nonnull(), val) == 0;
|
|
});
|
|
return found != range.end();
|
|
};
|
|
if (col_type->is_list()) {
|
|
return exists_in(value_cast<list_type_impl::native_type>(collection));
|
|
} else if (col_type->is_set()) {
|
|
return exists_in(value_cast<set_type_impl::native_type>(collection));
|
|
} else if (col_type->is_map()) {
|
|
auto data_map = value_cast<map_type_impl::native_type>(collection);
|
|
using entry = std::pair<data_value, data_value>;
|
|
return exists_in(data_map | transformed([] (const entry& e) { return e.second; }));
|
|
} else {
|
|
throw std::logic_error("unsupported collection type in a CONTAINS expression");
|
|
}
|
|
});
|
|
}
|
|
|
|
/// True iff a column is a collection containing value.
|
|
bool contains(const column_value& col, const raw_value_view& value, const column_value_eval_bag& bag) {
|
|
if (col.sub) {
|
|
throw exceptions::unsupported_operation_exception("CONTAINS lhs is subscripted");
|
|
}
|
|
const auto collection = get_value(col, bag);
|
|
if (collection) {
|
|
return contains(col.col->type->deserialize(*collection), value);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// True iff a column is a map containing \p key.
|
|
bool contains_key(const column_value& col, cql3::raw_value_view key, const column_value_eval_bag& bag) {
|
|
if (col.sub) {
|
|
throw exceptions::unsupported_operation_exception("CONTAINS KEY lhs is subscripted");
|
|
}
|
|
if (!key) {
|
|
return true; // Compatible with old code, which skips null terms in key comparisons.
|
|
}
|
|
auto type = col.col->type;
|
|
const auto collection = get_value(col, bag);
|
|
if (!collection) {
|
|
return false;
|
|
}
|
|
const auto data_map = value_cast<map_type_impl::native_type>(type->deserialize(*collection));
|
|
auto key_type = static_pointer_cast<const collection_type_impl>(type)->name_comparator();
|
|
auto found = with_linearized(*key, [&] (bytes_view k_bv) {
|
|
using entry = std::pair<data_value, data_value>;
|
|
return std::find_if(data_map.begin(), data_map.end(), [&] (const entry& element) {
|
|
return key_type->compare(element.first.serialize_nonnull(), k_bv) == 0;
|
|
});
|
|
});
|
|
return found != data_map.end();
|
|
}
|
|
|
|
/// Fetches the next cell value from iter and returns its (possibly null) value.
|
|
bytes_opt next_value(query::result_row_view::iterator_type& iter, const column_definition* cdef) {
|
|
if (cdef->type->is_multi_cell()) {
|
|
auto cell = iter.next_collection_cell();
|
|
if (cell) {
|
|
return linearized(*cell);
|
|
}
|
|
} else {
|
|
auto cell = iter.next_atomic_cell();
|
|
if (cell) {
|
|
return linearized(cell->value());
|
|
}
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
/// Returns values of non-primary-key columns from selection. The kth element of the result
|
|
/// corresponds to the kth column in selection.
|
|
std::vector<bytes_opt> get_non_pk_values(const selection& selection, const query::result_row_view& static_row,
|
|
const query::result_row_view* row) {
|
|
const auto& cols = selection.get_columns();
|
|
std::vector<bytes_opt> vals(cols.size());
|
|
auto static_row_iterator = static_row.iterator();
|
|
auto row_iterator = row ? std::optional<query::result_row_view::iterator_type>(row->iterator()) : std::nullopt;
|
|
for (size_t i = 0; i < cols.size(); ++i) {
|
|
switch (cols[i]->kind) {
|
|
case column_kind::static_column:
|
|
vals[i] = next_value(static_row_iterator, cols[i]);
|
|
break;
|
|
case column_kind::regular_column:
|
|
if (row) {
|
|
vals[i] = next_value(*row_iterator, cols[i]);
|
|
}
|
|
break;
|
|
default: // Skip.
|
|
break;
|
|
}
|
|
}
|
|
return vals;
|
|
}
|
|
|
|
/// True iff cv matches the CQL LIKE pattern.
|
|
bool like(const column_value& cv, const bytes_opt& pattern, const column_value_eval_bag& bag) {
|
|
if (!cv.col->type->is_string()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("LIKE is allowed only on string types, which {} is not", cv.col->name_as_text()));
|
|
}
|
|
auto value = get_value(cv, bag);
|
|
// TODO: reuse matchers.
|
|
return (pattern && value) ? like_matcher(*pattern)(*value) : false;
|
|
}
|
|
|
|
/// True iff the column value is in the set defined by rhs.
|
|
bool is_one_of(const column_value& col, term& rhs, const column_value_eval_bag& bag) {
|
|
// RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is.
|
|
if (auto dv = dynamic_cast<lists::delayed_value*>(&rhs)) {
|
|
// This is `a IN (1,2,3)`. RHS elements are themselves terms.
|
|
return boost::algorithm::any_of(dv->get_elements(), [&] (const ::shared_ptr<term>& t) {
|
|
return equal(*t, col, bag);
|
|
});
|
|
} else if (auto mkr = dynamic_cast<lists::marker*>(&rhs)) {
|
|
// This is `a IN ?`. RHS elements are values representable as bytes_opt.
|
|
const auto values = static_pointer_cast<lists::value>(mkr->bind(bag.options));
|
|
return boost::algorithm::any_of(values->get_elements(), [&] (const bytes_opt& b) {
|
|
return equal(b, col, bag);
|
|
});
|
|
}
|
|
throw std::logic_error("unexpected term type in is_one_of(single column)");
|
|
}
|
|
|
|
/// True iff the tuple of column values is in the set defined by rhs.
|
|
bool is_one_of(const std::vector<column_value>& cvs, term& rhs, const column_value_eval_bag& bag) {
|
|
// RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is.
|
|
if (auto dv = dynamic_cast<lists::delayed_value*>(&rhs)) {
|
|
// This is `(a,b) IN ((1,1),(2,2),(3,3))`. RHS elements are themselves terms.
|
|
return boost::algorithm::any_of(dv->get_elements(), [&] (const ::shared_ptr<term>& t) {
|
|
return equal(*t, cvs, bag);
|
|
});
|
|
} else if (auto mkr = dynamic_cast<tuples::in_marker*>(&rhs)) {
|
|
// This is `(a,b) IN ?`. RHS elements are themselves tuples, represented as vector<bytes_opt>.
|
|
const auto marker_value = static_pointer_cast<tuples::in_value>(mkr->bind(bag.options));
|
|
return boost::algorithm::any_of(marker_value->get_split_values(), [&] (const std::vector<bytes_opt>& el) {
|
|
return boost::equal(cvs, el, [&] (const column_value& c, const bytes_opt& b) {
|
|
return equal(b, c, bag);
|
|
});
|
|
});
|
|
}
|
|
throw std::logic_error("unexpected term type in is_one_of(multi-column)");
|
|
}
|
|
|
|
/// True iff op means bnd type of bound.
|
|
bool matches(oper_t op, statements::bound bnd) {
|
|
switch (op) {
|
|
case oper_t::GT:
|
|
case oper_t::GTE:
|
|
return is_start(bnd); // These set a lower bound.
|
|
case oper_t::LT:
|
|
case oper_t::LTE:
|
|
return is_end(bnd); // These set an upper bound.
|
|
case oper_t::EQ:
|
|
return true; // Bounds from both sides.
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
const value_set empty_value_set = value_list{};
|
|
const value_set unbounded_value_set = nonwrapping_range<bytes>::make_open_ended_both_sides();
|
|
|
|
struct intersection_visitor {
|
|
const abstract_type* type;
|
|
value_set operator()(const value_list& a, const value_list& b) const {
|
|
value_list common;
|
|
common.reserve(std::max(a.size(), b.size()));
|
|
boost::set_intersection(a, b, back_inserter(common), type->as_less_comparator());
|
|
return std::move(common);
|
|
}
|
|
|
|
value_set operator()(const nonwrapping_range<bytes>& a, const value_list& b) const {
|
|
const auto common = b | filtered([&] (const bytes& el) { return a.contains(el, type->as_tri_comparator()); });
|
|
return value_list(common.begin(), common.end());
|
|
}
|
|
|
|
value_set operator()(const value_list& a, const nonwrapping_range<bytes>& b) const {
|
|
return (*this)(b, a);
|
|
}
|
|
|
|
value_set operator()(const nonwrapping_range<bytes>& a, const nonwrapping_range<bytes>& b) const {
|
|
const auto common_range = a.intersection(b, type->as_tri_comparator());
|
|
return common_range ? *common_range : empty_value_set;
|
|
}
|
|
};
|
|
|
|
value_set intersection(value_set a, value_set b, const abstract_type* type) {
|
|
return std::visit(intersection_visitor{type}, std::move(a), std::move(b));
|
|
}
|
|
|
|
bool is_satisfied_by(const binary_operator& opr, const column_value_eval_bag& bag) {
|
|
return std::visit(overloaded_functor{
|
|
[&] (const column_value& col) {
|
|
if (opr.op == oper_t::EQ) {
|
|
return equal(*opr.rhs, col, bag);
|
|
} else if (opr.op == oper_t::NEQ) {
|
|
return !equal(*opr.rhs, col, bag);
|
|
} else if (is_slice(opr.op)) {
|
|
return limits(col, opr.op, *opr.rhs, bag);
|
|
} else if (opr.op == oper_t::CONTAINS) {
|
|
return contains(col, opr.rhs->bind_and_get(bag.options), bag);
|
|
} else if (opr.op == oper_t::CONTAINS_KEY) {
|
|
return contains_key(col, opr.rhs->bind_and_get(bag.options), bag);
|
|
} else if (opr.op == oper_t::LIKE) {
|
|
return like(col, to_bytes_opt(opr.rhs->bind_and_get(bag.options)), bag);
|
|
} else if (opr.op == oper_t::IN) {
|
|
return is_one_of(col, *opr.rhs, bag);
|
|
} else {
|
|
throw exceptions::unsupported_operation_exception(format("Unhandled binary_operator: {}", opr));
|
|
}
|
|
},
|
|
[&] (const std::vector<column_value>& cvs) {
|
|
if (opr.op == oper_t::EQ) {
|
|
return equal(*opr.rhs, cvs, bag);
|
|
} else if (is_slice(opr.op)) {
|
|
return limits(cvs, opr.op, *opr.rhs, bag);
|
|
} else if (opr.op == oper_t::IN) {
|
|
return is_one_of(cvs, *opr.rhs, bag);
|
|
} else {
|
|
throw exceptions::unsupported_operation_exception(
|
|
format("Unhandled multi-column binary_operator: {}", opr));
|
|
}
|
|
},
|
|
[] (const token& tok) -> bool {
|
|
// The RHS value was already used to ensure we fetch only rows in the specified
|
|
// token range. It is impossible for any fetched row not to match now.
|
|
return true;
|
|
},
|
|
}, opr.lhs);
|
|
}
|
|
|
|
bool is_satisfied_by(const expression& restr, const column_value_eval_bag& bag) {
|
|
return std::visit(overloaded_functor{
|
|
[&] (bool v) { return v; },
|
|
[&] (const conjunction& conj) {
|
|
return boost::algorithm::all_of(conj.children, [&] (const expression& c) {
|
|
return is_satisfied_by(c, bag);
|
|
});
|
|
},
|
|
[&] (const binary_operator& opr) { return is_satisfied_by(opr, bag); },
|
|
}, restr);
|
|
}
|
|
|
|
/// If t is a tuple, binds and gets its k-th element. Otherwise, binds and gets t's whole value.
|
|
bytes_opt get_kth(size_t k, const query_options& options, const ::shared_ptr<term>& t) {
|
|
auto bound = t->bind(options);
|
|
if (auto tup = dynamic_pointer_cast<tuples::value>(bound)) {
|
|
return tup->get_elements()[k];
|
|
} else {
|
|
throw std::logic_error("non-tuple RHS for multi-column IN");
|
|
}
|
|
}
|
|
|
|
template<typename Range>
|
|
value_list to_sorted_vector(Range r, const serialized_compare& comparator) {
|
|
BOOST_CONCEPT_ASSERT((boost::ForwardRangeConcept<Range>));
|
|
value_list tmp(r.begin(), r.end()); // Need random-access range to sort (r is not necessarily random-access).
|
|
const auto unique = boost::unique(boost::sort(tmp, comparator));
|
|
return value_list(unique.begin(), unique.end());
|
|
}
|
|
|
|
const auto non_null = boost::adaptors::filtered([] (const bytes_opt& b) { return b.has_value(); });
|
|
|
|
const auto deref = boost::adaptors::transformed([] (const bytes_opt& b) { return b.value(); });
|
|
|
|
/// Returns possible values from t, which must be RHS of IN.
|
|
value_list get_IN_values(
|
|
const ::shared_ptr<term>& t, const query_options& options, const serialized_compare& comparator,
|
|
sstring_view column_name) {
|
|
// RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is.
|
|
if (auto dv = dynamic_pointer_cast<lists::delayed_value>(t)) {
|
|
// Case `a IN (1,2,3)`.
|
|
const auto result_range = dv->get_elements()
|
|
| boost::adaptors::transformed([&] (const ::shared_ptr<term>& t) { return to_bytes_opt(t->bind_and_get(options)); })
|
|
| non_null | deref;
|
|
return to_sorted_vector(std::move(result_range), comparator);
|
|
} else if (auto mkr = dynamic_pointer_cast<lists::marker>(t)) {
|
|
// Case `a IN ?`. Collect all list-element values.
|
|
const auto val = mkr->bind(options);
|
|
if (val == constants::UNSET_VALUE) {
|
|
throw exceptions::invalid_request_exception(format("Invalid unset value for column {}", column_name));
|
|
}
|
|
return to_sorted_vector(static_pointer_cast<lists::value>(val)->get_elements() | non_null | deref, comparator);
|
|
}
|
|
throw std::logic_error(format("get_IN_values(single column) on invalid term {}", *t));
|
|
}
|
|
|
|
/// Returns possible values for k-th column from t, which must be RHS of IN.
|
|
value_list get_IN_values(const ::shared_ptr<term>& t, size_t k, const query_options& options,
|
|
const serialized_compare& comparator) {
|
|
// RHS is prepared differently for different CQL cases. Cast it dynamically to discern which case this is.
|
|
if (auto dv = dynamic_pointer_cast<lists::delayed_value>(t)) {
|
|
// Case `(a,b) in ((1,1),(2,2),(3,3))`. Get kth value from each term element.
|
|
const auto result_range = dv->get_elements()
|
|
| boost::adaptors::transformed(std::bind_front(get_kth, k, options)) | non_null | deref;
|
|
return to_sorted_vector(std::move(result_range), comparator);
|
|
} else if (auto mkr = dynamic_pointer_cast<tuples::in_marker>(t)) {
|
|
// Case `(a,b) IN ?`. Get kth value from each vector<bytes> element.
|
|
const auto val = static_pointer_cast<tuples::in_value>(mkr->bind(options));
|
|
const auto split_values = val->get_split_values(); // Need lvalue from which to make std::view.
|
|
const auto result_range = split_values
|
|
| boost::adaptors::transformed([k] (const std::vector<bytes_opt>& v) { return v[k]; }) | non_null | deref;
|
|
return to_sorted_vector(std::move(result_range), comparator);
|
|
}
|
|
throw std::logic_error(format("get_IN_values(multi-column) on invalid term {}", *t));
|
|
}
|
|
|
|
static constexpr bool inclusive = true, exclusive = false;
|
|
|
|
} // anonymous namespace
|
|
|
|
expression make_conjunction(expression a, expression b) {
|
|
auto children = explode_conjunction(std::move(a));
|
|
boost::copy(explode_conjunction(std::move(b)), back_inserter(children));
|
|
return conjunction{std::move(children)};
|
|
}
|
|
|
|
bool is_satisfied_by(
|
|
const expression& restr,
|
|
const std::vector<bytes>& partition_key, const std::vector<bytes>& clustering_key,
|
|
const query::result_row_view& static_row, const query::result_row_view* row,
|
|
const selection& selection, const query_options& options) {
|
|
const auto regulars = get_non_pk_values(selection, static_row, row);
|
|
return is_satisfied_by(
|
|
restr, {options, row_data_from_partition_slice{partition_key, clustering_key, regulars, selection}});
|
|
}
|
|
|
|
bool is_satisfied_by(
|
|
const expression& restr,
|
|
const schema& schema, const partition_key& key, const clustering_key_prefix& ckey, const row& cells,
|
|
const query_options& options, gc_clock::time_point now) {
|
|
return is_satisfied_by(restr, {options, row_data_from_mutation{key, ckey, cells, schema, now}});
|
|
}
|
|
|
|
std::vector<bytes_opt> first_multicolumn_bound(
|
|
const expression& restr, const query_options& options, statements::bound bnd) {
|
|
auto found = find_atom(restr, [bnd] (const binary_operator& oper) {
|
|
return matches(oper.op, bnd) && is_multi_column(oper);
|
|
});
|
|
if (found) {
|
|
return static_pointer_cast<tuples::value>(found->rhs->bind(options))->get_elements();
|
|
} else {
|
|
return std::vector<bytes_opt>{};
|
|
}
|
|
}
|
|
|
|
template<typename T>
|
|
nonwrapping_range<T> to_range(oper_t op, const T& val) {
|
|
static constexpr bool inclusive = true, exclusive = false;
|
|
switch (op) {
|
|
case oper_t::EQ:
|
|
return nonwrapping_range<T>::make_singular(val);
|
|
case oper_t::GT:
|
|
return nonwrapping_range<T>::make_starting_with(interval_bound(val, exclusive));
|
|
case oper_t::GTE:
|
|
return nonwrapping_range<T>::make_starting_with(interval_bound(val, inclusive));
|
|
case oper_t::LT:
|
|
return nonwrapping_range<T>::make_ending_with(interval_bound(val, exclusive));
|
|
case oper_t::LTE:
|
|
return nonwrapping_range<T>::make_ending_with(interval_bound(val, inclusive));
|
|
default:
|
|
throw std::logic_error(format("to_range: unknown comparison operator {}", op));
|
|
}
|
|
}
|
|
|
|
template nonwrapping_range<clustering_key_prefix> to_range(oper_t, const clustering_key_prefix&);
|
|
|
|
value_set possible_lhs_values(const column_definition* cdef, const expression& expr, const query_options& options) {
|
|
const auto type = cdef ? get_value_comparator(cdef) : long_type.get();
|
|
return std::visit(overloaded_functor{
|
|
[] (bool b) {
|
|
return b ? unbounded_value_set : empty_value_set;
|
|
},
|
|
[&] (const conjunction& conj) {
|
|
return boost::accumulate(conj.children, unbounded_value_set,
|
|
[&] (const value_set& acc, const expression& child) {
|
|
return intersection(
|
|
std::move(acc), possible_lhs_values(cdef, child, options), type);
|
|
});
|
|
},
|
|
[&] (const binary_operator& oper) -> value_set {
|
|
return std::visit(overloaded_functor{
|
|
[&] (const column_value& col) -> value_set {
|
|
if (!cdef || cdef != col.col) {
|
|
return unbounded_value_set;
|
|
}
|
|
if (is_compare(oper.op)) {
|
|
const auto val = to_bytes_opt(oper.rhs->bind_and_get(options));
|
|
if (!val) {
|
|
return empty_value_set; // All NULL comparisons fail; no column values match.
|
|
}
|
|
return oper.op == oper_t::EQ ? value_set(value_list{*val})
|
|
: to_range(oper.op, *val);
|
|
} else if (oper.op == oper_t::IN) {
|
|
return get_IN_values(oper.rhs, options, type->as_less_comparator(), cdef->name_as_text());
|
|
}
|
|
throw std::logic_error(format("possible_lhs_values: unhandled operator {}", oper));
|
|
},
|
|
[&] (const std::vector<column_value>& cvs) -> value_set {
|
|
if (!cdef) {
|
|
return unbounded_value_set;
|
|
}
|
|
const auto found = boost::find_if(
|
|
cvs, [&] (const column_value& c) { return c.col == cdef; });
|
|
if (found == cvs.end()) {
|
|
return unbounded_value_set;
|
|
}
|
|
const auto column_index_on_lhs = std::distance(cvs.begin(), found);
|
|
if (is_compare(oper.op)) {
|
|
// RHS must be a tuple due to upstream checks.
|
|
bytes_opt val = get_tuple(*oper.rhs, options)->get_elements()[column_index_on_lhs];
|
|
if (!val) {
|
|
return empty_value_set; // All NULL comparisons fail; no column values match.
|
|
}
|
|
if (oper.op == oper_t::EQ) {
|
|
return value_list{*val};
|
|
}
|
|
if (column_index_on_lhs > 0) {
|
|
// A multi-column comparison restricts only the first column, because
|
|
// comparison is lexicographical.
|
|
return unbounded_value_set;
|
|
}
|
|
return to_range(oper.op, *val);
|
|
} else if (oper.op == oper_t::IN) {
|
|
return get_IN_values(oper.rhs, column_index_on_lhs, options, type->as_less_comparator());
|
|
}
|
|
return unbounded_value_set;
|
|
},
|
|
[&] (token) -> value_set {
|
|
if (cdef) {
|
|
return unbounded_value_set;
|
|
}
|
|
const auto val = to_bytes_opt(oper.rhs->bind_and_get(options));
|
|
if (!val) {
|
|
return empty_value_set; // All NULL comparisons fail; no token values match.
|
|
}
|
|
if (oper.op == oper_t::EQ) {
|
|
return value_list{*val};
|
|
} else if (oper.op == oper_t::GT) {
|
|
return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, exclusive));
|
|
} else if (oper.op == oper_t::GTE) {
|
|
return nonwrapping_range<bytes>::make_starting_with(interval_bound(*val, inclusive));
|
|
}
|
|
static const bytes MININT = serialized(std::numeric_limits<int64_t>::min()),
|
|
MAXINT = serialized(std::numeric_limits<int64_t>::max());
|
|
// Undocumented feature: when the user types `token(...) < MININT`, we interpret
|
|
// that as MAXINT for some reason.
|
|
const auto adjusted_val = (*val == MININT) ? serialized(MAXINT) : *val;
|
|
if (oper.op == oper_t::LT) {
|
|
return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, exclusive));
|
|
} else if (oper.op == oper_t::LTE) {
|
|
return nonwrapping_range<bytes>::make_ending_with(interval_bound(adjusted_val, inclusive));
|
|
}
|
|
throw std::logic_error(format("get_token_interval invalid operator {}", oper.op));
|
|
},
|
|
}, oper.lhs);
|
|
},
|
|
}, expr);
|
|
}
|
|
|
|
nonwrapping_range<bytes> to_range(const value_set& s) {
|
|
return std::visit(overloaded_functor{
|
|
[] (const nonwrapping_range<bytes>& r) { return r; },
|
|
[] (const value_list& lst) {
|
|
if (lst.size() != 1) {
|
|
throw std::logic_error(format("to_range called on list of size {}", lst.size()));
|
|
}
|
|
return nonwrapping_range<bytes>::make_singular(lst[0]);
|
|
},
|
|
}, s);
|
|
}
|
|
|
|
bool is_supported_by(const expression& expr, const secondary_index::index& idx) {
|
|
using std::placeholders::_1;
|
|
return std::visit(overloaded_functor{
|
|
[&] (const conjunction& conj) {
|
|
return boost::algorithm::all_of(conj.children, std::bind(is_supported_by, _1, idx));
|
|
},
|
|
[&] (const binary_operator& oper) {
|
|
return std::visit(overloaded_functor{
|
|
[&] (const column_value& col) {
|
|
return idx.supports_expression(*col.col, oper.op);
|
|
},
|
|
[&] (const std::vector<column_value>& cvs) {
|
|
if (cvs.size() == 1) {
|
|
return idx.supports_expression(*cvs[0].col, oper.op);
|
|
}
|
|
// We don't use index table for multi-column restrictions, as it cannot avoid filtering.
|
|
return false;
|
|
},
|
|
[&] (const token&) { return false; },
|
|
}, oper.lhs);
|
|
},
|
|
[] (const auto& default_case) { return false; }
|
|
}, expr);
|
|
}
|
|
|
|
bool has_supporting_index(
|
|
const expression& expr,
|
|
const secondary_index::secondary_index_manager& index_manager,
|
|
allow_local_index allow_local) {
|
|
const auto indexes = index_manager.list_indexes();
|
|
const auto support = std::bind(is_supported_by, expr, std::placeholders::_1);
|
|
return allow_local ? boost::algorithm::any_of(indexes, support)
|
|
: boost::algorithm::any_of(
|
|
indexes | filtered([] (const secondary_index::index& i) { return !i.metadata().local(); }),
|
|
support);
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& os, const column_value& cv) {
|
|
os << *cv.col;
|
|
if (cv.sub) {
|
|
os << '[' << *cv.sub << ']';
|
|
}
|
|
return os;
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& os, const expression& expr) {
|
|
std::visit(overloaded_functor{
|
|
[&] (bool b) { os << (b ? "TRUE" : "FALSE"); },
|
|
[&] (const conjunction& conj) { fmt::print(os, "({})", fmt::join(conj.children, ") AND (")); },
|
|
[&] (const binary_operator& opr) {
|
|
std::visit(overloaded_functor{
|
|
[&] (const token& t) { os << "TOKEN"; },
|
|
[&] (const column_value& col) {
|
|
fmt::print(os, "({})", col);
|
|
},
|
|
[&] (const std::vector<column_value>& cvs) {
|
|
fmt::print(os, "(({}))", fmt::join(cvs, ","));
|
|
},
|
|
}, opr.lhs);
|
|
os << ' ' << opr.op << ' ' << *opr.rhs;
|
|
},
|
|
}, expr);
|
|
return os;
|
|
}
|
|
|
|
sstring to_string(const expression& expr) {
|
|
return fmt::format("{}", expr);
|
|
}
|
|
|
|
bool is_on_collection(const binary_operator& b) {
|
|
if (b.op == oper_t::CONTAINS || b.op == oper_t::CONTAINS_KEY) {
|
|
return true;
|
|
}
|
|
if (auto cvs = std::get_if<std::vector<column_value>>(&b.lhs)) {
|
|
return boost::algorithm::any_of(*cvs, [] (const column_value& v) { return v.sub; });
|
|
}
|
|
return false;
|
|
}
|
|
|
|
expression replace_column_def(const expression& expr, const column_definition* new_cdef) {
|
|
return std::visit(overloaded_functor{
|
|
[] (bool b){ return expression(b); },
|
|
[&] (const conjunction& conj) {
|
|
const auto applied = conj.children | transformed(
|
|
std::bind(replace_column_def, std::placeholders::_1, new_cdef));
|
|
return expression(conjunction{std::vector(applied.begin(), applied.end())});
|
|
},
|
|
[&] (const binary_operator& oper) {
|
|
return std::visit(overloaded_functor{
|
|
[&] (const column_value& col) {
|
|
return expression(binary_operator{column_value{new_cdef}, oper.op, oper.rhs});
|
|
},
|
|
[&] (const std::vector<column_value>& cvs) -> expression {
|
|
throw std::logic_error(format("replace_column_def invalid LHS: {}", to_string(oper)));
|
|
},
|
|
[&] (const token&) { return expr; },
|
|
}, oper.lhs);
|
|
},
|
|
}, expr);
|
|
}
|
|
|
|
std::ostream& operator<<(std::ostream& s, oper_t op) {
|
|
switch (op) {
|
|
case oper_t::EQ:
|
|
return s << "=";
|
|
case oper_t::NEQ:
|
|
return s << "!=";
|
|
case oper_t::LT:
|
|
return s << "<";
|
|
case oper_t::LTE:
|
|
return s << "<=";
|
|
case oper_t::GT:
|
|
return s << ">";
|
|
case oper_t::GTE:
|
|
return s << ">=";
|
|
case oper_t::IN:
|
|
return s << "IN";
|
|
case oper_t::CONTAINS:
|
|
return s << "CONTAINS";
|
|
case oper_t::CONTAINS_KEY:
|
|
return s << "CONTAINS KEY";
|
|
case oper_t::IS_NOT:
|
|
return s << "IS NOT";
|
|
case oper_t::LIKE:
|
|
return s << "LIKE";
|
|
}
|
|
__builtin_unreachable();
|
|
}
|
|
|
|
} // namespace expr
|
|
} // namespace cql3
|
|
|
|
|
|
template <>
|
|
struct fmt::formatter<cql3::expr::expression> {
|
|
constexpr auto parse(format_parse_context& ctx) {
|
|
return ctx.end();
|
|
}
|
|
|
|
template <typename FormatContext>
|
|
auto format(const cql3::expr::expression& expr, FormatContext& ctx) {
|
|
std::ostringstream os;
|
|
os << expr;
|
|
return format_to(ctx.out(), "{}", os.str());
|
|
}
|
|
};
|
|
|
|
template <>
|
|
struct fmt::formatter<cql3::expr::column_value> {
|
|
constexpr auto parse(format_parse_context& ctx) {
|
|
return ctx.end();
|
|
}
|
|
|
|
template <typename FormatContext>
|
|
auto format(const cql3::expr::column_value& col, FormatContext& ctx) {
|
|
std::ostringstream os;
|
|
os << col;
|
|
return format_to(ctx.out(), "{}", os.str());
|
|
}
|
|
};
|