Instead of lengthy blurbs, switch to single-line, machine-readable standardized (https://spdx.dev) license identifiers. The Linux kernel switched long ago, so there is strong precedent. Three cases are handled: AGPL-only, Apache-only, and dual licensed. For the latter case, I chose (AGPL-3.0-or-later and Apache-2.0), reasoning that our changes are extensive enough to apply our license. The changes we applied mechanically with a script, except to licenses/README.md. Closes #9937
335 lines
15 KiB
C++
335 lines
15 KiB
C++
/*
|
|
*/
|
|
|
|
/*
|
|
* Copyright (C) 2015-present ScyllaDB
|
|
*
|
|
* Modified by ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
|
*/
|
|
|
|
#include "cql3/column_condition.hh"
|
|
#include "statements/request_validations.hh"
|
|
#include "unimplemented.hh"
|
|
#include "lists.hh"
|
|
#include "maps.hh"
|
|
#include <boost/range/algorithm_ext/push_back.hpp>
|
|
#include "types/map.hh"
|
|
#include "types/list.hh"
|
|
#include "utils/like_matcher.hh"
|
|
#include "expr/expression.hh"
|
|
|
|
namespace {
|
|
|
|
void validate_operation_on_durations(const abstract_type& type, cql3::expr::oper_t op) {
|
|
using cql3::statements::request_validations::check_false;
|
|
|
|
if (is_slice(op) && type.references_duration()) {
|
|
check_false(type.is_collection(), "Slice conditions are not supported on collections containing durations");
|
|
check_false(type.is_tuple(), "Slice conditions are not supported on tuples containing durations");
|
|
check_false(type.is_user_type(), "Slice conditions are not supported on UDTs containing durations");
|
|
|
|
// We're a duration.
|
|
throw exceptions::invalid_request_exception(format("Slice conditions are not supported on durations"));
|
|
}
|
|
}
|
|
|
|
int is_satisfied_by(cql3::expr::oper_t op, const abstract_type& cell_type,
|
|
const abstract_type& param_type, const data_value& cell_value, const bytes& param) {
|
|
|
|
std::strong_ordering rc = std::strong_ordering::equal;
|
|
// For multi-cell sets and lists, cell value is represented as a map,
|
|
// thanks to collections_as_maps flag in partition_slice. param, however,
|
|
// is represented as a set or list type.
|
|
// We must implement an own compare of two different representations
|
|
// to compare the two.
|
|
if (cell_type.is_map() && cell_type.is_multi_cell() && param_type.is_listlike()) {
|
|
const listlike_collection_type_impl& list_type = static_cast<const listlike_collection_type_impl&>(param_type);
|
|
const map_type_impl& map_type = static_cast<const map_type_impl&>(cell_type);
|
|
assert(list_type.is_multi_cell());
|
|
// Inverse comparison result since the order of arguments is inverse.
|
|
rc = 0 <=> list_type.compare_with_map(map_type, param, map_type.decompose(cell_value));
|
|
} else {
|
|
rc = cell_type.compare(cell_type.decompose(cell_value), param);
|
|
}
|
|
switch (op) {
|
|
using cql3::expr::oper_t;
|
|
case oper_t::EQ:
|
|
return rc == 0;
|
|
case oper_t::NEQ:
|
|
return rc != 0;
|
|
case oper_t::GTE:
|
|
return rc >= 0;
|
|
case oper_t::LTE:
|
|
return rc <= 0;
|
|
case oper_t::GT:
|
|
return rc > 0;
|
|
case oper_t::LT:
|
|
return rc < 0;
|
|
default:
|
|
assert(false);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Read the list index from key and check that list index is not
|
|
// negative. The negative range check repeats Cassandra behaviour.
|
|
uint32_t read_and_check_list_index(const cql3::raw_value_view& key) {
|
|
// The list element type is always int32_type, see lists::index_spec_of
|
|
int32_t idx = read_simple_exactly<int32_t>(to_bytes(key));
|
|
if (idx < 0) {
|
|
throw exceptions::invalid_request_exception(format("Invalid negative list index {}", idx));
|
|
}
|
|
return static_cast<uint32_t>(idx);
|
|
}
|
|
|
|
} // end of anonymous namespace
|
|
|
|
namespace cql3 {
|
|
|
|
void column_condition::collect_marker_specificaton(prepare_context& ctx) {
|
|
if (_collection_element) {
|
|
expr::fill_prepare_context(*_collection_element, ctx);
|
|
}
|
|
for (auto&& value : _in_values) {
|
|
expr::fill_prepare_context(value, ctx);
|
|
}
|
|
if (_value) {
|
|
expr::fill_prepare_context(*_value, ctx);
|
|
}
|
|
}
|
|
|
|
bool column_condition::applies_to(const data_value* cell_value, const query_options& options) const {
|
|
|
|
// Cassandra condition support has a few quirks:
|
|
// - only a simple conjunct of predicates is supported "predicate AND predicate AND ..."
|
|
// - a predicate can operate on a column or a collection element, which must always be
|
|
// on the right side: "a = 3" or "collection['key'] IN (1,2,3)"
|
|
// - parameter markers are allowed on the right hand side only
|
|
// - only <, >, >=, <=, !=, LIKE, and IN predicates are supported.
|
|
// - NULLs and missing values are treated differently from the WHERE clause:
|
|
// a term or cell in IF clause is allowed to be NULL or compared with NULL,
|
|
// and NULL value is treated just like any other value in the domain (there is no
|
|
// three-value logic or UNKNOWN like in SQL).
|
|
// - empty sets/lists/maps are treated differently when comparing with NULLs depending on
|
|
// whether the object is frozen or not. An empty *frozen* set/map/list is not equal to NULL.
|
|
// An empty *multi-cell* set/map/list is identical to NULL.
|
|
// The code below implements these rules in a way compatible with Cassandra.
|
|
|
|
// Use a map/list value instead of entire collection if a key is present in the predicate.
|
|
if (_collection_element.has_value() && cell_value != nullptr) {
|
|
// Checked in column_condition::raw::prepare()
|
|
assert(cell_value->type()->is_collection());
|
|
const collection_type_impl& cell_type = static_cast<const collection_type_impl&>(*cell_value->type());
|
|
|
|
expr::constant key_constant = expr::evaluate(*_collection_element, options);
|
|
cql3::raw_value_view key = key_constant.view();
|
|
if (key.is_unset_value()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("Invalid 'unset' value in {} element access", cell_type.cql3_type_name()));
|
|
}
|
|
if (key.is_null()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("Invalid null value for {} element access", cell_type.cql3_type_name()));
|
|
}
|
|
if (cell_type.is_map()) {
|
|
// If a collection is multi-cell and not frozen, it is returned as a map even if the
|
|
// underlying data type is "set" or "list". This is controlled by
|
|
// partition_slice::collections_as_maps enum, which is set when preparing a read command
|
|
// object. Representing a list as a map<timeuuid, listval> is necessary to identify the list field
|
|
// being updated, e.g. in case of UPDATE t SET list[3] = null WHERE a = 1 IF list[3]
|
|
// = 'key'
|
|
const map_type_impl& map_type = static_cast<const map_type_impl&>(cell_type);
|
|
// A map is serialized as a vector of data value pairs.
|
|
const std::vector<std::pair<data_value, data_value>>& map = map_type.from_value(*cell_value);
|
|
if (column.type->is_map()) {
|
|
// We're working with a map *type*, not only map *representation*.
|
|
key.with_linearized([&map, &map_type, &cell_value] (bytes_view key) {
|
|
auto end = map.end();
|
|
const auto& map_key_type = *map_type.get_keys_type();
|
|
auto less = [&map_key_type](const std::pair<data_value, data_value>& value, bytes_view key) {
|
|
return map_key_type.less(map_key_type.decompose(value.first), key);
|
|
};
|
|
// Map elements are sorted by key.
|
|
auto it = std::lower_bound(map.begin(), end, key, less);
|
|
if (it != end && map_key_type.equal(map_key_type.decompose(it->first), key)) {
|
|
cell_value = &it->second;
|
|
} else {
|
|
cell_value = nullptr;
|
|
}
|
|
});
|
|
} else if (column.type->is_list()) {
|
|
// We're working with a list type, represented as map.
|
|
uint32_t idx = read_and_check_list_index(key);
|
|
cell_value = idx >= map.size() ? nullptr : &map[idx].second;
|
|
} else {
|
|
// Syntax like "set_column['key'] = constant" is invalid.
|
|
assert(false);
|
|
}
|
|
} else if (cell_type.is_list()) {
|
|
// This is a *frozen* list.
|
|
const list_type_impl& list_type = static_cast<const list_type_impl&>(cell_type);
|
|
const std::vector<data_value>& list = list_type.from_value(*cell_value);
|
|
uint32_t idx = read_and_check_list_index(key);
|
|
cell_value = idx >= list.size() ? nullptr : &list[idx];
|
|
} else {
|
|
assert(false);
|
|
}
|
|
}
|
|
|
|
if (is_compare(_op)) {
|
|
// <, >, >=, <=, !=
|
|
expr::constant param = expr::evaluate(*_value, options);
|
|
|
|
if (param.is_unset_value()) {
|
|
throw exceptions::invalid_request_exception("Invalid 'unset' value in condition");
|
|
}
|
|
if (param.is_null()) {
|
|
if (_op == expr::oper_t::EQ) {
|
|
return cell_value == nullptr;
|
|
} else if (_op == expr::oper_t::NEQ) {
|
|
return cell_value != nullptr;
|
|
} else {
|
|
throw exceptions::invalid_request_exception(format("Invalid comparison with null for operator \"{}\"", _op));
|
|
}
|
|
} else if (cell_value == nullptr) {
|
|
// The condition parameter is not null, so only NEQ can return true
|
|
return _op == expr::oper_t::NEQ;
|
|
}
|
|
// type::validate() is called earlier when creating the value, so it's safe to pass to_bytes() result
|
|
// directly to compare.
|
|
return is_satisfied_by(_op, *cell_value->type(), *column.type, *cell_value, to_bytes(param.view()));
|
|
}
|
|
|
|
if (_op == expr::oper_t::LIKE) {
|
|
if (cell_value == nullptr) {
|
|
return false;
|
|
}
|
|
if (_matcher) {
|
|
return (*_matcher)(bytes_view(cell_value->serialize_nonnull()));
|
|
} else {
|
|
auto param = expr::evaluate(*_value, options); // LIKE pattern
|
|
if (param.is_unset_value()) {
|
|
throw exceptions::invalid_request_exception("Invalid 'unset' value in LIKE pattern");
|
|
}
|
|
if (param.is_null()) {
|
|
throw exceptions::invalid_request_exception("Invalid NULL value in LIKE pattern");
|
|
}
|
|
like_matcher matcher(to_bytes(param.view()));
|
|
return matcher(bytes_view(cell_value->serialize_nonnull()));
|
|
}
|
|
}
|
|
|
|
assert(_op == expr::oper_t::IN);
|
|
|
|
// FIXME Use managed_bytes_opt
|
|
std::vector<bytes_opt> in_values;
|
|
|
|
if (_value.has_value()) {
|
|
expr::constant lval = expr::evaluate(*_value, options);
|
|
if (lval.is_null()) {
|
|
throw exceptions::invalid_request_exception("Invalid null value for IN condition");
|
|
}
|
|
for (const managed_bytes_opt& v : expr::get_elements(lval)) {
|
|
if (v) {
|
|
in_values.push_back(to_bytes(*v));
|
|
} else {
|
|
in_values.push_back(std::nullopt);
|
|
}
|
|
}
|
|
} else {
|
|
for (auto&& v : _in_values) {
|
|
in_values.emplace_back(to_bytes_opt(expr::evaluate(v, options).view()));
|
|
}
|
|
}
|
|
// If cell value is NULL, IN list must contain NULL or an empty set/list. Otherwise it must contain cell value.
|
|
if (cell_value) {
|
|
return std::any_of(in_values.begin(), in_values.end(), [this, cell_value] (const bytes_opt& value) {
|
|
return value.has_value() && is_satisfied_by(expr::oper_t::EQ, *cell_value->type(), *column.type, *cell_value, *value);
|
|
});
|
|
} else {
|
|
return std::any_of(in_values.begin(), in_values.end(), [] (const bytes_opt& value) { return !value.has_value() || value->empty(); });
|
|
}
|
|
}
|
|
|
|
lw_shared_ptr<column_condition>
|
|
column_condition::raw::prepare(data_dictionary::database db, const sstring& keyspace, const column_definition& receiver) const {
|
|
if (receiver.type->is_counter()) {
|
|
throw exceptions::invalid_request_exception("Conditions on counters are not supported");
|
|
}
|
|
std::optional<expr::expression> collection_element_expression;
|
|
lw_shared_ptr<column_specification> value_spec = receiver.column_specification;
|
|
|
|
if (_collection_element) {
|
|
if (!receiver.type->is_collection()) {
|
|
throw exceptions::invalid_request_exception(format("Invalid element access syntax for non-collection column {}",
|
|
receiver.name_as_text()));
|
|
}
|
|
// Pass a correct type specification to the collection_element->prepare(), so that it can
|
|
// later be used to validate the parameter type is compatible with receiver type.
|
|
lw_shared_ptr<column_specification> element_spec;
|
|
auto ctype = static_cast<const collection_type_impl*>(receiver.type.get());
|
|
const column_specification& recv_column_spec = *receiver.column_specification;
|
|
if (ctype->get_kind() == abstract_type::kind::list) {
|
|
element_spec = lists::index_spec_of(recv_column_spec);
|
|
value_spec = lists::value_spec_of(recv_column_spec);
|
|
} else if (ctype->get_kind() == abstract_type::kind::map) {
|
|
element_spec = maps::key_spec_of(recv_column_spec);
|
|
value_spec = maps::value_spec_of(recv_column_spec);
|
|
} else if (ctype->get_kind() == abstract_type::kind::set) {
|
|
throw exceptions::invalid_request_exception(format("Invalid element access syntax for set column {}",
|
|
receiver.name_as_text()));
|
|
} else {
|
|
throw exceptions::invalid_request_exception(
|
|
format("Unsupported collection type {} in a condition with element access", ctype->cql3_type_name()));
|
|
}
|
|
collection_element_expression = prepare_expression(*_collection_element, db, keyspace, element_spec);
|
|
}
|
|
|
|
if (is_compare(_op)) {
|
|
validate_operation_on_durations(*receiver.type, _op);
|
|
return column_condition::condition(receiver, std::move(collection_element_expression),
|
|
prepare_expression(*_value, db, keyspace, value_spec), nullptr, _op);
|
|
}
|
|
|
|
if (_op == expr::oper_t::LIKE) {
|
|
auto literal_term = expr::as_if<expr::untyped_constant>(&*_value);
|
|
if (literal_term) {
|
|
// Pass matcher object
|
|
const sstring& pattern = literal_term->raw_text;
|
|
return column_condition::condition(receiver, std::move(collection_element_expression),
|
|
prepare_expression(*_value, db, keyspace, value_spec),
|
|
std::make_unique<like_matcher>(bytes_view(reinterpret_cast<const int8_t*>(pattern.data()), pattern.size())),
|
|
_op);
|
|
} else {
|
|
// Pass through rhs value, matcher object built on execution
|
|
// TODO: caller should validate parametrized LIKE pattern
|
|
return column_condition::condition(receiver, std::move(collection_element_expression),
|
|
prepare_expression(*_value, db, keyspace, value_spec), nullptr, _op);
|
|
}
|
|
}
|
|
|
|
if (_op != expr::oper_t::IN) {
|
|
throw exceptions::invalid_request_exception(format("Unsupported operator type {} in a condition ", _op));
|
|
}
|
|
|
|
if (_in_marker) {
|
|
assert(_in_values.empty());
|
|
expr::expression multi_item_term = prepare_expression(*_in_marker, db, keyspace, value_spec);
|
|
return column_condition::in_condition(receiver, collection_element_expression, std::move(multi_item_term), {});
|
|
}
|
|
// Both _in_values and in _in_marker can be missing in case of empty IN list: "a IN ()"
|
|
std::vector<expr::expression> terms;
|
|
terms.reserve(_in_values.size());
|
|
for (auto&& value : _in_values) {
|
|
terms.push_back(prepare_expression(value, db, keyspace, value_spec));
|
|
}
|
|
return column_condition::in_condition(receiver, std::move(collection_element_expression),
|
|
std::nullopt, std::move(terms));
|
|
}
|
|
|
|
} // end of namespace cql3
|