Files
scylladb/cql3/column_condition.cc
Pavel Solodovnikov bcc4647552 lwt: fix handling of nulls in parameter markers for LWT queries
This patch affects the LWT queries with IF conditions of the
following form: `IF col in :value`, i.e. if the parameter
marker is used.

When executing a prepared query with a bound value
of `(None,)` (tuple with null, example for Python driver), it is
serialized not as NULL but as "empty" value (serialization
format differs in each case).

Therefore, Scylla deserializes the parameters in the request as
empty `data_value` instances, which are, in turn, translated
to non-empty `bytes_opt` with empty byte-string value later.

Account for this case too in the CAS condition evaluation code.

Example of a problem this patch aims to fix:

Suppose we have a table `tbl` with a boolean field `test` and
INSERT a row with NULL value for the `test` column.

Then the following update query fails to apply due to the
error in IF condition evaluation code (assume `v=(null)`):
`UPDATE tbl SET test=false WHERE key=0 IF test IN :v`
returns false in `[applied]` column, but is expected to succeed.

Tests: unit(debug, dev), dtest(prepared stmt LWT tests at https://github.com/scylladb/scylla-dtest/pull/1286)

Fixes: #5710

Signed-off-by: Pavel Solodovnikov <pa.solodovnikov@scylladb.com>
Message-Id: <20200205102039.35851-1-pa.solodovnikov@scylladb.com>
2020-02-09 16:50:42 +02:00

329 lines
15 KiB
C++

/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* Copyright (C) 2015 ScyllaDB
*
* Modified by ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "cql3/column_condition.hh"
#include "statements/request_validations.hh"
#include "unimplemented.hh"
#include "lists.hh"
#include "maps.hh"
#include <boost/range/algorithm_ext/push_back.hpp>
#include "types/map.hh"
#include "types/list.hh"
namespace {
void validate_operation_on_durations(const abstract_type& type, const cql3::operator_type& op) {
using cql3::statements::request_validations::check_false;
if (op.is_slice() && type.references_duration()) {
check_false(type.is_collection(), "Slice conditions are not supported on collections containing durations");
check_false(type.is_tuple(), "Slice conditions are not supported on tuples containing durations");
check_false(type.is_user_type(), "Slice conditions are not supported on UDTs containing durations");
// We're a duration.
throw exceptions::invalid_request_exception(format("Slice conditions are not supported on durations"));
}
}
int is_satisfied_by(const cql3::operator_type &op, const abstract_type& cell_type,
const abstract_type& param_type, const data_value& cell_value, const bytes& param) {
int rc;
// For multi-cell sets and lists, cell value is represented as a map,
// thanks to collections_as_maps flag in partition_slice. param, however,
// is represented as a set or list type.
// We must implement an own compare of two different representations
// to compare the two.
if (cell_type.is_map() && cell_type.is_multi_cell() && param_type.is_listlike()) {
const listlike_collection_type_impl& list_type = static_cast<const listlike_collection_type_impl&>(param_type);
const map_type_impl& map_type = static_cast<const map_type_impl&>(cell_type);
assert(list_type.is_multi_cell());
// Inverse comparison result since the order of arguments is inverse.
rc = -list_type.compare_with_map(map_type, param, map_type.decompose(cell_value));
} else {
rc = cell_type.compare(cell_type.decompose(cell_value), param);
}
if (op == cql3::operator_type::EQ) {
return rc == 0;
} else if (op == cql3::operator_type::NEQ) {
return rc != 0;
} else if (op == cql3::operator_type::GTE) {
return rc >= 0;
} else if (op == cql3::operator_type::LTE) {
return rc <= 0;
} else if (op == cql3::operator_type::GT) {
return rc > 0;
} else if (op == cql3::operator_type::LT) {
return rc < 0;
}
assert(false);
return false;
}
// Read the list index from key and check that list index is not
// negative. The negative range check repeats Cassandra behaviour.
uint32_t read_and_check_list_index(const cql3::raw_value_view& key) {
// The list element type is always int32_type, see lists::index_spec_of
int32_t idx = read_simple_exactly<int32_t>(to_bytes(key));
if (idx < 0) {
throw exceptions::invalid_request_exception(format("Invalid negative list index {}", idx));
}
return static_cast<uint32_t>(idx);
}
} // end of anonymous namespace
namespace cql3 {
bool
column_condition::uses_function(const sstring& ks_name, const sstring& function_name) const {
if (bool(_collection_element) && _collection_element->uses_function(ks_name, function_name)) {
return true;
}
if (bool(_value) && _value->uses_function(ks_name, function_name)) {
return true;
}
if (!_in_values.empty()) {
for (auto&& value : _in_values) {
if (bool(value) && value->uses_function(ks_name, function_name)) {
return true;
}
}
}
return false;
}
void column_condition::collect_marker_specificaton(variable_specifications& bound_names) {
if (_collection_element) {
_collection_element->collect_marker_specification(bound_names);
}
if (!_in_values.empty()) {
for (auto&& value : _in_values) {
value->collect_marker_specification(bound_names);
}
}
if (_value) {
_value->collect_marker_specification(bound_names);
}
}
bool column_condition::applies_to(const data_value* cell_value, const query_options& options) const {
// Cassandra condition support has a few quirks:
// - only a simple conjunct of predicates is supported "predicate AND predicate AND ..."
// - a predicate can operate on a column or a collection element, which must always be
// on the right side: "a = 3" or "collection['key'] IN (1,2,3)"
// - parameter markers are allowed on the right hand side only
// - only <, >, >=, <=, != and IN predicates are supported.
// - NULLs and missing values are treated differently from the WHERE clause:
// a term or cell in IF clause is allowed to be NULL or compared with NULL,
// and NULL value is treated just like any other value in the domain (there is no
// three-value logic or UNKNOWN like in SQL).
// - empty sets/lists/maps are treated differently when comparing with NULLs depending on
// whether the object is frozen or not. An empty *frozen* set/map/list is not equal to NULL.
// An empty *multi-cell* set/map/list is identical to NULL.
// The code below implements these rules in a way compatible with Cassandra.
// Use a map/list value instead of entire collection if a key is present in the predicate.
if (_collection_element != nullptr && cell_value != nullptr) {
// Checked in column_condition::raw::prepare()
assert(cell_value->type()->is_collection());
const collection_type_impl& cell_type = static_cast<const collection_type_impl&>(*cell_value->type());
cql3::raw_value_view key = _collection_element->bind_and_get(options);
if (key.is_unset_value()) {
throw exceptions::invalid_request_exception(
format("Invalid 'unset' value in {} element access", cell_type.cql3_type_name()));
}
if (key.is_null()) {
throw exceptions::invalid_request_exception(
format("Invalid null value for {} element access", cell_type.cql3_type_name()));
}
if (cell_type.is_map()) {
// If a collection is multi-cell and not frozen, it is returned as a map even if the
// underlying data type is "set" or "list". This is controlled by
// partition_slice::collections_as_maps enum, which is set when preparing a read command
// object. Representing a list as a map<timeuuid, listval> is necessary to identify the list field
// being updated, e.g. in case of UPDATE t SET list[3] = null WHERE a = 1 IF list[3]
// = 'key'
const map_type_impl& map_type = static_cast<const map_type_impl&>(cell_type);
// A map is serialized as a vector of data value pairs.
const std::vector<std::pair<data_value, data_value>>& map = map_type.from_value(*cell_value);
if (column.type->is_map()) {
// We're working with a map *type*, not only map *representation*.
with_linearized(*key, [&map, &map_type, &cell_value] (bytes_view key) {
auto end = map.end();
const auto& map_key_type = *map_type.get_keys_type();
auto less = [&map_key_type](const std::pair<data_value, data_value>& value, bytes_view key) {
return map_key_type.less(map_key_type.decompose(value.first), key);
};
// Map elements are sorted by key.
auto it = std::lower_bound(map.begin(), end, key, less);
if (it != end && map_key_type.equal(map_key_type.decompose(it->first), key)) {
cell_value = &it->second;
} else {
cell_value = nullptr;
}
});
} else if (column.type->is_list()) {
// We're working with a list type, represented as map.
uint32_t idx = read_and_check_list_index(key);
cell_value = idx >= map.size() ? nullptr : &map[idx].second;
} else {
// Syntax like "set_column['key'] = constant" is invalid.
assert(false);
}
} else if (cell_type.is_list()) {
// This is a *frozen* list.
const list_type_impl& list_type = static_cast<const list_type_impl&>(cell_type);
const std::vector<data_value>& list = list_type.from_value(*cell_value);
uint32_t idx = read_and_check_list_index(key);
cell_value = idx >= list.size() ? nullptr : &list[idx];
} else {
assert(false);
}
}
if (_op.is_compare()) {
// <, >, >=, <=, !=
cql3::raw_value_view param = _value->bind_and_get(options);
if (param.is_unset_value()) {
throw exceptions::invalid_request_exception("Invalid 'unset' value in condition");
}
if (param.is_null()) {
if (_op == operator_type::EQ) {
return cell_value == nullptr;
} else if (_op == operator_type::NEQ) {
return cell_value != nullptr;
} else {
throw exceptions::invalid_request_exception(format("Invalid comparison with null for operator \"{}\"", _op));
}
} else if (cell_value == nullptr) {
// The condition parameter is not null, so only NEQ can return true
return _op == operator_type::NEQ;
}
// type::validate() is called by bind_and_get(), so it's safe to pass to_bytes() result
// directly to compare.
return is_satisfied_by(_op, *cell_value->type(), *column.type, *cell_value, to_bytes(param));
}
assert(_op == operator_type::IN);
std::vector<bytes_opt> in_values;
if (_value) {
auto&& lval = dynamic_pointer_cast<multi_item_terminal>(_value->bind(options));
if (!lval) {
throw exceptions::invalid_request_exception("Invalid null value for IN condition");
}
in_values = std::move(lval->get_elements());
} else {
for (auto&& v : _in_values) {
in_values.emplace_back(to_bytes_opt(v->bind_and_get(options)));
}
}
// If cell value is NULL, IN list must contain NULL or an empty set/list. Otherwise it must contain cell value.
if (cell_value) {
return std::any_of(in_values.begin(), in_values.end(), [this, cell_value] (const bytes_opt& value) {
return value.has_value() && is_satisfied_by(operator_type::EQ, *cell_value->type(), *column.type, *cell_value, *value);
});
} else {
return std::any_of(in_values.begin(), in_values.end(), [] (const bytes_opt& value) { return !value.has_value() || value->empty(); });
}
}
::shared_ptr<column_condition>
column_condition::raw::prepare(database& db, const sstring& keyspace, const column_definition& receiver) const {
if (receiver.type->is_counter()) {
throw exceptions::invalid_request_exception("Conditions on counters are not supported");
}
shared_ptr<term> collection_element_term;
shared_ptr<column_specification> value_spec = receiver.column_specification;
if (_collection_element) {
if (!receiver.type->is_collection()) {
throw exceptions::invalid_request_exception(format("Invalid element access syntax for non-collection column {}",
receiver.name_as_text()));
}
// Pass a correct type specification to the collection_element->prepare(), so that it can
// later be used to validate the parameter type is compatible with receiver type.
shared_ptr<column_specification> element_spec;
auto ctype = static_cast<const collection_type_impl*>(receiver.type.get());
if (ctype->get_kind() == abstract_type::kind::list) {
element_spec = lists::index_spec_of(receiver.column_specification);
value_spec = lists::value_spec_of(receiver.column_specification);
} else if (ctype->get_kind() == abstract_type::kind::map) {
element_spec = maps::key_spec_of(*receiver.column_specification);
value_spec = maps::value_spec_of(*receiver.column_specification);
} else if (ctype->get_kind() == abstract_type::kind::set) {
throw exceptions::invalid_request_exception(format("Invalid element access syntax for set column {}",
receiver.name_as_text()));
} else {
throw exceptions::invalid_request_exception(
format("Unsupported collection type {} in a condition with element access", ctype->cql3_type_name()));
}
collection_element_term = _collection_element->prepare(db, keyspace, element_spec);
}
if (_op.is_compare()) {
validate_operation_on_durations(*receiver.type, _op);
return column_condition::condition(receiver, collection_element_term, _value->prepare(db, keyspace, value_spec), _op);
}
if (_op != operator_type::IN) {
throw exceptions::invalid_request_exception(format("Unsupported operator type {} in a condition ", _op));
}
if (_in_marker) {
assert(_in_values.empty());
shared_ptr<term> multi_item_term = _in_marker->prepare(db, keyspace, value_spec);
return column_condition::in_condition(receiver, collection_element_term, multi_item_term, {});
}
// Both _in_values and in _in_marker can be missing in case of empty IN list: "a IN ()"
std::vector<::shared_ptr<term>> terms;
terms.reserve(_in_values.size());
for (auto&& value : _in_values) {
terms.push_back(value->prepare(db, keyspace, value_spec));
}
return column_condition::in_condition(receiver, collection_element_term, {}, std::move(terms));
}
} // end of namespace cql3