Files
scylladb/cql3/sets.cc
Michał Chojnowski 458878a414 cql3: optimize the deserialization of collections
Before this patch, deserializing a collection from a (prepared) CQL request
involved deserializing every element and serializing it again. Originally this
was a hacky method of validation, and it was also needed to reserialize nested
frozen collections from the CQLv2 format to the CQLv3 format.

But since then we started doing validation separately (before calls to
from_serialized) and CQLv2 became irrelevant, making reserialization of
elements (which, among other things, involves a memory alocation for every
element) pure waste.

This patch adds a faster path for collections in the v3 format, which does not
involve linearizing or reserializing the elements (since v3 is the same as
our internal format).

After this patch, the path from prepared CQL statements to
atomic_cell_or_collection is almost completely linearization-free. The last
remaining place is collection_mutation_description, where map keys are
linearized.
2021-04-01 10:44:21 +02:00

347 lines
13 KiB
C++

/*
* Copyright (C) 2015 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#include "sets.hh"
#include "constants.hh"
#include "cql3_type.hh"
#include "types/map.hh"
#include "types/set.hh"
namespace cql3 {
lw_shared_ptr<column_specification>
sets::value_spec_of(const column_specification& column) {
return make_lw_shared<column_specification>(column.ks_name, column.cf_name,
::make_shared<column_identifier>(format("value({})", *column.name), true),
dynamic_cast<const set_type_impl&>(column.type->without_reversed()).get_elements_type());
}
shared_ptr<term>
sets::literal::prepare(database& db, const sstring& keyspace, lw_shared_ptr<column_specification> receiver) const {
validate_assignable_to(db, keyspace, *receiver);
if (_elements.empty()) {
// In Cassandra, an empty (unfrozen) map/set/list is equivalent to the column being null. In
// other words a non-frozen collection only exists if it has elements. Return nullptr right
// away to simplify predicate evaluation. See also
// https://issues.apache.org/jira/browse/CASSANDRA-5141
if (receiver->type->is_multi_cell()) {
return cql3::constants::null_literal::NULL_VALUE;
}
// We've parsed empty maps as a set literal to break the ambiguity so
// handle that case now. This branch works for frozen sets/maps only.
if (dynamic_pointer_cast<const map_type_impl>(receiver->type)) {
// use empty_type for comparator, set is empty anyway.
std::map<managed_bytes, managed_bytes, serialized_compare> m(empty_type->as_less_comparator());
return ::make_shared<maps::value>(std::move(m));
}
}
auto value_spec = value_spec_of(*receiver);
std::vector<shared_ptr<term>> values;
values.reserve(_elements.size());
bool all_terminal = true;
for (shared_ptr<term::raw> rt : _elements)
{
auto t = rt->prepare(db, keyspace, value_spec);
if (t->contains_bind_marker()) {
throw exceptions::invalid_request_exception(format("Invalid set literal for {}: bind variables are not supported inside collection literals", *receiver->name));
}
if (dynamic_pointer_cast<non_terminal>(t)) {
all_terminal = false;
}
values.push_back(std::move(t));
}
auto compare = dynamic_cast<const set_type_impl&>(receiver->type->without_reversed())
.get_elements_type()->as_less_comparator();
auto value = ::make_shared<delayed_value>(compare, std::move(values));
if (all_terminal) {
return value->bind(query_options::DEFAULT);
} else {
return value;
}
}
void
sets::literal::validate_assignable_to(database& db, const sstring& keyspace, const column_specification& receiver) const {
if (!receiver.type->without_reversed().is_set()) {
// We've parsed empty maps as a set literal to break the ambiguity so
// handle that case now
if (dynamic_pointer_cast<const map_type_impl>(receiver.type) && _elements.empty()) {
return;
}
throw exceptions::invalid_request_exception(format("Invalid set literal for {} of type {}", receiver.name, receiver.type->as_cql3_type()));
}
auto&& value_spec = value_spec_of(receiver);
for (shared_ptr<term::raw> rt : _elements) {
if (!is_assignable(rt->test_assignment(db, keyspace, *value_spec))) {
throw exceptions::invalid_request_exception(format("Invalid set literal for {}: value {} is not of type {}", *receiver.name, *rt, value_spec->type->as_cql3_type()));
}
}
}
assignment_testable::test_result
sets::literal::test_assignment(database& db, const sstring& keyspace, const column_specification& receiver) const {
if (!receiver.type->without_reversed().is_set()) {
// We've parsed empty maps as a set literal to break the ambiguity so handle that case now
if (dynamic_pointer_cast<const map_type_impl>(receiver.type) && _elements.empty()) {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
return assignment_testable::test_result::NOT_ASSIGNABLE;
}
// If there is no elements, we can't say it's an exact match (an empty set if fundamentally polymorphic).
if (_elements.empty()) {
return assignment_testable::test_result::WEAKLY_ASSIGNABLE;
}
auto&& value_spec = value_spec_of(receiver);
// FIXME: make assignment_testable::test_all() accept ranges
std::vector<shared_ptr<assignment_testable>> to_test(_elements.begin(), _elements.end());
return assignment_testable::test_all(db, keyspace, *value_spec, to_test);
}
sstring
sets::literal::to_string() const {
return "{" + join(", ", _elements) + "}";
}
sets::value
sets::value::from_serialized(const raw_value_view& val, const set_type_impl& type, cql_serialization_format sf) {
try {
std::set<managed_bytes, serialized_compare> elements(type.get_elements_type()->as_less_comparator());
if (sf.collection_format_unchanged()) {
std::vector<managed_bytes> tmp = val.with_value([sf] (const FragmentedView auto& v) {
return partially_deserialize_listlike(v, sf);
});
for (auto&& element : tmp) {
elements.insert(std::move(element));
}
} else [[unlikely]] {
auto s = val.deserialize<set_type_impl::native_type>(type, sf);
for (auto&& element : s) {
elements.insert(elements.end(), managed_bytes(type.get_elements_type()->decompose(element)));
}
}
return value(std::move(elements));
} catch (marshal_exception& e) {
throw exceptions::invalid_request_exception(e.what());
}
}
cql3::raw_value
sets::value::get(const query_options& options) {
return cql3::raw_value::make_value(get_with_protocol_version(options.get_cql_serialization_format()));
}
managed_bytes
sets::value::get_with_protocol_version(cql_serialization_format sf) {
return collection_type_impl::pack_fragmented(_elements.begin(), _elements.end(),
_elements.size(), sf);
}
bool
sets::value::equals(const set_type_impl& st, const value& v) {
if (_elements.size() != v._elements.size()) {
return false;
}
auto&& elements_type = st.get_elements_type();
return std::equal(_elements.begin(), _elements.end(),
v._elements.begin(),
[elements_type] (managed_bytes_view v1, managed_bytes_view v2) {
return elements_type->equal(v1, v2);
});
}
sstring
sets::value::to_string() const {
sstring result = "{";
bool first = true;
for (auto&& e : _elements) {
if (!first) {
result += ", ";
}
first = true;
result += to_hex(e);
}
result += "}";
return result;
}
bool
sets::delayed_value::contains_bind_marker() const {
// False since we don't support them in collection
return false;
}
void
sets::delayed_value::collect_marker_specification(variable_specifications& bound_names) const {
}
shared_ptr<terminal>
sets::delayed_value::bind(const query_options& options) {
std::set<managed_bytes, serialized_compare> buffers(_comparator);
for (auto&& t : _elements) {
auto b = t->bind_and_get(options);
if (b.is_null()) {
throw exceptions::invalid_request_exception("null is not supported inside collections");
}
if (b.is_unset_value()) {
return constants::UNSET_VALUE;
}
// We don't support value > 64K because the serialization format encode the length as an unsigned short.
if (b.size_bytes() > std::numeric_limits<uint16_t>::max()) {
throw exceptions::invalid_request_exception(format("Set value is too long. Set values are limited to {:d} bytes but {:d} bytes value provided",
std::numeric_limits<uint16_t>::max(),
b.size_bytes()));
}
buffers.insert(buffers.end(), *to_managed_bytes_opt(b));
}
return ::make_shared<value>(std::move(buffers));
}
sets::marker::marker(int32_t bind_index, lw_shared_ptr<column_specification> receiver)
: abstract_marker{bind_index, std::move(receiver)} {
if (!_receiver->type->without_reversed().is_set()) {
throw std::runtime_error(format("Receiver {} for set marker has wrong type: {}",
_receiver->cf_name, _receiver->type->name()));
}
}
::shared_ptr<terminal>
sets::marker::bind(const query_options& options) {
const auto& value = options.get_value_at(_bind_index);
if (value.is_null()) {
return nullptr;
} else if (value.is_unset_value()) {
return constants::UNSET_VALUE;
} else {
auto& type = dynamic_cast<const set_type_impl&>(_receiver->type->without_reversed());
try {
value.validate(type, options.get_cql_serialization_format());
} catch (marshal_exception& e) {
throw exceptions::invalid_request_exception(
format("Exception while binding column {:s}: {:s}", _receiver->name->to_cql_string(), e.what()));
}
return make_shared<cql3::sets::value>(value::from_serialized(value, type, options.get_cql_serialization_format()));
}
}
void
sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
auto value = _t->bind(params._options);
execute(m, row_key, params, column, std::move(value));
}
void
sets::setter::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params, const column_definition& column, ::shared_ptr<terminal> value) {
if (value == constants::UNSET_VALUE) {
return;
}
if (column.type->is_multi_cell()) {
// Delete all cells first, then add new ones
collection_mutation_description mut;
mut.tomb = params.make_tombstone_just_before();
m.set_cell(row_key, column, mut.serialize(*column.type));
}
adder::do_add(m, row_key, params, value, column);
}
void
sets::adder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
const auto& value = _t->bind(params._options);
if (value == constants::UNSET_VALUE) {
return;
}
assert(column.type->is_multi_cell()); // "Attempted to add items to a frozen set";
do_add(m, row_key, params, value, column);
}
void
sets::adder::do_add(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params,
shared_ptr<term> value, const column_definition& column) {
auto set_value = dynamic_pointer_cast<sets::value>(std::move(value));
auto& set_type = dynamic_cast<const set_type_impl&>(column.type->without_reversed());
if (column.type->is_multi_cell()) {
if (!set_value || set_value->_elements.empty()) {
return;
}
// FIXME: collection_mutation_view_description? not compatible with params.make_cell().
collection_mutation_description mut;
for (auto&& e : set_value->_elements) {
mut.cells.emplace_back(to_bytes(e), params.make_cell(*set_type.value_comparator(), bytes_view(), atomic_cell::collection_member::yes));
}
m.set_cell(row_key, column, mut.serialize(set_type));
} else if (set_value != nullptr) {
// for frozen sets, we're overwriting the whole cell
auto v = set_value->get_with_protocol_version(cql_serialization_format::internal());
m.set_cell(row_key, column, params.make_cell(*column.type, raw_value_view::make_value(v)));
} else {
m.set_cell(row_key, column, params.make_dead_cell());
}
}
void
sets::discarder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params) {
assert(column.type->is_multi_cell()); // "Attempted to remove items from a frozen set";
auto&& value = _t->bind(params._options);
if (!value || value == constants::UNSET_VALUE) {
return;
}
collection_mutation_description mut;
auto svalue = dynamic_pointer_cast<sets::value>(value);
assert(svalue);
mut.cells.reserve(svalue->_elements.size());
for (auto&& e : svalue->_elements) {
mut.cells.push_back({to_bytes(e), params.make_dead_cell()});
}
m.set_cell(row_key, column, mut.serialize(*column.type));
}
void sets::element_discarder::execute(mutation& m, const clustering_key_prefix& row_key, const update_parameters& params)
{
assert(column.type->is_multi_cell() && "Attempted to remove items from a frozen set");
auto elt = _t->bind(params._options);
if (!elt) {
throw exceptions::invalid_request_exception("Invalid null set element");
}
collection_mutation_description mut;
mut.cells.emplace_back(elt->get(params._options).to_bytes(), params.make_dead_cell());
m.set_cell(row_key, column, mut.serialize(*column.type));
}
}