The CQL binary protocol introduced "unset" values in version 4
of the protocol. Unset values can be bound to variables, which
cause certain CQL fragments to be skipped. For example, the
fragment `SET a = :var` will not change the value of `a` if `:var`
is bound to an unset value.
Unsets, however, are very limited in where they can appear. They
can only appear at the top-level of an expression, and any computation
done with them is invalid. For example, `SET list_column = [3, :var]`
is invalid if `:var` is bound to unset.
This causes the code to be littered with checks for unset, and there
are plenty of tests dedicated to catching unsets. However, a simpler
way is possible - prevent the infiltration of unsets at the point of
entry (when evaluating a bind variable expression), and introduce
guards to check for the few cases where unsets are allowed.
This is what this long patch does. It performs the following:
(general)
1. unset is removed from the possible values of cql3::raw_value and
cql3::raw_value_view.
(external->cql3)
2. query_options is fortified with a vector of booleans,
unset_bind_variable_vector, where each boolean corresponds to a bind
variable index and is true when it is unset.
3. To avoid churn, two compatiblity structs are introduced:
cql3::raw_value{,_view}_vector_with_unset, which can be constructed
from a std::vector<raw_value{,_view/}>, which is what most callers
have. They can also be constructed with explicit unset vectors, for
the few cases they are needed.
(cql3->variables)
4. query_options::get_value_at() now throws if the requested bind variable
is unset. This replaces all the throwing checks in expression evaluation
and statement execution, which are removed.
5. A new query_options::is_unset() is added for the users that can tolerate
unset; though it is not used directly.
6. A new cql3::unset_operation_guard class guards against unsets. It accepts
an expression, and can be queried whether an unset is present. Two
conditions are checked: the expression must be a singleton bind
variable, and at runtime it must be bound to an unset value.
7. The modification_statement operations are split into two, via two
new subclasses of cql3::operation. cql3::operation_no_unset_support
ignores unsets completely. cql3::operation_skip_if_unset checks if
an operand is unset (luckily all operations have at most one operand that
tolerates unset) and applies unset_operation_guard to it.
8. The various sites that accept expressions or operations are modified
to check for should_skip_operation(). This are the loops around
operations in update_statement and delete_statement, and the checks
for unset in attributes (LIMIT and PER PARTITION LIMIT)
(tests)
9. Many unset tests are removed. It's now impossible to enter an
unset value into the expression evaluation machinery (there's
just no unset value), so it's impossible to test for it.
10. Other unset tests now have to be invoked via bind variables,
since there's no way to create an unset cql3::expr::constant.
11. Many tests have their exception message match strings relaxed.
Since unsets are now checked very early, we don't know the context
where they happen. It would be possible to reintroduce it (by adding
a format string parameter to cql3::unset_operation_guard), but it
seems not to be worth the effort. Usage of unsets is rare, and it is
explicit (at least with the Python driver, an unset cannot be
introduced by ommission).
I tried as an alternative to wrap cql3::raw_value{,_view} (that doesn't
recognize unsets) with cql3::maybe_unset_value (that does), but that
caused huge amounts of churn, so I abandoned that in favor of the
current approach.
Closes #12517
295 lines
9.6 KiB
C++
295 lines
9.6 KiB
C++
/*
|
|
* Copyright (C) 2017-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "types.hh"
|
|
#include "types/collection.hh"
|
|
#include "bytes.hh"
|
|
|
|
#include <optional>
|
|
#include <variant>
|
|
|
|
#include <seastar/util/variant_utils.hh>
|
|
|
|
#include "utils/fragmented_temporary_buffer.hh"
|
|
#include "utils/overloaded_functor.hh"
|
|
|
|
namespace cql3 {
|
|
|
|
struct null_value {
|
|
friend bool operator==(const null_value&, const null_value) { return true; }
|
|
};
|
|
|
|
class raw_value;
|
|
/// \brief View to a raw CQL protocol value.
|
|
///
|
|
/// \see raw_value
|
|
class raw_value_view {
|
|
std::variant<fragmented_temporary_buffer::view, managed_bytes_view, null_value> _data;
|
|
// Temporary storage is only useful if a raw_value_view needs to be instantiated
|
|
// with a value which lifetime is bounded only to the view itself.
|
|
// This hack is introduced in order to avoid storing temporary storage
|
|
// in an external container, which may cause memory leaking problems.
|
|
// This pointer is disengaged for regular raw_value_view instances.
|
|
// Data is stored in a shared pointer for two reasons:
|
|
// - pointers are cheap to copy
|
|
// - it makes the view keep its semantics - it's safe to copy a view multiple times
|
|
// and all copies still refer to the same underlying data.
|
|
lw_shared_ptr<managed_bytes> _temporary_storage = nullptr;
|
|
|
|
raw_value_view(null_value data)
|
|
: _data{std::move(data)}
|
|
{}
|
|
raw_value_view(fragmented_temporary_buffer::view data)
|
|
: _data{data}
|
|
{}
|
|
raw_value_view(managed_bytes_view data)
|
|
: _data{data}
|
|
{}
|
|
// This constructor is only used by make_temporary() and it acquires ownership
|
|
// of the given buffer. The view created that way refers to its own temporary storage.
|
|
explicit raw_value_view(managed_bytes&& temporary_storage);
|
|
public:
|
|
static raw_value_view make_null() {
|
|
return raw_value_view{null_value{}};
|
|
}
|
|
static raw_value_view make_value(fragmented_temporary_buffer::view view) {
|
|
return raw_value_view{view};
|
|
}
|
|
static raw_value_view make_value(managed_bytes_view view) {
|
|
return raw_value_view{view};
|
|
}
|
|
static raw_value_view make_value(bytes_view view) {
|
|
return raw_value_view{managed_bytes_view(view)};
|
|
}
|
|
static raw_value_view make_temporary(raw_value&& value);
|
|
bool is_null() const {
|
|
return std::holds_alternative<null_value>(_data);
|
|
}
|
|
// An empty value is not null, but it has 0 bytes of data.
|
|
// An empty int value can be created in CQL using blobasint(0x).
|
|
bool is_empty_value() const {
|
|
if (is_null()) {
|
|
return false;
|
|
}
|
|
return size_bytes() == 0;
|
|
}
|
|
bool is_value() const {
|
|
return _data.index() <= 1;
|
|
}
|
|
explicit operator bool() const {
|
|
return is_value();
|
|
}
|
|
|
|
template <typename Func>
|
|
requires std::invocable<Func, const managed_bytes_view&> && std::invocable<Func, const fragmented_temporary_buffer::view&>
|
|
decltype(auto) with_value(Func f) const {
|
|
switch (_data.index()) {
|
|
case 0: return f(std::get<fragmented_temporary_buffer::view>(_data));
|
|
default: return f(std::get<managed_bytes_view>(_data));
|
|
}
|
|
}
|
|
|
|
template <typename Func>
|
|
requires std::invocable<Func, bytes_view>
|
|
decltype(auto) with_linearized(Func f) const {
|
|
return with_value([&] (const FragmentedView auto& v) {
|
|
return ::with_linearized(v, std::forward<Func>(f));
|
|
});
|
|
}
|
|
|
|
size_t size_bytes() const {
|
|
return with_value([&] (const FragmentedView auto& v) {
|
|
return v.size_bytes();
|
|
});
|
|
}
|
|
|
|
template <typename ValueType>
|
|
ValueType deserialize(const abstract_type& t) const {
|
|
return value_cast<ValueType>(with_value([&] (const FragmentedView auto& v) { return t.deserialize(v); }));
|
|
}
|
|
|
|
template <typename ValueType>
|
|
ValueType deserialize(const collection_type_impl& t) const {
|
|
return value_cast<ValueType>(with_value([&] (const FragmentedView auto& v) { return t.deserialize(v); }));
|
|
}
|
|
|
|
void validate(const abstract_type& t) const {
|
|
return with_value([&] (const FragmentedView auto& v) { return t.validate(v); });
|
|
}
|
|
|
|
template <typename ValueType>
|
|
ValueType validate_and_deserialize(const collection_type_impl& t) const {
|
|
return with_value([&] (const FragmentedView auto& v) {
|
|
t.validate(v);
|
|
return value_cast<ValueType>(t.deserialize(v));
|
|
});
|
|
}
|
|
|
|
template <typename ValueType>
|
|
ValueType validate_and_deserialize(const abstract_type& t) const {
|
|
return with_value([&] (const FragmentedView auto& v) {
|
|
t.validate(v);
|
|
return value_cast<ValueType>(t.deserialize(v));
|
|
});
|
|
}
|
|
|
|
friend managed_bytes_opt to_managed_bytes_opt(const cql3::raw_value_view& view) {
|
|
if (view.is_value()) {
|
|
return view.with_value([] (const FragmentedView auto& v) { return managed_bytes(v); });
|
|
}
|
|
return managed_bytes_opt();
|
|
}
|
|
|
|
friend managed_bytes_opt to_managed_bytes_opt(cql3::raw_value_view&& view) {
|
|
if (view._temporary_storage) {
|
|
return std::move(*view._temporary_storage);
|
|
}
|
|
return to_managed_bytes_opt(view);
|
|
}
|
|
|
|
friend std::ostream& operator<<(std::ostream& os, const raw_value_view& value);
|
|
friend class raw_value;
|
|
};
|
|
|
|
/// \brief Raw CQL protocol value.
|
|
///
|
|
/// The `raw_value` type represents an uninterpreted value from the CQL wire
|
|
/// protocol. A raw value can hold either a null value, or a byte
|
|
/// blob that represents the value.
|
|
class raw_value {
|
|
std::variant<bytes, managed_bytes, null_value> _data;
|
|
|
|
raw_value(null_value&& data)
|
|
: _data{std::move(data)}
|
|
{}
|
|
raw_value(bytes&& data)
|
|
: _data{std::move(data)}
|
|
{}
|
|
raw_value(const bytes& data)
|
|
: _data{data}
|
|
{}
|
|
raw_value(managed_bytes&& data)
|
|
: _data{std::move(data)}
|
|
{}
|
|
raw_value(const managed_bytes& data)
|
|
: _data{data}
|
|
{}
|
|
public:
|
|
static raw_value make_null() {
|
|
return raw_value{null_value{}};
|
|
}
|
|
static raw_value make_value(const raw_value_view& view);
|
|
static raw_value make_value(managed_bytes&& mb) {
|
|
return raw_value{std::move(mb)};
|
|
}
|
|
static raw_value make_value(managed_bytes_opt&& mbo) {
|
|
return mbo ? make_value(std::move(*mbo)) : make_null();
|
|
}
|
|
static raw_value make_value(const managed_bytes& mb) {
|
|
return raw_value{mb};
|
|
}
|
|
static raw_value make_value(const managed_bytes_opt& mbo) {
|
|
if (mbo) {
|
|
return make_value(*mbo);
|
|
}
|
|
return make_null();
|
|
}
|
|
static raw_value make_value(bytes&& bytes) {
|
|
return raw_value{std::move(bytes)};
|
|
}
|
|
static raw_value make_value(const bytes& bytes) {
|
|
return raw_value{bytes};
|
|
}
|
|
static raw_value make_value(const bytes_opt& bytes) {
|
|
if (bytes) {
|
|
return make_value(*bytes);
|
|
}
|
|
return make_null();
|
|
}
|
|
bool is_null() const {
|
|
return std::holds_alternative<null_value>(_data);
|
|
}
|
|
// An empty value is not null, but it has 0 bytes of data.
|
|
// An empty int value can be created in CQL using blobasint(0x).
|
|
bool is_empty_value() const {
|
|
if (is_null()) {
|
|
return false;
|
|
}
|
|
return view().size_bytes() == 0;
|
|
}
|
|
bool is_value() const {
|
|
return _data.index() <= 1;
|
|
}
|
|
explicit operator bool() const {
|
|
return is_value();
|
|
}
|
|
bytes to_bytes() && {
|
|
return std::visit(overloaded_functor{
|
|
[](bytes&& bytes_val) { return std::move(bytes_val); },
|
|
[](managed_bytes&& managed_bytes_val) { return ::to_bytes(managed_bytes_val); },
|
|
[](null_value&&) -> bytes {
|
|
throw std::runtime_error("to_bytes() called on raw value that is null");
|
|
},
|
|
}, std::move(_data));
|
|
}
|
|
bytes_opt to_bytes_opt() && {
|
|
return std::visit(overloaded_functor{
|
|
[](bytes&& bytes_val) { return bytes_opt(bytes_val); },
|
|
[](managed_bytes&& managed_bytes_val) { return bytes_opt(::to_bytes(managed_bytes_val)); },
|
|
[](null_value&&) -> bytes_opt {
|
|
return std::nullopt;
|
|
},
|
|
}, std::move(_data));
|
|
}
|
|
managed_bytes to_managed_bytes() && {
|
|
return std::visit(overloaded_functor{
|
|
[](bytes&& bytes_val) { return managed_bytes(bytes_val); },
|
|
[](managed_bytes&& managed_bytes_val) { return std::move(managed_bytes_val); },
|
|
[](null_value&&) -> managed_bytes {
|
|
throw std::runtime_error("to_managed_bytes() called on raw value that is null");
|
|
},
|
|
}, std::move(_data));
|
|
}
|
|
managed_bytes_opt to_managed_bytes_opt() && {
|
|
return std::visit(overloaded_functor{
|
|
[](bytes&& bytes_val) { return managed_bytes_opt(bytes_val); },
|
|
[](managed_bytes&& managed_bytes_val) { return managed_bytes_opt(std::move(managed_bytes_val)); },
|
|
[](null_value&&) -> managed_bytes_opt {
|
|
return std::nullopt;
|
|
},
|
|
}, std::move(_data));
|
|
}
|
|
raw_value_view view() const;
|
|
friend class raw_value_view;
|
|
|
|
friend bool operator==(const raw_value& v1, const raw_value& v2);
|
|
friend std::ostream& operator<<(std::ostream& os, const raw_value& value);
|
|
};
|
|
|
|
}
|
|
|
|
inline bytes to_bytes(const cql3::raw_value_view& view)
|
|
{
|
|
return view.with_value([] (const FragmentedView auto& v) {
|
|
return linearized(v);
|
|
});
|
|
}
|
|
|
|
inline bytes_opt to_bytes_opt(const cql3::raw_value_view& view) {
|
|
if (view.is_value()) {
|
|
return to_bytes(view);
|
|
}
|
|
return bytes_opt();
|
|
}
|
|
|
|
inline bytes_opt to_bytes_opt(const cql3::raw_value& value) {
|
|
return to_bytes_opt(value.view());
|
|
}
|