Let's remove expr::token and replace all of its functionality with expr::function_call. expr::token is a struct whose job is to represent a partition key token. The idea is that when the user types in `token(p1, p2) < 1234`, this will be internally represented as an expression which uses expr::token to represent the `token(p1, p2)` part. The situation with expr::token is a bit complicated. On one hand side it's supposed to represent the partition token, but sometimes it's also assumed that it can represent a generic call to the token() function, for example `token(1, 2, 3)` could be a function_call, but it could also be expr::token. The query planning code assumes that each occurence of expr::token represents the partition token without checking the arguments. Because of this allowing `token(1, 2, 3)` to be represented as expr::token is dangerous - the query planning might think that it is `token(p1, p2, p3)` and plan the query based on this, which would be wrong. Currently expr::token is created only in one specific case. When the parser detects that the user typed in a restriction which has a call to `token` on the LHS it generates expr::token. In all other cases it generates an `expr::function_call`. Even when the `function_call` represents a valid partition token, it stays a `function_call`. During preparation there is no check to see if a `function_call` to `token` could be turned into `expr::token`. This is a bit inconsistent - sometimes `token(p1, p2, p3)` is represented as `expr::token` and the query planner handles that, but sometimes it might be represented as `function_call`, which the query planner doesn't handle. There is also a problem because there's a lot of duplication between a `function_call` and `expr::token`. All of the evaluation and preparation is the same for `expr::token` as it's for a `function_call` to the token function. Currently it's impossible to evaluate `expr::token` and preparation has some flaws, but implementing it would basically consist of copy-pasting the corresponding code from token `function_call`. One more aspect is multi-table queries. With `expr::token` we turn a call to the `token()` function into a struct that is schema-specific. What happens when a single expression is used to make queries to multiple tables? The schema is different, so something that is representad as `expr::token` for one schema would be represented as `function_call` in the context of a different schema. Translating expressions to different tables would require careful manipulation to convert `expr::token` to `function_call` and vice versa. This could cause trouble for index queries. Overall I think it would be best to remove expr::token. Although having a clear marker for the partition token is sometimes nice for query planning, in my opinion the pros are outweighted by the cons. I'm a big fan of having a single way to represent things, having two separate representations of the same thing without clear boundaries between them causes trouble. Instead of having expr::token and function_call we can just have the function_call and check if it represents a partition token when needed. Signed-off-by: Jan Ciolek <jan.ciolek@scylladb.com>
291 lines
13 KiB
C++
291 lines
13 KiB
C++
/*
|
|
* Copyright (C) 2015-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#include "selectable.hh"
|
|
#include "selectable_with_field_selection.hh"
|
|
#include "field_selector.hh"
|
|
#include "writetime_or_ttl.hh"
|
|
#include "selector_factories.hh"
|
|
#include "simple_selector.hh"
|
|
#include "cql3/query_options.hh"
|
|
#include "cql3/functions/functions.hh"
|
|
#include "cql3/functions/castas_fcts.hh"
|
|
#include "cql3/functions/aggregate_fcts.hh"
|
|
#include "cql3/expr/expression.hh"
|
|
#include "abstract_function_selector.hh"
|
|
#include "writetime_or_ttl_selector.hh"
|
|
|
|
namespace cql3 {
|
|
|
|
namespace selection {
|
|
|
|
seastar::logger slogger("cql3_selection");
|
|
|
|
class selectable_column : public selectable {
|
|
column_identifier _ci;
|
|
public:
|
|
explicit selectable_column(column_identifier ci) : _ci(std::move(ci)) {}
|
|
virtual ::shared_ptr<selector::factory> new_selector_factory(data_dictionary::database db, schema_ptr schema,
|
|
std::vector<const column_definition*>& defs) override;
|
|
virtual sstring to_string() const override {
|
|
return _ci.to_string();
|
|
}
|
|
};
|
|
|
|
::shared_ptr<selector::factory>
|
|
selectable_column::new_selector_factory(data_dictionary::database db, schema_ptr schema, std::vector<const column_definition*>& defs) {
|
|
auto def = get_column_definition(*schema, _ci);
|
|
if (!def) {
|
|
throw exceptions::invalid_request_exception(format("Undefined name {} in selection clause", _ci.text()));
|
|
}
|
|
// Do not allow explicitly selecting hidden columns. We also skip them on
|
|
// "SELECT *" (see selection::wildcard()).
|
|
if (def->is_hidden_from_cql()) {
|
|
throw exceptions::invalid_request_exception(format("Undefined name {} in selection clause", _ci.text()));
|
|
}
|
|
return simple_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), def->type);
|
|
}
|
|
|
|
shared_ptr<selector::factory>
|
|
selectable::writetime_or_ttl::new_selector_factory(data_dictionary::database db, schema_ptr s, std::vector<const column_definition*>& defs) {
|
|
auto&& def = s->get_column_definition(_id->name());
|
|
if (!def || def->is_hidden_from_cql()) {
|
|
throw exceptions::invalid_request_exception(format("Undefined name {} in selection clause", _id));
|
|
}
|
|
if (def->is_primary_key()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("Cannot use selection function {} on PRIMARY KEY part {}",
|
|
_is_writetime ? "writeTime" : "ttl",
|
|
def->name()));
|
|
}
|
|
if (def->type->is_multi_cell()) {
|
|
throw exceptions::invalid_request_exception(format("Cannot use selection function {} on non-frozen collections",
|
|
_is_writetime ? "writeTime" : "ttl"));
|
|
}
|
|
|
|
return writetime_or_ttl_selector::new_factory(def->name_as_text(), add_and_get_index(*def, defs), _is_writetime);
|
|
}
|
|
|
|
sstring
|
|
selectable::writetime_or_ttl::to_string() const {
|
|
return format("{}({})", _is_writetime ? "writetime" : "ttl", _id->to_string());
|
|
}
|
|
|
|
shared_ptr<selector::factory>
|
|
selectable::with_function::new_selector_factory(data_dictionary::database db, schema_ptr s, std::vector<const column_definition*>& defs) {
|
|
auto&& factories = selector_factories::create_factories_and_collect_column_definitions(_args, db, s, defs);
|
|
|
|
// resolve built-in functions before user defined functions
|
|
auto&& fun = functions::functions::get(db, s->ks_name(), _function_name, factories->new_instances(), s->ks_name(), s->cf_name());
|
|
if (!fun) {
|
|
throw exceptions::invalid_request_exception(format("Unknown function '{}'", _function_name));
|
|
}
|
|
if (!fun->return_type()) {
|
|
throw exceptions::invalid_request_exception(format("Unknown function {} called in selection clause", _function_name));
|
|
}
|
|
|
|
return abstract_function_selector::new_factory(std::move(fun), std::move(factories));
|
|
}
|
|
|
|
sstring
|
|
selectable::with_function::to_string() const {
|
|
return format("{}({})", _function_name.name, fmt::join(_args, ", "));
|
|
}
|
|
|
|
expr::expression
|
|
make_count_rows_function_expression() {
|
|
return expr::function_call{
|
|
cql3::functions::function_name::native_function(cql3::functions::aggregate_fcts::COUNT_ROWS_FUNCTION_NAME),
|
|
std::vector<cql3::expr::expression>()};
|
|
}
|
|
|
|
shared_ptr<selector::factory>
|
|
selectable::with_anonymous_function::new_selector_factory(data_dictionary::database db, schema_ptr s, std::vector<const column_definition*>& defs) {
|
|
auto&& factories = selector_factories::create_factories_and_collect_column_definitions(_args, db, s, defs);
|
|
return abstract_function_selector::new_factory(_function, std::move(factories));
|
|
}
|
|
|
|
sstring
|
|
selectable::with_anonymous_function::to_string() const {
|
|
return format("{}({})", _function->name().name, fmt::join(_args, ", "));
|
|
}
|
|
|
|
shared_ptr<selector::factory>
|
|
selectable::with_field_selection::new_selector_factory(data_dictionary::database db, schema_ptr s, std::vector<const column_definition*>& defs) {
|
|
auto&& factory = _selected->new_selector_factory(db, s, defs);
|
|
auto&& type = factory->new_instance()->get_type();
|
|
if (!type->underlying_type()->is_user_type()) {
|
|
throw exceptions::invalid_request_exception(
|
|
format("Invalid field selection: {} of type {} is not a user type", _selected->to_string(), type->as_cql3_type()));
|
|
}
|
|
|
|
auto ut = static_pointer_cast<const user_type_impl>(type->underlying_type());
|
|
auto idx = ut->idx_of_field(_field->bytes_);
|
|
if (!idx) {
|
|
throw exceptions::invalid_request_exception(format("{} of type {} has no field {}",
|
|
_selected->to_string(), ut->as_cql3_type(), _field));
|
|
}
|
|
|
|
return field_selector::new_factory(std::move(ut), *idx, std::move(factory));
|
|
}
|
|
|
|
sstring
|
|
selectable::with_field_selection::to_string() const {
|
|
return format("{}.{}", _selected->to_string(), _field->to_string());
|
|
}
|
|
|
|
shared_ptr<selector::factory>
|
|
selectable::with_cast::new_selector_factory(data_dictionary::database db, schema_ptr s, std::vector<const column_definition*>& defs) {
|
|
std::vector<shared_ptr<selectable>> args{_arg};
|
|
auto&& factories = selector_factories::create_factories_and_collect_column_definitions(args, db, s, defs);
|
|
auto&& fun = functions::castas_functions::get(_type, factories->new_instances());
|
|
|
|
return abstract_function_selector::new_factory(std::move(fun), std::move(factories));
|
|
}
|
|
|
|
sstring
|
|
selectable::with_cast::to_string() const {
|
|
return format("cast({} as {})", _arg->to_string(), cql3_type(_type).to_string());
|
|
}
|
|
|
|
shared_ptr<selectable>
|
|
prepare_selectable(const schema& s, const expr::expression& raw_selectable) {
|
|
return expr::visit(overloaded_functor{
|
|
[&] (const expr::constant&) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "no way to express SELECT constant in the grammar yet");
|
|
},
|
|
[&] (const expr::conjunction& conj) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "no way to express 'SELECT a AND b' in the grammar yet");
|
|
},
|
|
[&] (const expr::binary_operator& conj) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "no way to express 'SELECT a binop b' in the grammar yet");
|
|
},
|
|
[&] (const expr::column_value& column) -> shared_ptr<selectable> {
|
|
// There is no path that reaches here, but expr::column_value and selectable_column are logically the same,
|
|
// so bridge them.
|
|
return ::make_shared<selectable_column>(column_identifier(column.col->name(), column.col->name_as_text()));
|
|
},
|
|
[&] (const expr::subscript& sub) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "no way to express 'SELECT a[b]' in the grammar yet");
|
|
},
|
|
[&] (const expr::unresolved_identifier& ui) -> shared_ptr<selectable> {
|
|
return make_shared<selectable_column>(*ui.ident->prepare(s));
|
|
},
|
|
[&] (const expr::column_mutation_attribute& cma) -> shared_ptr<selectable> {
|
|
auto unresolved_id = expr::as<expr::unresolved_identifier>(cma.column);
|
|
bool is_writetime = cma.kind == expr::column_mutation_attribute::attribute_kind::writetime;
|
|
return make_shared<selectable::writetime_or_ttl>(unresolved_id.ident->prepare_column_identifier(s), is_writetime);
|
|
},
|
|
[&] (const expr::function_call& fc) -> shared_ptr<selectable> {
|
|
std::vector<shared_ptr<selectable>> prepared_args;
|
|
prepared_args.reserve(fc.args.size());
|
|
for (auto&& arg : fc.args) {
|
|
prepared_args.push_back(prepare_selectable(s, arg));
|
|
}
|
|
return std::visit(overloaded_functor{
|
|
[&] (const functions::function_name& named) -> shared_ptr<selectable> {
|
|
return ::make_shared<selectable::with_function>(named, std::move(prepared_args));
|
|
},
|
|
[&] (const shared_ptr<functions::function>& anon) -> shared_ptr<selectable> {
|
|
return ::make_shared<selectable::with_anonymous_function>(anon, std::move(prepared_args));
|
|
},
|
|
}, fc.func);
|
|
},
|
|
[&] (const expr::cast& c) -> shared_ptr<selectable> {
|
|
auto t = std::get_if<data_type>(&c.type);
|
|
if (!t) {
|
|
// FIXME: adjust prepare_seletable() signature so we can prepare the type too
|
|
on_internal_error(slogger, "unprepared type in selector type cast");
|
|
}
|
|
return ::make_shared<selectable::with_cast>(prepare_selectable(s, c.arg), *t);
|
|
},
|
|
[&] (const expr::field_selection& fs) -> shared_ptr<selectable> {
|
|
// static_pointer_cast<> needed due to lack of covariant return type
|
|
// support with smart pointers
|
|
return make_shared<selectable::with_field_selection>(prepare_selectable(s, fs.structure),
|
|
fs.field->prepare(s));
|
|
},
|
|
[&] (const expr::bind_variable&) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "bind_variable found its way to selector context");
|
|
},
|
|
[&] (const expr::untyped_constant&) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "untyped_constant found its way to selector context");
|
|
},
|
|
[&] (const expr::tuple_constructor&) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "tuple_constructor found its way to selector context");
|
|
},
|
|
[&] (const expr::collection_constructor&) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "collection_constructor found its way to selector context");
|
|
},
|
|
[&] (const expr::usertype_constructor&) -> shared_ptr<selectable> {
|
|
on_internal_error(slogger, "usertype_constructor found its way to selector context");
|
|
},
|
|
}, raw_selectable);
|
|
}
|
|
|
|
bool
|
|
selectable_processes_selection(const expr::expression& raw_selectable) {
|
|
return expr::visit(overloaded_functor{
|
|
[&] (const expr::constant&) -> bool {
|
|
on_internal_error(slogger, "no way to express SELECT constant in the grammar yet");
|
|
},
|
|
[&] (const expr::conjunction& conj) -> bool {
|
|
on_internal_error(slogger, "no way to express 'SELECT a AND b' in the grammar yet");
|
|
},
|
|
[&] (const expr::binary_operator& conj) -> bool {
|
|
on_internal_error(slogger, "no way to express 'SELECT a binop b' in the grammar yet");
|
|
},
|
|
[] (const expr::subscript&) -> bool {
|
|
on_internal_error(slogger, "no way to express 'SELECT a[b]' in the grammar yet");
|
|
},
|
|
[&] (const expr::column_value& column) -> bool {
|
|
// There is no path that reaches here, but expr::column_value and column_identifier are logically the same,
|
|
// so bridge them.
|
|
return false;
|
|
},
|
|
[&] (const expr::unresolved_identifier& ui) -> bool {
|
|
return ui.ident->processes_selection();
|
|
},
|
|
[&] (const expr::column_mutation_attribute& cma) -> bool {
|
|
return true;
|
|
},
|
|
[&] (const expr::function_call& fc) -> bool {
|
|
return true;
|
|
},
|
|
[&] (const expr::cast& c) -> bool {
|
|
return true;
|
|
},
|
|
[&] (const expr::field_selection& fs) -> bool {
|
|
return true;
|
|
},
|
|
[&] (const expr::bind_variable&) -> bool {
|
|
on_internal_error(slogger, "bind_variable found its way to selector context");
|
|
},
|
|
[&] (const expr::untyped_constant&) -> bool {
|
|
on_internal_error(slogger, "untyped_constant found its way to selector context");
|
|
},
|
|
[&] (const expr::tuple_constructor&) -> bool {
|
|
on_internal_error(slogger, "tuple_constructor found its way to selector context");
|
|
},
|
|
[&] (const expr::collection_constructor&) -> bool {
|
|
on_internal_error(slogger, "collection_constructor found its way to selector context");
|
|
},
|
|
[&] (const expr::usertype_constructor&) -> bool {
|
|
on_internal_error(slogger, "collection_constructor found its way to selector context");
|
|
},
|
|
}, raw_selectable);
|
|
};
|
|
|
|
std::ostream & operator<<(std::ostream &os, const selectable& s) {
|
|
return os << s.to_string();
|
|
}
|
|
|
|
}
|
|
|
|
}
|