/* * Copyright (C) 2019-present ScyllaDB * * Modified by ScyllaDB */ /* * SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0) */ #include "bytes.hh" #include "types/types.hh" #include "types/tuple.hh" #include "cql3/functions/scalar_function.hh" #include "db/functions/aggregate_function.hh" #include "cql3/util.hh" #include "utils/big_decimal.hh" #include "aggregate_fcts.hh" #include "user_aggregate.hh" #include "functions.hh" #include "first_function.hh" #include "exceptions/exceptions.hh" #include "utils/multiprecision_int.hh" #include #include #include #include using namespace cql3; using namespace functions; using namespace aggregate_fcts; namespace cql3::functions { extern logging::logger log; } namespace { class internal_scalar_function : public scalar_function { function_name _name; data_type _return_type; std::vector _arg_types; noncopyable_function parameters)> _func; public: internal_scalar_function( sstring name, data_type return_type, std::vector arg_types, noncopyable_function parameters)> func) : _name(function_name::native_function(std::move(name))) , _return_type(std::move(return_type)) , _arg_types(std::move(arg_types)) , _func(std::move(func)) { } virtual bytes_opt execute(std::span parameters) override { return _func(parameters); } virtual const function_name& name() const override { return _name; } virtual const std::vector& arg_types() const override { return _arg_types; } virtual const data_type& return_type() const override { return _return_type; } virtual bool is_pure() const override { return true; } virtual bool is_native() const override { return true; } virtual bool requires_thread() const override { return false; } virtual bool is_aggregate() const override { return false; } virtual void print(std::ostream& os) const override { fmt::print(os, "{}", _name); } virtual sstring column_name(const std::vector& column_names) const override { return _name.name; } }; // Called if any of the inputs is NULL using null_handler = bytes_opt (*)(std::span); bytes_opt return_accumulator_on_null(std::span args) { return args[0]; } bytes_opt return_any_nonnull(std::span args) { auto i = std::ranges::find_if(args, std::mem_fn(&bytes_opt::has_value)); return i != args.end() ? *i : bytes_opt(); } template noncopyable_function)> wrap_function_autonull(null_handler nullhandler, Ret (*func)(Args...)) { return [nullhandler, func] (std::span args) -> bytes_opt { if (!std::all_of(args.begin(), args.end(), std::mem_fn(&bytes_opt::has_value))) { return nullhandler(args); } using args_tuple_type = std::tuple; auto ret = std::invoke([&] (std::index_sequence) { return func(value_cast>( data_type_for>()->deserialize_value(*args[Indexes]))...); }, std::index_sequence_for()); return data_value(std::move(ret)).serialize_nonnull(); }; } template shared_ptr make_internal_scalar_function(sstring name, null_handler nullhandler, Ret (*func)(Args...)) { return ::make_shared( std::move(name), data_type_for(), std::vector({data_type_for()...}), wrap_function_autonull(nullhandler, func) ); } template requires std::is_class_v shared_ptr make_internal_scalar_function(sstring name, null_handler nullhandler, Lambda func) { // "+func" decays the lambda into a pointer-to-function, so that its signature // can be inferred by the other overload. return make_internal_scalar_function(std::move(name), nullhandler, +func); } template NarrowT narrow(WideT acc) { NarrowT ret = static_cast(acc); // The following check only makes sense when NarrowT and WideT are two // different integral types and we want to check that NarrowT isn't too // narrow. Let's avoid the check when they are the same type - it is // useless, and worse - wrong for the floating-point case (issue #13564). if constexpr (!std::is_same::value) { if (static_cast(ret) != acc) { throw exceptions::overflow_error_exception("Sum overflow. Values should be casted to a wider type."); } } return ret; } // We need a wider accumulator for sum and average, // since summing the inputs can overflow the input type template using accumulator_for = std::conditional_t, utils::multiprecision_int, T>; template static shared_ptr make_sum_function() { using Acc = accumulator_for; return make_shared( db::functions::stateless_aggregate_function{ .name = function_name::native_function("sum"), .state_type = data_type_for>(), .result_type = data_type_for(), .argument_types = {data_type_for()}, .initial_state = data_type_for>()->decompose(Acc(0)), .aggregation_function = make_internal_scalar_function("sum_step", return_accumulator_on_null, [] (Acc acc, Type addend) -> Acc { return acc + addend; }), .state_to_result_function = make_internal_scalar_function("sum_finalizer", return_any_nonnull, [] (Acc acc) -> Type { return narrow(acc); }), .state_reduction_function = make_internal_scalar_function("sum_reducer", return_any_nonnull, [] (Acc a1, Acc a2) -> Acc { return a1 + a2; }), } ); } template class impl_div_for_avg { public: static Type div(const accumulator_for& x, const int64_t y) { return Type(x/y); } }; template <> class impl_div_for_avg { public: static big_decimal div(const big_decimal& x, const int64_t y) { return x.div(y, big_decimal::rounding_mode::HALF_EVEN); } }; template static shared_ptr make_avg_function() { using sum_type = accumulator_for; auto accumulator_tuple_type = tuple_type_impl::get_instance({data_type_for(), data_type_for()}); return make_shared( db::functions::stateless_aggregate_function{ .name = function_name::native_function("avg"), .state_type = accumulator_tuple_type, .result_type = data_type_for(), .argument_types = {data_type_for()}, .initial_state = make_tuple_value(accumulator_tuple_type, std::vector({data_value(sum_type(0)), data_value(int64_t(0))})).serialize(), .aggregation_function = ::make_shared( "avg_step", accumulator_tuple_type, std::vector({accumulator_tuple_type, data_type_for()}), [accumulator_tuple_type] (std::span args) -> bytes_opt { if (!args[0]) { return std::nullopt; } if (!args[1]) { return args[0]; } data_value acc_value = accumulator_tuple_type->deserialize(*args[0]); std::vector acc = value_cast(std::move(acc_value)); auto sum = value_cast(acc[0]); auto count = value_cast(acc[1]); auto input = value_cast(data_type_for()->deserialize(*args[1])); sum += input; count += 1; acc[0] = data_value(std::move(sum)); acc[1] = data_value(count); return make_tuple_value(accumulator_tuple_type, acc).serialize(); }), .state_to_result_function = ::make_shared( "avg_finalizer", data_type_for(), std::vector({accumulator_tuple_type}), [accumulator_tuple_type] (std::span args) -> bytes_opt { data_value acc_value = accumulator_tuple_type->deserialize(*args[0]); std::vector acc = value_cast(std::move(acc_value)); auto sum = value_cast(acc[0]); auto count = value_cast(acc[1]); auto result = count ? impl_div_for_avg::div(sum, count) : Type(); return data_type_for()->decompose(result); }), .state_reduction_function = ::make_shared( "avg_reducer", accumulator_tuple_type, std::vector({accumulator_tuple_type, accumulator_tuple_type}), [accumulator_tuple_type] (std::span args) -> bytes_opt { data_value acc1_value = accumulator_tuple_type->deserialize(*args[0]); std::vector acc1 = value_cast(std::move(acc1_value)); auto sum1 = value_cast(acc1[0]); auto count1 = value_cast(acc1[1]); data_value acc2_value = accumulator_tuple_type->deserialize(*args[1]); std::vector acc2 = value_cast(std::move(acc2_value)); auto sum2 = value_cast(acc2[0]); auto count2 = value_cast(acc2[1]); acc1[0] = data_value(sum1 + sum2); acc1[1] = data_value(count1 + count2); return make_tuple_value(accumulator_tuple_type, acc1).serialize(); }), }); } template struct aggregate_type_for { using type = T; }; template<> struct aggregate_type_for { using type = ascii_native_type::primary_type; }; template<> struct aggregate_type_for { using type = simple_date_native_type::primary_type; }; template<> struct aggregate_type_for { using type = timeuuid_native_type; }; template<> struct aggregate_type_for { using type = time_native_type::primary_type; }; } // anonymous namespace /** * Creates a COUNT function for the specified type. * * @param input_type the function input type * @return a COUNT function for the specified type. */ shared_ptr aggregate_fcts::make_count_function(data_type input_type) { return make_shared( db::functions::stateless_aggregate_function{ .name = function_name::native_function("count"), .state_type = long_type, .result_type = long_type, .argument_types = {input_type}, .initial_state = data_value(int64_t(0)).serialize(), .aggregation_function = ::make_shared( "count_step", long_type, std::vector({long_type, input_type}), [] (std::span args) { if (!args[1]) { return args[0]; } auto count = value_cast(long_type->deserialize(*args[0])); count += 1; return data_value(count).serialize(); }), .state_to_result_function = make_internal_scalar_function("count_finalizer", return_any_nonnull, [] (int64_t count) { return count; }), .state_reduction_function = make_internal_scalar_function("count_reducer", return_any_nonnull, [] (int64_t c1, int64_t c2) { return c1 + c2; }), }); } // Drops the first arg type from the types declaration (which denotes the accumulator) // in order to compute the actual type of given user-defined-aggregate (UDA) static std::vector state_arg_types_to_uda_arg_types(const std::vector& arg_types) { if(arg_types.size() < 2) { on_internal_error(cql3::functions::log, "State function for user-defined aggregates needs at least two arguments"); } std::vector types; types.insert(types.end(), std::next(arg_types.begin()), arg_types.end()); return types; } static data_type uda_return_type(const ::shared_ptr& ffunc, const ::shared_ptr& sfunc) { return ffunc ? ffunc->return_type() : sfunc->return_type(); } user_aggregate::user_aggregate(function_name fname, bytes_opt initcond, ::shared_ptr sfunc, ::shared_ptr reducefunc, ::shared_ptr finalfunc) : aggregate_function(db::functions::stateless_aggregate_function{ .name = fname, .state_type = sfunc->return_type(), .result_type = finalfunc ? finalfunc->return_type() : sfunc->return_type(), .argument_types = std::vector(std::next(sfunc->arg_types().begin()), sfunc->arg_types().end()), .initial_state = std::move(initcond), .aggregation_function = std::move(sfunc), .state_to_result_function = std::move(finalfunc), .state_reduction_function = std::move(reducefunc), }) { } bool user_aggregate::has_finalfunc() const { return _agg.state_to_result_function != nullptr; } std::ostream& user_aggregate::describe(std::ostream& os) const { auto ks = cql3::util::maybe_quote(name().keyspace); auto na = cql3::util::maybe_quote(name().name); os << "CREATE AGGREGATE " << ks << "." << na << "("; auto a = arg_types(); for (size_t i = 0; i < a.size(); i++) { if (i > 0) { os << ", "; } os << a[i]->cql3_type_name(); } os << ")\n"; os << "SFUNC " << cql3::util::maybe_quote(_agg.aggregation_function->name().name) << "\n" << "STYPE " << _agg.aggregation_function->return_type()->cql3_type_name(); if (is_reducible()) { os << "\n" << "REDUCEFUNC " << cql3::util::maybe_quote(_agg.state_reduction_function->name().name); } if (has_finalfunc()) { os << "\n" << "FINALFUNC " << cql3::util::maybe_quote(_agg.state_to_result_function->name().name); } if (_agg.initial_state) { os << "\n" << "INITCOND " << _agg.aggregation_function->return_type()->deserialize(bytes_view(*_agg.initial_state)).to_parsable_string(); } os << ";"; return os; } shared_ptr aggregate_fcts::make_count_rows_function() { return make_shared( db::functions::stateless_aggregate_function{ .name = function_name::native_function(COUNT_ROWS_FUNCTION_NAME), .column_name_override = "count", .state_type = long_type, .result_type = long_type, .argument_types = {}, .initial_state = data_value(int64_t(0)).serialize(), .aggregation_function = make_internal_scalar_function("count_step", return_any_nonnull, [] (int64_t accumulator) { return accumulator + 1; }), .state_to_result_function = make_internal_scalar_function("count_finalizer", return_any_nonnull, [] (int64_t accumulator) { return accumulator; }), .state_reduction_function = make_internal_scalar_function("count_reducer", return_any_nonnull, [] (int64_t acc1, int64_t acc2) { return acc1 + acc2; }), } ); } shared_ptr aggregate_fcts::make_max_function(data_type io_type) { io_type = io_type->without_reversed().shared_from_this(); auto max = ::make_shared("max_step", io_type, std::vector({io_type, io_type}), [io_type] (std::span args) -> bytes_opt { if (!args[0]) { return args[1]; } if (!args[1]) { return args[0]; } return std::max(*args[0], *args[1], io_type->as_less_comparator()); }); return ::make_shared( db::functions::stateless_aggregate_function{ .name = function_name::native_function("max"), .state_type = io_type, .result_type = io_type, .argument_types = {io_type}, .initial_state = std::nullopt, .aggregation_function = max, .state_to_result_function = ::make_shared("max_finalizer", io_type, std::vector({io_type}), [] (std::span args) { return args[0]; }), .state_reduction_function = max, } ); } shared_ptr aggregate_fcts::make_min_function(data_type io_type) { io_type = io_type->without_reversed().shared_from_this(); auto min = ::make_shared("min_step", io_type, std::vector({io_type, io_type}), [io_type] (std::span args) -> bytes_opt { if (!args[0]) { return args[1]; } if (!args[1]) { return args[0]; } return std::min(*args[0], *args[1], io_type->as_less_comparator()); }); return ::make_shared( db::functions::stateless_aggregate_function{ .name = function_name::native_function("min"), .state_type = io_type, .result_type = io_type, .argument_types = {io_type}, .initial_state = std::nullopt, .aggregation_function = min, .state_to_result_function = ::make_shared("min_finalizer", io_type, std::vector({io_type}), [] (std::span args) { return args[0]; }), .state_reduction_function = min, } ); } function_name aggregate_fcts::first_function_name() { return function_name::native_function("$$first$$"); } shared_ptr aggregate_fcts::make_first_function(data_type io_type) { io_type = io_type->without_reversed().shared_from_this(); // The function's state is a one-element tuple containing the value, if the tuple // itself is null then the an input hasn't been seen yet auto state_type = data_type(tuple_type_impl::get_instance({io_type})); return ::make_shared( db::functions::stateless_aggregate_function{ .name = first_function_name(), .state_type = state_type, .result_type = io_type, .argument_types = {io_type}, .initial_state = std::nullopt, .aggregation_function = ::make_shared("first_agg", state_type, std::vector({state_type, io_type}), [] (std::span args) -> bytes_opt { if (!args[0]) { // First call: create a tuple with the input return tuple_type_impl::build_value(boost::make_iterator_range_n(&args[1], 1)); } else { // Second or later call: return result of first call return args[0]; } }), .state_to_result_function = ::make_shared("first_finalizer", io_type, std::vector({state_type}), [] (std::span args) -> bytes_opt { if (!args[0]) { return std::nullopt; } else { return to_bytes_opt(get_nth_tuple_element(managed_bytes_view(*args[0]), 0)); } }), .state_reduction_function = ::make_shared("first_reducer", state_type, std::vector({state_type, state_type}), return_any_nonnull), } ); } void cql3::functions::add_agg_functions(declared_t& funcs) { auto declare = [&funcs] (shared_ptr f) { funcs.emplace(f->name(), f); }; declare(make_sum_function()); declare(make_sum_function()); declare(make_sum_function()); declare(make_sum_function()); declare(make_sum_function()); declare(make_sum_function()); declare(make_sum_function()); declare(make_sum_function()); declare(make_avg_function()); declare(make_avg_function()); declare(make_avg_function()); declare(make_avg_function()); declare(make_avg_function()); declare(make_avg_function()); declare(make_avg_function()); declare(make_avg_function()); }