Attribute names are now checked against DynamoDB-compatible length limits. When exceeded, Alternator emits exception identical or similar to the DDB one. It might be worth noting that DDB emits more than a single kind of an exception string for some exceptions. The tests' catch clauses handle all the observed kinds of messages from DynamoDB. The validation differentiates between key and non-key attributes and applies the limit accordingly. AWS DDB raises exceptions with somewhat different contents when the get request contains ProjectionExpression, so this case needed separate treatment to emit the corresponding exception string. The length-validating function was declared and defined in expressions.hh/.cc respectively, because that's where the relevant parsing happens. ** Tests The following tests were validated when handling this issue: test_limit_attribute_length_nonkey_good, test_limit_attribute_length_nonkey_bad, test_limit_attribute_length_key_good, test_limit_attribute_length_key_bad, test_limit_attribute_length_gsi_lsi_good, test_limit_attribute_length_gsi_lsi_bad, test_limit_attribute_length_gsi_lsi_projection_bad. Some of the tests were expanded into being more granular. Namely, there is a new test function `test_limit_attribute_length_key_bad_incoherent_names` which groups tests with too long attribute names in the case of incorrect (incoherent) user requests. Similarily, there is a new test function `test_limit_attribute_length_gsi_lsi_bad_incoherent_names` All the tests cover now each combination of the key/keys being too long. Both the new fuctions contain tests that verify that ScyllaDB throws length-related exceptions (instead of the coherency-related), similar to what DynamoDB does. The new test test_limit_gsiu_key_len_bad covers the case of too long attribute name inside GlobalSecondaryIndexUpdates. The new test test_limit_gsiu_key_len_bad_incoherent_names covers the case of incorrect (incoherent) user requests containing too long attribute names and GlobalSecondaryIndexUpdates. test_limit_attribute_length_key_bad was found to have contaned an illegal KeySchema structure. Some of the tests were corrected their match clause. All the tests are stripped of the xfail flag except test_limit_attribute_length_key_bad, which has it changed since it still fails due to Projection in GSI and LIS not implemented in Alternator. The xfail now points to #5036. Fixes scylladb/scylladb#9169 Closes scylladb/scylladb#23097
780 lines
34 KiB
C++
780 lines
34 KiB
C++
/*
|
|
* Copyright 2019-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#include "expressions.hh"
|
|
#include "serialization.hh"
|
|
#include "utils/base64.hh"
|
|
#include "conditions.hh"
|
|
#include "alternator/expressionsLexer.hpp"
|
|
#include "alternator/expressionsParser.hpp"
|
|
#include "utils/overloaded_functor.hh"
|
|
#include "error.hh"
|
|
|
|
#include "seastarx.hh"
|
|
|
|
#include <seastar/core/format.hh>
|
|
#include <seastar/util/log.hh>
|
|
|
|
#include <functional>
|
|
#include <unordered_map>
|
|
|
|
namespace alternator {
|
|
|
|
template <typename Func, typename Result = std::invoke_result_t<Func, expressionsParser&>>
|
|
static Result do_with_parser(std::string_view input, Func&& f) {
|
|
expressionsLexer::InputStreamType input_stream{
|
|
reinterpret_cast<const ANTLR_UINT8*>(input.data()),
|
|
ANTLR_ENC_UTF8,
|
|
static_cast<ANTLR_UINT32>(input.size()),
|
|
nullptr };
|
|
expressionsLexer lexer(&input_stream);
|
|
expressionsParser::TokenStreamType tstream(ANTLR_SIZE_HINT, lexer.get_tokSource());
|
|
expressionsParser parser(&tstream);
|
|
|
|
auto result = f(parser);
|
|
return result;
|
|
}
|
|
|
|
template <typename Func, typename Result = std::invoke_result_t<Func, expressionsParser&>>
|
|
static Result parse(const char* input_name, std::string_view input, Func&& f) {
|
|
if (input.length() > 4096) {
|
|
throw expressions_syntax_error(format("{} expression size {} exceeds allowed maximum 4096.",
|
|
input_name, input.length()));
|
|
}
|
|
try {
|
|
return do_with_parser(input, f);
|
|
} catch (expressions_syntax_error& e) {
|
|
// If already an expressions_syntax_error, don't print the type's
|
|
// name (it's just ugly), just the message.
|
|
// TODO: displayRecognitionError could set a position inside the
|
|
// expressions_syntax_error in throws, and we could use it here to
|
|
// mark the broken position in 'input'.
|
|
throw expressions_syntax_error(fmt::format("Failed parsing {} '{}': {}",
|
|
input_name, input, e.what()));
|
|
} catch (...) {
|
|
throw expressions_syntax_error(fmt::format("Failed parsing {} '{}': {}",
|
|
input_name, input, std::current_exception()));
|
|
}
|
|
}
|
|
|
|
parsed::update_expression
|
|
parse_update_expression(std::string_view query) {
|
|
return parse("UpdateExpression", query, std::mem_fn(&expressionsParser::update_expression));
|
|
}
|
|
|
|
std::vector<parsed::path>
|
|
parse_projection_expression(std::string_view query) {
|
|
return parse ("ProjectionExpression", query, std::mem_fn(&expressionsParser::projection_expression));
|
|
}
|
|
|
|
parsed::condition_expression
|
|
parse_condition_expression(std::string_view query, const char* caller) {
|
|
return parse(caller, query, std::mem_fn(&expressionsParser::condition_expression));
|
|
}
|
|
|
|
namespace parsed {
|
|
|
|
void update_expression::add(update_expression::action a) {
|
|
std::visit(overloaded_functor {
|
|
[&] (action::set&) { seen_set = true; },
|
|
[&] (action::remove&) { seen_remove = true; },
|
|
[&] (action::add&) { seen_add = true; },
|
|
[&] (action::del&) { seen_del = true; }
|
|
}, a._action);
|
|
_actions.push_back(std::move(a));
|
|
}
|
|
|
|
void update_expression::append(update_expression other) {
|
|
if ((seen_set && other.seen_set) ||
|
|
(seen_remove && other.seen_remove) ||
|
|
(seen_add && other.seen_add) ||
|
|
(seen_del && other.seen_del)) {
|
|
throw expressions_syntax_error("Each of SET, REMOVE, ADD, DELETE may only appear once in UpdateExpression");
|
|
}
|
|
std::move(other._actions.begin(), other._actions.end(), std::back_inserter(_actions));
|
|
seen_set |= other.seen_set;
|
|
seen_remove |= other.seen_remove;
|
|
seen_add |= other.seen_add;
|
|
seen_del |= other.seen_del;
|
|
}
|
|
|
|
void condition_expression::append(condition_expression&& a, char op) {
|
|
std::visit(overloaded_functor {
|
|
[&] (condition_list& x) {
|
|
// If 'a' has a single condition, we could, instead of inserting
|
|
// it insert its single condition (possibly negated if a._negated)
|
|
// But considering it we don't evaluate these expressions many
|
|
// times, this optimization is not worth extra code complexity.
|
|
if (!x.conditions.empty() && x.op != op) {
|
|
// Shouldn't happen unless we have a bug in the parser
|
|
throw std::logic_error("condition_expression::append called with mixed operators");
|
|
}
|
|
x.conditions.push_back(std::move(a));
|
|
x.op = op;
|
|
},
|
|
[&] (primitive_condition& x) {
|
|
// Shouldn't happen unless we have a bug in the parser
|
|
throw std::logic_error("condition_expression::append called on primitive_condition");
|
|
}
|
|
}, _expression);
|
|
}
|
|
|
|
void path::check_depth_limit() {
|
|
if (1 + _operators.size() > depth_limit) {
|
|
throw expressions_syntax_error(format("Document path exceeded {} nesting levels", depth_limit));
|
|
}
|
|
}
|
|
|
|
} // namespace parsed
|
|
|
|
// The following resolve_*() functions resolve references in parsed
|
|
// expressions of different types. Resolving a parsed expression means
|
|
// replacing:
|
|
// 1. In parsed::path objects, replace references like "#name" with the
|
|
// attribute name from ExpressionAttributeNames,
|
|
// 2. In parsed::constant objects, replace references like ":value" with
|
|
// the value from ExpressionAttributeValues.
|
|
// These function also track which name and value references were used, to
|
|
// allow complaining if some remain unused.
|
|
// Note that the resolve_*() functions modify the expressions in-place,
|
|
// so if we ever intend to cache parsed expression, we need to pass a copy
|
|
// into this function.
|
|
//
|
|
// Doing the "resolving" stage before the evaluation stage has two benefits.
|
|
// First, it allows us to be compatible with DynamoDB in catching unused
|
|
// names and values (see issue #6572). Second, in the FilterExpression case,
|
|
// we need to resolve the expression just once but then use it many times
|
|
// (once for each item to be filtered).
|
|
|
|
static std::optional<std::string> resolve_path_component(const std::string& column_name,
|
|
const rjson::value* expression_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_names) {
|
|
if (column_name.size() > 0 && column_name.front() == '#') {
|
|
if (!expression_attribute_names) {
|
|
throw api_error::validation(
|
|
fmt::format("ExpressionAttributeNames missing, entry '{}' required by expression", column_name));
|
|
}
|
|
const rjson::value* value = rjson::find(*expression_attribute_names, column_name);
|
|
if (!value || !value->IsString()) {
|
|
throw api_error::validation(
|
|
fmt::format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
|
|
}
|
|
used_attribute_names.emplace(column_name);
|
|
auto result = std::string(rjson::to_string_view(*value));
|
|
validate_attr_name_length("", result.size(), false, "ExpressionAttributeNames contains invalid value: ");
|
|
return result;
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
static void resolve_path(parsed::path& p,
|
|
const rjson::value* expression_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_names) {
|
|
std::optional<std::string> r = resolve_path_component(p.root(), expression_attribute_names, used_attribute_names);
|
|
if (r) {
|
|
p.set_root(std::move(*r));
|
|
}
|
|
for (auto& op : p.operators()) {
|
|
std::visit(overloaded_functor {
|
|
[&] (std::string& s) {
|
|
r = resolve_path_component(s, expression_attribute_names, used_attribute_names);
|
|
if (r) {
|
|
s = std::move(*r);
|
|
}
|
|
},
|
|
[&] (unsigned index) {
|
|
// nothing to resolve
|
|
}
|
|
}, op);
|
|
}
|
|
}
|
|
|
|
static void resolve_constant(parsed::constant& c,
|
|
const rjson::value* expression_attribute_values,
|
|
std::unordered_set<std::string>& used_attribute_values) {
|
|
std::visit(overloaded_functor {
|
|
[&] (const std::string& valref) {
|
|
if (!expression_attribute_values) {
|
|
throw api_error::validation(
|
|
fmt::format("ExpressionAttributeValues missing, entry '{}' required by expression", valref));
|
|
}
|
|
const rjson::value* value = rjson::find(*expression_attribute_values, valref);
|
|
if (!value) {
|
|
throw api_error::validation(
|
|
fmt::format("ExpressionAttributeValues missing entry '{}' required by expression", valref));
|
|
}
|
|
if (value->IsNull()) {
|
|
throw api_error::validation(
|
|
fmt::format("ExpressionAttributeValues null value for entry '{}' required by expression", valref));
|
|
}
|
|
validate_value(*value, "ExpressionAttributeValues");
|
|
used_attribute_values.emplace(valref);
|
|
c.set(*value);
|
|
},
|
|
[&] (const parsed::constant::literal& lit) {
|
|
// Nothing to do, already resolved
|
|
}
|
|
}, c._value);
|
|
|
|
}
|
|
|
|
void resolve_value(parsed::value& rhs,
|
|
const rjson::value* expression_attribute_names,
|
|
const rjson::value* expression_attribute_values,
|
|
std::unordered_set<std::string>& used_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_values) {
|
|
std::visit(overloaded_functor {
|
|
[&] (parsed::constant& c) {
|
|
resolve_constant(c, expression_attribute_values, used_attribute_values);
|
|
},
|
|
[&] (parsed::value::function_call& f) {
|
|
for (parsed::value& value : f._parameters) {
|
|
resolve_value(value, expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
}
|
|
},
|
|
[&] (parsed::path& p) {
|
|
resolve_path(p, expression_attribute_names, used_attribute_names);
|
|
}
|
|
}, rhs._value);
|
|
}
|
|
|
|
void resolve_set_rhs(parsed::set_rhs& rhs,
|
|
const rjson::value* expression_attribute_names,
|
|
const rjson::value* expression_attribute_values,
|
|
std::unordered_set<std::string>& used_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_values) {
|
|
resolve_value(rhs._v1, expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
if (rhs._op != 'v') {
|
|
resolve_value(rhs._v2, expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
}
|
|
}
|
|
|
|
void resolve_update_expression(parsed::update_expression& ue,
|
|
const rjson::value* expression_attribute_names,
|
|
const rjson::value* expression_attribute_values,
|
|
std::unordered_set<std::string>& used_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_values) {
|
|
for (parsed::update_expression::action& action : ue.actions()) {
|
|
resolve_path(action._path, expression_attribute_names, used_attribute_names);
|
|
std::visit(overloaded_functor {
|
|
[&] (parsed::update_expression::action::set& a) {
|
|
resolve_set_rhs(a._rhs, expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
},
|
|
[&] (parsed::update_expression::action::remove& a) {
|
|
// nothing to do
|
|
},
|
|
[&] (parsed::update_expression::action::add& a) {
|
|
resolve_constant(a._valref, expression_attribute_values, used_attribute_values);
|
|
},
|
|
[&] (parsed::update_expression::action::del& a) {
|
|
resolve_constant(a._valref, expression_attribute_values, used_attribute_values);
|
|
}
|
|
}, action._action);
|
|
}
|
|
}
|
|
|
|
static void resolve_primitive_condition(parsed::primitive_condition& pc,
|
|
const rjson::value* expression_attribute_names,
|
|
const rjson::value* expression_attribute_values,
|
|
std::unordered_set<std::string>& used_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_values) {
|
|
for (parsed::value& value : pc._values) {
|
|
resolve_value(value,
|
|
expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
}
|
|
}
|
|
|
|
void resolve_condition_expression(parsed::condition_expression& ce,
|
|
const rjson::value* expression_attribute_names,
|
|
const rjson::value* expression_attribute_values,
|
|
std::unordered_set<std::string>& used_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_values) {
|
|
std::visit(overloaded_functor {
|
|
[&] (parsed::primitive_condition& cond) {
|
|
resolve_primitive_condition(cond,
|
|
expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
},
|
|
[&] (parsed::condition_expression::condition_list& list) {
|
|
for (parsed::condition_expression& cond : list.conditions) {
|
|
resolve_condition_expression(cond,
|
|
expression_attribute_names, expression_attribute_values,
|
|
used_attribute_names, used_attribute_values);
|
|
|
|
}
|
|
}
|
|
}, ce._expression);
|
|
}
|
|
|
|
void resolve_projection_expression(std::vector<parsed::path>& pe,
|
|
const rjson::value* expression_attribute_names,
|
|
std::unordered_set<std::string>& used_attribute_names) {
|
|
for (parsed::path& p : pe) {
|
|
resolve_path(p, expression_attribute_names, used_attribute_names);
|
|
}
|
|
}
|
|
|
|
// condition_expression_on() checks whether a condition_expression places any
|
|
// condition on the given attribute. It can be useful, for example, for
|
|
// checking whether the condition tries to restrict a key column.
|
|
|
|
static bool value_on(const parsed::value& v, std::string_view attribute) {
|
|
return std::visit(overloaded_functor {
|
|
[&] (const parsed::constant& c) {
|
|
return false;
|
|
},
|
|
[&] (const parsed::value::function_call& f) {
|
|
for (const parsed::value& value : f._parameters) {
|
|
if (value_on(value, attribute)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
},
|
|
[&] (const parsed::path& p) {
|
|
return p.root() == attribute;
|
|
}
|
|
}, v._value);
|
|
}
|
|
|
|
static bool primitive_condition_on(const parsed::primitive_condition& pc, std::string_view attribute) {
|
|
for (const parsed::value& value : pc._values) {
|
|
if (value_on(value, attribute)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute) {
|
|
return std::visit(overloaded_functor {
|
|
[&] (const parsed::primitive_condition& cond) {
|
|
return primitive_condition_on(cond, attribute);
|
|
},
|
|
[&] (const parsed::condition_expression::condition_list& list) {
|
|
for (const parsed::condition_expression& cond : list.conditions) {
|
|
if (condition_expression_on(cond, attribute)) {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
}, ce._expression);
|
|
}
|
|
|
|
// for_condition_expression_on() runs a given function over all the attributes
|
|
// mentioned in the expression. If the same attribute is mentioned more than
|
|
// once, the function will be called more than once for the same attribute.
|
|
|
|
static void for_value_on(const parsed::value& v, const noncopyable_function<void(std::string_view)>& func) {
|
|
std::visit(overloaded_functor {
|
|
[&] (const parsed::constant& c) { },
|
|
[&] (const parsed::value::function_call& f) {
|
|
for (const parsed::value& value : f._parameters) {
|
|
for_value_on(value, func);
|
|
}
|
|
},
|
|
[&] (const parsed::path& p) {
|
|
func(p.root());
|
|
}
|
|
}, v._value);
|
|
}
|
|
|
|
void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function<void(std::string_view)>& func) {
|
|
std::visit(overloaded_functor {
|
|
[&] (const parsed::primitive_condition& cond) {
|
|
for (const parsed::value& value : cond._values) {
|
|
for_value_on(value, func);
|
|
}
|
|
},
|
|
[&] (const parsed::condition_expression::condition_list& list) {
|
|
for (const parsed::condition_expression& cond : list.conditions) {
|
|
for_condition_expression_on(cond, func);
|
|
}
|
|
}
|
|
}, ce._expression);
|
|
}
|
|
|
|
// The following calculate_value() functions calculate, or evaluate, a parsed
|
|
// expression. The parsed expression is assumed to have been "resolved", with
|
|
// the matching resolve_* function.
|
|
|
|
// calculate_size() is ConditionExpression's size() function, i.e., it takes
|
|
// a JSON-encoded value and returns its "size" as defined differently for the
|
|
// different types - also as a JSON-encoded number.
|
|
// If the value's type (e.g. number) has no size defined, there are two cases:
|
|
// 1. If from_data (the value came directly from an attribute of the data),
|
|
// It returns a JSON-encoded "null" value. Comparisons against this
|
|
// non-numeric value will later fail, so eventually the application will
|
|
// get a ConditionalCheckFailedException.
|
|
// 2. Otherwise (the value came from a constant in the query or some other
|
|
// calculation), throw a ValidationException.
|
|
static rjson::value calculate_size(const rjson::value& v, bool from_data) {
|
|
// NOTE: If v is improperly formatted for our JSON value encoding, it
|
|
// must come from the request itself, not from the database, so it makes
|
|
// sense to throw a ValidationException if we see such a problem.
|
|
if (!v.IsObject() || v.MemberCount() != 1) {
|
|
throw api_error::validation(format("invalid object: {}", v));
|
|
}
|
|
auto it = v.MemberBegin();
|
|
int ret;
|
|
if (it->name == "S") {
|
|
if (!it->value.IsString()) {
|
|
throw api_error::validation(format("invalid string: {}", v));
|
|
}
|
|
ret = it->value.GetStringLength();
|
|
} else if (it->name == "NS" || it->name == "SS" || it->name == "BS" || it->name == "L") {
|
|
if (!it->value.IsArray()) {
|
|
throw api_error::validation(format("invalid set: {}", v));
|
|
}
|
|
ret = it->value.Size();
|
|
} else if (it->name == "M") {
|
|
if (!it->value.IsObject()) {
|
|
throw api_error::validation(format("invalid map: {}", v));
|
|
}
|
|
ret = it->value.MemberCount();
|
|
} else if (it->name == "B") {
|
|
if (!it->value.IsString()) {
|
|
throw api_error::validation(format("invalid byte string: {}", v));
|
|
}
|
|
ret = base64_decoded_len(rjson::to_string_view(it->value));
|
|
} else if (from_data) {
|
|
rjson::value json_ret = rjson::empty_object();
|
|
rjson::add(json_ret, "null", rjson::value(true));
|
|
return json_ret;
|
|
} else {
|
|
throw api_error::validation(format("Unsupported operand type {} for function size()", it->name));
|
|
}
|
|
rjson::value json_ret = rjson::empty_object();
|
|
rjson::add(json_ret, "N", rjson::from_string(std::to_string(ret)));
|
|
return json_ret;
|
|
}
|
|
|
|
static const rjson::value& calculate_value(const parsed::constant& c) {
|
|
return std::visit(overloaded_functor {
|
|
[&] (const parsed::constant::literal& v) -> const rjson::value& {
|
|
return *v;
|
|
},
|
|
[&] (const std::string& valref) -> const rjson::value& {
|
|
// Shouldn't happen, we should have called resolve_value() earlier
|
|
// and replaced the value reference by the literal constant.
|
|
throw std::logic_error("calculate_value() called before resolve_value()");
|
|
}
|
|
}, c._value);
|
|
}
|
|
|
|
static rjson::value to_bool_json(bool b) {
|
|
rjson::value json_ret = rjson::empty_object();
|
|
rjson::add(json_ret, "BOOL", rjson::value(b));
|
|
return json_ret;
|
|
}
|
|
|
|
static bool known_type(std::string_view type) {
|
|
static thread_local const std::unordered_set<std::string_view> types = {
|
|
"N", "S", "B", "NS", "SS", "BS", "L", "M", "NULL", "BOOL"
|
|
};
|
|
return types.contains(type);
|
|
}
|
|
|
|
using function_handler_type = rjson::value(calculate_value_caller, const rjson::value*, const parsed::value::function_call&);
|
|
static const
|
|
std::unordered_map<std::string_view, function_handler_type*> function_handlers {
|
|
{"list_append", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::UpdateExpression) {
|
|
throw api_error::validation(
|
|
format("{}: list_append() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 2) {
|
|
throw api_error::validation(
|
|
format("{}: list_append() accepts 2 parameters, got {}", caller, f._parameters.size()));
|
|
}
|
|
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
|
|
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
|
|
rjson::value ret = list_concatenate(v1, v2);
|
|
if (ret.IsNull()) {
|
|
throw api_error::validation("UpdateExpression: list_append() given a non-list");
|
|
}
|
|
return ret;
|
|
}
|
|
},
|
|
{"if_not_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::UpdateExpression) {
|
|
throw api_error::validation(
|
|
format("{}: if_not_exists() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 2) {
|
|
throw api_error::validation(
|
|
format("{}: if_not_exists() accepts 2 parameters, got {}", caller, f._parameters.size()));
|
|
}
|
|
if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
|
|
throw api_error::validation(
|
|
format("{}: if_not_exists() must include path as its first argument", caller));
|
|
}
|
|
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
|
|
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
|
|
return v1.IsNull() ? std::move(v2) : std::move(v1);
|
|
}
|
|
},
|
|
{"size", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::ConditionExpression) {
|
|
throw api_error::validation(
|
|
format("{}: size() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 1) {
|
|
throw api_error::validation(
|
|
format("{}: size() accepts 1 parameter, got {}", caller, f._parameters.size()));
|
|
}
|
|
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
|
|
return calculate_size(v, f._parameters[0].is_path());
|
|
}
|
|
},
|
|
{"attribute_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::ConditionExpressionAlone) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_exists() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 1) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_exists() accepts 1 parameter, got {}", caller, f._parameters.size()));
|
|
}
|
|
if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_exists()'s parameter must be a path", caller));
|
|
}
|
|
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
|
|
return to_bool_json(!v.IsNull());
|
|
}
|
|
},
|
|
{"attribute_not_exists", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::ConditionExpressionAlone) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_not_exists() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 1) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_not_exists() accepts 1 parameter, got {}", caller, f._parameters.size()));
|
|
}
|
|
if (!std::holds_alternative<parsed::path>(f._parameters[0]._value)) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_not_exists()'s parameter must be a path", caller));
|
|
}
|
|
rjson::value v = calculate_value(f._parameters[0], caller, previous_item);
|
|
return to_bool_json(v.IsNull());
|
|
}
|
|
},
|
|
{"attribute_type", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::ConditionExpressionAlone) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_type() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 2) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_type() accepts 2 parameters, got {}", caller, f._parameters.size()));
|
|
}
|
|
// There is no real reason for the following check (not
|
|
// allowing the type to come from a document attribute), but
|
|
// DynamoDB does this check, so we do too...
|
|
if (!f._parameters[1].is_constant()) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_types()'s first parameter must be an expression attribute", caller));
|
|
}
|
|
rjson::value v0 = calculate_value(f._parameters[0], caller, previous_item);
|
|
rjson::value v1 = calculate_value(f._parameters[1], caller, previous_item);
|
|
if (v1.IsObject() && v1.MemberCount() == 1 && v1.MemberBegin()->name == "S") {
|
|
// If the type parameter is not one of the legal types
|
|
// we should generate an error, not a failed condition:
|
|
if (!known_type(rjson::to_string_view(v1.MemberBegin()->value))) {
|
|
throw api_error::validation(
|
|
format("{}: attribute_types()'s second parameter, {}, is not a known type",
|
|
caller, v1.MemberBegin()->value));
|
|
}
|
|
if (v0.IsObject() && v0.MemberCount() == 1) {
|
|
return to_bool_json(v1.MemberBegin()->value == v0.MemberBegin()->name);
|
|
} else {
|
|
return to_bool_json(false);
|
|
}
|
|
} else {
|
|
throw api_error::validation(
|
|
format("{}: attribute_type() second parameter must refer to a string, got {}", caller, v1));
|
|
}
|
|
}
|
|
},
|
|
{"begins_with", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::ConditionExpressionAlone) {
|
|
throw api_error::validation(
|
|
format("{}: begins_with() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 2) {
|
|
throw api_error::validation(
|
|
format("{}: begins_with() accepts 2 parameters, got {}", caller, f._parameters.size()));
|
|
}
|
|
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
|
|
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
|
|
return to_bool_json(check_BEGINS_WITH(v1.IsNull() ? nullptr : &v1, v2,
|
|
f._parameters[0].is_constant(), f._parameters[1].is_constant()));
|
|
}
|
|
},
|
|
{"contains", [] (calculate_value_caller caller, const rjson::value* previous_item, const parsed::value::function_call& f) {
|
|
if (caller != calculate_value_caller::ConditionExpressionAlone) {
|
|
throw api_error::validation(
|
|
format("{}: contains() not allowed here", caller));
|
|
}
|
|
if (f._parameters.size() != 2) {
|
|
throw api_error::validation(
|
|
format("{}: contains() accepts 2 parameters, got {}", caller, f._parameters.size()));
|
|
}
|
|
rjson::value v1 = calculate_value(f._parameters[0], caller, previous_item);
|
|
rjson::value v2 = calculate_value(f._parameters[1], caller, previous_item);
|
|
return to_bool_json(check_CONTAINS(v1.IsNull() ? nullptr : &v1, v2,
|
|
f._parameters[0].is_constant(), f._parameters[1].is_constant()));
|
|
}
|
|
},
|
|
};
|
|
|
|
// Given a parsed::path and an item read from the table, extract the value
|
|
// of a certain attribute path, such as "a" or "a.b.c[3]". Returns a null
|
|
// value if the item or the requested attribute does not exist.
|
|
// Note that the item is assumed to be encoded in JSON using DynamoDB
|
|
// conventions - each level of a nested document is a map with one key -
|
|
// a type (e.g., "M" for map) - and its value is the representation of
|
|
// that value.
|
|
static rjson::value extract_path(const rjson::value* item,
|
|
const parsed::path& p, calculate_value_caller caller) {
|
|
if (!item) {
|
|
return rjson::null_value();
|
|
}
|
|
const rjson::value* v = rjson::find(*item, p.root());
|
|
if (!v) {
|
|
return rjson::null_value();
|
|
}
|
|
for (const auto& op : p.operators()) {
|
|
if (!v->IsObject() || v->MemberCount() != 1) {
|
|
// This shouldn't happen. We shouldn't have stored malformed
|
|
// objects. But today Alternator does not validate the structure
|
|
// of nested documents before storing them, so this can happen on
|
|
// read.
|
|
throw api_error::validation(format("{}: malformed item read: {}", caller, *item));
|
|
}
|
|
const char* type = v->MemberBegin()->name.GetString();
|
|
v = &(v->MemberBegin()->value);
|
|
std::visit(overloaded_functor {
|
|
[&] (const std::string& member) {
|
|
if (type[0] == 'M' && v->IsObject()) {
|
|
v = rjson::find(*v, member);
|
|
} else {
|
|
v = nullptr;
|
|
}
|
|
},
|
|
[&] (unsigned index) {
|
|
if (type[0] == 'L' && v->IsArray() && index < v->Size()) {
|
|
v = &(v->GetArray()[index]);
|
|
} else {
|
|
v = nullptr;
|
|
}
|
|
}
|
|
}, op);
|
|
if (!v) {
|
|
return rjson::null_value();
|
|
}
|
|
}
|
|
return rjson::copy(*v);
|
|
}
|
|
|
|
// Given a parsed::value, which can refer either to a constant value from
|
|
// ExpressionAttributeValues, to the value of some attribute, or to a function
|
|
// of other values, this function calculates the resulting value.
|
|
// "caller" determines which expression - ConditionExpression or
|
|
// UpdateExpression - is asking for this value. We need to know this because
|
|
// DynamoDB allows a different choice of functions for different expressions.
|
|
rjson::value calculate_value(const parsed::value& v,
|
|
calculate_value_caller caller,
|
|
const rjson::value* previous_item) {
|
|
return std::visit(overloaded_functor {
|
|
[&] (const parsed::constant& c) -> rjson::value {
|
|
return rjson::copy(calculate_value(c));
|
|
},
|
|
[&] (const parsed::value::function_call& f) -> rjson::value {
|
|
auto function_it = function_handlers.find(std::string_view(f._function_name));
|
|
if (function_it == function_handlers.end()) {
|
|
throw api_error::validation(
|
|
fmt::format("{}: unknown function '{}' called.", caller, f._function_name));
|
|
}
|
|
return function_it->second(caller, previous_item, f);
|
|
},
|
|
[&] (const parsed::path& p) -> rjson::value {
|
|
return extract_path(previous_item, p, caller);
|
|
}
|
|
}, v._value);
|
|
}
|
|
|
|
// Same as calculate_value() above, except takes a set_rhs, which may be
|
|
// either a single value, or v1+v2 or v1-v2.
|
|
rjson::value calculate_value(const parsed::set_rhs& rhs,
|
|
const rjson::value* previous_item) {
|
|
switch (rhs._op) {
|
|
case 'v':
|
|
return calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
|
|
case '+': {
|
|
rjson::value v1 = calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
|
|
rjson::value v2 = calculate_value(rhs._v2, calculate_value_caller::UpdateExpression, previous_item);
|
|
return number_add(v1, v2);
|
|
}
|
|
case '-': {
|
|
rjson::value v1 = calculate_value(rhs._v1, calculate_value_caller::UpdateExpression, previous_item);
|
|
rjson::value v2 = calculate_value(rhs._v2, calculate_value_caller::UpdateExpression, previous_item);
|
|
return number_subtract(v1, v2);
|
|
}
|
|
}
|
|
// Can't happen
|
|
return rjson::null_value();
|
|
}
|
|
|
|
void validate_attr_name_length(std::string_view supplementary_context, size_t attr_name_length, bool is_key, std::string_view error_msg_prefix) {
|
|
constexpr const size_t DYNAMODB_KEY_ATTR_NAME_SIZE_MAX = 255;
|
|
constexpr const size_t DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX = 65535;
|
|
|
|
const size_t max_length = is_key ? DYNAMODB_KEY_ATTR_NAME_SIZE_MAX : DYNAMODB_NONKEY_ATTR_NAME_SIZE_MAX;
|
|
if (attr_name_length > max_length) {
|
|
std::string error_msg;
|
|
if (!error_msg_prefix.empty()) {
|
|
error_msg += error_msg_prefix;
|
|
}
|
|
if (!supplementary_context.empty()) {
|
|
error_msg += "in ";
|
|
error_msg += supplementary_context;
|
|
error_msg += " - ";
|
|
}
|
|
error_msg += fmt::format("Attribute name is too large, must be less than {} bytes", std::to_string(max_length + 1));
|
|
throw api_error::validation(error_msg);
|
|
}
|
|
}
|
|
|
|
} // namespace alternator
|
|
|
|
auto fmt::formatter<alternator::parsed::path>::format(const alternator::parsed::path& p, fmt::format_context& ctx) const
|
|
-> decltype(ctx.out()) {
|
|
auto out = ctx.out();
|
|
out = fmt::format_to(out, "{}", p.root());
|
|
for (const auto& op : p.operators()) {
|
|
std::visit(overloaded_functor {
|
|
[&] (const std::string& member) {
|
|
out = fmt::format_to(out, ".{}", member);
|
|
},
|
|
[&] (unsigned index) {
|
|
out = fmt::format_to(out, "[{}]", index);
|
|
}
|
|
}, op);
|
|
}
|
|
return out;
|
|
}
|