mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
Merge 'Alternator: support nested attribute paths...
in all expressions' from Nadav Har'El.
This series fixes #5024 - which is about adding support for nested attribute
paths (e.g., a.b.c[2]) to Alternator. The series adds complete support for this
feature in ProjectionExpression, ConditionExpression, FilterExpression and
UpdateExpression - and also its combination with ReturnValues. Many relevant
tests - and also some new tests added in this series - now pass.
The first patch in the series fixes #8043 a bug in some error cases in
conditions, which was discovered while working in this series, and is
conceptually separate from the rest of the series.
Closes #8066
* github.com:scylladb/scylla:
alternator: correct implemention of UpdateItem with nested attributes and ReturnValues
alternator: fix bug in ReturnValues=UPDATED_NEW
alternator: implemented nested attribute paths in UpdateExpression
alternator: limit the depth of nested paths
alternator: prepare for UpdateItem nested attribute paths
alternator: overhaul ProjectionExpression hierarchy implementation
alternator: make parsed::path object printable
alternator-test: a few more ProjectionExpression conflict test cases
alternator-test: improve tests for nested attributes in UpdateExpression
alternator: support attribute paths in ConditionExpression, FilterExpression
alternator-test: improve tests for nested attributes in ConditionExpression
alternator: support attribute paths in ProjectionExpression
alternator: overhaul attrs_to_get handling
alternator-test: additional tests for attribute paths in ProjectionExpression
alternator-test: harden attribute-path tests for ProjectionExpression
alternator: fix ValidationException in FilterExpression - and more
(cherry picked from commit cbbb7f08a0)
This commit is contained in:
committed by
Nadav Har'El
parent
7b19cc17d6
commit
a810e57684
@@ -202,7 +202,7 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
|
||||
if (!schema) {
|
||||
// if we get here then the name was missing, since syntax or missing actual CF
|
||||
// checks throw. Slow path, but just call get_table_name to generate exception.
|
||||
get_table_name(request);
|
||||
get_table_name(request);
|
||||
}
|
||||
return schema;
|
||||
}
|
||||
@@ -1882,18 +1882,182 @@ static std::string get_item_type_string(const rjson::value& v) {
|
||||
return it->name.GetString();
|
||||
}
|
||||
|
||||
// attrs_to_get saves for each top-level attribute an attrs_to_get_node,
|
||||
// a hierarchy of subparts that need to be kept. The following function
|
||||
// takes a given JSON value and drops its parts which weren't asked to be
|
||||
// kept. It modifies the given JSON value, or returns false to signify that
|
||||
// the entire object should be dropped.
|
||||
// Note that The JSON value is assumed to be encoded using the DynamoDB
|
||||
// conventions - i.e., it is really a map whose key has a type string,
|
||||
// and the value is the real object.
|
||||
template<typename T>
|
||||
static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
|
||||
if (!val.IsObject() || val.MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed objects.
|
||||
// But today Alternator does not validate the structure of nested
|
||||
// documents before storing them, so this can happen on read.
|
||||
throw api_error::internal(format("Malformed value object read: {}", val));
|
||||
}
|
||||
const char* type = val.MemberBegin()->name.GetString();
|
||||
rjson::value& v = val.MemberBegin()->value;
|
||||
if (h.has_members()) {
|
||||
const auto& members = h.get_members();
|
||||
if (type[0] != 'M' || !v.IsObject()) {
|
||||
// If v is not an object (dictionary, map), none of the members
|
||||
// can match.
|
||||
return false;
|
||||
}
|
||||
rjson::value newv = rjson::empty_object();
|
||||
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
|
||||
std::string attr = it->name.GetString();
|
||||
auto x = members.find(attr);
|
||||
if (x != members.end()) {
|
||||
if (x->second) {
|
||||
// Only a part of this attribute is to be filtered, do it.
|
||||
if (hierarchy_filter(it->value, *x->second)) {
|
||||
rjson::set_with_string_name(newv, attr, std::move(it->value));
|
||||
}
|
||||
} else {
|
||||
// The entire attribute is to be kept
|
||||
rjson::set_with_string_name(newv, attr, std::move(it->value));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newv.MemberCount() == 0) {
|
||||
return false;
|
||||
}
|
||||
v = newv;
|
||||
} else if (h.has_indexes()) {
|
||||
const auto& indexes = h.get_indexes();
|
||||
if (type[0] != 'L' || !v.IsArray()) {
|
||||
return false;
|
||||
}
|
||||
rjson::value newv = rjson::empty_array();
|
||||
const auto& a = v.GetArray();
|
||||
for (unsigned i = 0; i < v.Size(); i++) {
|
||||
auto x = indexes.find(i);
|
||||
if (x != indexes.end()) {
|
||||
if (x->second) {
|
||||
if (hierarchy_filter(a[i], *x->second)) {
|
||||
rjson::push_back(newv, std::move(a[i]));
|
||||
}
|
||||
} else {
|
||||
// The entire attribute is to be kept
|
||||
rjson::push_back(newv, std::move(a[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
if (newv.Size() == 0) {
|
||||
return false;
|
||||
}
|
||||
v = newv;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Add a path to a attribute_path_map. Throws a validation error if the path
|
||||
// "overlaps" with one already in the filter (one is a sub-path of the other)
|
||||
// or "conflicts" with it (both a member and index is requested).
|
||||
template<typename T>
|
||||
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
|
||||
using node = attribute_path_map_node<T>;
|
||||
// The first step is to look for the top-level attribute (p.root()):
|
||||
auto it = map.find(p.root());
|
||||
if (it == map.end()) {
|
||||
if (p.has_operators()) {
|
||||
it = map.emplace(p.root(), node {std::nullopt}).first;
|
||||
} else {
|
||||
(void) map.emplace(p.root(), node {std::move(value)}).first;
|
||||
// Value inserted for top-level node. We're done.
|
||||
return;
|
||||
}
|
||||
} else if(!p.has_operators()) {
|
||||
// If p is top-level and we already have it or a part of it
|
||||
// in map, it's a forbidden overlapping path.
|
||||
throw api_error::validation(format(
|
||||
"Invalid {}: two document paths overlap at {}", source, p.root()));
|
||||
} else if (it->second.has_value()) {
|
||||
// If we're here, it != map.end() && p.has_operators && it->second.has_value().
|
||||
// This means the top-level attribute already has a value, and we're
|
||||
// trying to add a non-top-level value. It's an overlap.
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p.root()));
|
||||
}
|
||||
node* h = &it->second;
|
||||
// The second step is to walk h from the top-level node to the inner node
|
||||
// where we're supposed to insert the value:
|
||||
for (const auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
if (h->is_empty()) {
|
||||
*h = node {typename node::members_t()};
|
||||
} else if (h->has_indexes()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
|
||||
} else if (h->has_value()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
typename node::members_t& members = h->get_members();
|
||||
auto it = members.find(member);
|
||||
if (it == members.end()) {
|
||||
it = members.insert({member, make_shared<node>()}).first;
|
||||
}
|
||||
h = it->second.get();
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
if (h->is_empty()) {
|
||||
*h = node {typename node::indexes_t()};
|
||||
} else if (h->has_members()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
|
||||
} else if (h->has_value()) {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
typename node::indexes_t& indexes = h->get_indexes();
|
||||
auto it = indexes.find(index);
|
||||
if (it == indexes.end()) {
|
||||
it = indexes.insert({index, make_shared<node>()}).first;
|
||||
}
|
||||
h = it->second.get();
|
||||
}
|
||||
}, op);
|
||||
}
|
||||
// Finally, insert the value in the node h.
|
||||
if (h->is_empty()) {
|
||||
*h = node {std::move(value)};
|
||||
} else {
|
||||
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
|
||||
}
|
||||
}
|
||||
|
||||
// A very simplified version of the above function for the special case of
|
||||
// adding only top-level attribute. It's not only simpler, we also use a
|
||||
// different error message, referring to a "duplicate attribute"instead of
|
||||
// "overlapping paths". DynamoDB also has this distinction (errors in
|
||||
// AttributesToGet refer to duplicates, not overlaps, but errors in
|
||||
// ProjectionExpression refer to overlap - even if it's an exact duplicate).
|
||||
template<typename T>
|
||||
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
|
||||
using node = attribute_path_map_node<T>;
|
||||
auto it = map.find(attr);
|
||||
if (it == map.end()) {
|
||||
map.emplace(attr, node {std::move(value)});
|
||||
} else {
|
||||
throw api_error::validation(format(
|
||||
"Invalid {}: Duplicate attribute: {}", source, attr));
|
||||
}
|
||||
}
|
||||
|
||||
// calculate_attrs_to_get() takes either AttributesToGet or
|
||||
// ProjectionExpression parameters (having both is *not* allowed),
|
||||
// and returns the list of cells we need to read, or an empty set when
|
||||
// *all* attributes are to be returned.
|
||||
// In our current implementation, only top-level attributes are stored
|
||||
// as cells, and nested documents are stored serialized as JSON.
|
||||
// So this function currently returns only the the top-level attributes
|
||||
// but we also need to add, after the query, filtering to keep only
|
||||
// the parts of the JSON attributes that were chosen in the paths'
|
||||
// operators. Because we don't have such filtering yet (FIXME), we fail here
|
||||
// if the requested paths are anything but top-level attributes.
|
||||
std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
|
||||
// However, in our current implementation, only top-level attributes are
|
||||
// stored as separate cells - a nested document is stored serialized together
|
||||
// (as JSON) in the same cell. So this function return a map - each key is the
|
||||
// top-level attribute we will need need to read, and the value for each
|
||||
// top-level attribute is the partial hierarchy (struct hierarchy_filter)
|
||||
// that we will need to extract from that serialized JSON.
|
||||
// For example, if ProjectionExpression lists a.b and a.c[2], we
|
||||
// return one top-level attribute name, "a", with the value "{b, c[2]}".
|
||||
static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
|
||||
const bool has_attributes_to_get = req.HasMember("AttributesToGet");
|
||||
const bool has_projection_expression = req.HasMember("ProjectionExpression");
|
||||
if (has_attributes_to_get && has_projection_expression) {
|
||||
@@ -1902,9 +2066,9 @@ std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req,
|
||||
}
|
||||
if (has_attributes_to_get) {
|
||||
const rjson::value& attributes_to_get = req["AttributesToGet"];
|
||||
std::unordered_set<std::string> ret;
|
||||
attrs_to_get ret;
|
||||
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
|
||||
ret.insert(it->GetString());
|
||||
attribute_path_map_add("AttributesToGet", ret, it->GetString());
|
||||
}
|
||||
return ret;
|
||||
} else if (has_projection_expression) {
|
||||
@@ -1917,24 +2081,13 @@ std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req,
|
||||
throw api_error::validation(e.what());
|
||||
}
|
||||
resolve_projection_expression(paths_to_get, expression_attribute_names, used_attribute_names);
|
||||
std::unordered_set<std::string> seen_column_names;
|
||||
auto ret = boost::copy_range<std::unordered_set<std::string>>(paths_to_get |
|
||||
boost::adaptors::transformed([&] (const parsed::path& p) {
|
||||
if (p.has_operators()) {
|
||||
// FIXME: this check will need to change when we support non-toplevel attributes
|
||||
throw api_error::validation("Non-toplevel attributes in ProjectionExpression not yet implemented");
|
||||
}
|
||||
if (!seen_column_names.insert(p.root()).second) {
|
||||
// FIXME: this check will need to change when we support non-toplevel attributes
|
||||
throw api_error::validation(
|
||||
format("Invalid ProjectionExpression: two document paths overlap with each other: {} and {}.",
|
||||
p.root(), p.root()));
|
||||
}
|
||||
return p.root();
|
||||
}));
|
||||
attrs_to_get ret;
|
||||
for (const parsed::path& p : paths_to_get) {
|
||||
attribute_path_map_add("ProjectionExpression", ret, p);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
// An empty set asks to read everything
|
||||
// An empty map asks to read everything
|
||||
return {};
|
||||
}
|
||||
|
||||
@@ -1955,7 +2108,7 @@ std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req,
|
||||
*/
|
||||
void executor::describe_single_item(const cql3::selection::selection& selection,
|
||||
const std::vector<bytes_opt>& result_row,
|
||||
const std::unordered_set<std::string>& attrs_to_get,
|
||||
const attrs_to_get& attrs_to_get,
|
||||
rjson::value& item,
|
||||
bool include_all_embedded_attributes)
|
||||
{
|
||||
@@ -1976,7 +2129,16 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
|
||||
std::string attr_name = value_cast<sstring>(entry.first);
|
||||
if (include_all_embedded_attributes || attrs_to_get.empty() || attrs_to_get.contains(attr_name)) {
|
||||
bytes value = value_cast<bytes>(entry.second);
|
||||
rjson::set_with_string_name(item, attr_name, deserialize_item(value));
|
||||
rjson::value v = deserialize_item(value);
|
||||
auto it = attrs_to_get.find(attr_name);
|
||||
if (it != attrs_to_get.end()) {
|
||||
// attrs_to_get may have asked for only part of this attribute:
|
||||
if (hierarchy_filter(v, it->second)) {
|
||||
rjson::set_with_string_name(item, attr_name, std::move(v));
|
||||
}
|
||||
} else {
|
||||
rjson::set_with_string_name(item, attr_name, std::move(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1988,7 +2150,7 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
|
||||
const query::partition_slice& slice,
|
||||
const cql3::selection::selection& selection,
|
||||
const query::result& query_result,
|
||||
const std::unordered_set<std::string>& attrs_to_get) {
|
||||
const attrs_to_get& attrs_to_get) {
|
||||
rjson::value item = rjson::empty_object();
|
||||
|
||||
cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
|
||||
@@ -2024,8 +2186,16 @@ static bool check_needs_read_before_write(const parsed::value& v) {
|
||||
}, v._value);
|
||||
}
|
||||
|
||||
static bool check_needs_read_before_write(const parsed::update_expression& update_expression) {
|
||||
return boost::algorithm::any_of(update_expression.actions(), [](const parsed::update_expression::action& action) {
|
||||
static bool check_needs_read_before_write(const attribute_path_map<parsed::update_expression::action>& update_expression) {
|
||||
return boost::algorithm::any_of(update_expression, [](const auto& p) {
|
||||
if (!p.second.has_value()) {
|
||||
// If the action is not on the top-level attribute, we need to
|
||||
// read the old item: we change only a part of the top-level
|
||||
// attribute, and write the full top-level attribute back.
|
||||
return true;
|
||||
}
|
||||
// Otherwise, the action p.second.get_value() is just on top-level
|
||||
// attribute. Check if it needs read-before-write:
|
||||
return std::visit(overloaded_functor {
|
||||
[&] (const parsed::update_expression::action::set& a) -> bool {
|
||||
return check_needs_read_before_write(a._rhs._v1) || (a._rhs._op != 'v' && check_needs_read_before_write(a._rhs._v2));
|
||||
@@ -2039,7 +2209,7 @@ static bool check_needs_read_before_write(const parsed::update_expression& updat
|
||||
[&] (const parsed::update_expression::action::del& a) -> bool {
|
||||
return true;
|
||||
}
|
||||
}, action._action);
|
||||
}, p.second.get_value()._action);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -2048,7 +2218,11 @@ public:
|
||||
// Some information parsed during the constructor to check for input
|
||||
// errors, and cached to be used again during apply().
|
||||
rjson::value* _attribute_updates;
|
||||
parsed::update_expression _update_expression;
|
||||
// Instead of keeping a parsed::update_expression with an unsorted list
|
||||
// list of actions, we keep them in an attribute_path_map which groups
|
||||
// them by top-level attribute, and detects forbidden overlaps/conflicts.
|
||||
attribute_path_map<parsed::update_expression::action> _update_expression;
|
||||
|
||||
parsed::condition_expression _condition_expression;
|
||||
|
||||
update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
|
||||
@@ -2079,16 +2253,22 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
|
||||
throw api_error::validation("UpdateExpression must be a string");
|
||||
}
|
||||
try {
|
||||
_update_expression = parse_update_expression(update_expression->GetString());
|
||||
resolve_update_expression(_update_expression,
|
||||
parsed::update_expression expr = parse_update_expression(update_expression->GetString());
|
||||
resolve_update_expression(expr,
|
||||
expression_attribute_names, expression_attribute_values,
|
||||
used_attribute_names, used_attribute_values);
|
||||
if (expr.empty()) {
|
||||
throw api_error::validation("Empty expression in UpdateExpression is not allowed");
|
||||
}
|
||||
for (auto& action : expr.actions()) {
|
||||
// Unfortunately we need to copy the action's path, because
|
||||
// we std::move the action object.
|
||||
auto p = action._path;
|
||||
attribute_path_map_add("UpdateExpression", _update_expression, p, std::move(action));
|
||||
}
|
||||
} catch(expressions_syntax_error& e) {
|
||||
throw api_error::validation(e.what());
|
||||
}
|
||||
if (_update_expression.empty()) {
|
||||
throw api_error::validation("Empty expression in UpdateExpression is not allowed");
|
||||
}
|
||||
}
|
||||
_attribute_updates = rjson::find(_request, "AttributeUpdates");
|
||||
if (_attribute_updates) {
|
||||
@@ -2130,6 +2310,187 @@ update_item_operation::needs_read_before_write() const {
|
||||
(_returnvalues != returnvalues::NONE && _returnvalues != returnvalues::UPDATED_NEW);
|
||||
}
|
||||
|
||||
// action_result() returns the result of applying an UpdateItem action -
|
||||
// this result is either a JSON object or an unset optional which indicates
|
||||
// the action was a deletion. The caller (update_item_operation::apply()
|
||||
// below) will either write this JSON as the content of a column, or
|
||||
// use it as a piece in a bigger top-level attribute.
|
||||
static std::optional<rjson::value> action_result(
|
||||
const parsed::update_expression::action& action,
|
||||
const rjson::value* previous_item) {
|
||||
return std::visit(overloaded_functor {
|
||||
[&] (const parsed::update_expression::action::set& a) -> std::optional<rjson::value> {
|
||||
return calculate_value(a._rhs, previous_item);
|
||||
},
|
||||
[&] (const parsed::update_expression::action::remove& a) -> std::optional<rjson::value> {
|
||||
return std::nullopt;
|
||||
},
|
||||
[&] (const parsed::update_expression::action::add& a) -> std::optional<rjson::value> {
|
||||
parsed::value base;
|
||||
parsed::value addition;
|
||||
base.set_path(action._path);
|
||||
addition.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
|
||||
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item);
|
||||
rjson::value result;
|
||||
// An ADD can be used to create a new attribute (when
|
||||
// v1.IsNull()) or to add to a pre-existing attribute:
|
||||
if (v1.IsNull()) {
|
||||
std::string v2_type = get_item_type_string(v2);
|
||||
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
|
||||
result = v2;
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
|
||||
}
|
||||
} else {
|
||||
std::string v1_type = get_item_type_string(v1);
|
||||
if (v1_type == "N") {
|
||||
if (get_item_type_string(v2) != "N") {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = number_add(v1, v2);
|
||||
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
|
||||
if (get_item_type_string(v2) != v1_type) {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = set_sum(v1, v2);
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
},
|
||||
[&] (const parsed::update_expression::action::del& a) -> std::optional<rjson::value> {
|
||||
parsed::value base;
|
||||
parsed::value subset;
|
||||
base.set_path(action._path);
|
||||
subset.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
|
||||
rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item);
|
||||
if (!v1.IsNull()) {
|
||||
return set_diff(v1, v2);
|
||||
}
|
||||
// When we return nullopt here, we ask to *delete* this attribute,
|
||||
// which is unnecessary because we know the attribute does not
|
||||
// exist anyway. This is a waste, but a small one. Note that also
|
||||
// for the "remove" action above we don't bother to check if the
|
||||
// previous_item add anything to remove.
|
||||
return std::nullopt;
|
||||
}
|
||||
}, action._action);
|
||||
}
|
||||
|
||||
// Print an attribute_path_map_node<action> as the list of paths it contains:
|
||||
static std::ostream& operator<<(std::ostream& out, const attribute_path_map_node<parsed::update_expression::action>& h) {
|
||||
if (h.has_value()) {
|
||||
out << " " << h.get_value()._path;
|
||||
} else if (h.has_members()) {
|
||||
for (auto& member : h.get_members()) {
|
||||
out << *member.second;
|
||||
}
|
||||
} else if (h.has_indexes()) {
|
||||
for (auto& index : h.get_indexes()) {
|
||||
out << *index.second;
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// Apply the hierarchy of actions in an attribute_path_map_node<action> to a
|
||||
// JSON object which uses DynamoDB's serialization conventions. The complete,
|
||||
// unmodified, previous_item is also necessary for the right-hand sides of the
|
||||
// actions. Modifies obj in-place or returns false if it is to be removed.
|
||||
static bool hierarchy_actions(
|
||||
rjson::value& obj,
|
||||
const attribute_path_map_node<parsed::update_expression::action>& h,
|
||||
const rjson::value* previous_item)
|
||||
{
|
||||
if (!obj.IsObject() || obj.MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed objects.
|
||||
// But today Alternator does not validate the structure of nested
|
||||
// documents before storing them, so this can happen on read.
|
||||
throw api_error::validation(format("Malformed value object read: {}", obj));
|
||||
}
|
||||
const char* type = obj.MemberBegin()->name.GetString();
|
||||
rjson::value& v = obj.MemberBegin()->value;
|
||||
if (h.has_value()) {
|
||||
// Action replacing everything in this position in the hierarchy
|
||||
std::optional<rjson::value> newv = action_result(h.get_value(), previous_item);
|
||||
if (newv) {
|
||||
obj = std::move(*newv);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else if (h.has_members()) {
|
||||
if (type[0] != 'M' || !v.IsObject()) {
|
||||
// A .something on a non-map doesn't work.
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
for (const auto& member : h.get_members()) {
|
||||
std::string attr = member.first;
|
||||
const attribute_path_map_node<parsed::update_expression::action>& subh = *member.second;
|
||||
rjson::value *subobj = rjson::find(v, attr);
|
||||
if (subobj) {
|
||||
if (!hierarchy_actions(*subobj, subh, previous_item)) {
|
||||
rjson::remove_member(v, attr);
|
||||
}
|
||||
} else {
|
||||
// When a.b does not exist, setting a.b itself (i.e.
|
||||
// subh.has_value()) is fine, but setting a.b.c is not.
|
||||
if (subh.has_value()) {
|
||||
std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
|
||||
if (newv) {
|
||||
rjson::set_with_string_name(v, attr, std::move(*newv));
|
||||
} else {
|
||||
throw api_error::validation(format("Can't remove document path {} - not present in item",
|
||||
subh.get_value()._path));
|
||||
}
|
||||
} else {
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (h.has_indexes()) {
|
||||
if (type[0] != 'L' || !v.IsArray()) {
|
||||
// A [i] on a non-list doesn't work.
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
unsigned nremoved = 0;
|
||||
for (const auto& index : h.get_indexes()) {
|
||||
unsigned i = index.first - nremoved;
|
||||
const attribute_path_map_node<parsed::update_expression::action>& subh = *index.second;
|
||||
if (i < v.Size()) {
|
||||
if (!hierarchy_actions(v[i], subh, previous_item)) {
|
||||
v.Erase(v.Begin() + i);
|
||||
// If we have the actions "REMOVE a[1] SET a[3] = :val",
|
||||
// the index 3 refers to the original indexes, before any
|
||||
// items were removed. So we offset the next indexes
|
||||
// (which are guaranteed to be higher than i - indexes is
|
||||
// a sorted map) by an increased "nremoved".
|
||||
nremoved++;
|
||||
}
|
||||
} else {
|
||||
// If a[7] does not exist, setting a[7] itself (i.e.
|
||||
// subh.has_value()) is fine - and appends an item, though
|
||||
// not necessarily with index 7. But setting a[7].b will
|
||||
// not work.
|
||||
if (subh.has_value()) {
|
||||
std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
|
||||
if (newv) {
|
||||
rjson::push_back(v, std::move(*newv));
|
||||
} else {
|
||||
// Removing a[7] when the list has fewer elements is
|
||||
// silently ignored. It's not considered an error.
|
||||
}
|
||||
} else {
|
||||
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<mutation>
|
||||
update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const {
|
||||
if (!verify_expected(_request, previous_item.get()) ||
|
||||
@@ -2144,17 +2505,37 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
auto& row = m.partition().clustered_row(*_schema, _ck);
|
||||
attribute_collector attrs_collector;
|
||||
bool any_updates = false;
|
||||
auto do_update = [&] (bytes&& column_name, const rjson::value& json_value) {
|
||||
auto do_update = [&] (bytes&& column_name, const rjson::value& json_value,
|
||||
const attribute_path_map_node<parsed::update_expression::action>* h = nullptr) {
|
||||
any_updates = true;
|
||||
if (_returnvalues == returnvalues::ALL_NEW ||
|
||||
_returnvalues == returnvalues::UPDATED_NEW) {
|
||||
if (_returnvalues == returnvalues::ALL_NEW) {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), rjson::copy(json_value));
|
||||
to_sstring_view(column_name), rjson::copy(json_value));
|
||||
} else if (_returnvalues == returnvalues::UPDATED_NEW) {
|
||||
rjson::value&& v = rjson::copy(json_value);
|
||||
if (h) {
|
||||
// If the operation was only on specific attribute paths,
|
||||
// leave only them in _return_attributes.
|
||||
if (hierarchy_filter(v, *h)) {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), std::move(v));
|
||||
}
|
||||
} else {
|
||||
rjson::set_with_string_name(_return_attributes,
|
||||
to_sstring_view(column_name), std::move(v));
|
||||
}
|
||||
} else if (_returnvalues == returnvalues::UPDATED_OLD && previous_item) {
|
||||
std::string_view cn = to_sstring_view(column_name);
|
||||
const rjson::value* col = rjson::find(*previous_item, cn);
|
||||
if (col) {
|
||||
rjson::set_with_string_name(_return_attributes, cn, rjson::copy(*col));
|
||||
rjson::value&& v = rjson::copy(*col);
|
||||
if (h) {
|
||||
if (hierarchy_filter(v, *h)) {
|
||||
rjson::set_with_string_name(_return_attributes, cn, std::move(v));
|
||||
}
|
||||
} else {
|
||||
rjson::set_with_string_name(_return_attributes, cn, std::move(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
const column_definition* cdef = _schema->get_column_definition(column_name);
|
||||
@@ -2196,7 +2577,7 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
// can just move previous_item later, when we don't need it any more.
|
||||
if (_returnvalues == returnvalues::ALL_NEW) {
|
||||
if (previous_item) {
|
||||
_return_attributes = std::move(*previous_item);
|
||||
_return_attributes = rjson::copy(*previous_item);
|
||||
} else {
|
||||
// If there is no previous item, usually a new item is created
|
||||
// and contains they given key. This may be cancelled at the end
|
||||
@@ -2209,88 +2590,44 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
|
||||
}
|
||||
|
||||
if (!_update_expression.empty()) {
|
||||
std::unordered_set<std::string> seen_column_names;
|
||||
for (auto& action : _update_expression.actions()) {
|
||||
if (action._path.has_operators()) {
|
||||
// FIXME: implement this case
|
||||
throw api_error::validation("UpdateItem support for nested updates not yet implemented");
|
||||
}
|
||||
std::string column_name = action._path.root();
|
||||
for (auto& actions : _update_expression) {
|
||||
// The actions of _update_expression are grouped by top-level
|
||||
// attributes. Here, all actions in actions.second share the same
|
||||
// top-level attribute actions.first.
|
||||
std::string column_name = actions.first;
|
||||
const column_definition* cdef = _schema->get_column_definition(to_bytes(column_name));
|
||||
if (cdef && cdef->is_primary_key()) {
|
||||
throw api_error::validation(
|
||||
format("UpdateItem cannot update key column {}", column_name));
|
||||
throw api_error::validation(format("UpdateItem cannot update key column {}", column_name));
|
||||
}
|
||||
// DynamoDB forbids multiple updates in the same expression to
|
||||
// modify overlapping document paths. Updates of one expression
|
||||
// have the same timestamp, so it's unclear which would "win".
|
||||
// FIXME: currently, without full support for document paths,
|
||||
// we only check if the paths' roots are the same.
|
||||
if (!seen_column_names.insert(column_name).second) {
|
||||
throw api_error::validation(
|
||||
format("Invalid UpdateExpression: two document paths overlap with each other: {} and {}.",
|
||||
column_name, column_name));
|
||||
}
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const parsed::update_expression::action::set& a) {
|
||||
auto value = calculate_value(a._rhs, previous_item.get());
|
||||
do_update(to_bytes(column_name), value);
|
||||
},
|
||||
[&] (const parsed::update_expression::action::remove& a) {
|
||||
if (actions.second.has_value()) {
|
||||
// An action on a top-level attribute column_name. The single
|
||||
// action is actions.second.get_value(). We can simply invoke
|
||||
// the action and replace the attribute with its result:
|
||||
std::optional<rjson::value> result = action_result(actions.second.get_value(), previous_item.get());
|
||||
if (result) {
|
||||
do_update(to_bytes(column_name), *result);
|
||||
} else {
|
||||
do_delete(to_bytes(column_name));
|
||||
},
|
||||
[&] (const parsed::update_expression::action::add& a) {
|
||||
parsed::value base;
|
||||
parsed::value addition;
|
||||
base.set_path(action._path);
|
||||
addition.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value result;
|
||||
// An ADD can be used to create a new attribute (when
|
||||
// v1.IsNull()) or to add to a pre-existing attribute:
|
||||
if (v1.IsNull()) {
|
||||
std::string v2_type = get_item_type_string(v2);
|
||||
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
|
||||
result = v2;
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
|
||||
}
|
||||
} else {
|
||||
std::string v1_type = get_item_type_string(v1);
|
||||
if (v1_type == "N") {
|
||||
if (get_item_type_string(v2) != "N") {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = number_add(v1, v2);
|
||||
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
|
||||
if (get_item_type_string(v2) != v1_type) {
|
||||
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
|
||||
}
|
||||
result = set_sum(v1, v2);
|
||||
} else {
|
||||
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
|
||||
}
|
||||
}
|
||||
do_update(to_bytes(column_name), result);
|
||||
},
|
||||
[&] (const parsed::update_expression::action::del& a) {
|
||||
parsed::value base;
|
||||
parsed::value subset;
|
||||
base.set_path(action._path);
|
||||
subset.set_constant(a._valref);
|
||||
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item.get());
|
||||
if (!v1.IsNull()) {
|
||||
std::optional<rjson::value> result = set_diff(v1, v2);
|
||||
if (result) {
|
||||
do_update(to_bytes(column_name), *result);
|
||||
} else {
|
||||
do_delete(to_bytes(column_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
}, action._action);
|
||||
} else {
|
||||
// We have actions on a path or more than one path in the same
|
||||
// top-level attribute column_name - but not on the top-level
|
||||
// attribute as a whole. We already read the full top-level
|
||||
// attribute (see check_needs_read_before_write()), and now we
|
||||
// need to modify pieces of it and write back the entire
|
||||
// top-level attribute.
|
||||
if (!previous_item) {
|
||||
throw api_error::validation(format("UpdateItem cannot update nested document path on non-existent item"));
|
||||
}
|
||||
const rjson::value *toplevel = rjson::find(*previous_item, column_name);
|
||||
if (!toplevel) {
|
||||
throw api_error::validation(format("UpdateItem cannot update document path: missing attribute {}",
|
||||
column_name));
|
||||
}
|
||||
rjson::value result = rjson::copy(*toplevel);
|
||||
hierarchy_actions(result, actions.second, previous_item.get());
|
||||
do_update(to_bytes(column_name), std::move(result), &actions.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
|
||||
@@ -2408,7 +2745,7 @@ static rjson::value describe_item(schema_ptr schema,
|
||||
const query::partition_slice& slice,
|
||||
const cql3::selection::selection& selection,
|
||||
const query::result& query_result,
|
||||
const std::unordered_set<std::string>& attrs_to_get) {
|
||||
const attrs_to_get& attrs_to_get) {
|
||||
std::optional<rjson::value> opt_item = executor::describe_single_item(std::move(schema), slice, selection, std::move(query_result), attrs_to_get);
|
||||
if (!opt_item) {
|
||||
// If there is no matching item, we're supposed to return an empty
|
||||
@@ -2480,7 +2817,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
|
||||
struct table_requests {
|
||||
schema_ptr schema;
|
||||
db::consistency_level cl;
|
||||
std::unordered_set<std::string> attrs_to_get;
|
||||
attrs_to_get attrs_to_get;
|
||||
struct single_request {
|
||||
partition_key pk;
|
||||
clustering_key ck;
|
||||
@@ -2694,7 +3031,7 @@ void filter::for_filters_on(const noncopyable_function<void(std::string_view)>&
|
||||
class describe_items_visitor {
|
||||
typedef std::vector<const column_definition*> columns_t;
|
||||
const columns_t& _columns;
|
||||
const std::unordered_set<std::string>& _attrs_to_get;
|
||||
const attrs_to_get& _attrs_to_get;
|
||||
std::unordered_set<std::string> _extra_filter_attrs;
|
||||
const filter& _filter;
|
||||
typename columns_t::const_iterator _column_it;
|
||||
@@ -2703,7 +3040,7 @@ class describe_items_visitor {
|
||||
size_t _scanned_count;
|
||||
|
||||
public:
|
||||
describe_items_visitor(const columns_t& columns, const std::unordered_set<std::string>& attrs_to_get, filter& filter)
|
||||
describe_items_visitor(const columns_t& columns, const attrs_to_get& attrs_to_get, filter& filter)
|
||||
: _columns(columns)
|
||||
, _attrs_to_get(attrs_to_get)
|
||||
, _filter(filter)
|
||||
@@ -2752,6 +3089,12 @@ public:
|
||||
std::string attr_name = value_cast<sstring>(entry.first);
|
||||
if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
|
||||
bytes value = value_cast<bytes>(entry.second);
|
||||
// Even if _attrs_to_get asked to keep only a part of a
|
||||
// top-level attribute, we keep the entire attribute
|
||||
// at this stage, because the item filter might still
|
||||
// need the other parts (it was easier for us to keep
|
||||
// extra_filter_attrs at top-level granularity). We'll
|
||||
// filter the unneeded parts after item filtering.
|
||||
rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
|
||||
}
|
||||
}
|
||||
@@ -2762,11 +3105,24 @@ public:
|
||||
|
||||
void end_row() {
|
||||
if (_filter.check(_item)) {
|
||||
// As noted above, we kept entire top-level attributes listed in
|
||||
// _attrs_to_get. We may need to only keep parts of them.
|
||||
for (const auto& attr: _attrs_to_get) {
|
||||
// If !attr.has_value() it means we were asked not to keep
|
||||
// attr entirely, but just parts of it.
|
||||
if (!attr.second.has_value()) {
|
||||
rjson::value* toplevel= rjson::find(_item, attr.first);
|
||||
if (toplevel && !hierarchy_filter(*toplevel, attr.second)) {
|
||||
rjson::remove_member(_item, attr.first);
|
||||
}
|
||||
}
|
||||
}
|
||||
// Remove the extra attributes _extra_filter_attrs which we had
|
||||
// to add just for the filter, and not requested to be returned:
|
||||
for (const auto& attr : _extra_filter_attrs) {
|
||||
rjson::remove_member(_item, attr);
|
||||
}
|
||||
|
||||
rjson::push_back(_items, std::move(_item));
|
||||
}
|
||||
_item = rjson::empty_object();
|
||||
@@ -2782,7 +3138,7 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, std::unordered_set<std::string>&& attrs_to_get, filter&& filter) {
|
||||
static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, attrs_to_get&& attrs_to_get, filter&& filter) {
|
||||
describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
|
||||
result_set->visit(visitor);
|
||||
auto scanned_count = visitor.get_scanned_count();
|
||||
@@ -2823,7 +3179,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
|
||||
const rjson::value* exclusive_start_key,
|
||||
dht::partition_range_vector&& partition_ranges,
|
||||
std::vector<query::clustering_range>&& ck_bounds,
|
||||
std::unordered_set<std::string>&& attrs_to_get,
|
||||
attrs_to_get&& attrs_to_get,
|
||||
uint32_t limit,
|
||||
db::consistency_level cl,
|
||||
filter&& filter,
|
||||
|
||||
@@ -70,6 +70,76 @@ public:
|
||||
std::string to_json() const override;
|
||||
};
|
||||
|
||||
namespace parsed {
|
||||
class path;
|
||||
};
|
||||
|
||||
// An attribute_path_map object is used to hold data for various attributes
|
||||
// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
|
||||
// has a root attribute, and then modified by member and index operators -
|
||||
// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
|
||||
// "[2]" index, and finally ".c" member.
|
||||
// Data can be added to an attribute_path_map using the add() function, but
|
||||
// requires that attributes with data not be *overlapping* or *conflicting*:
|
||||
//
|
||||
// 1. Two attribute paths which are identical or an ancestor of one another
|
||||
// are considered *overlapping* and not allowed. If a.b.c has data,
|
||||
// we can't add more data in a.b.c or any of its descendants like a.b.c.d.
|
||||
//
|
||||
// 2. Two attribute paths which need the same parent to have both a member and
|
||||
// an index are considered *conflicting* and not allowed. E.g., if a.b has
|
||||
// data, you can't add a[1]. The meaning of adding both would be that the
|
||||
// attribute a is both a map and an array, which isn't sensible.
|
||||
//
|
||||
// These two requirements are common to the two places where Alternator uses
|
||||
// this abstraction to describe how a hierarchical item is to be transformed:
|
||||
//
|
||||
// 1. In ProjectExpression: for filtering from a full top-level attribute
|
||||
// only the parts for which user asked in ProjectionExpression.
|
||||
//
|
||||
// 2. In UpdateExpression: for taking the previous value of a top-level
|
||||
// attribute, and modifying it based on the instructions in the user
|
||||
// wrote in UpdateExpression.
|
||||
|
||||
template<typename T>
|
||||
class attribute_path_map_node {
|
||||
public:
|
||||
using data_t = T;
|
||||
// We need the extra shared_ptr<> here because libstdc++ unordered_map
|
||||
// doesn't work with incomplete types :-( We couldn't use lw_shared_ptr<>
|
||||
// because it doesn't work for incomplete types either. We couldn't use
|
||||
// std::unique_ptr<> because it makes the entire object uncopyable. We
|
||||
// don't often need to copy such a map, but we do have some code that
|
||||
// copies an attrs_to_get object, and is hard to find and remove.
|
||||
// The shared_ptr should never be null.
|
||||
using members_t = std::unordered_map<std::string, seastar::shared_ptr<attribute_path_map_node<T>>>;
|
||||
// The indexes list is sorted because DynamoDB requires handling writes
|
||||
// beyond the end of a list in index order.
|
||||
using indexes_t = std::map<unsigned, seastar::shared_ptr<attribute_path_map_node<T>>>;
|
||||
// The prohibition on "overlap" and "conflict" explained above means
|
||||
// That only one of data, members or indexes is non-empty.
|
||||
std::optional<std::variant<data_t, members_t, indexes_t>> _content;
|
||||
|
||||
bool is_empty() const { return !_content; }
|
||||
bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
|
||||
bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
|
||||
bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
|
||||
// get_members() assumes that has_members() is true
|
||||
members_t& get_members() { return std::get<members_t>(*_content); }
|
||||
const members_t& get_members() const { return std::get<members_t>(*_content); }
|
||||
indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
|
||||
const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
|
||||
T& get_value() { return std::get<T>(*_content); }
|
||||
const T& get_value() const { return std::get<T>(*_content); }
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
|
||||
|
||||
using attrs_to_get_node = attribute_path_map_node<std::monostate>;
|
||||
using attrs_to_get = attribute_path_map<std::monostate>;
|
||||
|
||||
|
||||
class executor : public peering_sharded_service<executor> {
|
||||
service::storage_proxy& _proxy;
|
||||
service::migration_manager& _mm;
|
||||
@@ -140,16 +210,14 @@ public:
|
||||
const query::partition_slice&,
|
||||
const cql3::selection::selection&,
|
||||
const query::result&,
|
||||
const std::unordered_set<std::string>&);
|
||||
const attrs_to_get&);
|
||||
|
||||
static void describe_single_item(const cql3::selection::selection&,
|
||||
const std::vector<bytes_opt>&,
|
||||
const std::unordered_set<std::string>&,
|
||||
const attrs_to_get&,
|
||||
rjson::value&,
|
||||
bool = false);
|
||||
|
||||
|
||||
|
||||
void add_stream_options(const rjson::value& stream_spec, schema_builder&) const;
|
||||
void supplement_table_info(rjson::value& descr, const schema& schema) const;
|
||||
void supplement_table_stream_info(rjson::value& descr, const schema& schema) const;
|
||||
|
||||
@@ -130,6 +130,27 @@ void condition_expression::append(condition_expression&& a, char op) {
|
||||
}, _expression);
|
||||
}
|
||||
|
||||
void path::check_depth_limit() {
|
||||
if (1 + _operators.size() > depth_limit) {
|
||||
throw expressions_syntax_error(format("Document path exceeded {} nesting levels", depth_limit));
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const path& p) {
|
||||
os << p.root();
|
||||
for (const auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
os << '.' << member;
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
os << '[' << index << ']';
|
||||
}
|
||||
}, op);
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace parsed
|
||||
|
||||
// The following resolve_*() functions resolve references in parsed
|
||||
@@ -151,10 +172,9 @@ void condition_expression::append(condition_expression&& a, char op) {
|
||||
// we need to resolve the expression just once but then use it many times
|
||||
// (once for each item to be filtered).
|
||||
|
||||
static void resolve_path(parsed::path& p,
|
||||
static std::optional<std::string> resolve_path_component(const std::string& column_name,
|
||||
const rjson::value* expression_attribute_names,
|
||||
std::unordered_set<std::string>& used_attribute_names) {
|
||||
const std::string& column_name = p.root();
|
||||
if (column_name.size() > 0 && column_name.front() == '#') {
|
||||
if (!expression_attribute_names) {
|
||||
throw api_error::validation(
|
||||
@@ -166,7 +186,30 @@ static void resolve_path(parsed::path& p,
|
||||
format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
|
||||
}
|
||||
used_attribute_names.emplace(column_name);
|
||||
p.set_root(std::string(rjson::to_string_view(*value)));
|
||||
return std::string(rjson::to_string_view(*value));
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
static void resolve_path(parsed::path& p,
|
||||
const rjson::value* expression_attribute_names,
|
||||
std::unordered_set<std::string>& used_attribute_names) {
|
||||
std::optional<std::string> r = resolve_path_component(p.root(), expression_attribute_names, used_attribute_names);
|
||||
if (r) {
|
||||
p.set_root(std::move(*r));
|
||||
}
|
||||
for (auto& op : p.operators()) {
|
||||
std::visit(overloaded_functor {
|
||||
[&] (std::string& s) {
|
||||
r = resolve_path_component(s, expression_attribute_names, used_attribute_names);
|
||||
if (r) {
|
||||
s = std::move(*r);
|
||||
}
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
// nothing to resolve
|
||||
}
|
||||
}, op);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -623,6 +666,55 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
|
||||
},
|
||||
};
|
||||
|
||||
// Given a parsed::path and an item read from the table, extract the value
|
||||
// of a certain attribute path, such as "a" or "a.b.c[3]". Returns a null
|
||||
// value if the item or the requested attribute does not exist.
|
||||
// Note that the item is assumed to be encoded in JSON using DynamoDB
|
||||
// conventions - each level of a nested document is a map with one key -
|
||||
// a type (e.g., "M" for map) - and its value is the representation of
|
||||
// that value.
|
||||
static rjson::value extract_path(const rjson::value* item,
|
||||
const parsed::path& p, calculate_value_caller caller) {
|
||||
if (!item) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
const rjson::value* v = rjson::find(*item, p.root());
|
||||
if (!v) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
for (const auto& op : p.operators()) {
|
||||
if (!v->IsObject() || v->MemberCount() != 1) {
|
||||
// This shouldn't happen. We shouldn't have stored malformed
|
||||
// objects. But today Alternator does not validate the structure
|
||||
// of nested documents before storing them, so this can happen on
|
||||
// read.
|
||||
throw api_error::validation(format("{}: malformed item read: {}", *item));
|
||||
}
|
||||
const char* type = v->MemberBegin()->name.GetString();
|
||||
v = &(v->MemberBegin()->value);
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const std::string& member) {
|
||||
if (type[0] == 'M' && v->IsObject()) {
|
||||
v = rjson::find(*v, member);
|
||||
} else {
|
||||
v = nullptr;
|
||||
}
|
||||
},
|
||||
[&] (unsigned index) {
|
||||
if (type[0] == 'L' && v->IsArray() && index < v->Size()) {
|
||||
v = &(v->GetArray()[index]);
|
||||
} else {
|
||||
v = nullptr;
|
||||
}
|
||||
}
|
||||
}, op);
|
||||
if (!v) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
}
|
||||
return rjson::copy(*v);
|
||||
}
|
||||
|
||||
// Given a parsed::value, which can refer either to a constant value from
|
||||
// ExpressionAttributeValues, to the value of some attribute, or to a function
|
||||
// of other values, this function calculates the resulting value.
|
||||
@@ -640,21 +732,12 @@ rjson::value calculate_value(const parsed::value& v,
|
||||
auto function_it = function_handlers.find(std::string_view(f._function_name));
|
||||
if (function_it == function_handlers.end()) {
|
||||
throw api_error::validation(
|
||||
format("UpdateExpression: unknown function '{}' called.", f._function_name));
|
||||
format("{}: unknown function '{}' called.", caller, f._function_name));
|
||||
}
|
||||
return function_it->second(caller, previous_item, f);
|
||||
},
|
||||
[&] (const parsed::path& p) -> rjson::value {
|
||||
if (!previous_item) {
|
||||
return rjson::null_value();
|
||||
}
|
||||
std::string update_path = p.root();
|
||||
if (p.has_operators()) {
|
||||
// FIXME: support this
|
||||
throw api_error::validation("Reading attribute paths not yet implemented");
|
||||
}
|
||||
const rjson::value* previous_value = rjson::find(*previous_item, update_path);
|
||||
return previous_value ? rjson::copy(*previous_value) : rjson::null_value();
|
||||
return extract_path(previous_item, p, caller);
|
||||
}
|
||||
}, v._value);
|
||||
}
|
||||
|
||||
@@ -49,15 +49,23 @@ class path {
|
||||
// dot (e.g., ".xyz").
|
||||
std::string _root;
|
||||
std::vector<std::variant<std::string, unsigned>> _operators;
|
||||
// It is useful to limit the depth of a user-specified path, because is
|
||||
// allows us to use recursive algorithms without worrying about recursion
|
||||
// depth. DynamoDB officially limits the length of paths to 32 components
|
||||
// (including the root) so let's use the same limit.
|
||||
static constexpr unsigned depth_limit = 32;
|
||||
void check_depth_limit();
|
||||
public:
|
||||
void set_root(std::string root) {
|
||||
_root = std::move(root);
|
||||
}
|
||||
void add_index(unsigned i) {
|
||||
_operators.emplace_back(i);
|
||||
check_depth_limit();
|
||||
}
|
||||
void add_dot(std::string(name)) {
|
||||
_operators.emplace_back(std::move(name));
|
||||
check_depth_limit();
|
||||
}
|
||||
const std::string& root() const {
|
||||
return _root;
|
||||
@@ -65,6 +73,13 @@ public:
|
||||
bool has_operators() const {
|
||||
return !_operators.empty();
|
||||
}
|
||||
const std::vector<std::variant<std::string, unsigned>>& operators() const {
|
||||
return _operators;
|
||||
}
|
||||
std::vector<std::variant<std::string, unsigned>>& operators() {
|
||||
return _operators;
|
||||
}
|
||||
friend std::ostream& operator<<(std::ostream&, const path&);
|
||||
};
|
||||
|
||||
// When an expression is first parsed, all constants are references, like
|
||||
|
||||
@@ -845,16 +845,18 @@ future<executor::request_return_type> executor::get_records(client_state& client
|
||||
static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
|
||||
static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");
|
||||
|
||||
auto key_names = boost::copy_range<std::unordered_set<std::string>>(
|
||||
auto key_names = boost::copy_range<attrs_to_get>(
|
||||
boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
|
||||
| boost::adaptors::transformed([&] (const column_definition& cdef) { return cdef.name_as_text(); })
|
||||
| boost::adaptors::transformed([&] (const column_definition& cdef) {
|
||||
return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
|
||||
);
|
||||
// Include all base table columns as values (in case pre or post is enabled).
|
||||
// This will include attributes not stored in the frozen map column
|
||||
auto attr_names = boost::copy_range<std::unordered_set<std::string>>(base->regular_columns()
|
||||
auto attr_names = boost::copy_range<attrs_to_get>(base->regular_columns()
|
||||
// this will include the :attrs column, which we will also force evaluating.
|
||||
// But not having this set empty forces out any cdc columns from actual result
|
||||
| boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.name_as_text(); })
|
||||
| boost::adaptors::transformed([] (const column_definition& cdef) {
|
||||
return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
|
||||
);
|
||||
|
||||
std::vector<const column_definition*> columns;
|
||||
|
||||
@@ -87,25 +87,13 @@ progresses and compatibility continues to improve.
|
||||
* UpdateTable: Not supported.
|
||||
* ListTables: Supported.
|
||||
### Item Operations
|
||||
* GetItem: Support almost complete except that projection expressions can
|
||||
only ask for top-level attributes.
|
||||
* PutItem: Support almost complete except that condition expressions can
|
||||
only refer to to-level attributes.
|
||||
* UpdateItem: Nested documents are supported but updates to nested attributes
|
||||
are not (e.g., `SET a.b[3].c=val`), and neither are nested attributes in
|
||||
condition expressions.
|
||||
* DeleteItem: Mostly works, but again does not support nested attributes
|
||||
in condition expressions.
|
||||
* GetItem, PutItem, UpdateItem, DeleteItem fully supported.
|
||||
### Batch Operations
|
||||
* BatchGetItem: Almost complete except that projection expressions can only
|
||||
ask for top-level attributes.
|
||||
* BatchWriteItem: Supported. Doesn't limit the number of items (DynamoDB
|
||||
limits to 25) or size of items (400 KB) or total request size (16 MB).
|
||||
* BatchGetItem, BatchWriteItem fully supported.
|
||||
Doesn't limit the number of items (DynamoDB limits to 25) or size of items
|
||||
(400 KB) or total request size (16 MB).
|
||||
### Scans
|
||||
Scan and Query are mostly supported, with the following limitations:
|
||||
* As above, projection expressions only support top-level attributes.
|
||||
* The ScanFilter/QueryFilter parameter for filtering results is fully
|
||||
supported, but the newer FilterExpression syntax is not yet supported.
|
||||
* The "Select" options which allows to count items instead of returning them
|
||||
is not yet supported.
|
||||
### Secondary Indexes
|
||||
@@ -297,11 +285,10 @@ policies" section.
|
||||
DynamoDB allows attributes to be **nested** - a top-level attribute may
|
||||
be a list or a map, and each of its elements may further be lists or
|
||||
maps, etc. Alternator currently stores the entire content of a top-level
|
||||
attribute as one JSON object. This is good enough for most needs, except
|
||||
one DynamoDB feature which we cannot support safely: we cannot modify
|
||||
a non-top-level attribute (e.g., a.b[3].c) directly without RMW. We plan
|
||||
to fix this in a future version by rethinking the data model we use for
|
||||
attributes, or rethinking our implementation of RMW (as explained above).
|
||||
attribute as one JSON object. This means that UpdateItem requests which
|
||||
want modify a non-top-level attribute directly (e.g., a.b[3].c) need RMW:
|
||||
Alternator implements such requests by reading the entire top-level
|
||||
attribute a, modifying only a.b[3].c, and then writing back a.
|
||||
|
||||
```eval_rst
|
||||
.. toctree::
|
||||
@@ -309,4 +296,4 @@ attributes, or rethinking our implementation of RMW (as explained above).
|
||||
|
||||
getting-started
|
||||
compatibility
|
||||
```
|
||||
```
|
||||
|
||||
@@ -61,12 +61,6 @@ behave the same in Alternator. However, there are a few features which we have
|
||||
not implemented yet. Unimplemented features return an error when used, so
|
||||
they should be easy to detect. Here is a list of these unimplemented features:
|
||||
|
||||
* Missing support for **atribute paths** like `a.b[3].c`.
|
||||
Nested attributes _are_ supported, but Alternator does not yet allow reading
|
||||
or writing directly a piece of a nested attributes using an attribute path -
|
||||
only top-level attributes can be read or written directly.
|
||||
https://github.com/scylladb/scylla/issues/5024
|
||||
|
||||
* Currently in Alternator, a GSI (Global Secondary Index) can only be added
|
||||
to a table at table creation time. Unlike DynamoDB which also allows adding
|
||||
a GSI (but not an LSI) to an existing table using an UpdateTable operation.
|
||||
|
||||
@@ -230,3 +230,19 @@ def filled_test_table(dynamodb):
|
||||
def scylla_only(dynamodb):
|
||||
if dynamodb.meta.client._endpoint.host.endswith('.amazonaws.com'):
|
||||
pytest.skip('Scylla-only feature not supported by AWS')
|
||||
|
||||
# The "test_table_s_forbid_rmw" fixture is the same as test_table_s, except
|
||||
# with the "forbid_rmw" write isolation mode. This is useful for verifying
|
||||
# that writes that we think should not need a read-before-write in fact do
|
||||
# not need it.
|
||||
# Because forbid_rmw is a Scylla-only feature, this test is skipped when not
|
||||
# running against Scylla.
|
||||
@pytest.fixture(scope="session")
|
||||
def test_table_s_forbid_rmw(dynamodb, scylla_only):
|
||||
table = create_test_table(dynamodb,
|
||||
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }, ],
|
||||
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
|
||||
arn = table.meta.client.describe_table(TableName=table.name)['Table']['TableArn']
|
||||
table.meta.client.tag_resource(ResourceArn=arn, Tags=[{'Key': 'system:write_isolation', 'Value': 'forbid_rmw'}])
|
||||
yield table
|
||||
table.delete()
|
||||
|
||||
@@ -1262,7 +1262,6 @@ def test_update_condition_other_funcs(test_table_s):
|
||||
# ConditionExpressions also allows reading nested attributes, and we should
|
||||
# support that too. This test just checks a few random operators - we don't
|
||||
# test all the different operators here.
|
||||
@pytest.mark.xfail(reason="nested attributes not yet implemented in ConditionExpression")
|
||||
def test_update_condition_nested_attributes(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
@@ -1277,6 +1276,21 @@ def test_update_condition_nested_attributes(test_table_s):
|
||||
ConditionExpression='b.x < b.y[1]',
|
||||
ExpressionAttributeValues={':val': 2})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 2
|
||||
# Also check the case of a failing condition
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='b.x < b.y[0]',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 2
|
||||
# A condition involving an attribute which doesn't exist results in
|
||||
# failed condition - not an error.
|
||||
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='b.z < b.y[100]',
|
||||
ExpressionAttributeValues={':val': 4})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 2
|
||||
|
||||
# All the previous tests refered to attributes using their name directly.
|
||||
# But the DynamoDB API also allows to refer to attributes using a #reference.
|
||||
@@ -1293,16 +1307,15 @@ def test_update_condition_attribute_reference(test_table_s):
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 1
|
||||
|
||||
@pytest.mark.xfail(reason="nested attributes not yet implemented in ConditionExpression")
|
||||
def test_update_condition_nested_attribute_reference(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
AttributeUpdates={'and': {'Value': {'or': 1}, 'Action': 'PUT'}})
|
||||
AttributeUpdates={'and': {'Value': {'or': 2}, 'Action': 'PUT'}})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET c = :val',
|
||||
ConditionExpression='attribute_exists (#name1.#name2)',
|
||||
ConditionExpression='#name1.#name2 = :two',
|
||||
ExpressionAttributeNames={'#name1': 'and', '#name2': 'or'},
|
||||
ExpressionAttributeValues={':val': 1})
|
||||
ExpressionAttributeValues={':val': 1, ':two': 2})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 1
|
||||
|
||||
# All the previous tests involved a single condition. The following tests
|
||||
|
||||
@@ -743,7 +743,6 @@ def test_filter_expression_and_attributes_to_get(test_table):
|
||||
# support that too. This test just checks one operators - we don't
|
||||
# test all the different operators again here, we will assume the same
|
||||
# code is used internally so if one operator worked, all will work.
|
||||
@pytest.mark.xfail(reason="nested attributes not yet implemented in FilterExpression")
|
||||
def test_filter_expression_nested_attribute(test_table):
|
||||
p = random_string()
|
||||
test_table.put_item(Item={'p': p, 'c': 'hi', 'x': {'a': 'dog', 'b': 3}})
|
||||
@@ -754,7 +753,6 @@ def test_filter_expression_nested_attribute(test_table):
|
||||
ExpressionAttributeValues={':p': p, ':a': 'mouse'})
|
||||
assert(got_items == [{'p': p, 'c': 'yo', 'x': {'a': 'mouse', 'b': 4}}])
|
||||
|
||||
@pytest.mark.xfail(reason="nested attributes not yet implemented in FilterExpression")
|
||||
# This test is a version of test_filter_expression_and_projection_expression
|
||||
# involving nested attributes. In that test, we had a filter and projection
|
||||
# involving different attributes. Nested attributes open new corner cases:
|
||||
|
||||
@@ -334,6 +334,33 @@ def test_gsi_wrong_type_attribute_update(test_table_gsi_2):
|
||||
test_table_gsi_2.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': 3, 'Action': 'PUT'}})
|
||||
assert test_table_gsi_2.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'x': x}
|
||||
|
||||
# Since a GSI key x cannot be a map or an array, in particular updates to
|
||||
# nested attributes like x.y or x[1] are not legal. The error that DynamoDB
|
||||
# reports is "Key attributes must be scalars; list random access '[]' and map
|
||||
# lookup '.' are not allowed: IndexKey: x".
|
||||
def test_gsi_wrong_type_attribute_update_nested(test_table_gsi_2):
|
||||
p = random_string()
|
||||
x = random_string()
|
||||
test_table_gsi_2.put_item(Item={'p': p, 'x': x})
|
||||
# We can't write a map into a GSI key column, which in this case can only
|
||||
# be a string and in any case can never be a map. DynamoDB and Alternator
|
||||
# report different errors here: DynamoDB reports a type mismatch (exactly
|
||||
# like in test test_gsi_wrong_type_attribute_update), but Alternator
|
||||
# reports the obscure message "Malformed value object for key column x".
|
||||
# Alternator's error message should probably be improved here, but let's
|
||||
# not test it in this test.
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_gsi_2.update_item(Key={'p': p}, UpdateExpression='SET x = :val1',
|
||||
ExpressionAttributeValues={':val1': {'a': 3, 'b': 4}})
|
||||
# Here we try to set x.y for the GSI key column x. Again DynamoDB and
|
||||
# Alternator produce different error messages - but both make sense.
|
||||
# DynamoDB says "Key attributes must be scalars; list random access '[]'
|
||||
# and map # lookup '.' are not allowed: IndexKey: x", while Alternator
|
||||
# complains that "document paths not valid for this item: x.y".
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_gsi_2.update_item(Key={'p': p}, UpdateExpression='SET x.y = :val1',
|
||||
ExpressionAttributeValues={':val1': 3})
|
||||
|
||||
def test_gsi_wrong_type_attribute_batch(test_table_gsi_2):
|
||||
# In a BatchWriteItem, if any update is forbidden, the entire batch is
|
||||
# rejected, and none of the updates happen at all.
|
||||
|
||||
@@ -367,6 +367,13 @@ def test_getitem_attributes_to_get(dynamodb, test_table):
|
||||
expected_item = {k: item[k] for k in wanted if k in item}
|
||||
assert expected_item == got_item
|
||||
|
||||
# Verify that it is forbidden to ask for the same attribute multiple times
|
||||
def test_getitem_attributes_to_get_duplicate(dynamodb, test_table):
|
||||
p = random_string()
|
||||
c = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException.*Duplicate'):
|
||||
test_table.get_item(Key={'p': p, 'c': c}, AttributesToGet=['a', 'a'], ConsistentRead=True)
|
||||
|
||||
# Basic test for DeleteItem, with hash key only
|
||||
def test_delete_item_hash(test_table_s):
|
||||
p = random_string()
|
||||
|
||||
@@ -116,7 +116,6 @@ def test_projection_expression_query(test_table):
|
||||
# but the previous test checked that the alternative syntax works correctly.
|
||||
# The following test checks fetching more elaborate attribute paths from
|
||||
# nested documents.
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_projection_expression_path(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={
|
||||
@@ -143,11 +142,21 @@ def test_projection_expression_path(test_table_s):
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x.y')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3].x')['Item'] == {}
|
||||
# Similarly, indexing a dictionary as an array, or array as dictionary, or
|
||||
# integer as either, yields an empty item.
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b.x')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0].x')['Item'] == {}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0][0]')['Item'] == {}
|
||||
# We can read multiple paths - the result are merged into one object
|
||||
# structured the same was as in the original item:
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.b[1]')['Item'] == {'a': {'b': [2, 4]}}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.c')['Item'] == {'a': {'b': [2], 'c': 5}}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.c,b')['Item'] == {'a': {'c': 5}, 'b': 'hello'}
|
||||
# If some of the paths are not available, they are silently ignored (just
|
||||
# like they returned an empty item when used alone earlier)
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x, a.b[0], x, a.b[3].x')['Item'] == {'a': {'b': [2]}}
|
||||
|
||||
# It is not allowed to read the same path multiple times. The error from
|
||||
# DynamoDB looks like: "Invalid ProjectionExpression: Two document paths
|
||||
# overlap with each other; must remove or rewrite one of these paths;
|
||||
@@ -160,6 +169,14 @@ def test_projection_expression_path(test_table_s):
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,a.b[0]')['Item']
|
||||
|
||||
# Above we noted that asking for to project a non-existent attribute in an
|
||||
# existing item yields an empty Item object. However, if the item does not
|
||||
# exist at all, the Item object will be missing entirely:
|
||||
p = random_string()
|
||||
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')
|
||||
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')
|
||||
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')
|
||||
|
||||
# Above in test_projection_expression_toplevel_syntax() we tested how
|
||||
# name references (#name) work in top-level attributes. In the following
|
||||
# two tests we test how they work in more elaborate paths:
|
||||
@@ -167,7 +184,6 @@ def test_projection_expression_path(test_table_s):
|
||||
# 2. Conversely, a single reference, e.g., "#a", is always a single path
|
||||
# component. Even if "#a" is "a.b", this refers to the literal attribute
|
||||
# "a.b" - with a dot in its name - and not to the b element in a.
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_projection_expression_path_references(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 1, 'c': 2}, 'b': 'hi'})
|
||||
@@ -181,10 +197,9 @@ def test_projection_expression_path_references(test_table_s):
|
||||
with pytest.raises(ClientError, match='ValidationException'):
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#n2': 'b', '#unused': 'x'})
|
||||
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_projection_expression_path_dot(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a.b': 'hi', 'a': {'b': 'yo'}})
|
||||
test_table_s.put_item(Item={'p': p, 'a.b': 'hi', 'a': {'b': 'yo', 'c': 'jo'}})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': 'yo'}}
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a.b'})['Item'] == {'a.b': 'hi'}
|
||||
|
||||
@@ -192,7 +207,6 @@ def test_projection_expression_path_dot(test_table_s):
|
||||
# ProjectionExpression. This includes both identical paths, and paths where
|
||||
# one is a sub-path of the other - e.g. "a.b" and "a.b.c". As we already saw
|
||||
# above, paths with just a common *prefix* - e.g., "a.b, a.c" - are fine.
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_projection_expression_path_overlap(test_table_s):
|
||||
# The overlap is tested symbolically, on the given paths, without any
|
||||
# relation to what the item contains, or whether it even exists. So we
|
||||
@@ -207,13 +221,57 @@ def test_projection_expression_path_overlap(test_table_s):
|
||||
'a.b, a.b[2]',
|
||||
'a.b, a.b.c',
|
||||
'a, a.b[2].c',
|
||||
'a.b.d, a.b',
|
||||
'a.b.d.e, a.b',
|
||||
'a.b, a.b.d',
|
||||
'a.b, a.b.d.e',
|
||||
]:
|
||||
with pytest.raises(ClientError, match='ValidationException.* overlap'):
|
||||
print(expr)
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
|
||||
# The checks above can be easily passed by an over-zealos "overlap" check
|
||||
# which declares everything an overlap :-) Let's also check some non-
|
||||
# overlap cases - which shouldn't be declared an overlap.
|
||||
for expr in ['a, b',
|
||||
'a.b, a.c',
|
||||
'a.b.d, a.b.e',
|
||||
'a[1], a[2]',
|
||||
'a.b, a.c[2]',
|
||||
]:
|
||||
print(expr)
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
|
||||
|
||||
# In addition to not allowing "overlapping" paths, DynamoDB also does not
|
||||
# allow "conflicting" paths: It does not allow giving both a.b and a[1] in a
|
||||
# single ProjectionExpression. It gives the error:
|
||||
# "Invalid ProjectionExpression: Two document paths conflict with each other;
|
||||
# must remove or rewrite one of these paths; path one: [a, b], path two:
|
||||
# [a, [1]]".
|
||||
# The reasoning is that asking for both in one request makes no sense because
|
||||
# no item will ever be able to fulfill both.
|
||||
def test_projection_expression_path_conflict(test_table_s):
|
||||
# The conflict is tested symbolically, on the given paths, without any
|
||||
# relation to what the item contains, or whether it even exists. So we
|
||||
# don't even need to create an item for this test. We still need a
|
||||
# key for the GetItem call :-)
|
||||
p = random_string()
|
||||
for expr in ['a.b, a[1]',
|
||||
'a[1], a.b',
|
||||
'a.b[1], a.b.c',
|
||||
'a.b.c, a.b[1]',
|
||||
]:
|
||||
with pytest.raises(ClientError, match='ValidationException.* conflict'):
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
|
||||
# The checks above can be easily passed by an over-zealos "conflict" check
|
||||
# which declares everything a conflict :-) Let's also check some non-
|
||||
# conflict cases - which shouldn't be declared a conflict.
|
||||
for expr in ['a.b, a.c',
|
||||
'a.b, a.c[1]',
|
||||
]:
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
|
||||
|
||||
# Above we nested paths in ProjectionExpression, but just for the GetItem
|
||||
# request. Let's verify they also work in Query and Scan requests:
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_query_projection_expression_path(test_table):
|
||||
p = random_string()
|
||||
items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
|
||||
@@ -224,7 +282,6 @@ def test_query_projection_expression_path(test_table):
|
||||
expected_items = [{'a': {'x': x['a']['x']}} for x in items]
|
||||
assert multiset(expected_items) == multiset(got_items)
|
||||
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_scan_projection_expression_path(test_table):
|
||||
# This test is similar to test_query_projection_expression_path above,
|
||||
# but uses a scan instead of a query. The scan will generate unrelated
|
||||
@@ -241,9 +298,8 @@ def test_scan_projection_expression_path(test_table):
|
||||
|
||||
# BatchGetItem also supports ProjectionExpression, let's test that it
|
||||
# applies to all items, and that it correctly suports document paths as well.
|
||||
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
|
||||
def test_batch_get_item_projection_expression_path(test_table_s):
|
||||
items = [{'p': random_string(), 'a': {'b': random_string()}, 'c': random_string()} for i in range(3)]
|
||||
items = [{'p': random_string(), 'a': {'b': random_string(), 'x': 'hi'}, 'c': random_string()} for i in range(3)]
|
||||
with test_table_s.batch_writer() as batch:
|
||||
for item in items:
|
||||
batch.put_item(item)
|
||||
@@ -285,3 +341,20 @@ def test_projection_expression_and_key_condition_expression(test_table_s):
|
||||
ExpressionAttributeNames={'#name1': 'p', '#name2': 'a'},
|
||||
ExpressionAttributeValues={':val1': p});
|
||||
assert got_items == [{'a': 'hello'}]
|
||||
|
||||
# Test whether the nesting depth of an a path in a projection expression
|
||||
# is limited. If the implementation is done using recursion, it is goood
|
||||
# practice to limit it and not crash the server. According to the DynamoDB
|
||||
# documentation, DynamoDB supports nested attributes up to 32 levels deep:
|
||||
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html#limits-attributes-nested-depth
|
||||
# There is no reason why Alternator should not use exactly the same limit
|
||||
# as is officially documented by DynamoDB.
|
||||
def test_projection_expression_path_nesting_levels(test_table_s):
|
||||
p = random_string()
|
||||
# 32 nesting levels (including the top-level attribute) work
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*31))
|
||||
# 33 nesting levels do not. DynamoDB gives an error: "Invalid
|
||||
# ProjectionExpression: The document path has too many nesting levels;
|
||||
# nesting levels: 33".
|
||||
with pytest.raises(ClientError, match='ValidationException.*nesting levels'):
|
||||
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*32))
|
||||
|
||||
@@ -346,7 +346,6 @@ def test_update_item_returnvalues_updated_new(test_table_s):
|
||||
|
||||
# Test the ReturnValues from an UpdateItem directly modifying a *nested*
|
||||
# attribute, in the relevant ReturnValue modes:
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_item_returnvalues_nested(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 'dog', 'c': [1, 2, 3]}, 'd': 'cat'})
|
||||
@@ -398,3 +397,37 @@ def test_update_item_returnvalues_nested(test_table_s):
|
||||
UpdateExpression='SET a.c[1] = :val, a.b = :val2',
|
||||
ExpressionAttributeValues={':val': 3, ':val2': 'dog'})
|
||||
assert ret['Attributes'] == {'p': p, 'a': {'b': 'dog', 'c': [1, 3, 3]}, 'd': 'cat' }
|
||||
# Test with REMOVE, and one of them doing nothing (so shouldn't be in UPDATED_OLD)
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
|
||||
UpdateExpression='REMOVE a.c[1], a.c[3]')
|
||||
assert ret['Attributes'] == {'a': {'c': [3]}} # a.c[3] did not exist
|
||||
# When adding a new sub-attribute, UPDATED_OLD does not return anything,
|
||||
# but UPDATED_NEW does:
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
|
||||
UpdateExpression='SET a.x1 = :val',
|
||||
ExpressionAttributeValues={':val': 8})
|
||||
assert not 'Attributes' in ret
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='SET a.x2 = :val',
|
||||
ExpressionAttributeValues={':val': 8})
|
||||
assert ret['Attributes'] == {'a': {'x2': 8}}
|
||||
# Nevertheless, there is one strange exception - although setting an array
|
||||
# element *beyond* its end (e.g., a.c[100]) does add a new array item, it
|
||||
# is *not* returned by UPDATED_NEW. I am not sure DynamoDB did this
|
||||
# deliberately (is it a bug or a feature?), but it simplifies our
|
||||
# implementation as well: after setting a.c[100], a.c[100] is not actually
|
||||
# set (instead, maybe a.c[4] was set) so UPDATED_NEW returning a.c[100]
|
||||
# returns nothing.
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='SET a.c[100] = :val',
|
||||
ExpressionAttributeValues={':val': 70})
|
||||
assert not 'Attributes' in ret
|
||||
# When removing an item, it shouldn't appear in UPDATED_NEW. Again there
|
||||
# a strange exception - which I'm not sure if we should consider it a
|
||||
# DynamoDB bug or feature - but it simplifies our own implementation as
|
||||
# well: if we remove a.c[1], the new item *will* have a new a.c[1] (the
|
||||
# previous a.c[2] is shifted back), so this value is returned. This is
|
||||
# very odd, but this is what DynamoDB does, so we should too...
|
||||
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
|
||||
UpdateExpression='REMOVE a.c[1]')
|
||||
assert ret['Attributes'] == {'a': {'c': [70]}}
|
||||
|
||||
@@ -265,16 +265,64 @@ def test_update_expression_multi_overlap(test_table_s):
|
||||
|
||||
# The problem isn't just with identical paths - we can't modify two paths that
|
||||
# "overlap" in the sense that one is the ancestor of the other.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_expression_multi_overlap_nested(test_table_s):
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException.*overlap'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :val1, a.b = :val2',
|
||||
ExpressionAttributeValues={':val1': {'b': 7}, ':val2': 'there'})
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': {'c': 2}}})
|
||||
with pytest.raises(ClientError, match='ValidationException.*overlap'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a.b = :val1, a.b.c = :val2',
|
||||
ExpressionAttributeValues={':val1': 'hi', ':val2': 'there'})
|
||||
# Note that the overlap checks happen before checking the actual content
|
||||
# of the item, so it doesn't matter that the item we want to modify
|
||||
# doesn't even exist or have the structure referenced by the paths below.
|
||||
for expr in ['SET a = :val1, a.b = :val2',
|
||||
'SET a.b = :val1, a = :val2',
|
||||
'SET a.b = :val1, a.b = :val2',
|
||||
'SET a.b = :val1, a.b.c = :val2',
|
||||
'SET a.b.c = :val1, a.b = :val2',
|
||||
'SET a.b.c = :val1, a.b.c = :val2',
|
||||
'SET a.b = :val1, a.b.c.d.e = :val2',
|
||||
'SET a.b.c.d.e = :val1, a.b = :val2',
|
||||
'SET a = :val1, a[1] = :val2',
|
||||
'SET a[1] = :val1, a = :val2',
|
||||
'SET a[1] = :val1, a[1] = :val2',
|
||||
'SET a[1][1] = :val1, a[1] = :val2',
|
||||
'SET a[1] = :val1, a[1][1] = :val2',
|
||||
'SET a[1][1] = :val1, a[1][1] = :val2',
|
||||
'SET a[1][1][1][1] = :val1, a[1][1] = :val2',
|
||||
'SET a[1][1] = :val1, a[1][1][1][1] = :val2',
|
||||
'SET a[1][1][1][1] = :val1, a[1][1][1][1] = :val2',
|
||||
]:
|
||||
print(expr)
|
||||
with pytest.raises(ClientError, match='ValidationException.*overlap'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression=expr,
|
||||
ExpressionAttributeValues={':val1': 2, ':val2': 'there'})
|
||||
# Obviously this test can trivially pass if overlap checks wrongly labels
|
||||
# everything as an overlap. So the test test_update_expression_multi_nested
|
||||
# below is important - it confirms that we can do multiple modifications
|
||||
# to the same item when they do not overlap.
|
||||
|
||||
# Besides the concept of "overlapping" paths tested above, DynamoDB also has
|
||||
# the concept of "conflicting" paths - e.g., attempting to set both a.b and
|
||||
# a[1] together doesn't make sense.
|
||||
def test_update_expression_multi_conflict_nested(test_table_s):
|
||||
p = random_string()
|
||||
for expr in ['SET a.b = :val1, a[1] = :val2',
|
||||
'SET a.b.c = :val1, a.b[2] = :val2',
|
||||
]:
|
||||
print(expr)
|
||||
with pytest.raises(ClientError, match='ValidationException.*conflict'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression=expr,
|
||||
ExpressionAttributeValues={':val1': 2, ':val2': 'there'})
|
||||
|
||||
# We can do several non-overlapping modifications to the same top-level
|
||||
# attribute and to different top-level attributes in the same update
|
||||
# expression.
|
||||
def test_update_expression_multi_nested(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'x': 3, 'y': 4, 'c': {'y': 3}}, 'b': {'x': 1, 'y': 2}})
|
||||
test_table_s.update_item(Key={'p': p},
|
||||
UpdateExpression='SET a.b = :val1, a.c.d = :val2, b.x = :val3 REMOVE a.x, b.y',
|
||||
ExpressionAttributeValues={':val1': 10, ':val2': 'dog', ':val3': 17})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {
|
||||
'p': p,
|
||||
'a': {'y': 4, 'b': 10, 'c': {'y': 3, 'd': 'dog'}},
|
||||
'b': {'x': 17}}
|
||||
|
||||
# In the previous test we saw that *modifying* the same item twice in the same
|
||||
# update is forbidden; But it is allowed to *read* an item in the same update
|
||||
@@ -776,10 +824,19 @@ def test_update_expression_dot_in_name(test_table_s):
|
||||
ExpressionAttributeNames={'#a': 'a.b'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a.b': 3}
|
||||
|
||||
|
||||
# Below we have several tests of what happens when a nested attribute is
|
||||
# on the left-hand side of an assignment, but an every simpler case of
|
||||
# nested attributes is having one on the right hand side of an assignment:
|
||||
def test_update_expression_nested_attribute_rhs(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': {'x': 7, 'y': 8}}, 'd': 5})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET z = a.c.x')
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7
|
||||
|
||||
# A basic test for direct update of a nested attribute: One of the top-level
|
||||
# attributes is itself a document, and we update only one of that document's
|
||||
# nested attributes.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_expression_nested_attribute_dot(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': 4}, 'd': 5})
|
||||
@@ -795,7 +852,6 @@ def test_update_expression_nested_attribute_dot(test_table_s):
|
||||
|
||||
# Similar test, for a list: one of the top-level attributes is a list, we
|
||||
# can update one of its items.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_expression_nested_attribute_index(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': ['one', 'two', 'three']})
|
||||
@@ -817,7 +873,6 @@ def test_update_expression_nested_attribute_index_reference(test_table_s):
|
||||
# Test that just like happens in top-level attributes, also in nested
|
||||
# attributes, setting them replaces the old value - potentially an entire
|
||||
# nested document, by the whole value (which may have a different type)
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_expression_nested_different_type(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': {'one': 1, 'two': 2}}})
|
||||
@@ -827,7 +882,6 @@ def test_update_expression_nested_different_type(test_table_s):
|
||||
|
||||
# Yet another test of a nested attribute update. This one uses deeper
|
||||
# level of nesting (dots and indexes), adds #name references to the mix.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_expression_nested_deep(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': ['hi', {'x': {'y': [3, 5, 7]}}]}})
|
||||
@@ -841,20 +895,55 @@ def test_update_expression_nested_deep(test_table_s):
|
||||
|
||||
# A REMOVE operation can be used to remove nested attributes, and also
|
||||
# individual list items.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_update_expression_nested_remove(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': ['hi', {'x': {'y': [3, 5, 7]}, 'q': 2}]}})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a.c[1].x.y[1], a.c[1].q')
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == {'b': 3, 'c': ['hi', {'x': {'y': [3, 7]}}]}
|
||||
|
||||
# Removing a list item beyond the end of the list (e.g., REMOVE a[17] when
|
||||
# the list only has three items) is silently ignored.
|
||||
def test_update_expression_nested_remove_list_item_after_end(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': [4, 5, 6]})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[17]')
|
||||
|
||||
# If we remove a[1] and then change a[3], the index "3" refers to the position
|
||||
# *before* the first removal.
|
||||
def test_update_expression_nested_remove_list_item_original_number(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': [2, 3, 4, 5, 6, 7]})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[1] SET a[3] = :val',
|
||||
ExpressionAttributeValues={':val': 17})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == [2, 4, 17, 6, 7]
|
||||
# The order of the operations doesn't matter
|
||||
test_table_s.put_item(Item={'p': p, 'a': [2, 3, 4, 5, 6, 7]})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[3] = :val REMOVE a[1]',
|
||||
ExpressionAttributeValues={':val': 17})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == [2, 4, 17, 6, 7]
|
||||
|
||||
# DynamoDB allows an empty map. So removing the only member from a map leaves
|
||||
# behind an empty map.
|
||||
def test_update_expression_nested_remove_singleton_map(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': {'b': 1}})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a.b')
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == {}
|
||||
|
||||
# DynamoDB allows an empty list. So removing the only member from a list leaves
|
||||
# behind an empty list.
|
||||
def test_update_expression_nested_remove_singleton_list(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': [1]})
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[0]')
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == []
|
||||
|
||||
# The DynamoDB documentation specifies: "When you use SET to update a list
|
||||
# element, the contents of that element are replaced with the new data that
|
||||
# you specify. If the element does not already exist, SET will append the
|
||||
# new element at the end of the list."
|
||||
# So if we take a three-element list a[7], and set a[7], the new element
|
||||
# will be put at the end of the list, not position 7 specifically.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_nested_attribute_update_array_out_of_bounds(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': ['one', 'two', 'three']})
|
||||
@@ -864,9 +953,9 @@ def test_nested_attribute_update_array_out_of_bounds(test_table_s):
|
||||
# The DynamoDB documentation also says: "If you add multiple elements
|
||||
# in a single SET operation, the elements are sorted in order by element
|
||||
# number.
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[84] = :val1, a[37] = :val2',
|
||||
ExpressionAttributeValues={':val1': 'a1', ':val2': 'a2'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': ['one', 'two', 'three', 'hello', 'a2', 'a1']}
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[84] = :val1, a[37] = :val2, a[17] = :val3, a[50] = :val4',
|
||||
ExpressionAttributeValues={':val1': 'a1', ':val2': 'a2', ':val3': 'a3', ':val4': 'a4'})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': ['one', 'two', 'three', 'hello', 'a3', 'a2', 'a4', 'a1']}
|
||||
|
||||
# Test what happens if we try to write to a.b, which would only make sense if
|
||||
# a were a nested document, but a doesn't exist, or exists and is NOT a nested
|
||||
@@ -875,9 +964,6 @@ def test_nested_attribute_update_array_out_of_bounds(test_table_s):
|
||||
# ClientError: An error occurred (ValidationException) when calling the
|
||||
# UpdateItem operation: The document path provided in the update expression
|
||||
# is invalid for update
|
||||
# Because Scylla doesn't read before write, it cannot detect this as an error,
|
||||
# so we'll probably want to allow for that possibility as well.
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
def test_nested_attribute_update_bad_path_dot(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': ['hi']})
|
||||
@@ -890,17 +976,56 @@ def test_nested_attribute_update_bad_path_dot(test_table_s):
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET c.c = :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
# Same errors for "remove" operation.
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a.c')
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE b.c')
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE c.c')
|
||||
# Same error when the item doesn't exist at all:
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET c.c = :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
# HOWEVER, although it *is* allowed to remove a random path from a non-
|
||||
# existent item. I don't know why. See the next test -
|
||||
# test_nested_attribute_remove_from_missing_item
|
||||
|
||||
# Though in the above test (test_nested_attribute_update_bad_path_dot) we
|
||||
# showed that DynamoDB does not allow REMOVE x.y if attribute x doesn't
|
||||
# exist - and generates a ValidationException, it turns out that if the
|
||||
# entire item doesn't exist, then a REMOVE x.y is silently ignored.
|
||||
# I don't understand why they did this.
|
||||
@pytest.mark.xfail(reason="for unknown reason, DynamoDB allows REMOVE x.y when item doesn't exist")
|
||||
def test_nested_attribute_remove_from_missing_item(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE x.y')
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE x[0]')
|
||||
|
||||
# Similarly for other types of bad paths - using [0] on something which
|
||||
# isn't an array,
|
||||
@pytest.mark.xfail(reason="nested updates not yet implemented")
|
||||
# doesn't exist or isn't an array.
|
||||
def test_nested_attribute_update_bad_path_array(test_table_s):
|
||||
p = random_string()
|
||||
test_table_s.put_item(Item={'p': p, 'a': 'hello'})
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[0] = :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET b[0] = :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
# Same errors for "remove" operation.
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[0]')
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE b[0]')
|
||||
# Same error when the item doesn't exist at all:
|
||||
p = random_string()
|
||||
with pytest.raises(ClientError, match='ValidationException.*path'):
|
||||
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET b[0] = :val1',
|
||||
ExpressionAttributeValues={':val1': 7})
|
||||
# HOWEVER, although it *is* allowed to remove a random path from a non-
|
||||
# existent item. I don't know why... See test_nested_attribute_remove_from_missing_item
|
||||
|
||||
# DynamoDB Does not allow empty sets.
|
||||
# Trying to ask UpdateItem to put one of these in an attribute should be
|
||||
@@ -921,4 +1046,27 @@ def test_update_expression_empty_attribute(test_table_s):
|
||||
UpdateExpression='SET d = :v1, e = :v2, f = :v3, g = :v4',
|
||||
ExpressionAttributeValues={':v1': [], ':v2': {}, ':v3': '', ':v4': b''})
|
||||
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'd': [], 'e': {}, 'f': '', 'g': b''}
|
||||
#
|
||||
|
||||
# Verify which kind of update operations require a read-before-write (a.k.a
|
||||
# read-modify-write, or RMW). We test this by using a table configured with
|
||||
# "forbid_rmw" isolation mode and checking which writes succeed or pass.
|
||||
# This is a Scylla-only test (the test_table_s_forbid_rmw implies scylla_only).
|
||||
def test_update_expression_when_rmw(test_table_s_forbid_rmw):
|
||||
table = test_table_s_forbid_rmw
|
||||
p = random_string()
|
||||
# A write with a RHS (right-hand side) being a constant from the query
|
||||
# doesn't need RMW:
|
||||
table.update_item(Key={'p': p},
|
||||
UpdateExpression='SET a = :val',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
# But if the LHS (left-hand side) of the assignment is a document path,
|
||||
# it *does* need RMW:
|
||||
with pytest.raises(ClientError, match='ValidationException.*write isolation policy'):
|
||||
table.update_item(Key={'p': p},
|
||||
UpdateExpression='SET a.b = :val',
|
||||
ExpressionAttributeValues={':val': 3})
|
||||
assert table.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == 3
|
||||
# A write with a path in the RHS of a SET also needs RMW
|
||||
with pytest.raises(ClientError, match='ValidationException.*write isolation policy'):
|
||||
table.update_item(Key={'p': p},
|
||||
UpdateExpression='SET b = a')
|
||||
|
||||
Reference in New Issue
Block a user