Merge 'Alternator: support nested attribute paths...

in all expressions' from Nadav Har'El.

This series fixes #5024 - which is about adding support for nested attribute
paths (e.g., a.b.c[2]) to Alternator.  The series adds complete support for this
feature in ProjectionExpression, ConditionExpression, FilterExpression and
UpdateExpression - and also its combination with ReturnValues. Many relevant
tests - and also some new tests added in this series - now pass.

The first patch in the series fixes #8043 a bug in some error cases in
conditions, which was discovered while working in this series, and is
conceptually separate from the rest of the series.

Closes #8066

* github.com:scylladb/scylla:
  alternator: correct implemention of UpdateItem with nested attributes and ReturnValues
  alternator: fix bug in ReturnValues=UPDATED_NEW
  alternator: implemented nested attribute paths in UpdateExpression
  alternator: limit the depth of nested paths
  alternator: prepare for UpdateItem nested attribute paths
  alternator: overhaul ProjectionExpression hierarchy implementation
  alternator: make parsed::path object printable
  alternator-test: a few more ProjectionExpression conflict test cases
  alternator-test: improve tests for nested attributes in UpdateExpression
  alternator: support attribute paths in ConditionExpression, FilterExpression
  alternator-test: improve tests for nested attributes in ConditionExpression
  alternator: support attribute paths in ProjectionExpression
  alternator: overhaul attrs_to_get handling
  alternator-test: additional tests for attribute paths in ProjectionExpression
  alternator-test: harden attribute-path tests for ProjectionExpression
  alternator: fix ValidationException in FilterExpression - and more

(cherry picked from commit cbbb7f08a0)
This commit is contained in:
Piotr Sarna
2021-02-15 13:14:10 +01:00
committed by Nadav Har'El
parent 7b19cc17d6
commit a810e57684
15 changed files with 1038 additions and 218 deletions

View File

@@ -202,7 +202,7 @@ static schema_ptr get_table(service::storage_proxy& proxy, const rjson::value& r
if (!schema) {
// if we get here then the name was missing, since syntax or missing actual CF
// checks throw. Slow path, but just call get_table_name to generate exception.
get_table_name(request);
get_table_name(request);
}
return schema;
}
@@ -1882,18 +1882,182 @@ static std::string get_item_type_string(const rjson::value& v) {
return it->name.GetString();
}
// attrs_to_get saves for each top-level attribute an attrs_to_get_node,
// a hierarchy of subparts that need to be kept. The following function
// takes a given JSON value and drops its parts which weren't asked to be
// kept. It modifies the given JSON value, or returns false to signify that
// the entire object should be dropped.
// Note that The JSON value is assumed to be encoded using the DynamoDB
// conventions - i.e., it is really a map whose key has a type string,
// and the value is the real object.
template<typename T>
static bool hierarchy_filter(rjson::value& val, const attribute_path_map_node<T>& h) {
if (!val.IsObject() || val.MemberCount() != 1) {
// This shouldn't happen. We shouldn't have stored malformed objects.
// But today Alternator does not validate the structure of nested
// documents before storing them, so this can happen on read.
throw api_error::internal(format("Malformed value object read: {}", val));
}
const char* type = val.MemberBegin()->name.GetString();
rjson::value& v = val.MemberBegin()->value;
if (h.has_members()) {
const auto& members = h.get_members();
if (type[0] != 'M' || !v.IsObject()) {
// If v is not an object (dictionary, map), none of the members
// can match.
return false;
}
rjson::value newv = rjson::empty_object();
for (auto it = v.MemberBegin(); it != v.MemberEnd(); ++it) {
std::string attr = it->name.GetString();
auto x = members.find(attr);
if (x != members.end()) {
if (x->second) {
// Only a part of this attribute is to be filtered, do it.
if (hierarchy_filter(it->value, *x->second)) {
rjson::set_with_string_name(newv, attr, std::move(it->value));
}
} else {
// The entire attribute is to be kept
rjson::set_with_string_name(newv, attr, std::move(it->value));
}
}
}
if (newv.MemberCount() == 0) {
return false;
}
v = newv;
} else if (h.has_indexes()) {
const auto& indexes = h.get_indexes();
if (type[0] != 'L' || !v.IsArray()) {
return false;
}
rjson::value newv = rjson::empty_array();
const auto& a = v.GetArray();
for (unsigned i = 0; i < v.Size(); i++) {
auto x = indexes.find(i);
if (x != indexes.end()) {
if (x->second) {
if (hierarchy_filter(a[i], *x->second)) {
rjson::push_back(newv, std::move(a[i]));
}
} else {
// The entire attribute is to be kept
rjson::push_back(newv, std::move(a[i]));
}
}
}
if (newv.Size() == 0) {
return false;
}
v = newv;
}
return true;
}
// Add a path to a attribute_path_map. Throws a validation error if the path
// "overlaps" with one already in the filter (one is a sub-path of the other)
// or "conflicts" with it (both a member and index is requested).
template<typename T>
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const parsed::path& p, T value = {}) {
using node = attribute_path_map_node<T>;
// The first step is to look for the top-level attribute (p.root()):
auto it = map.find(p.root());
if (it == map.end()) {
if (p.has_operators()) {
it = map.emplace(p.root(), node {std::nullopt}).first;
} else {
(void) map.emplace(p.root(), node {std::move(value)}).first;
// Value inserted for top-level node. We're done.
return;
}
} else if(!p.has_operators()) {
// If p is top-level and we already have it or a part of it
// in map, it's a forbidden overlapping path.
throw api_error::validation(format(
"Invalid {}: two document paths overlap at {}", source, p.root()));
} else if (it->second.has_value()) {
// If we're here, it != map.end() && p.has_operators && it->second.has_value().
// This means the top-level attribute already has a value, and we're
// trying to add a non-top-level value. It's an overlap.
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p.root()));
}
node* h = &it->second;
// The second step is to walk h from the top-level node to the inner node
// where we're supposed to insert the value:
for (const auto& op : p.operators()) {
std::visit(overloaded_functor {
[&] (const std::string& member) {
if (h->is_empty()) {
*h = node {typename node::members_t()};
} else if (h->has_indexes()) {
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
} else if (h->has_value()) {
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
}
typename node::members_t& members = h->get_members();
auto it = members.find(member);
if (it == members.end()) {
it = members.insert({member, make_shared<node>()}).first;
}
h = it->second.get();
},
[&] (unsigned index) {
if (h->is_empty()) {
*h = node {typename node::indexes_t()};
} else if (h->has_members()) {
throw api_error::validation(format("Invalid {}: two document paths conflict at {}", source, p));
} else if (h->has_value()) {
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
}
typename node::indexes_t& indexes = h->get_indexes();
auto it = indexes.find(index);
if (it == indexes.end()) {
it = indexes.insert({index, make_shared<node>()}).first;
}
h = it->second.get();
}
}, op);
}
// Finally, insert the value in the node h.
if (h->is_empty()) {
*h = node {std::move(value)};
} else {
throw api_error::validation(format("Invalid {}: two document paths overlap at {}", source, p));
}
}
// A very simplified version of the above function for the special case of
// adding only top-level attribute. It's not only simpler, we also use a
// different error message, referring to a "duplicate attribute"instead of
// "overlapping paths". DynamoDB also has this distinction (errors in
// AttributesToGet refer to duplicates, not overlaps, but errors in
// ProjectionExpression refer to overlap - even if it's an exact duplicate).
template<typename T>
void attribute_path_map_add(const char* source, attribute_path_map<T>& map, const std::string& attr, T value = {}) {
using node = attribute_path_map_node<T>;
auto it = map.find(attr);
if (it == map.end()) {
map.emplace(attr, node {std::move(value)});
} else {
throw api_error::validation(format(
"Invalid {}: Duplicate attribute: {}", source, attr));
}
}
// calculate_attrs_to_get() takes either AttributesToGet or
// ProjectionExpression parameters (having both is *not* allowed),
// and returns the list of cells we need to read, or an empty set when
// *all* attributes are to be returned.
// In our current implementation, only top-level attributes are stored
// as cells, and nested documents are stored serialized as JSON.
// So this function currently returns only the the top-level attributes
// but we also need to add, after the query, filtering to keep only
// the parts of the JSON attributes that were chosen in the paths'
// operators. Because we don't have such filtering yet (FIXME), we fail here
// if the requested paths are anything but top-level attributes.
std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
// However, in our current implementation, only top-level attributes are
// stored as separate cells - a nested document is stored serialized together
// (as JSON) in the same cell. So this function return a map - each key is the
// top-level attribute we will need need to read, and the value for each
// top-level attribute is the partial hierarchy (struct hierarchy_filter)
// that we will need to extract from that serialized JSON.
// For example, if ProjectionExpression lists a.b and a.c[2], we
// return one top-level attribute name, "a", with the value "{b, c[2]}".
static attrs_to_get calculate_attrs_to_get(const rjson::value& req, std::unordered_set<std::string>& used_attribute_names) {
const bool has_attributes_to_get = req.HasMember("AttributesToGet");
const bool has_projection_expression = req.HasMember("ProjectionExpression");
if (has_attributes_to_get && has_projection_expression) {
@@ -1902,9 +2066,9 @@ std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req,
}
if (has_attributes_to_get) {
const rjson::value& attributes_to_get = req["AttributesToGet"];
std::unordered_set<std::string> ret;
attrs_to_get ret;
for (auto it = attributes_to_get.Begin(); it != attributes_to_get.End(); ++it) {
ret.insert(it->GetString());
attribute_path_map_add("AttributesToGet", ret, it->GetString());
}
return ret;
} else if (has_projection_expression) {
@@ -1917,24 +2081,13 @@ std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req,
throw api_error::validation(e.what());
}
resolve_projection_expression(paths_to_get, expression_attribute_names, used_attribute_names);
std::unordered_set<std::string> seen_column_names;
auto ret = boost::copy_range<std::unordered_set<std::string>>(paths_to_get |
boost::adaptors::transformed([&] (const parsed::path& p) {
if (p.has_operators()) {
// FIXME: this check will need to change when we support non-toplevel attributes
throw api_error::validation("Non-toplevel attributes in ProjectionExpression not yet implemented");
}
if (!seen_column_names.insert(p.root()).second) {
// FIXME: this check will need to change when we support non-toplevel attributes
throw api_error::validation(
format("Invalid ProjectionExpression: two document paths overlap with each other: {} and {}.",
p.root(), p.root()));
}
return p.root();
}));
attrs_to_get ret;
for (const parsed::path& p : paths_to_get) {
attribute_path_map_add("ProjectionExpression", ret, p);
}
return ret;
}
// An empty set asks to read everything
// An empty map asks to read everything
return {};
}
@@ -1955,7 +2108,7 @@ std::unordered_set<std::string> calculate_attrs_to_get(const rjson::value& req,
*/
void executor::describe_single_item(const cql3::selection::selection& selection,
const std::vector<bytes_opt>& result_row,
const std::unordered_set<std::string>& attrs_to_get,
const attrs_to_get& attrs_to_get,
rjson::value& item,
bool include_all_embedded_attributes)
{
@@ -1976,7 +2129,16 @@ void executor::describe_single_item(const cql3::selection::selection& selection,
std::string attr_name = value_cast<sstring>(entry.first);
if (include_all_embedded_attributes || attrs_to_get.empty() || attrs_to_get.contains(attr_name)) {
bytes value = value_cast<bytes>(entry.second);
rjson::set_with_string_name(item, attr_name, deserialize_item(value));
rjson::value v = deserialize_item(value);
auto it = attrs_to_get.find(attr_name);
if (it != attrs_to_get.end()) {
// attrs_to_get may have asked for only part of this attribute:
if (hierarchy_filter(v, it->second)) {
rjson::set_with_string_name(item, attr_name, std::move(v));
}
} else {
rjson::set_with_string_name(item, attr_name, std::move(v));
}
}
}
}
@@ -1988,7 +2150,7 @@ std::optional<rjson::value> executor::describe_single_item(schema_ptr schema,
const query::partition_slice& slice,
const cql3::selection::selection& selection,
const query::result& query_result,
const std::unordered_set<std::string>& attrs_to_get) {
const attrs_to_get& attrs_to_get) {
rjson::value item = rjson::empty_object();
cql3::selection::result_set_builder builder(selection, gc_clock::now(), cql_serialization_format::latest());
@@ -2024,8 +2186,16 @@ static bool check_needs_read_before_write(const parsed::value& v) {
}, v._value);
}
static bool check_needs_read_before_write(const parsed::update_expression& update_expression) {
return boost::algorithm::any_of(update_expression.actions(), [](const parsed::update_expression::action& action) {
static bool check_needs_read_before_write(const attribute_path_map<parsed::update_expression::action>& update_expression) {
return boost::algorithm::any_of(update_expression, [](const auto& p) {
if (!p.second.has_value()) {
// If the action is not on the top-level attribute, we need to
// read the old item: we change only a part of the top-level
// attribute, and write the full top-level attribute back.
return true;
}
// Otherwise, the action p.second.get_value() is just on top-level
// attribute. Check if it needs read-before-write:
return std::visit(overloaded_functor {
[&] (const parsed::update_expression::action::set& a) -> bool {
return check_needs_read_before_write(a._rhs._v1) || (a._rhs._op != 'v' && check_needs_read_before_write(a._rhs._v2));
@@ -2039,7 +2209,7 @@ static bool check_needs_read_before_write(const parsed::update_expression& updat
[&] (const parsed::update_expression::action::del& a) -> bool {
return true;
}
}, action._action);
}, p.second.get_value()._action);
});
}
@@ -2048,7 +2218,11 @@ public:
// Some information parsed during the constructor to check for input
// errors, and cached to be used again during apply().
rjson::value* _attribute_updates;
parsed::update_expression _update_expression;
// Instead of keeping a parsed::update_expression with an unsorted list
// list of actions, we keep them in an attribute_path_map which groups
// them by top-level attribute, and detects forbidden overlaps/conflicts.
attribute_path_map<parsed::update_expression::action> _update_expression;
parsed::condition_expression _condition_expression;
update_item_operation(service::storage_proxy& proxy, rjson::value&& request);
@@ -2079,16 +2253,22 @@ update_item_operation::update_item_operation(service::storage_proxy& proxy, rjso
throw api_error::validation("UpdateExpression must be a string");
}
try {
_update_expression = parse_update_expression(update_expression->GetString());
resolve_update_expression(_update_expression,
parsed::update_expression expr = parse_update_expression(update_expression->GetString());
resolve_update_expression(expr,
expression_attribute_names, expression_attribute_values,
used_attribute_names, used_attribute_values);
if (expr.empty()) {
throw api_error::validation("Empty expression in UpdateExpression is not allowed");
}
for (auto& action : expr.actions()) {
// Unfortunately we need to copy the action's path, because
// we std::move the action object.
auto p = action._path;
attribute_path_map_add("UpdateExpression", _update_expression, p, std::move(action));
}
} catch(expressions_syntax_error& e) {
throw api_error::validation(e.what());
}
if (_update_expression.empty()) {
throw api_error::validation("Empty expression in UpdateExpression is not allowed");
}
}
_attribute_updates = rjson::find(_request, "AttributeUpdates");
if (_attribute_updates) {
@@ -2130,6 +2310,187 @@ update_item_operation::needs_read_before_write() const {
(_returnvalues != returnvalues::NONE && _returnvalues != returnvalues::UPDATED_NEW);
}
// action_result() returns the result of applying an UpdateItem action -
// this result is either a JSON object or an unset optional which indicates
// the action was a deletion. The caller (update_item_operation::apply()
// below) will either write this JSON as the content of a column, or
// use it as a piece in a bigger top-level attribute.
static std::optional<rjson::value> action_result(
const parsed::update_expression::action& action,
const rjson::value* previous_item) {
return std::visit(overloaded_functor {
[&] (const parsed::update_expression::action::set& a) -> std::optional<rjson::value> {
return calculate_value(a._rhs, previous_item);
},
[&] (const parsed::update_expression::action::remove& a) -> std::optional<rjson::value> {
return std::nullopt;
},
[&] (const parsed::update_expression::action::add& a) -> std::optional<rjson::value> {
parsed::value base;
parsed::value addition;
base.set_path(action._path);
addition.set_constant(a._valref);
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item);
rjson::value result;
// An ADD can be used to create a new attribute (when
// v1.IsNull()) or to add to a pre-existing attribute:
if (v1.IsNull()) {
std::string v2_type = get_item_type_string(v2);
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
result = v2;
} else {
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
}
} else {
std::string v1_type = get_item_type_string(v1);
if (v1_type == "N") {
if (get_item_type_string(v2) != "N") {
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
}
result = number_add(v1, v2);
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
if (get_item_type_string(v2) != v1_type) {
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
}
result = set_sum(v1, v2);
} else {
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
}
}
return result;
},
[&] (const parsed::update_expression::action::del& a) -> std::optional<rjson::value> {
parsed::value base;
parsed::value subset;
base.set_path(action._path);
subset.set_constant(a._valref);
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item);
rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item);
if (!v1.IsNull()) {
return set_diff(v1, v2);
}
// When we return nullopt here, we ask to *delete* this attribute,
// which is unnecessary because we know the attribute does not
// exist anyway. This is a waste, but a small one. Note that also
// for the "remove" action above we don't bother to check if the
// previous_item add anything to remove.
return std::nullopt;
}
}, action._action);
}
// Print an attribute_path_map_node<action> as the list of paths it contains:
static std::ostream& operator<<(std::ostream& out, const attribute_path_map_node<parsed::update_expression::action>& h) {
if (h.has_value()) {
out << " " << h.get_value()._path;
} else if (h.has_members()) {
for (auto& member : h.get_members()) {
out << *member.second;
}
} else if (h.has_indexes()) {
for (auto& index : h.get_indexes()) {
out << *index.second;
}
}
return out;
}
// Apply the hierarchy of actions in an attribute_path_map_node<action> to a
// JSON object which uses DynamoDB's serialization conventions. The complete,
// unmodified, previous_item is also necessary for the right-hand sides of the
// actions. Modifies obj in-place or returns false if it is to be removed.
static bool hierarchy_actions(
rjson::value& obj,
const attribute_path_map_node<parsed::update_expression::action>& h,
const rjson::value* previous_item)
{
if (!obj.IsObject() || obj.MemberCount() != 1) {
// This shouldn't happen. We shouldn't have stored malformed objects.
// But today Alternator does not validate the structure of nested
// documents before storing them, so this can happen on read.
throw api_error::validation(format("Malformed value object read: {}", obj));
}
const char* type = obj.MemberBegin()->name.GetString();
rjson::value& v = obj.MemberBegin()->value;
if (h.has_value()) {
// Action replacing everything in this position in the hierarchy
std::optional<rjson::value> newv = action_result(h.get_value(), previous_item);
if (newv) {
obj = std::move(*newv);
} else {
return false;
}
} else if (h.has_members()) {
if (type[0] != 'M' || !v.IsObject()) {
// A .something on a non-map doesn't work.
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
}
for (const auto& member : h.get_members()) {
std::string attr = member.first;
const attribute_path_map_node<parsed::update_expression::action>& subh = *member.second;
rjson::value *subobj = rjson::find(v, attr);
if (subobj) {
if (!hierarchy_actions(*subobj, subh, previous_item)) {
rjson::remove_member(v, attr);
}
} else {
// When a.b does not exist, setting a.b itself (i.e.
// subh.has_value()) is fine, but setting a.b.c is not.
if (subh.has_value()) {
std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
if (newv) {
rjson::set_with_string_name(v, attr, std::move(*newv));
} else {
throw api_error::validation(format("Can't remove document path {} - not present in item",
subh.get_value()._path));
}
} else {
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
}
}
}
} else if (h.has_indexes()) {
if (type[0] != 'L' || !v.IsArray()) {
// A [i] on a non-list doesn't work.
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
}
unsigned nremoved = 0;
for (const auto& index : h.get_indexes()) {
unsigned i = index.first - nremoved;
const attribute_path_map_node<parsed::update_expression::action>& subh = *index.second;
if (i < v.Size()) {
if (!hierarchy_actions(v[i], subh, previous_item)) {
v.Erase(v.Begin() + i);
// If we have the actions "REMOVE a[1] SET a[3] = :val",
// the index 3 refers to the original indexes, before any
// items were removed. So we offset the next indexes
// (which are guaranteed to be higher than i - indexes is
// a sorted map) by an increased "nremoved".
nremoved++;
}
} else {
// If a[7] does not exist, setting a[7] itself (i.e.
// subh.has_value()) is fine - and appends an item, though
// not necessarily with index 7. But setting a[7].b will
// not work.
if (subh.has_value()) {
std::optional<rjson::value> newv = action_result(subh.get_value(), previous_item);
if (newv) {
rjson::push_back(v, std::move(*newv));
} else {
// Removing a[7] when the list has fewer elements is
// silently ignored. It's not considered an error.
}
} else {
throw api_error::validation(format("UpdateExpression: document paths not valid for this item:{}", h));
}
}
}
}
return true;
}
std::optional<mutation>
update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::timestamp_type ts) const {
if (!verify_expected(_request, previous_item.get()) ||
@@ -2144,17 +2505,37 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
auto& row = m.partition().clustered_row(*_schema, _ck);
attribute_collector attrs_collector;
bool any_updates = false;
auto do_update = [&] (bytes&& column_name, const rjson::value& json_value) {
auto do_update = [&] (bytes&& column_name, const rjson::value& json_value,
const attribute_path_map_node<parsed::update_expression::action>* h = nullptr) {
any_updates = true;
if (_returnvalues == returnvalues::ALL_NEW ||
_returnvalues == returnvalues::UPDATED_NEW) {
if (_returnvalues == returnvalues::ALL_NEW) {
rjson::set_with_string_name(_return_attributes,
to_sstring_view(column_name), rjson::copy(json_value));
to_sstring_view(column_name), rjson::copy(json_value));
} else if (_returnvalues == returnvalues::UPDATED_NEW) {
rjson::value&& v = rjson::copy(json_value);
if (h) {
// If the operation was only on specific attribute paths,
// leave only them in _return_attributes.
if (hierarchy_filter(v, *h)) {
rjson::set_with_string_name(_return_attributes,
to_sstring_view(column_name), std::move(v));
}
} else {
rjson::set_with_string_name(_return_attributes,
to_sstring_view(column_name), std::move(v));
}
} else if (_returnvalues == returnvalues::UPDATED_OLD && previous_item) {
std::string_view cn = to_sstring_view(column_name);
const rjson::value* col = rjson::find(*previous_item, cn);
if (col) {
rjson::set_with_string_name(_return_attributes, cn, rjson::copy(*col));
rjson::value&& v = rjson::copy(*col);
if (h) {
if (hierarchy_filter(v, *h)) {
rjson::set_with_string_name(_return_attributes, cn, std::move(v));
}
} else {
rjson::set_with_string_name(_return_attributes, cn, std::move(v));
}
}
}
const column_definition* cdef = _schema->get_column_definition(column_name);
@@ -2196,7 +2577,7 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
// can just move previous_item later, when we don't need it any more.
if (_returnvalues == returnvalues::ALL_NEW) {
if (previous_item) {
_return_attributes = std::move(*previous_item);
_return_attributes = rjson::copy(*previous_item);
} else {
// If there is no previous item, usually a new item is created
// and contains they given key. This may be cancelled at the end
@@ -2209,88 +2590,44 @@ update_item_operation::apply(std::unique_ptr<rjson::value> previous_item, api::t
}
if (!_update_expression.empty()) {
std::unordered_set<std::string> seen_column_names;
for (auto& action : _update_expression.actions()) {
if (action._path.has_operators()) {
// FIXME: implement this case
throw api_error::validation("UpdateItem support for nested updates not yet implemented");
}
std::string column_name = action._path.root();
for (auto& actions : _update_expression) {
// The actions of _update_expression are grouped by top-level
// attributes. Here, all actions in actions.second share the same
// top-level attribute actions.first.
std::string column_name = actions.first;
const column_definition* cdef = _schema->get_column_definition(to_bytes(column_name));
if (cdef && cdef->is_primary_key()) {
throw api_error::validation(
format("UpdateItem cannot update key column {}", column_name));
throw api_error::validation(format("UpdateItem cannot update key column {}", column_name));
}
// DynamoDB forbids multiple updates in the same expression to
// modify overlapping document paths. Updates of one expression
// have the same timestamp, so it's unclear which would "win".
// FIXME: currently, without full support for document paths,
// we only check if the paths' roots are the same.
if (!seen_column_names.insert(column_name).second) {
throw api_error::validation(
format("Invalid UpdateExpression: two document paths overlap with each other: {} and {}.",
column_name, column_name));
}
std::visit(overloaded_functor {
[&] (const parsed::update_expression::action::set& a) {
auto value = calculate_value(a._rhs, previous_item.get());
do_update(to_bytes(column_name), value);
},
[&] (const parsed::update_expression::action::remove& a) {
if (actions.second.has_value()) {
// An action on a top-level attribute column_name. The single
// action is actions.second.get_value(). We can simply invoke
// the action and replace the attribute with its result:
std::optional<rjson::value> result = action_result(actions.second.get_value(), previous_item.get());
if (result) {
do_update(to_bytes(column_name), *result);
} else {
do_delete(to_bytes(column_name));
},
[&] (const parsed::update_expression::action::add& a) {
parsed::value base;
parsed::value addition;
base.set_path(action._path);
addition.set_constant(a._valref);
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
rjson::value v2 = calculate_value(addition, calculate_value_caller::UpdateExpression, previous_item.get());
rjson::value result;
// An ADD can be used to create a new attribute (when
// v1.IsNull()) or to add to a pre-existing attribute:
if (v1.IsNull()) {
std::string v2_type = get_item_type_string(v2);
if (v2_type == "N" || v2_type == "SS" || v2_type == "NS" || v2_type == "BS") {
result = v2;
} else {
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v2));
}
} else {
std::string v1_type = get_item_type_string(v1);
if (v1_type == "N") {
if (get_item_type_string(v2) != "N") {
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
}
result = number_add(v1, v2);
} else if (v1_type == "SS" || v1_type == "NS" || v1_type == "BS") {
if (get_item_type_string(v2) != v1_type) {
throw api_error::validation(format("Incorrect operand type for operator or function. Expected {}: {}", v1_type, rjson::print(v2)));
}
result = set_sum(v1, v2);
} else {
throw api_error::validation(format("An operand in the update expression has an incorrect data type: {}", v1));
}
}
do_update(to_bytes(column_name), result);
},
[&] (const parsed::update_expression::action::del& a) {
parsed::value base;
parsed::value subset;
base.set_path(action._path);
subset.set_constant(a._valref);
rjson::value v1 = calculate_value(base, calculate_value_caller::UpdateExpression, previous_item.get());
rjson::value v2 = calculate_value(subset, calculate_value_caller::UpdateExpression, previous_item.get());
if (!v1.IsNull()) {
std::optional<rjson::value> result = set_diff(v1, v2);
if (result) {
do_update(to_bytes(column_name), *result);
} else {
do_delete(to_bytes(column_name));
}
}
}
}, action._action);
} else {
// We have actions on a path or more than one path in the same
// top-level attribute column_name - but not on the top-level
// attribute as a whole. We already read the full top-level
// attribute (see check_needs_read_before_write()), and now we
// need to modify pieces of it and write back the entire
// top-level attribute.
if (!previous_item) {
throw api_error::validation(format("UpdateItem cannot update nested document path on non-existent item"));
}
const rjson::value *toplevel = rjson::find(*previous_item, column_name);
if (!toplevel) {
throw api_error::validation(format("UpdateItem cannot update document path: missing attribute {}",
column_name));
}
rjson::value result = rjson::copy(*toplevel);
hierarchy_actions(result, actions.second, previous_item.get());
do_update(to_bytes(column_name), std::move(result), &actions.second);
}
}
}
if (_returnvalues == returnvalues::ALL_OLD && previous_item) {
@@ -2408,7 +2745,7 @@ static rjson::value describe_item(schema_ptr schema,
const query::partition_slice& slice,
const cql3::selection::selection& selection,
const query::result& query_result,
const std::unordered_set<std::string>& attrs_to_get) {
const attrs_to_get& attrs_to_get) {
std::optional<rjson::value> opt_item = executor::describe_single_item(std::move(schema), slice, selection, std::move(query_result), attrs_to_get);
if (!opt_item) {
// If there is no matching item, we're supposed to return an empty
@@ -2480,7 +2817,7 @@ future<executor::request_return_type> executor::batch_get_item(client_state& cli
struct table_requests {
schema_ptr schema;
db::consistency_level cl;
std::unordered_set<std::string> attrs_to_get;
attrs_to_get attrs_to_get;
struct single_request {
partition_key pk;
clustering_key ck;
@@ -2694,7 +3031,7 @@ void filter::for_filters_on(const noncopyable_function<void(std::string_view)>&
class describe_items_visitor {
typedef std::vector<const column_definition*> columns_t;
const columns_t& _columns;
const std::unordered_set<std::string>& _attrs_to_get;
const attrs_to_get& _attrs_to_get;
std::unordered_set<std::string> _extra_filter_attrs;
const filter& _filter;
typename columns_t::const_iterator _column_it;
@@ -2703,7 +3040,7 @@ class describe_items_visitor {
size_t _scanned_count;
public:
describe_items_visitor(const columns_t& columns, const std::unordered_set<std::string>& attrs_to_get, filter& filter)
describe_items_visitor(const columns_t& columns, const attrs_to_get& attrs_to_get, filter& filter)
: _columns(columns)
, _attrs_to_get(attrs_to_get)
, _filter(filter)
@@ -2752,6 +3089,12 @@ public:
std::string attr_name = value_cast<sstring>(entry.first);
if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) {
bytes value = value_cast<bytes>(entry.second);
// Even if _attrs_to_get asked to keep only a part of a
// top-level attribute, we keep the entire attribute
// at this stage, because the item filter might still
// need the other parts (it was easier for us to keep
// extra_filter_attrs at top-level granularity). We'll
// filter the unneeded parts after item filtering.
rjson::set_with_string_name(_item, attr_name, deserialize_item(value));
}
}
@@ -2762,11 +3105,24 @@ public:
void end_row() {
if (_filter.check(_item)) {
// As noted above, we kept entire top-level attributes listed in
// _attrs_to_get. We may need to only keep parts of them.
for (const auto& attr: _attrs_to_get) {
// If !attr.has_value() it means we were asked not to keep
// attr entirely, but just parts of it.
if (!attr.second.has_value()) {
rjson::value* toplevel= rjson::find(_item, attr.first);
if (toplevel && !hierarchy_filter(*toplevel, attr.second)) {
rjson::remove_member(_item, attr.first);
}
}
}
// Remove the extra attributes _extra_filter_attrs which we had
// to add just for the filter, and not requested to be returned:
for (const auto& attr : _extra_filter_attrs) {
rjson::remove_member(_item, attr);
}
rjson::push_back(_items, std::move(_item));
}
_item = rjson::empty_object();
@@ -2782,7 +3138,7 @@ public:
}
};
static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, std::unordered_set<std::string>&& attrs_to_get, filter&& filter) {
static rjson::value describe_items(schema_ptr schema, const query::partition_slice& slice, const cql3::selection::selection& selection, std::unique_ptr<cql3::result_set> result_set, attrs_to_get&& attrs_to_get, filter&& filter) {
describe_items_visitor visitor(selection.get_columns(), attrs_to_get, filter);
result_set->visit(visitor);
auto scanned_count = visitor.get_scanned_count();
@@ -2823,7 +3179,7 @@ static future<executor::request_return_type> do_query(service::storage_proxy& pr
const rjson::value* exclusive_start_key,
dht::partition_range_vector&& partition_ranges,
std::vector<query::clustering_range>&& ck_bounds,
std::unordered_set<std::string>&& attrs_to_get,
attrs_to_get&& attrs_to_get,
uint32_t limit,
db::consistency_level cl,
filter&& filter,

View File

@@ -70,6 +70,76 @@ public:
std::string to_json() const override;
};
namespace parsed {
class path;
};
// An attribute_path_map object is used to hold data for various attributes
// paths (parsed::path) in a hierarchy of attribute paths. Each attribute path
// has a root attribute, and then modified by member and index operators -
// for example in "a.b[2].c" we have "a" as the root, then ".b" member, then
// "[2]" index, and finally ".c" member.
// Data can be added to an attribute_path_map using the add() function, but
// requires that attributes with data not be *overlapping* or *conflicting*:
//
// 1. Two attribute paths which are identical or an ancestor of one another
// are considered *overlapping* and not allowed. If a.b.c has data,
// we can't add more data in a.b.c or any of its descendants like a.b.c.d.
//
// 2. Two attribute paths which need the same parent to have both a member and
// an index are considered *conflicting* and not allowed. E.g., if a.b has
// data, you can't add a[1]. The meaning of adding both would be that the
// attribute a is both a map and an array, which isn't sensible.
//
// These two requirements are common to the two places where Alternator uses
// this abstraction to describe how a hierarchical item is to be transformed:
//
// 1. In ProjectExpression: for filtering from a full top-level attribute
// only the parts for which user asked in ProjectionExpression.
//
// 2. In UpdateExpression: for taking the previous value of a top-level
// attribute, and modifying it based on the instructions in the user
// wrote in UpdateExpression.
template<typename T>
class attribute_path_map_node {
public:
using data_t = T;
// We need the extra shared_ptr<> here because libstdc++ unordered_map
// doesn't work with incomplete types :-( We couldn't use lw_shared_ptr<>
// because it doesn't work for incomplete types either. We couldn't use
// std::unique_ptr<> because it makes the entire object uncopyable. We
// don't often need to copy such a map, but we do have some code that
// copies an attrs_to_get object, and is hard to find and remove.
// The shared_ptr should never be null.
using members_t = std::unordered_map<std::string, seastar::shared_ptr<attribute_path_map_node<T>>>;
// The indexes list is sorted because DynamoDB requires handling writes
// beyond the end of a list in index order.
using indexes_t = std::map<unsigned, seastar::shared_ptr<attribute_path_map_node<T>>>;
// The prohibition on "overlap" and "conflict" explained above means
// That only one of data, members or indexes is non-empty.
std::optional<std::variant<data_t, members_t, indexes_t>> _content;
bool is_empty() const { return !_content; }
bool has_value() const { return _content && std::holds_alternative<data_t>(*_content); }
bool has_members() const { return _content && std::holds_alternative<members_t>(*_content); }
bool has_indexes() const { return _content && std::holds_alternative<indexes_t>(*_content); }
// get_members() assumes that has_members() is true
members_t& get_members() { return std::get<members_t>(*_content); }
const members_t& get_members() const { return std::get<members_t>(*_content); }
indexes_t& get_indexes() { return std::get<indexes_t>(*_content); }
const indexes_t& get_indexes() const { return std::get<indexes_t>(*_content); }
T& get_value() { return std::get<T>(*_content); }
const T& get_value() const { return std::get<T>(*_content); }
};
template<typename T>
using attribute_path_map = std::unordered_map<std::string, attribute_path_map_node<T>>;
using attrs_to_get_node = attribute_path_map_node<std::monostate>;
using attrs_to_get = attribute_path_map<std::monostate>;
class executor : public peering_sharded_service<executor> {
service::storage_proxy& _proxy;
service::migration_manager& _mm;
@@ -140,16 +210,14 @@ public:
const query::partition_slice&,
const cql3::selection::selection&,
const query::result&,
const std::unordered_set<std::string>&);
const attrs_to_get&);
static void describe_single_item(const cql3::selection::selection&,
const std::vector<bytes_opt>&,
const std::unordered_set<std::string>&,
const attrs_to_get&,
rjson::value&,
bool = false);
void add_stream_options(const rjson::value& stream_spec, schema_builder&) const;
void supplement_table_info(rjson::value& descr, const schema& schema) const;
void supplement_table_stream_info(rjson::value& descr, const schema& schema) const;

View File

@@ -130,6 +130,27 @@ void condition_expression::append(condition_expression&& a, char op) {
}, _expression);
}
void path::check_depth_limit() {
if (1 + _operators.size() > depth_limit) {
throw expressions_syntax_error(format("Document path exceeded {} nesting levels", depth_limit));
}
}
std::ostream& operator<<(std::ostream& os, const path& p) {
os << p.root();
for (const auto& op : p.operators()) {
std::visit(overloaded_functor {
[&] (const std::string& member) {
os << '.' << member;
},
[&] (unsigned index) {
os << '[' << index << ']';
}
}, op);
}
return os;
}
} // namespace parsed
// The following resolve_*() functions resolve references in parsed
@@ -151,10 +172,9 @@ void condition_expression::append(condition_expression&& a, char op) {
// we need to resolve the expression just once but then use it many times
// (once for each item to be filtered).
static void resolve_path(parsed::path& p,
static std::optional<std::string> resolve_path_component(const std::string& column_name,
const rjson::value* expression_attribute_names,
std::unordered_set<std::string>& used_attribute_names) {
const std::string& column_name = p.root();
if (column_name.size() > 0 && column_name.front() == '#') {
if (!expression_attribute_names) {
throw api_error::validation(
@@ -166,7 +186,30 @@ static void resolve_path(parsed::path& p,
format("ExpressionAttributeNames missing entry '{}' required by expression", column_name));
}
used_attribute_names.emplace(column_name);
p.set_root(std::string(rjson::to_string_view(*value)));
return std::string(rjson::to_string_view(*value));
}
return std::nullopt;
}
static void resolve_path(parsed::path& p,
const rjson::value* expression_attribute_names,
std::unordered_set<std::string>& used_attribute_names) {
std::optional<std::string> r = resolve_path_component(p.root(), expression_attribute_names, used_attribute_names);
if (r) {
p.set_root(std::move(*r));
}
for (auto& op : p.operators()) {
std::visit(overloaded_functor {
[&] (std::string& s) {
r = resolve_path_component(s, expression_attribute_names, used_attribute_names);
if (r) {
s = std::move(*r);
}
},
[&] (unsigned index) {
// nothing to resolve
}
}, op);
}
}
@@ -623,6 +666,55 @@ std::unordered_map<std::string_view, function_handler_type*> function_handlers {
},
};
// Given a parsed::path and an item read from the table, extract the value
// of a certain attribute path, such as "a" or "a.b.c[3]". Returns a null
// value if the item or the requested attribute does not exist.
// Note that the item is assumed to be encoded in JSON using DynamoDB
// conventions - each level of a nested document is a map with one key -
// a type (e.g., "M" for map) - and its value is the representation of
// that value.
static rjson::value extract_path(const rjson::value* item,
const parsed::path& p, calculate_value_caller caller) {
if (!item) {
return rjson::null_value();
}
const rjson::value* v = rjson::find(*item, p.root());
if (!v) {
return rjson::null_value();
}
for (const auto& op : p.operators()) {
if (!v->IsObject() || v->MemberCount() != 1) {
// This shouldn't happen. We shouldn't have stored malformed
// objects. But today Alternator does not validate the structure
// of nested documents before storing them, so this can happen on
// read.
throw api_error::validation(format("{}: malformed item read: {}", *item));
}
const char* type = v->MemberBegin()->name.GetString();
v = &(v->MemberBegin()->value);
std::visit(overloaded_functor {
[&] (const std::string& member) {
if (type[0] == 'M' && v->IsObject()) {
v = rjson::find(*v, member);
} else {
v = nullptr;
}
},
[&] (unsigned index) {
if (type[0] == 'L' && v->IsArray() && index < v->Size()) {
v = &(v->GetArray()[index]);
} else {
v = nullptr;
}
}
}, op);
if (!v) {
return rjson::null_value();
}
}
return rjson::copy(*v);
}
// Given a parsed::value, which can refer either to a constant value from
// ExpressionAttributeValues, to the value of some attribute, or to a function
// of other values, this function calculates the resulting value.
@@ -640,21 +732,12 @@ rjson::value calculate_value(const parsed::value& v,
auto function_it = function_handlers.find(std::string_view(f._function_name));
if (function_it == function_handlers.end()) {
throw api_error::validation(
format("UpdateExpression: unknown function '{}' called.", f._function_name));
format("{}: unknown function '{}' called.", caller, f._function_name));
}
return function_it->second(caller, previous_item, f);
},
[&] (const parsed::path& p) -> rjson::value {
if (!previous_item) {
return rjson::null_value();
}
std::string update_path = p.root();
if (p.has_operators()) {
// FIXME: support this
throw api_error::validation("Reading attribute paths not yet implemented");
}
const rjson::value* previous_value = rjson::find(*previous_item, update_path);
return previous_value ? rjson::copy(*previous_value) : rjson::null_value();
return extract_path(previous_item, p, caller);
}
}, v._value);
}

View File

@@ -49,15 +49,23 @@ class path {
// dot (e.g., ".xyz").
std::string _root;
std::vector<std::variant<std::string, unsigned>> _operators;
// It is useful to limit the depth of a user-specified path, because is
// allows us to use recursive algorithms without worrying about recursion
// depth. DynamoDB officially limits the length of paths to 32 components
// (including the root) so let's use the same limit.
static constexpr unsigned depth_limit = 32;
void check_depth_limit();
public:
void set_root(std::string root) {
_root = std::move(root);
}
void add_index(unsigned i) {
_operators.emplace_back(i);
check_depth_limit();
}
void add_dot(std::string(name)) {
_operators.emplace_back(std::move(name));
check_depth_limit();
}
const std::string& root() const {
return _root;
@@ -65,6 +73,13 @@ public:
bool has_operators() const {
return !_operators.empty();
}
const std::vector<std::variant<std::string, unsigned>>& operators() const {
return _operators;
}
std::vector<std::variant<std::string, unsigned>>& operators() {
return _operators;
}
friend std::ostream& operator<<(std::ostream&, const path&);
};
// When an expression is first parsed, all constants are references, like

View File

@@ -845,16 +845,18 @@ future<executor::request_return_type> executor::get_records(client_state& client
static const bytes op_column_name = cdc::log_meta_column_name_bytes("operation");
static const bytes eor_column_name = cdc::log_meta_column_name_bytes("end_of_batch");
auto key_names = boost::copy_range<std::unordered_set<std::string>>(
auto key_names = boost::copy_range<attrs_to_get>(
boost::range::join(std::move(base->partition_key_columns()), std::move(base->clustering_key_columns()))
| boost::adaptors::transformed([&] (const column_definition& cdef) { return cdef.name_as_text(); })
| boost::adaptors::transformed([&] (const column_definition& cdef) {
return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
);
// Include all base table columns as values (in case pre or post is enabled).
// This will include attributes not stored in the frozen map column
auto attr_names = boost::copy_range<std::unordered_set<std::string>>(base->regular_columns()
auto attr_names = boost::copy_range<attrs_to_get>(base->regular_columns()
// this will include the :attrs column, which we will also force evaluating.
// But not having this set empty forces out any cdc columns from actual result
| boost::adaptors::transformed([] (const column_definition& cdef) { return cdef.name_as_text(); })
| boost::adaptors::transformed([] (const column_definition& cdef) {
return std::make_pair<std::string, attrs_to_get_node>(cdef.name_as_text(), {}); })
);
std::vector<const column_definition*> columns;

View File

@@ -87,25 +87,13 @@ progresses and compatibility continues to improve.
* UpdateTable: Not supported.
* ListTables: Supported.
### Item Operations
* GetItem: Support almost complete except that projection expressions can
only ask for top-level attributes.
* PutItem: Support almost complete except that condition expressions can
only refer to to-level attributes.
* UpdateItem: Nested documents are supported but updates to nested attributes
are not (e.g., `SET a.b[3].c=val`), and neither are nested attributes in
condition expressions.
* DeleteItem: Mostly works, but again does not support nested attributes
in condition expressions.
* GetItem, PutItem, UpdateItem, DeleteItem fully supported.
### Batch Operations
* BatchGetItem: Almost complete except that projection expressions can only
ask for top-level attributes.
* BatchWriteItem: Supported. Doesn't limit the number of items (DynamoDB
limits to 25) or size of items (400 KB) or total request size (16 MB).
* BatchGetItem, BatchWriteItem fully supported.
Doesn't limit the number of items (DynamoDB limits to 25) or size of items
(400 KB) or total request size (16 MB).
### Scans
Scan and Query are mostly supported, with the following limitations:
* As above, projection expressions only support top-level attributes.
* The ScanFilter/QueryFilter parameter for filtering results is fully
supported, but the newer FilterExpression syntax is not yet supported.
* The "Select" options which allows to count items instead of returning them
is not yet supported.
### Secondary Indexes
@@ -297,11 +285,10 @@ policies" section.
DynamoDB allows attributes to be **nested** - a top-level attribute may
be a list or a map, and each of its elements may further be lists or
maps, etc. Alternator currently stores the entire content of a top-level
attribute as one JSON object. This is good enough for most needs, except
one DynamoDB feature which we cannot support safely: we cannot modify
a non-top-level attribute (e.g., a.b[3].c) directly without RMW. We plan
to fix this in a future version by rethinking the data model we use for
attributes, or rethinking our implementation of RMW (as explained above).
attribute as one JSON object. This means that UpdateItem requests which
want modify a non-top-level attribute directly (e.g., a.b[3].c) need RMW:
Alternator implements such requests by reading the entire top-level
attribute a, modifying only a.b[3].c, and then writing back a.
```eval_rst
.. toctree::
@@ -309,4 +296,4 @@ attributes, or rethinking our implementation of RMW (as explained above).
getting-started
compatibility
```
```

View File

@@ -61,12 +61,6 @@ behave the same in Alternator. However, there are a few features which we have
not implemented yet. Unimplemented features return an error when used, so
they should be easy to detect. Here is a list of these unimplemented features:
* Missing support for **atribute paths** like `a.b[3].c`.
Nested attributes _are_ supported, but Alternator does not yet allow reading
or writing directly a piece of a nested attributes using an attribute path -
only top-level attributes can be read or written directly.
https://github.com/scylladb/scylla/issues/5024
* Currently in Alternator, a GSI (Global Secondary Index) can only be added
to a table at table creation time. Unlike DynamoDB which also allows adding
a GSI (but not an LSI) to an existing table using an UpdateTable operation.

View File

@@ -230,3 +230,19 @@ def filled_test_table(dynamodb):
def scylla_only(dynamodb):
if dynamodb.meta.client._endpoint.host.endswith('.amazonaws.com'):
pytest.skip('Scylla-only feature not supported by AWS')
# The "test_table_s_forbid_rmw" fixture is the same as test_table_s, except
# with the "forbid_rmw" write isolation mode. This is useful for verifying
# that writes that we think should not need a read-before-write in fact do
# not need it.
# Because forbid_rmw is a Scylla-only feature, this test is skipped when not
# running against Scylla.
@pytest.fixture(scope="session")
def test_table_s_forbid_rmw(dynamodb, scylla_only):
table = create_test_table(dynamodb,
KeySchema=[ { 'AttributeName': 'p', 'KeyType': 'HASH' }, ],
AttributeDefinitions=[ { 'AttributeName': 'p', 'AttributeType': 'S' } ])
arn = table.meta.client.describe_table(TableName=table.name)['Table']['TableArn']
table.meta.client.tag_resource(ResourceArn=arn, Tags=[{'Key': 'system:write_isolation', 'Value': 'forbid_rmw'}])
yield table
table.delete()

View File

@@ -1262,7 +1262,6 @@ def test_update_condition_other_funcs(test_table_s):
# ConditionExpressions also allows reading nested attributes, and we should
# support that too. This test just checks a few random operators - we don't
# test all the different operators here.
@pytest.mark.xfail(reason="nested attributes not yet implemented in ConditionExpression")
def test_update_condition_nested_attributes(test_table_s):
p = random_string()
test_table_s.update_item(Key={'p': p},
@@ -1277,6 +1276,21 @@ def test_update_condition_nested_attributes(test_table_s):
ConditionExpression='b.x < b.y[1]',
ExpressionAttributeValues={':val': 2})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 2
# Also check the case of a failing condition
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET c = :val',
ConditionExpression='b.x < b.y[0]',
ExpressionAttributeValues={':val': 3})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 2
# A condition involving an attribute which doesn't exist results in
# failed condition - not an error.
with pytest.raises(ClientError, match='ConditionalCheckFailedException'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET c = :val',
ConditionExpression='b.z < b.y[100]',
ExpressionAttributeValues={':val': 4})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 2
# All the previous tests refered to attributes using their name directly.
# But the DynamoDB API also allows to refer to attributes using a #reference.
@@ -1293,16 +1307,15 @@ def test_update_condition_attribute_reference(test_table_s):
ExpressionAttributeValues={':val': 1})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 1
@pytest.mark.xfail(reason="nested attributes not yet implemented in ConditionExpression")
def test_update_condition_nested_attribute_reference(test_table_s):
p = random_string()
test_table_s.update_item(Key={'p': p},
AttributeUpdates={'and': {'Value': {'or': 1}, 'Action': 'PUT'}})
AttributeUpdates={'and': {'Value': {'or': 2}, 'Action': 'PUT'}})
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET c = :val',
ConditionExpression='attribute_exists (#name1.#name2)',
ConditionExpression='#name1.#name2 = :two',
ExpressionAttributeNames={'#name1': 'and', '#name2': 'or'},
ExpressionAttributeValues={':val': 1})
ExpressionAttributeValues={':val': 1, ':two': 2})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['c'] == 1
# All the previous tests involved a single condition. The following tests

View File

@@ -743,7 +743,6 @@ def test_filter_expression_and_attributes_to_get(test_table):
# support that too. This test just checks one operators - we don't
# test all the different operators again here, we will assume the same
# code is used internally so if one operator worked, all will work.
@pytest.mark.xfail(reason="nested attributes not yet implemented in FilterExpression")
def test_filter_expression_nested_attribute(test_table):
p = random_string()
test_table.put_item(Item={'p': p, 'c': 'hi', 'x': {'a': 'dog', 'b': 3}})
@@ -754,7 +753,6 @@ def test_filter_expression_nested_attribute(test_table):
ExpressionAttributeValues={':p': p, ':a': 'mouse'})
assert(got_items == [{'p': p, 'c': 'yo', 'x': {'a': 'mouse', 'b': 4}}])
@pytest.mark.xfail(reason="nested attributes not yet implemented in FilterExpression")
# This test is a version of test_filter_expression_and_projection_expression
# involving nested attributes. In that test, we had a filter and projection
# involving different attributes. Nested attributes open new corner cases:

View File

@@ -334,6 +334,33 @@ def test_gsi_wrong_type_attribute_update(test_table_gsi_2):
test_table_gsi_2.update_item(Key={'p': p}, AttributeUpdates={'x': {'Value': 3, 'Action': 'PUT'}})
assert test_table_gsi_2.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'x': x}
# Since a GSI key x cannot be a map or an array, in particular updates to
# nested attributes like x.y or x[1] are not legal. The error that DynamoDB
# reports is "Key attributes must be scalars; list random access '[]' and map
# lookup '.' are not allowed: IndexKey: x".
def test_gsi_wrong_type_attribute_update_nested(test_table_gsi_2):
p = random_string()
x = random_string()
test_table_gsi_2.put_item(Item={'p': p, 'x': x})
# We can't write a map into a GSI key column, which in this case can only
# be a string and in any case can never be a map. DynamoDB and Alternator
# report different errors here: DynamoDB reports a type mismatch (exactly
# like in test test_gsi_wrong_type_attribute_update), but Alternator
# reports the obscure message "Malformed value object for key column x".
# Alternator's error message should probably be improved here, but let's
# not test it in this test.
with pytest.raises(ClientError, match='ValidationException'):
test_table_gsi_2.update_item(Key={'p': p}, UpdateExpression='SET x = :val1',
ExpressionAttributeValues={':val1': {'a': 3, 'b': 4}})
# Here we try to set x.y for the GSI key column x. Again DynamoDB and
# Alternator produce different error messages - but both make sense.
# DynamoDB says "Key attributes must be scalars; list random access '[]'
# and map # lookup '.' are not allowed: IndexKey: x", while Alternator
# complains that "document paths not valid for this item: x.y".
with pytest.raises(ClientError, match='ValidationException'):
test_table_gsi_2.update_item(Key={'p': p}, UpdateExpression='SET x.y = :val1',
ExpressionAttributeValues={':val1': 3})
def test_gsi_wrong_type_attribute_batch(test_table_gsi_2):
# In a BatchWriteItem, if any update is forbidden, the entire batch is
# rejected, and none of the updates happen at all.

View File

@@ -367,6 +367,13 @@ def test_getitem_attributes_to_get(dynamodb, test_table):
expected_item = {k: item[k] for k in wanted if k in item}
assert expected_item == got_item
# Verify that it is forbidden to ask for the same attribute multiple times
def test_getitem_attributes_to_get_duplicate(dynamodb, test_table):
p = random_string()
c = random_string()
with pytest.raises(ClientError, match='ValidationException.*Duplicate'):
test_table.get_item(Key={'p': p, 'c': c}, AttributesToGet=['a', 'a'], ConsistentRead=True)
# Basic test for DeleteItem, with hash key only
def test_delete_item_hash(test_table_s):
p = random_string()

View File

@@ -116,7 +116,6 @@ def test_projection_expression_query(test_table):
# but the previous test checked that the alternative syntax works correctly.
# The following test checks fetching more elaborate attribute paths from
# nested documents.
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_projection_expression_path(test_table_s):
p = random_string()
test_table_s.put_item(Item={
@@ -143,11 +142,21 @@ def test_projection_expression_path(test_table_s):
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x.y')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[3].x')['Item'] == {}
# Similarly, indexing a dictionary as an array, or array as dictionary, or
# integer as either, yields an empty item.
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b.x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0].x')['Item'] == {}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0][0]')['Item'] == {}
# We can read multiple paths - the result are merged into one object
# structured the same was as in the original item:
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.b[1]')['Item'] == {'a': {'b': [2, 4]}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b[0],a.c')['Item'] == {'a': {'b': [2], 'c': 5}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.c,b')['Item'] == {'a': {'c': 5}, 'b': 'hello'}
# If some of the paths are not available, they are silently ignored (just
# like they returned an empty item when used alone earlier)
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x, a.b[0], x, a.b[3].x')['Item'] == {'a': {'b': [2]}}
# It is not allowed to read the same path multiple times. The error from
# DynamoDB looks like: "Invalid ProjectionExpression: Two document paths
# overlap with each other; must remove or rewrite one of these paths;
@@ -160,6 +169,14 @@ def test_projection_expression_path(test_table_s):
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a,a.b[0]')['Item']
# Above we noted that asking for to project a non-existent attribute in an
# existing item yields an empty Item object. However, if the item does not
# exist at all, the Item object will be missing entirely:
p = random_string()
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='x')
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.x')
assert not 'Item' in test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a[0]')
# Above in test_projection_expression_toplevel_syntax() we tested how
# name references (#name) work in top-level attributes. In the following
# two tests we test how they work in more elaborate paths:
@@ -167,7 +184,6 @@ def test_projection_expression_path(test_table_s):
# 2. Conversely, a single reference, e.g., "#a", is always a single path
# component. Even if "#a" is "a.b", this refers to the literal attribute
# "a.b" - with a dot in its name - and not to the b element in a.
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_projection_expression_path_references(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 1, 'c': 2}, 'b': 'hi'})
@@ -181,10 +197,9 @@ def test_projection_expression_path_references(test_table_s):
with pytest.raises(ClientError, match='ValidationException'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.#n2', ExpressionAttributeNames={'#n2': 'b', '#unused': 'x'})
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_projection_expression_path_dot(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a.b': 'hi', 'a': {'b': 'yo'}})
test_table_s.put_item(Item={'p': p, 'a.b': 'hi', 'a': {'b': 'yo', 'c': 'jo'}})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a.b')['Item'] == {'a': {'b': 'yo'}}
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='#name', ExpressionAttributeNames={'#name': 'a.b'})['Item'] == {'a.b': 'hi'}
@@ -192,7 +207,6 @@ def test_projection_expression_path_dot(test_table_s):
# ProjectionExpression. This includes both identical paths, and paths where
# one is a sub-path of the other - e.g. "a.b" and "a.b.c". As we already saw
# above, paths with just a common *prefix* - e.g., "a.b, a.c" - are fine.
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_projection_expression_path_overlap(test_table_s):
# The overlap is tested symbolically, on the given paths, without any
# relation to what the item contains, or whether it even exists. So we
@@ -207,13 +221,57 @@ def test_projection_expression_path_overlap(test_table_s):
'a.b, a.b[2]',
'a.b, a.b.c',
'a, a.b[2].c',
'a.b.d, a.b',
'a.b.d.e, a.b',
'a.b, a.b.d',
'a.b, a.b.d.e',
]:
with pytest.raises(ClientError, match='ValidationException.* overlap'):
print(expr)
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# The checks above can be easily passed by an over-zealos "overlap" check
# which declares everything an overlap :-) Let's also check some non-
# overlap cases - which shouldn't be declared an overlap.
for expr in ['a, b',
'a.b, a.c',
'a.b.d, a.b.e',
'a[1], a[2]',
'a.b, a.c[2]',
]:
print(expr)
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# In addition to not allowing "overlapping" paths, DynamoDB also does not
# allow "conflicting" paths: It does not allow giving both a.b and a[1] in a
# single ProjectionExpression. It gives the error:
# "Invalid ProjectionExpression: Two document paths conflict with each other;
# must remove or rewrite one of these paths; path one: [a, b], path two:
# [a, [1]]".
# The reasoning is that asking for both in one request makes no sense because
# no item will ever be able to fulfill both.
def test_projection_expression_path_conflict(test_table_s):
# The conflict is tested symbolically, on the given paths, without any
# relation to what the item contains, or whether it even exists. So we
# don't even need to create an item for this test. We still need a
# key for the GetItem call :-)
p = random_string()
for expr in ['a.b, a[1]',
'a[1], a.b',
'a.b[1], a.b.c',
'a.b.c, a.b[1]',
]:
with pytest.raises(ClientError, match='ValidationException.* conflict'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# The checks above can be easily passed by an over-zealos "conflict" check
# which declares everything a conflict :-) Let's also check some non-
# conflict cases - which shouldn't be declared a conflict.
for expr in ['a.b, a.c',
'a.b, a.c[1]',
]:
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression=expr)
# Above we nested paths in ProjectionExpression, but just for the GetItem
# request. Let's verify they also work in Query and Scan requests:
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_query_projection_expression_path(test_table):
p = random_string()
items = [{'p': p, 'c': str(i), 'a': {'x': str(i*10), 'y': 'hi'}, 'b': 'hello' } for i in range(10)]
@@ -224,7 +282,6 @@ def test_query_projection_expression_path(test_table):
expected_items = [{'a': {'x': x['a']['x']}} for x in items]
assert multiset(expected_items) == multiset(got_items)
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_scan_projection_expression_path(test_table):
# This test is similar to test_query_projection_expression_path above,
# but uses a scan instead of a query. The scan will generate unrelated
@@ -241,9 +298,8 @@ def test_scan_projection_expression_path(test_table):
# BatchGetItem also supports ProjectionExpression, let's test that it
# applies to all items, and that it correctly suports document paths as well.
@pytest.mark.xfail(reason="ProjectionExpression does not yet support attribute paths")
def test_batch_get_item_projection_expression_path(test_table_s):
items = [{'p': random_string(), 'a': {'b': random_string()}, 'c': random_string()} for i in range(3)]
items = [{'p': random_string(), 'a': {'b': random_string(), 'x': 'hi'}, 'c': random_string()} for i in range(3)]
with test_table_s.batch_writer() as batch:
for item in items:
batch.put_item(item)
@@ -285,3 +341,20 @@ def test_projection_expression_and_key_condition_expression(test_table_s):
ExpressionAttributeNames={'#name1': 'p', '#name2': 'a'},
ExpressionAttributeValues={':val1': p});
assert got_items == [{'a': 'hello'}]
# Test whether the nesting depth of an a path in a projection expression
# is limited. If the implementation is done using recursion, it is goood
# practice to limit it and not crash the server. According to the DynamoDB
# documentation, DynamoDB supports nested attributes up to 32 levels deep:
# https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html#limits-attributes-nested-depth
# There is no reason why Alternator should not use exactly the same limit
# as is officially documented by DynamoDB.
def test_projection_expression_path_nesting_levels(test_table_s):
p = random_string()
# 32 nesting levels (including the top-level attribute) work
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*31))
# 33 nesting levels do not. DynamoDB gives an error: "Invalid
# ProjectionExpression: The document path has too many nesting levels;
# nesting levels: 33".
with pytest.raises(ClientError, match='ValidationException.*nesting levels'):
test_table_s.get_item(Key={'p': p}, ConsistentRead=True, ProjectionExpression='a'+('.b'*32))

View File

@@ -346,7 +346,6 @@ def test_update_item_returnvalues_updated_new(test_table_s):
# Test the ReturnValues from an UpdateItem directly modifying a *nested*
# attribute, in the relevant ReturnValue modes:
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_item_returnvalues_nested(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 'dog', 'c': [1, 2, 3]}, 'd': 'cat'})
@@ -398,3 +397,37 @@ def test_update_item_returnvalues_nested(test_table_s):
UpdateExpression='SET a.c[1] = :val, a.b = :val2',
ExpressionAttributeValues={':val': 3, ':val2': 'dog'})
assert ret['Attributes'] == {'p': p, 'a': {'b': 'dog', 'c': [1, 3, 3]}, 'd': 'cat' }
# Test with REMOVE, and one of them doing nothing (so shouldn't be in UPDATED_OLD)
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
UpdateExpression='REMOVE a.c[1], a.c[3]')
assert ret['Attributes'] == {'a': {'c': [3]}} # a.c[3] did not exist
# When adding a new sub-attribute, UPDATED_OLD does not return anything,
# but UPDATED_NEW does:
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_OLD',
UpdateExpression='SET a.x1 = :val',
ExpressionAttributeValues={':val': 8})
assert not 'Attributes' in ret
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
UpdateExpression='SET a.x2 = :val',
ExpressionAttributeValues={':val': 8})
assert ret['Attributes'] == {'a': {'x2': 8}}
# Nevertheless, there is one strange exception - although setting an array
# element *beyond* its end (e.g., a.c[100]) does add a new array item, it
# is *not* returned by UPDATED_NEW. I am not sure DynamoDB did this
# deliberately (is it a bug or a feature?), but it simplifies our
# implementation as well: after setting a.c[100], a.c[100] is not actually
# set (instead, maybe a.c[4] was set) so UPDATED_NEW returning a.c[100]
# returns nothing.
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
UpdateExpression='SET a.c[100] = :val',
ExpressionAttributeValues={':val': 70})
assert not 'Attributes' in ret
# When removing an item, it shouldn't appear in UPDATED_NEW. Again there
# a strange exception - which I'm not sure if we should consider it a
# DynamoDB bug or feature - but it simplifies our own implementation as
# well: if we remove a.c[1], the new item *will* have a new a.c[1] (the
# previous a.c[2] is shifted back), so this value is returned. This is
# very odd, but this is what DynamoDB does, so we should too...
ret=test_table_s.update_item(Key={'p': p}, ReturnValues='UPDATED_NEW',
UpdateExpression='REMOVE a.c[1]')
assert ret['Attributes'] == {'a': {'c': [70]}}

View File

@@ -265,16 +265,64 @@ def test_update_expression_multi_overlap(test_table_s):
# The problem isn't just with identical paths - we can't modify two paths that
# "overlap" in the sense that one is the ancestor of the other.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_expression_multi_overlap_nested(test_table_s):
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*overlap'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a = :val1, a.b = :val2',
ExpressionAttributeValues={':val1': {'b': 7}, ':val2': 'there'})
test_table_s.put_item(Item={'p': p, 'a': {'b': {'c': 2}}})
with pytest.raises(ClientError, match='ValidationException.*overlap'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a.b = :val1, a.b.c = :val2',
ExpressionAttributeValues={':val1': 'hi', ':val2': 'there'})
# Note that the overlap checks happen before checking the actual content
# of the item, so it doesn't matter that the item we want to modify
# doesn't even exist or have the structure referenced by the paths below.
for expr in ['SET a = :val1, a.b = :val2',
'SET a.b = :val1, a = :val2',
'SET a.b = :val1, a.b = :val2',
'SET a.b = :val1, a.b.c = :val2',
'SET a.b.c = :val1, a.b = :val2',
'SET a.b.c = :val1, a.b.c = :val2',
'SET a.b = :val1, a.b.c.d.e = :val2',
'SET a.b.c.d.e = :val1, a.b = :val2',
'SET a = :val1, a[1] = :val2',
'SET a[1] = :val1, a = :val2',
'SET a[1] = :val1, a[1] = :val2',
'SET a[1][1] = :val1, a[1] = :val2',
'SET a[1] = :val1, a[1][1] = :val2',
'SET a[1][1] = :val1, a[1][1] = :val2',
'SET a[1][1][1][1] = :val1, a[1][1] = :val2',
'SET a[1][1] = :val1, a[1][1][1][1] = :val2',
'SET a[1][1][1][1] = :val1, a[1][1][1][1] = :val2',
]:
print(expr)
with pytest.raises(ClientError, match='ValidationException.*overlap'):
test_table_s.update_item(Key={'p': p}, UpdateExpression=expr,
ExpressionAttributeValues={':val1': 2, ':val2': 'there'})
# Obviously this test can trivially pass if overlap checks wrongly labels
# everything as an overlap. So the test test_update_expression_multi_nested
# below is important - it confirms that we can do multiple modifications
# to the same item when they do not overlap.
# Besides the concept of "overlapping" paths tested above, DynamoDB also has
# the concept of "conflicting" paths - e.g., attempting to set both a.b and
# a[1] together doesn't make sense.
def test_update_expression_multi_conflict_nested(test_table_s):
p = random_string()
for expr in ['SET a.b = :val1, a[1] = :val2',
'SET a.b.c = :val1, a.b[2] = :val2',
]:
print(expr)
with pytest.raises(ClientError, match='ValidationException.*conflict'):
test_table_s.update_item(Key={'p': p}, UpdateExpression=expr,
ExpressionAttributeValues={':val1': 2, ':val2': 'there'})
# We can do several non-overlapping modifications to the same top-level
# attribute and to different top-level attributes in the same update
# expression.
def test_update_expression_multi_nested(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'x': 3, 'y': 4, 'c': {'y': 3}}, 'b': {'x': 1, 'y': 2}})
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a.b = :val1, a.c.d = :val2, b.x = :val3 REMOVE a.x, b.y',
ExpressionAttributeValues={':val1': 10, ':val2': 'dog', ':val3': 17})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {
'p': p,
'a': {'y': 4, 'b': 10, 'c': {'y': 3, 'd': 'dog'}},
'b': {'x': 17}}
# In the previous test we saw that *modifying* the same item twice in the same
# update is forbidden; But it is allowed to *read* an item in the same update
@@ -776,10 +824,19 @@ def test_update_expression_dot_in_name(test_table_s):
ExpressionAttributeNames={'#a': 'a.b'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a.b': 3}
# Below we have several tests of what happens when a nested attribute is
# on the left-hand side of an assignment, but an every simpler case of
# nested attributes is having one on the right hand side of an assignment:
def test_update_expression_nested_attribute_rhs(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': {'x': 7, 'y': 8}}, 'd': 5})
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET z = a.c.x')
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['z'] == 7
# A basic test for direct update of a nested attribute: One of the top-level
# attributes is itself a document, and we update only one of that document's
# nested attributes.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_expression_nested_attribute_dot(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': 4}, 'd': 5})
@@ -795,7 +852,6 @@ def test_update_expression_nested_attribute_dot(test_table_s):
# Similar test, for a list: one of the top-level attributes is a list, we
# can update one of its items.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_expression_nested_attribute_index(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': ['one', 'two', 'three']})
@@ -817,7 +873,6 @@ def test_update_expression_nested_attribute_index_reference(test_table_s):
# Test that just like happens in top-level attributes, also in nested
# attributes, setting them replaces the old value - potentially an entire
# nested document, by the whole value (which may have a different type)
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_expression_nested_different_type(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': {'one': 1, 'two': 2}}})
@@ -827,7 +882,6 @@ def test_update_expression_nested_different_type(test_table_s):
# Yet another test of a nested attribute update. This one uses deeper
# level of nesting (dots and indexes), adds #name references to the mix.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_expression_nested_deep(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': ['hi', {'x': {'y': [3, 5, 7]}}]}})
@@ -841,20 +895,55 @@ def test_update_expression_nested_deep(test_table_s):
# A REMOVE operation can be used to remove nested attributes, and also
# individual list items.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_update_expression_nested_remove(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 3, 'c': ['hi', {'x': {'y': [3, 5, 7]}, 'q': 2}]}})
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a.c[1].x.y[1], a.c[1].q')
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == {'b': 3, 'c': ['hi', {'x': {'y': [3, 7]}}]}
# Removing a list item beyond the end of the list (e.g., REMOVE a[17] when
# the list only has three items) is silently ignored.
def test_update_expression_nested_remove_list_item_after_end(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': [4, 5, 6]})
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[17]')
# If we remove a[1] and then change a[3], the index "3" refers to the position
# *before* the first removal.
def test_update_expression_nested_remove_list_item_original_number(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': [2, 3, 4, 5, 6, 7]})
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[1] SET a[3] = :val',
ExpressionAttributeValues={':val': 17})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == [2, 4, 17, 6, 7]
# The order of the operations doesn't matter
test_table_s.put_item(Item={'p': p, 'a': [2, 3, 4, 5, 6, 7]})
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[3] = :val REMOVE a[1]',
ExpressionAttributeValues={':val': 17})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == [2, 4, 17, 6, 7]
# DynamoDB allows an empty map. So removing the only member from a map leaves
# behind an empty map.
def test_update_expression_nested_remove_singleton_map(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': {'b': 1}})
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a.b')
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == {}
# DynamoDB allows an empty list. So removing the only member from a list leaves
# behind an empty list.
def test_update_expression_nested_remove_singleton_list(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': [1]})
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[0]')
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == []
# The DynamoDB documentation specifies: "When you use SET to update a list
# element, the contents of that element are replaced with the new data that
# you specify. If the element does not already exist, SET will append the
# new element at the end of the list."
# So if we take a three-element list a[7], and set a[7], the new element
# will be put at the end of the list, not position 7 specifically.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_nested_attribute_update_array_out_of_bounds(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': ['one', 'two', 'three']})
@@ -864,9 +953,9 @@ def test_nested_attribute_update_array_out_of_bounds(test_table_s):
# The DynamoDB documentation also says: "If you add multiple elements
# in a single SET operation, the elements are sorted in order by element
# number.
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[84] = :val1, a[37] = :val2',
ExpressionAttributeValues={':val1': 'a1', ':val2': 'a2'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': ['one', 'two', 'three', 'hello', 'a2', 'a1']}
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[84] = :val1, a[37] = :val2, a[17] = :val3, a[50] = :val4',
ExpressionAttributeValues={':val1': 'a1', ':val2': 'a2', ':val3': 'a3', ':val4': 'a4'})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'a': ['one', 'two', 'three', 'hello', 'a3', 'a2', 'a4', 'a1']}
# Test what happens if we try to write to a.b, which would only make sense if
# a were a nested document, but a doesn't exist, or exists and is NOT a nested
@@ -875,9 +964,6 @@ def test_nested_attribute_update_array_out_of_bounds(test_table_s):
# ClientError: An error occurred (ValidationException) when calling the
# UpdateItem operation: The document path provided in the update expression
# is invalid for update
# Because Scylla doesn't read before write, it cannot detect this as an error,
# so we'll probably want to allow for that possibility as well.
@pytest.mark.xfail(reason="nested updates not yet implemented")
def test_nested_attribute_update_bad_path_dot(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello', 'b': ['hi']})
@@ -890,17 +976,56 @@ def test_nested_attribute_update_bad_path_dot(test_table_s):
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET c.c = :val1',
ExpressionAttributeValues={':val1': 7})
# Same errors for "remove" operation.
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a.c')
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE b.c')
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE c.c')
# Same error when the item doesn't exist at all:
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET c.c = :val1',
ExpressionAttributeValues={':val1': 7})
# HOWEVER, although it *is* allowed to remove a random path from a non-
# existent item. I don't know why. See the next test -
# test_nested_attribute_remove_from_missing_item
# Though in the above test (test_nested_attribute_update_bad_path_dot) we
# showed that DynamoDB does not allow REMOVE x.y if attribute x doesn't
# exist - and generates a ValidationException, it turns out that if the
# entire item doesn't exist, then a REMOVE x.y is silently ignored.
# I don't understand why they did this.
@pytest.mark.xfail(reason="for unknown reason, DynamoDB allows REMOVE x.y when item doesn't exist")
def test_nested_attribute_remove_from_missing_item(test_table_s):
p = random_string()
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE x.y')
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE x[0]')
# Similarly for other types of bad paths - using [0] on something which
# isn't an array,
@pytest.mark.xfail(reason="nested updates not yet implemented")
# doesn't exist or isn't an array.
def test_nested_attribute_update_bad_path_array(test_table_s):
p = random_string()
test_table_s.put_item(Item={'p': p, 'a': 'hello'})
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET a[0] = :val1',
ExpressionAttributeValues={':val1': 7})
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET b[0] = :val1',
ExpressionAttributeValues={':val1': 7})
# Same errors for "remove" operation.
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE a[0]')
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='REMOVE b[0]')
# Same error when the item doesn't exist at all:
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*path'):
test_table_s.update_item(Key={'p': p}, UpdateExpression='SET b[0] = :val1',
ExpressionAttributeValues={':val1': 7})
# HOWEVER, although it *is* allowed to remove a random path from a non-
# existent item. I don't know why... See test_nested_attribute_remove_from_missing_item
# DynamoDB Does not allow empty sets.
# Trying to ask UpdateItem to put one of these in an attribute should be
@@ -921,4 +1046,27 @@ def test_update_expression_empty_attribute(test_table_s):
UpdateExpression='SET d = :v1, e = :v2, f = :v3, g = :v4',
ExpressionAttributeValues={':v1': [], ':v2': {}, ':v3': '', ':v4': b''})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'd': [], 'e': {}, 'f': '', 'g': b''}
#
# Verify which kind of update operations require a read-before-write (a.k.a
# read-modify-write, or RMW). We test this by using a table configured with
# "forbid_rmw" isolation mode and checking which writes succeed or pass.
# This is a Scylla-only test (the test_table_s_forbid_rmw implies scylla_only).
def test_update_expression_when_rmw(test_table_s_forbid_rmw):
table = test_table_s_forbid_rmw
p = random_string()
# A write with a RHS (right-hand side) being a constant from the query
# doesn't need RMW:
table.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': 3})
# But if the LHS (left-hand side) of the assignment is a document path,
# it *does* need RMW:
with pytest.raises(ClientError, match='ValidationException.*write isolation policy'):
table.update_item(Key={'p': p},
UpdateExpression='SET a.b = :val',
ExpressionAttributeValues={':val': 3})
assert table.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == 3
# A write with a path in the RHS of a SET also needs RMW
with pytest.raises(ClientError, match='ValidationException.*write isolation policy'):
table.update_item(Key={'p': p},
UpdateExpression='SET b = a')