diff --git a/alternator/executor.cc b/alternator/executor.cc index 463c362906..c44eb1cc59 100644 --- a/alternator/executor.cc +++ b/alternator/executor.cc @@ -1881,7 +1881,8 @@ static std::string get_item_type_string(const rjson::value& v) { // calculate_attrs_to_get() takes either AttributesToGet or // ProjectionExpression parameters (having both is *not* allowed), -// and returns the list of cells we need to read. +// and returns the list of cells we need to read, or an empty set when +// *all* attributes are to be returned. // In our current implementation, only top-level attributes are stored // as cells, and nested documents are stored serialized as JSON. // So this function currently returns only the the top-level attributes @@ -2571,6 +2572,10 @@ public: std::unordered_set& used_attribute_values); bool check(const rjson::value& item) const; bool filters_on(std::string_view attribute) const; + // for_filters_on() runs the given function on the attributes that the + // filter works on. It may run for the same attribute more than once if + // used more than once in the filter. + void for_filters_on(const noncopyable_function& func) const; operator bool() const { return bool(_imp); } }; @@ -2651,10 +2656,26 @@ bool filter::filters_on(std::string_view attribute) const { }, *_imp); } +void filter::for_filters_on(const noncopyable_function& func) const { + if (_imp) { + std::visit(overloaded_functor { + [&] (const conditions_filter& f) -> void { + for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) { + func(rjson::to_string_view(it->name)); + } + }, + [&] (const expression_filter& f) -> void { + return for_condition_expression_on(f.expression, func); + } + }, *_imp); + } +} + class describe_items_visitor { typedef std::vector columns_t; const columns_t& _columns; const std::unordered_set& _attrs_to_get; + std::unordered_set _extra_filter_attrs; const filter& _filter; typename columns_t::const_iterator _column_it; rjson::value _item; @@ -2670,7 +2691,20 @@ public: , _item(rjson::empty_object()) , _items(rjson::empty_array()) , _scanned_count(0) - { } + { + // _filter.check() may need additional attributes not listed in + // _attrs_to_get (i.e., not requested as part of the output). + // We list those in _extra_filter_attrs. We will include them in + // the JSON but take them out before finally returning the JSON. + if (!_attrs_to_get.empty()) { + _filter.for_filters_on([&] (std::string_view attr) { + std::string a(attr); // no heterogenous maps searches :-( + if (!_attrs_to_get.contains(a)) { + _extra_filter_attrs.emplace(std::move(a)); + } + }); + } + } void start_row() { _column_it = _columns.begin(); @@ -2684,7 +2718,7 @@ public: result_bytes_view->with_linearized([this] (bytes_view bv) { std::string column_name = (*_column_it)->name_as_text(); if (column_name != executor::ATTRS_COLUMN_NAME) { - if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name)) { + if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name) || _extra_filter_attrs.contains(column_name)) { if (!_item.HasMember(column_name.c_str())) { rjson::set_with_string_name(_item, column_name, rjson::empty_object()); } @@ -2696,7 +2730,7 @@ public: auto keys_and_values = value_cast(deserialized); for (auto entry : keys_and_values) { std::string attr_name = value_cast(entry.first); - if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name)) { + if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) { bytes value = value_cast(entry.second); rjson::set_with_string_name(_item, attr_name, deserialize_item(value)); } @@ -2708,6 +2742,11 @@ public: void end_row() { if (_filter.check(_item)) { + // Remove the extra attributes _extra_filter_attrs which we had + // to add just for the filter, and not requested to be returned: + for (const auto& attr : _extra_filter_attrs) { + rjson::remove_member(_item, attr); + } rjson::push_back(_items, std::move(_item)); } _item = rjson::empty_object(); diff --git a/alternator/expressions.cc b/alternator/expressions.cc index cf12f754c6..d7aedd52d0 100644 --- a/alternator/expressions.cc +++ b/alternator/expressions.cc @@ -348,6 +348,39 @@ bool condition_expression_on(const parsed::condition_expression& ce, std::string }, ce._expression); } +// for_condition_expression_on() runs a given function over all the attributes +// mentioned in the expression. If the same attribute is mentioned more than +// once, the function will be called more than once for the same attribute. + +static void for_value_on(const parsed::value& v, const noncopyable_function& func) { + std::visit(overloaded_functor { + [&] (const parsed::constant& c) { }, + [&] (const parsed::value::function_call& f) { + for (const parsed::value& value : f._parameters) { + for_value_on(value, func); + } + }, + [&] (const parsed::path& p) { + func(p.root()); + } + }, v._value); +} + +void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function& func) { + std::visit(overloaded_functor { + [&] (const parsed::primitive_condition& cond) { + for (const parsed::value& value : cond._values) { + for_value_on(value, func); + } + }, + [&] (const parsed::condition_expression::condition_list& list) { + for (const parsed::condition_expression& cond : list.conditions) { + for_condition_expression_on(cond, func); + } + } + }, ce._expression); +} + // The following calculate_value() functions calculate, or evaluate, a parsed // expression. The parsed expression is assumed to have been "resolved", with // the matching resolve_* function. diff --git a/alternator/expressions.hh b/alternator/expressions.hh index bc4c46e723..9b43a1b62a 100644 --- a/alternator/expressions.hh +++ b/alternator/expressions.hh @@ -27,6 +27,8 @@ #include #include +#include + #include "expressions_types.hh" #include "utils/rjson.hh" @@ -59,6 +61,11 @@ void validate_value(const rjson::value& v, const char* caller); bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute); +// for_condition_expression_on() runs the given function on the attributes +// that the expression uses. It may run for the same attribute more than once +// if the same attribute is used more than once in the expression. +void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function& func); + // calculate_value() behaves slightly different (especially, different // functions supported) when used in different types of expressions, as // enumerated in this enum: diff --git a/test/alternator/test_filter_expression.py b/test/alternator/test_filter_expression.py index 147db39656..ed78542dd1 100644 --- a/test/alternator/test_filter_expression.py +++ b/test/alternator/test_filter_expression.py @@ -658,7 +658,6 @@ def test_filter_expression_and_sort_key_condition(test_table_sn_with_data): # In particular, test that FilterExpression may inspect attributes which will # not be returned by the query, because of the ProjectionExpression. # This test reproduces issue #6951. -@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes") def test_filter_expression_and_projection_expression(test_table): p = random_string() test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'}) diff --git a/test/alternator/test_query_filter.py b/test/alternator/test_query_filter.py index 707a8f3be0..22ce4b6391 100644 --- a/test/alternator/test_query_filter.py +++ b/test/alternator/test_query_filter.py @@ -539,7 +539,6 @@ def test_query_filter_paging(test_table_sn_with_data): # In particular, test that QueryFilter may inspect attributes which will # not be returned by the query, because the AttributesToGet. # This test reproduces issue #6951. -@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes") def test_query_filter_and_attributes_to_get(test_table): p = random_string() test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})