From dd7e3d3eab77dce979368acea88fa5fc51a2a7ce Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 4 Oct 2020 23:16:13 +0300 Subject: [PATCH] alternator: fix query with both projection and filtering We had a bug when a Query/Scan had both projection (ProjectionExpression or AttributesToGet) and filtering (FilterExpression or Query/ScanFilter). The problem was that projection left only the requested attributes, and the filter might have needed - and not got - additional attributes. The solution in this patch is to add the generated JSON item also the extra attributes needed by filtering (if any), run the filter on that, and only at the end remove the extra filtering attributes from the item to be returned. The two tests test_query_filter.py::test_query_filter_and_attributes_to_get test_filter_expression.py::test_filter_expression_and_projection_expression Which failed before this patch now pass so we drop their "xfail" tag. Fixes #6951. Signed-off-by: Nadav Har'El (cherry picked from commit 282742a469c342657b8e02f42cd4ba01807d0e54) --- alternator/executor.cc | 47 +++++++++++++++++++++-- alternator/expressions.cc | 33 ++++++++++++++++ alternator/expressions.hh | 7 ++++ test/alternator/test_filter_expression.py | 1 - test/alternator/test_query_filter.py | 1 - 5 files changed, 83 insertions(+), 6 deletions(-) diff --git a/alternator/executor.cc b/alternator/executor.cc index 463c362906..c44eb1cc59 100644 --- a/alternator/executor.cc +++ b/alternator/executor.cc @@ -1881,7 +1881,8 @@ static std::string get_item_type_string(const rjson::value& v) { // calculate_attrs_to_get() takes either AttributesToGet or // ProjectionExpression parameters (having both is *not* allowed), -// and returns the list of cells we need to read. +// and returns the list of cells we need to read, or an empty set when +// *all* attributes are to be returned. // In our current implementation, only top-level attributes are stored // as cells, and nested documents are stored serialized as JSON. // So this function currently returns only the the top-level attributes @@ -2571,6 +2572,10 @@ public: std::unordered_set& used_attribute_values); bool check(const rjson::value& item) const; bool filters_on(std::string_view attribute) const; + // for_filters_on() runs the given function on the attributes that the + // filter works on. It may run for the same attribute more than once if + // used more than once in the filter. + void for_filters_on(const noncopyable_function& func) const; operator bool() const { return bool(_imp); } }; @@ -2651,10 +2656,26 @@ bool filter::filters_on(std::string_view attribute) const { }, *_imp); } +void filter::for_filters_on(const noncopyable_function& func) const { + if (_imp) { + std::visit(overloaded_functor { + [&] (const conditions_filter& f) -> void { + for (auto it = f.conditions.MemberBegin(); it != f.conditions.MemberEnd(); ++it) { + func(rjson::to_string_view(it->name)); + } + }, + [&] (const expression_filter& f) -> void { + return for_condition_expression_on(f.expression, func); + } + }, *_imp); + } +} + class describe_items_visitor { typedef std::vector columns_t; const columns_t& _columns; const std::unordered_set& _attrs_to_get; + std::unordered_set _extra_filter_attrs; const filter& _filter; typename columns_t::const_iterator _column_it; rjson::value _item; @@ -2670,7 +2691,20 @@ public: , _item(rjson::empty_object()) , _items(rjson::empty_array()) , _scanned_count(0) - { } + { + // _filter.check() may need additional attributes not listed in + // _attrs_to_get (i.e., not requested as part of the output). + // We list those in _extra_filter_attrs. We will include them in + // the JSON but take them out before finally returning the JSON. + if (!_attrs_to_get.empty()) { + _filter.for_filters_on([&] (std::string_view attr) { + std::string a(attr); // no heterogenous maps searches :-( + if (!_attrs_to_get.contains(a)) { + _extra_filter_attrs.emplace(std::move(a)); + } + }); + } + } void start_row() { _column_it = _columns.begin(); @@ -2684,7 +2718,7 @@ public: result_bytes_view->with_linearized([this] (bytes_view bv) { std::string column_name = (*_column_it)->name_as_text(); if (column_name != executor::ATTRS_COLUMN_NAME) { - if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name)) { + if (_attrs_to_get.empty() || _attrs_to_get.contains(column_name) || _extra_filter_attrs.contains(column_name)) { if (!_item.HasMember(column_name.c_str())) { rjson::set_with_string_name(_item, column_name, rjson::empty_object()); } @@ -2696,7 +2730,7 @@ public: auto keys_and_values = value_cast(deserialized); for (auto entry : keys_and_values) { std::string attr_name = value_cast(entry.first); - if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name)) { + if (_attrs_to_get.empty() || _attrs_to_get.contains(attr_name) || _extra_filter_attrs.contains(attr_name)) { bytes value = value_cast(entry.second); rjson::set_with_string_name(_item, attr_name, deserialize_item(value)); } @@ -2708,6 +2742,11 @@ public: void end_row() { if (_filter.check(_item)) { + // Remove the extra attributes _extra_filter_attrs which we had + // to add just for the filter, and not requested to be returned: + for (const auto& attr : _extra_filter_attrs) { + rjson::remove_member(_item, attr); + } rjson::push_back(_items, std::move(_item)); } _item = rjson::empty_object(); diff --git a/alternator/expressions.cc b/alternator/expressions.cc index cf12f754c6..d7aedd52d0 100644 --- a/alternator/expressions.cc +++ b/alternator/expressions.cc @@ -348,6 +348,39 @@ bool condition_expression_on(const parsed::condition_expression& ce, std::string }, ce._expression); } +// for_condition_expression_on() runs a given function over all the attributes +// mentioned in the expression. If the same attribute is mentioned more than +// once, the function will be called more than once for the same attribute. + +static void for_value_on(const parsed::value& v, const noncopyable_function& func) { + std::visit(overloaded_functor { + [&] (const parsed::constant& c) { }, + [&] (const parsed::value::function_call& f) { + for (const parsed::value& value : f._parameters) { + for_value_on(value, func); + } + }, + [&] (const parsed::path& p) { + func(p.root()); + } + }, v._value); +} + +void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function& func) { + std::visit(overloaded_functor { + [&] (const parsed::primitive_condition& cond) { + for (const parsed::value& value : cond._values) { + for_value_on(value, func); + } + }, + [&] (const parsed::condition_expression::condition_list& list) { + for (const parsed::condition_expression& cond : list.conditions) { + for_condition_expression_on(cond, func); + } + } + }, ce._expression); +} + // The following calculate_value() functions calculate, or evaluate, a parsed // expression. The parsed expression is assumed to have been "resolved", with // the matching resolve_* function. diff --git a/alternator/expressions.hh b/alternator/expressions.hh index bc4c46e723..9b43a1b62a 100644 --- a/alternator/expressions.hh +++ b/alternator/expressions.hh @@ -27,6 +27,8 @@ #include #include +#include + #include "expressions_types.hh" #include "utils/rjson.hh" @@ -59,6 +61,11 @@ void validate_value(const rjson::value& v, const char* caller); bool condition_expression_on(const parsed::condition_expression& ce, std::string_view attribute); +// for_condition_expression_on() runs the given function on the attributes +// that the expression uses. It may run for the same attribute more than once +// if the same attribute is used more than once in the expression. +void for_condition_expression_on(const parsed::condition_expression& ce, const noncopyable_function& func); + // calculate_value() behaves slightly different (especially, different // functions supported) when used in different types of expressions, as // enumerated in this enum: diff --git a/test/alternator/test_filter_expression.py b/test/alternator/test_filter_expression.py index 147db39656..ed78542dd1 100644 --- a/test/alternator/test_filter_expression.py +++ b/test/alternator/test_filter_expression.py @@ -658,7 +658,6 @@ def test_filter_expression_and_sort_key_condition(test_table_sn_with_data): # In particular, test that FilterExpression may inspect attributes which will # not be returned by the query, because of the ProjectionExpression. # This test reproduces issue #6951. -@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes") def test_filter_expression_and_projection_expression(test_table): p = random_string() test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'}) diff --git a/test/alternator/test_query_filter.py b/test/alternator/test_query_filter.py index 707a8f3be0..22ce4b6391 100644 --- a/test/alternator/test_query_filter.py +++ b/test/alternator/test_query_filter.py @@ -539,7 +539,6 @@ def test_query_filter_paging(test_table_sn_with_data): # In particular, test that QueryFilter may inspect attributes which will # not be returned by the query, because the AttributesToGet. # This test reproduces issue #6951. -@pytest.mark.xfail(reason="issue #6951: cannot filter on non-returned attributes") def test_query_filter_and_attributes_to_get(test_table): p = random_string() test_table.put_item(Item={'p': p, 'c': 'hi', 'x': 'dog', 'y': 'cat'})