From f604269f0ac7de79e4db31c2cba84cc6d67c02ce Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 29 Jun 2023 16:12:44 +0300 Subject: [PATCH] cql3, secondary index: consistently choose index to use in a query When a table has secondary indexes on *multiple* columns, and several such columns are used for filtering in a query, Scylla chooses one of these indexes as the main driver of the query, and the second column's restriction is implemented as filtering. Before this patch, the index to use was chosen fairly randomly, based on the order of the indexes in the schema. This order may be different in different coordinators, and may even change across restarts on the same coordinators. This is not only inconsistent, it can cause outright wrong results when using *paging* and switching (or restarting) coordinates in the middle of a paged scan... One coordinator saves one index's key in the paging state, and then the other coordinator gets this paging state and wrongly believes it is supposed to be a key of a *different* index. The fix in this patch is to pick the index suitable for the first indexed column mentioned in the query. This has two benefits over the situation before the patch: 1. The decision of which index to use no longer changes between coordinators or across restarts - it just depends on the schema and the specific query. 2. Different indexes can have different "specificity" so using one or the other can change the query's performance. After this patch, the user is in control over which index is used by changing the order of terms in the query. A curious user can use tracing to check which index was used to implement a particular query. An xfailing test we had for this issue no longer fails, so the "xfail" marker is removed. Fixes #7969 Signed-off-by: Nadav Har'El Closes scylladb/scylladb#14450 --- cql3/restrictions/statement_restrictions.cc | 40 +++++++++++++-------- test/cql-pytest/test_secondary_index.py | 3 +- 2 files changed, 27 insertions(+), 16 deletions(-) diff --git a/cql3/restrictions/statement_restrictions.cc b/cql3/restrictions/statement_restrictions.cc index cd400bc0f9..7baffc5b0f 100644 --- a/cql3/restrictions/statement_restrictions.cc +++ b/cql3/restrictions/statement_restrictions.cc @@ -537,22 +537,32 @@ std::pair, expr::expression> statement_res int chosen_index_score = 0; expr::expression chosen_index_restrictions = expr::conjunction({}); - for (const auto& index : sim.list_indexes()) { - auto cdef = _schema->get_column_definition(to_bytes(index.target_column())); - for (const expr::expression& restriction : index_restrictions()) { - if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) { - continue; - } - - expr::single_column_restrictions_map rmap = expr::get_single_column_restrictions_map(restriction); - const auto found = rmap.find(cdef); - if (found != rmap.end() && is_supported_by(found->second, index) - && score(index) > chosen_index_score) { - chosen_index = index; - chosen_index_score = score(index); - chosen_index_restrictions = restriction; - } + // Several indexes may be usable for this query. When their score is tied, + // let's pick one by order of the columns mentioned in the restriction + // expression. This specific order isn't important (and maybe in the + // future we could plan a better order based on the specificity of each + // index), but it is critical that two coordinators - or the same + // coordinator over time - must choose the same index for the same query. + // Otherwise, paging can break (see issue #7969). + for (const expr::expression& restriction : index_restrictions()) { + if (has_partition_token(restriction, *_schema) || contains_multi_column_restriction(restriction)) { + continue; } + expr::for_each_expression(restriction, [&](const expr::column_value& cval) { + auto& cdef = cval.col; + expr::expression col_restrictions = expr::conjunction { + .children = expr::extract_single_column_restrictions_for_column(restriction, *cdef) + }; + for (const auto& index : sim.list_indexes()) { + if (cdef->name_as_text() == index.target_column() && + expr::is_supported_by(col_restrictions, index) && + score(index) > chosen_index_score) { + chosen_index = index; + chosen_index_score = score(index); + chosen_index_restrictions = restriction; + } + } + }); } return {chosen_index, chosen_index_restrictions}; } diff --git a/test/cql-pytest/test_secondary_index.py b/test/cql-pytest/test_secondary_index.py index 4bb62cc57d..8220dbae94 100644 --- a/test/cql-pytest/test_secondary_index.py +++ b/test/cql-pytest/test_secondary_index.py @@ -66,8 +66,9 @@ def test_partition_order_with_si(cql, test_keyspace): # The order tested in this case was decided as a good first step in issue # #7969, but it's possible that it will eventually be implemented another # way, e.g. dynamically based on estimated query selectivity statistics. +# In any case, the order must be consistent across coordinators and time +# (and upgrades...) to allow paged queries that use an index to continue. # Ref: #7969 -@pytest.mark.xfail(reason="The order of picking indexes is currently arbitrary. Issue #7969") def test_order_of_indexes(scylla_only, cql, test_keyspace): schema = 'p int primary key, v1 int, v2 int, v3 int' with new_test_table(cql, test_keyspace, schema) as table: