From 374be94faad03123aa946ee50e56bfcab6b7e61a Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Mar 2026 12:30:12 +0200 Subject: [PATCH] test: statement_restrictions: add index_selection regression test In preparation for refactoring statement_restrictions, add a simple and an exhaustive regression test, encoding the index selection algorithm into the test. We cannot change the index selection algorithm because then mixed-node clusters will alter the sorting key mid-query (if paging takes place). Because the exhaustive space has such a large stack frame, and because Address Santizer bloats the stack frame, increase it for debug builds. --- test/boost/statement_restrictions_test.cc | 799 ++++++++++++++++++++++ 1 file changed, 799 insertions(+) diff --git a/test/boost/statement_restrictions_test.cc b/test/boost/statement_restrictions_test.cc index f778abe105..04ded2d886 100644 --- a/test/boost/statement_restrictions_test.cc +++ b/test/boost/statement_restrictions_test.cc @@ -11,12 +11,14 @@ #include #include +#include #include #include "cql3/restrictions/statement_restrictions.hh" #include "cql3/expr/expr-utils.hh" #include "cql3/util.hh" +#include "index/secondary_index_manager.hh" #include "test/lib/cql_assertions.hh" #include "test/lib/cql_test_env.hh" #include "test/lib/test_utils.hh" @@ -366,6 +368,803 @@ SEASTAR_TEST_CASE(slice_single_column_mixed_order) { }); } +// Regression test: verifies that index selection (find_idx), uses_secondary_indexing, +// and need_filtering produce consistent results across all supported index types: +// - regular_values (standard column EQ) +// - keys (set CONTAINS, map CONTAINS KEY) +// - collection_values (map/list CONTAINS) +// - keys_and_values (map subscript EQ) +// - full (frozen collection EQ; round-trips to regular_values via serialization) +// - local vs global index scoring +SEASTAR_TEST_CASE(index_selection) { + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "CREATE TABLE ks.idx_test (" + " pk1 int, pk2 int," + " ck1 int, ck2 int," + " v1 int, v2 int, v3 int," + " s1 set," + " m1 map," + " l1 list," + " fs frozen>," + " PRIMARY KEY ((pk1, pk2), ck1, ck2)" + ")"); + // 10 indexes covering all target types. + cquery_nofail(e, "CREATE INDEX idx_v1 ON ks.idx_test(v1)"); + cquery_nofail(e, "CREATE INDEX idx_v2 ON ks.idx_test(v2)"); + cquery_nofail(e, "CREATE INDEX idx_v3_local ON ks.idx_test((pk1,pk2), v3)"); + cquery_nofail(e, "CREATE INDEX idx_ck1 ON ks.idx_test(ck1)"); + cquery_nofail(e, "CREATE INDEX idx_s1 ON ks.idx_test(s1)"); // keys (rewritten from VALUES for sets) + cquery_nofail(e, "CREATE INDEX idx_m1_values ON ks.idx_test(VALUES(m1))"); // collection_values + cquery_nofail(e, "CREATE INDEX idx_m1_keys ON ks.idx_test(KEYS(m1))"); // keys + cquery_nofail(e, "CREATE INDEX idx_m1_entries ON ks.idx_test(ENTRIES(m1))"); // keys_and_values + cquery_nofail(e, "CREATE INDEX idx_l1 ON ks.idx_test(l1)"); // collection_values + cquery_nofail(e, "CREATE INDEX idx_fs ON ks.idx_test(FULL(fs))"); // full -> round-trips to regular_values + + auto schema = e.local_db().find_schema("ks", "idx_test"); + auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager(); + + struct expected { + std::string_view where_clause; + std::optional index_name; // nullopt = no index selected + bool uses_secondary_indexing; + bool need_filtering; + }; + + // Build statement_restrictions from a WHERE clause string and return the + // index-selection result. + auto check = [&](std::string_view where_clause) -> expected { + prepare_context ctx; + auto factors = where_clause.empty() + ? std::vector{} + : boolean_factors(cql3::util::where_clause_to_relations(where_clause, cql3::dialect{})); + auto sr = restrictions::analyze_statement_restrictions( + e.data_dictionary(), + schema, + statements::statement_type::SELECT, + expr::conjunction{std::move(factors)}, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes); + auto [idx, restrictions_expr] = sr.find_idx(sim); + return {where_clause, + idx ? std::optional(idx->metadata().name()) : std::nullopt, + sr.uses_secondary_indexing(), + sr.need_filtering()}; + }; + + auto none = std::optional{}; + auto idx = [](const char* name) { return std::optional(name); }; + + auto verify = [](const expected& got, const expected& want) { + BOOST_CHECK_MESSAGE(got.index_name == want.index_name, + fmt::format("WHERE {}: index_name: got {} want {}", + want.where_clause, + got.index_name.value_or("(none)"), + want.index_name.value_or("(none)"))); + BOOST_CHECK_MESSAGE(got.uses_secondary_indexing == want.uses_secondary_indexing, + fmt::format("WHERE {}: uses_secondary_indexing: got {} want {}", + want.where_clause, + got.uses_secondary_indexing, + want.uses_secondary_indexing)); + BOOST_CHECK_MESSAGE(got.need_filtering == want.need_filtering, + fmt::format("WHERE {}: need_filtering: got {} want {}", + want.where_clause, + got.need_filtering, + want.need_filtering)); + }; + + // --- A. Regular column EQ (target_type: regular_values) --- + verify(check("v1 = 1"), {"", idx("idx_v1"), true, false}); + verify(check("v2 = 1"), {"", idx("idx_v2"), true, false}); + // WHERE-clause order tiebreak: first column in WHERE wins for equal scores. + verify(check("v1 = 1 AND v2 = 1"), {"", idx("idx_v1"), true, true}); + verify(check("v2 = 1 AND v1 = 1"), {"", idx("idx_v2"), true, true}); + // Slices (GT/LT) are not supported by standard secondary indexes. + verify(check("v1 > 1"), {"", none, false, true}); + + // --- B. Local vs global index scoring --- + // Local index with full PK scores 2, global scores 1. + verify(check("pk1 = 1 AND pk2 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, false}); + // Local (score 2) beats global (score 1) even when global column appears first. + verify(check("pk1 = 1 AND pk2 = 1 AND v1 = 1 AND v3 = 1"), {"", idx("idx_v3_local"), true, true}); + // Local index without full PK gets score 0 and is never picked. + verify(check("v3 = 1"), {"", none, false, true}); + + // --- C. CK column index (search group ordering) --- + verify(check("ck1 = 1"), {"", idx("idx_ck1"), true, false}); + // CK group is iterated before non-PK group, regardless of WHERE order. + verify(check("ck1 = 1 AND v1 = 1"), {"", idx("idx_ck1"), true, true}); + verify(check("v1 = 1 AND ck1 = 1"), {"", idx("idx_ck1"), true, true}); + + // --- D. Set CONTAINS (target_type: keys, rewritten from VALUES for sets) --- + verify(check("s1 CONTAINS 1"), {"", idx("idx_s1"), true, false}); + + // --- E. Map indexes --- + // CONTAINS on map values (target_type: collection_values). + verify(check("m1 CONTAINS 'one'"), {"", idx("idx_m1_values"), true, false}); + // CONTAINS KEY on map keys (target_type: keys). + verify(check("m1 CONTAINS KEY 1"), {"", idx("idx_m1_keys"), true, false}); + // Subscript EQ on map entries (target_type: keys_and_values). + verify(check("m1[1] = 'one'"), {"", idx("idx_m1_entries"), true, false}); + + // --- F. List CONTAINS (target_type: collection_values) --- + verify(check("l1 CONTAINS 1"), {"", idx("idx_l1"), true, false}); + + // --- G. Frozen collection (FULL index, round-trips to regular_values) --- + verify(check("fs = {1}"), {"", idx("idx_fs"), true, false}); + // Same with full PK (local index on v3 is available but idx_fs is global, score 1). + verify(check("pk1 = 1 AND pk2 = 1 AND fs = {1}"), {"", idx("idx_fs"), true, false}); + + // --- H. Double CONTAINS on same column: CollectionYes && CollectionYes = No --- + verify(check("s1 CONTAINS 1 AND s1 CONTAINS 2"), {"", none, false, true}); + + // --- I. Collection + regular column tiebreak (WHERE-clause order) --- + verify(check("s1 CONTAINS 1 AND v1 = 1"), {"", idx("idx_s1"), true, true}); + verify(check("v1 = 1 AND s1 CONTAINS 1"), {"", idx("idx_v1"), true, true}); + + // --- J. CK group beats collection in non-PK group --- + verify(check("m1 CONTAINS 'one' AND ck1 = 1"), {"", idx("idx_ck1"), true, true}); + + // --- K. Edge cases --- + // Full PK only: no secondary index needed. + verify(check("pk1 = 1 AND pk2 = 1"), {"", none, false, false}); + // No restrictions at all. + verify(check(""), {"", none, false, false}); + // Token restriction with a regular column: index is used. + verify(check("token(pk1, pk2) > 0 AND v1 = 1"), {"", idx("idx_v1"), true, false}); + }); +} + +// Exhaustive combinatorial test: iterates over all 2^N subsets of N restriction +// fragments and, for each subset, verifies a broad set of statement_restrictions +// public APIs. This catches any refactoring that accidentally changes observable +// behaviour for *any* combination of restriction types. +// +// Restriction fragments (15 independent bits, 2^15 = 32768 combinations): +// bit 0: pk1 = 1 +// bit 1: pk2 = 2 +// bit 2: ck1 = 3 (single-column EQ) +// bit 3: ck2 > 4 (single-column slice) +// bit 4: ck1 IN (3, 6) (single-column IN; includes CK1_EQ value 3) +// bit 5: (ck1, ck2) = (7, 8) (multi-column EQ) +// bit 6: (ck1, ck2) > (9, 10) (multi-column slice) +// bit 7: (ck1, ck2) IN ((11, 12), (13, 14)) (multi-column IN) +// bit 8: v1 = 15 (global index comb_v1, target: regular_values) +// bit 9: v3 = 16 (local index comb_v3_local, target: regular_values) +// bit 10: s1 CONTAINS 17 (global index comb_s1, target: keys — set) +// bit 11: m1 CONTAINS 'alpha' (global index comb_m1_values, target: collection_values) +// bit 12: m2 CONTAINS KEY 18 (global index comb_m2_keys, target: keys — map) +// bit 13: m3[19] = 'beta' (global index comb_m3_entries, target: keys_and_values) +// bit 14: fs = {20, 21} (global index comb_fs, target: full — frozen collection) +SEASTAR_TEST_CASE(combinatorial_restrictions) { + // ASAN's fake-stack shadow buffer for the large lambda below is ~248 KiB; + // bump the thread stack so it doesn't overflow under sanitized builds. + seastar::thread_attributes tattr; +#if defined(__SANITIZE_ADDRESS__) || __has_feature(address_sanitizer) + tattr.stack_size = 2 * 1024 * 1024; +#endif + return do_with_cql_env_thread([](cql_test_env& e) { + cquery_nofail(e, "CREATE TABLE ks.comb (" + " pk1 int, pk2 int," + " ck1 int, ck2 int," + " v1 int, v2 int, v3 int," + " s1 set," + " m1 map," + " m2 map," + " m3 map," + " fs frozen>," + " PRIMARY KEY ((pk1, pk2), ck1, ck2)" + ")"); + cquery_nofail(e, "CREATE INDEX comb_pk1 ON ks.comb(pk1)"); + cquery_nofail(e, "CREATE INDEX comb_v1 ON ks.comb(v1)"); + cquery_nofail(e, "CREATE INDEX comb_v3_local ON ks.comb((pk1,pk2), v3)"); + cquery_nofail(e, "CREATE INDEX comb_s1 ON ks.comb(s1)"); + cquery_nofail(e, "CREATE INDEX comb_ck1 ON ks.comb(ck1)"); + cquery_nofail(e, "CREATE INDEX comb_m1_values ON ks.comb(VALUES(m1))"); + cquery_nofail(e, "CREATE INDEX comb_m2_keys ON ks.comb(KEYS(m2))"); + cquery_nofail(e, "CREATE INDEX comb_m3_entries ON ks.comb(ENTRIES(m3))"); + cquery_nofail(e, "CREATE INDEX comb_fs ON ks.comb(FULL(fs))"); + + auto schema = e.local_db().find_schema("ks", "comb"); + auto& sim = e.data_dictionary().find_column_family(schema).get_index_manager(); + const auto& pk1_def = *schema->get_column_definition("pk1"); + const auto& pk2_def = *schema->get_column_definition("pk2"); + const auto& ck1_def = *schema->get_column_definition("ck1"); + const auto& ck2_def = *schema->get_column_definition("ck2"); + const auto& v1_def = *schema->get_column_definition("v1"); + const auto& v3_def = *schema->get_column_definition("v3"); + const auto& s1_def = *schema->get_column_definition("s1"); + const auto& m1_def = *schema->get_column_definition("m1"); + const auto& m2_def = *schema->get_column_definition("m2"); + const auto& m3_def = *schema->get_column_definition("m3"); + const auto& fs_def = *schema->get_column_definition("fs"); + + // Every restriction fragment is an independent bit in the mask. + // This includes CK restriction variants, giving us exhaustive + // coverage of all 2^15 = 32768 combinations. + enum frag : unsigned { + PK1 = 1u << 0, // pk1 = 1 (global index, regular_values) + PK2 = 1u << 1, // pk2 = 2 (no index) + CK1_EQ = 1u << 2, // ck1 = 3 + CK2_SLICE = 1u << 3, // ck2 > 4 + CK1_IN = 1u << 4, // ck1 IN (3, 6) + MULTI_EQ = 1u << 5, // (ck1, ck2) = (7, 8) + MULTI_SLICE= 1u << 6, // (ck1, ck2) > (9, 10) + MULTI_IN = 1u << 7, // (ck1, ck2) IN ((11, 12), (13, 14)) + V1 = 1u << 8, // v1 = 15 (global index, regular_values) + V3 = 1u << 9, // v3 = 16 (local index, regular_values) + S1 = 1u << 10, // s1 CONTAINS 17 (global index, keys — set) + M_VAL = 1u << 11, // m1 CONTAINS 'alpha' (global index, collection_values) + M_KEY = 1u << 12, // m2 CONTAINS KEY 18 (global index, keys — map) + M_ENT = 1u << 13, // m3[19] = 'beta' (global index, keys_and_values) + FS = 1u << 14, // fs = {20, 21} (global index, full) + }; + constexpr unsigned N_FRAG = 15; + constexpr unsigned FRAG_TOTAL = 1u << N_FRAG; + + constexpr unsigned SINGLE_CK_MASK = CK1_EQ | CK2_SLICE | CK1_IN; + constexpr unsigned MULTI_CK_MASK = MULTI_EQ | MULTI_SLICE | MULTI_IN; + + struct fragment_info { + unsigned bit; + const char* clause; + }; + // Each fragment uses unique values so that conjunction intersections + // are predictable. CK1_IN includes the CK1_EQ value (3) to ensure + // the intersection is non-empty when both are present. + const fragment_info fragments[] = { + {PK1, "pk1 = 1"}, + {PK2, "pk2 = 2"}, + {CK1_EQ, "ck1 = 3"}, + {CK2_SLICE, "ck2 > 4"}, + {CK1_IN, "ck1 IN (3, 6)"}, + {MULTI_EQ, "(ck1, ck2) = (7, 8)"}, + {MULTI_SLICE, "(ck1, ck2) > (9, 10)"}, + {MULTI_IN, "(ck1, ck2) IN ((11, 12), (13, 14))"}, + {V1, "v1 = 15"}, + {V3, "v3 = 16"}, + {S1, "s1 CONTAINS 17"}, + {M_VAL, "m1 CONTAINS 'alpha'"}, + {M_KEY, "m2 CONTAINS KEY 18"}, + {M_ENT, "m3[19] = 'beta'"}, + {FS, "fs = {20, 21}"}, + }; + + unsigned total_tested = 0; + unsigned total_illegal = 0; + + for (unsigned mask = 0; mask < FRAG_TOTAL; ++mask) { + // --- Illegality detection --- + // Rule 1: Mixing single-column and multi-column CK restrictions + // is always illegal. + // Rule 2: At most one multi-column CK restriction type allowed. + bool has_single_ck = (mask & SINGLE_CK_MASK) != 0; + bool has_multi_ck = (mask & MULTI_CK_MASK) != 0; + unsigned multi_ck_count = std::popcount(mask & MULTI_CK_MASK); + bool is_illegal = (has_single_ck && has_multi_ck) + || (multi_ck_count > 1); + + // Build WHERE clause from all set bits. + std::string where_clause; + for (auto& f : fragments) { + if (mask & f.bit) { + if (!where_clause.empty()) { + where_clause += " AND "; + } + where_clause += f.clause; + } + } + + auto ctx_msg = [&](std::string_view api) { + return fmt::format("mask=0x{:04x} WHERE [{}]: {}", + mask, where_clause, api); + }; + + prepare_context ctx; + auto where_expr = where_clause.empty() + ? expr::expression(expr::conjunction{}) + : cql3::util::where_clause_to_relations(where_clause, cql3::dialect{}); + + std::optional sr; + try { + sr.emplace(restrictions::analyze_statement_restrictions( + e.data_dictionary(), + schema, + statements::statement_type::SELECT, + where_expr, + ctx, + /*contains_only_static_columns=*/false, + /*for_view=*/false, + /*allow_filtering=*/true, + restrictions::check_indexes::yes)); + } catch (const exceptions::invalid_request_exception&) { + } + + if (is_illegal) { + BOOST_CHECK_MESSAGE(!sr, + ctx_msg("expected exception for illegal CK combination")); + ++total_illegal; + ++total_tested; + continue; + } + BOOST_REQUIRE_MESSAGE(sr, + ctx_msg("unexpected exception for legal CK combination")); + + // --- Derived CK properties --- + bool has_multi_column = has_multi_ck; + + // Which CK columns are restricted? + bool has_ck1 = (mask & (CK1_EQ | CK1_IN | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0; + bool has_ck2 = (mask & (CK2_SLICE | MULTI_EQ | MULTI_SLICE | MULTI_IN)) != 0; + bool has_any_ck = has_ck1 || has_ck2; + unsigned ck_count = (has_ck1 ? 1u : 0u) + (has_ck2 ? 1u : 0u); + + // clustering_key_restrictions_has_IN: any IN binop present + bool has_ck_in = (mask & (CK1_IN | MULTI_IN)) != 0; + + // clustering_key_restrictions_has_only_eq: no non-EQ binop in CK + // restrictions. Vacuously true when no CK restrictions. + bool has_only_eq = !(mask & (CK2_SLICE | CK1_IN | MULTI_SLICE | MULTI_IN)); + + // clustering_key_restrictions_need_filtering (internal predicate, + // before ORing with has_partition_key_unrestricted_components): + // For multi-column restrictions, always false. + // For single-column: true when there's a CK gap (ck2 restricted + // without ck1 being restricted by EQ or IN). + bool ck_need_filtering_internal = !has_multi_column + && (mask & CK2_SLICE) + && !(mask & (CK1_EQ | CK1_IN)); + + // has_eq_restriction_on_column: recognizes column_value and + // tuple_constructor LHS with oper_t::EQ. + // CK1_EQ → true for ck1. MULTI_EQ → true for both ck1 and ck2. + // IN, slice → false. + bool ck1_has_eq = (mask & CK1_EQ) || (mask & MULTI_EQ); + bool ck2_has_eq = (mask & MULTI_EQ) != 0; + + // comb_pk1 index selection: + // Requires: pk1 restricted with EQ and PK incomplete + // (_is_key_range). PK restrictions are iterated first in + // _index_restrictions, so comb_pk1 (global, score 1) beats + // all same-score indexes that come later. + bool selects_comb_pk1 = (mask & PK1) && !(mask & PK2); + + // comb_ck1 index selection: + // Requires: !full_pk, single-column EQ on ck1 (not IN — index + // only supports EQ), no CK1_IN (makes conjunction unsupported), + // no multi-column. + // In legal combos, CK1_EQ set → no MULTI_* possible. + // When pk1 is also restricted, comb_pk1 takes priority (PK + // group comes before CK group in _index_restrictions). + bool selects_comb_ck1 = (mask & CK1_EQ) && !(mask & CK1_IN); + + // Index clustering range multiplier: + // CK1_IN alone produces 2 IN values → 2 ranges. + // CK1_EQ + CK1_IN: intersection narrows to 1. + // Multi-column: not added to prefix, multiplier 1. + unsigned idx_range_multiplier = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1; + + // --- Partition key APIs --- + bool has_pk1 = (mask & PK1) != 0; + bool has_pk2 = (mask & PK2) != 0; + bool full_pk = has_pk1 && has_pk2; + + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_is_empty() == (!has_pk1 && !has_pk2), + ctx_msg("partition_key_restrictions_is_empty")); + + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_is_all_eq() == true, + ctx_msg("partition_key_restrictions_is_all_eq")); + + BOOST_CHECK_MESSAGE( + sr->has_partition_key_unrestricted_components() == (!has_pk1 || !has_pk2), + ctx_msg("has_partition_key_unrestricted_components")); + + unsigned pk_restricted = (has_pk1 ? 1u : 0u) + (has_pk2 ? 1u : 0u); + BOOST_CHECK_MESSAGE( + sr->partition_key_restrictions_size() == pk_restricted, + ctx_msg(fmt::format("partition_key_restrictions_size: got {} want {}", + sr->partition_key_restrictions_size(), pk_restricted))); + + BOOST_CHECK_MESSAGE( + sr->has_token_restrictions() == false, + ctx_msg("has_token_restrictions")); + + BOOST_CHECK_MESSAGE( + sr->key_is_in_relation() == false, + ctx_msg("key_is_in_relation")); + + // is_key_range: true unless full PK is specified with EQ + BOOST_CHECK_MESSAGE( + sr->is_key_range() == !full_pk, + ctx_msg("is_key_range")); + + // --- Clustering key APIs --- + BOOST_CHECK_MESSAGE( + sr->has_clustering_columns_restriction() == has_any_ck, + ctx_msg("has_clustering_columns_restriction")); + + BOOST_CHECK_MESSAGE( + sr->clustering_columns_restrictions_size() == ck_count, + ctx_msg(fmt::format("clustering_columns_restrictions_size: got {} want {}", + sr->clustering_columns_restrictions_size(), ck_count))); + + BOOST_CHECK_MESSAGE( + sr->has_unrestricted_clustering_columns() == (ck_count < 2), + ctx_msg("has_unrestricted_clustering_columns")); + + BOOST_CHECK_MESSAGE( + sr->clustering_key_restrictions_has_IN() == has_ck_in, + ctx_msg("clustering_key_restrictions_has_IN")); + + BOOST_CHECK_MESSAGE( + sr->clustering_key_restrictions_has_only_eq() == has_only_eq, + ctx_msg("clustering_key_restrictions_has_only_eq")); + + // ck_restrictions_need_filtering: + // = has_any_ck && (!full_pk || ck_need_filtering_internal) + // The internal predicate captures column-gap / non-prefix issues; + // has_partition_key_unrestricted_components() is ORed in by the + // outer ck_restrictions_need_filtering(). + bool ck_needs_filter = has_any_ck && (!full_pk || ck_need_filtering_internal); + BOOST_CHECK_MESSAGE( + sr->ck_restrictions_need_filtering() == ck_needs_filter, + ctx_msg("ck_restrictions_need_filtering")); + + // --- Non-primary-key APIs --- + bool has_v1 = (mask & V1) != 0; + bool has_v3 = (mask & V3) != 0; + bool has_s1 = (mask & S1) != 0; + bool has_m_val = (mask & M_VAL) != 0; + bool has_m_key = (mask & M_KEY) != 0; + bool has_m_ent = (mask & M_ENT) != 0; + bool has_fs = (mask & FS) != 0; + bool has_nonpk = has_v1 || has_v3 || has_s1 + || has_m_val || has_m_key || has_m_ent || has_fs; + + BOOST_CHECK_MESSAGE( + sr->has_non_primary_key_restriction() == has_nonpk, + ctx_msg("has_non_primary_key_restriction")); + + // --- Per-column restriction checks --- + BOOST_CHECK_MESSAGE( + sr->is_restricted(&pk1_def) == has_pk1, + ctx_msg("is_restricted(pk1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&pk2_def) == has_pk2, + ctx_msg("is_restricted(pk2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&ck1_def) == has_ck1, + ctx_msg("is_restricted(ck1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&ck2_def) == has_ck2, + ctx_msg("is_restricted(ck2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&v1_def) == has_v1, + ctx_msg("is_restricted(v1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&v3_def) == has_v3, + ctx_msg("is_restricted(v3)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&s1_def) == has_s1, + ctx_msg("is_restricted(s1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m1_def) == has_m_val, + ctx_msg("is_restricted(m1)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m2_def) == has_m_key, + ctx_msg("is_restricted(m2)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&m3_def) == has_m_ent, + ctx_msg("is_restricted(m3)")); + BOOST_CHECK_MESSAGE( + sr->is_restricted(&fs_def) == has_fs, + ctx_msg("is_restricted(fs)")); + + // has_eq_restriction_on_column: + // pk1/pk2 always EQ when present. + // ck1/ck2 depend on the CK restriction type. + // v1/v3/fs are EQ, s1 is CONTAINS, m1 CONTAINS, m2 CONTAINS KEY, + // m3[1]='a' is subscript EQ (not recognized), fs is regular EQ. + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(pk1_def) == has_pk1, + ctx_msg("has_eq_restriction_on_column(pk1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(pk2_def) == has_pk2, + ctx_msg("has_eq_restriction_on_column(pk2)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(ck1_def) == ck1_has_eq, + ctx_msg("has_eq_restriction_on_column(ck1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(ck2_def) == ck2_has_eq, + ctx_msg("has_eq_restriction_on_column(ck2)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(v1_def) == has_v1, + ctx_msg("has_eq_restriction_on_column(v1)")); + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(v3_def) == has_v3, + ctx_msg("has_eq_restriction_on_column(v3)")); + // s1 CONTAINS is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(s1_def) == false, + ctx_msg("has_eq_restriction_on_column(s1)")); + // m1 CONTAINS is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m1_def) == false, + ctx_msg("has_eq_restriction_on_column(m1)")); + // m2 CONTAINS KEY is not EQ: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m2_def) == false, + ctx_msg("has_eq_restriction_on_column(m2)")); + // m3[1] = 'a' is a subscript EQ — not recognized by + // has_eq_restriction_on_column (needs column_value/tuple_constructor LHS). + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(m3_def) == false, + ctx_msg("has_eq_restriction_on_column(m3)")); + // fs = {1,2} is a regular EQ on frozen collection: + BOOST_CHECK_MESSAGE( + sr->has_eq_restriction_on_column(fs_def) == has_fs, + ctx_msg("has_eq_restriction_on_column(fs)")); + + // --- Index selection --- + auto [idx_opt, idx_expr] = sr->find_idx(sim); + + // Determine expected index. The scoring algorithm: + // - do_find_idx iterates _index_restrictions (PK group, then + // CK group, then non-PK group in WHERE-clause order). + // - Multi-column restrictions are skipped (line 1358). + // - Score: local index with full PK = 2, global = 1, local + // without full PK = 0. + // - Strict > for tiebreaking → first with highest score wins. + // + // The PK index (comb_pk1) can be selected when: + // (a) pk1 is restricted with EQ, and + // (b) PK is incomplete (_is_key_range), which triggers + // _uses_secondary_indexing via _has_queriable_pk_index. + // PK restrictions are iterated first in _index_restrictions, so + // comb_pk1 (score 1) beats all later global indexes (tie → first + // wins). + // + // The CK index (comb_ck1) can only be selected when: + // (a) ck1 is restricted with single-column EQ (not IN — the + // index only supports EQ, not IN), + // (b) PK is incomplete (_is_key_range), and + // (c) no multi-column CK restriction (_has_multi_column blocks + // the CK-index path at line 1151). + // (d) CK1_EQ + CK1_IN conjunction makes the index unsupported + // (IN child fails is_supported_by). + // (e) pk1 is NOT restricted (otherwise comb_pk1 wins first). + // + // When comb_ck1 qualifies it is iterated after PK but before + // non-PK in _index_restrictions, and its score 1 ties with any + // non-PK global, so it wins. + + std::optional expected_idx; + + if (selects_comb_pk1) { + expected_idx = "comb_pk1"; + } else if (selects_comb_ck1 && !full_pk) { + expected_idx = "comb_ck1"; + } else if (full_pk && has_v3) { + // Local index scores 2, beats any global (score 1). + expected_idx = "comb_v3_local"; + } else if (has_v1) { + expected_idx = "comb_v1"; + } else if (has_s1) { + // v3 without full_pk scores 0 and is skipped. + expected_idx = "comb_s1"; + } else if (has_m_val) { + expected_idx = "comb_m1_values"; + } else if (has_m_key) { + expected_idx = "comb_m2_keys"; + } else if (has_m_ent) { + expected_idx = "comb_m3_entries"; + } else if (has_fs) { + expected_idx = "comb_fs"; + } + // else: no indexable column (v3 alone without full_pk scores 0) + + bool uses_idx = expected_idx.has_value(); + + BOOST_CHECK_MESSAGE( + (idx_opt ? std::optional(idx_opt->metadata().name()) : std::nullopt) == expected_idx, + ctx_msg(fmt::format("find_idx: got {} want {}", + idx_opt ? idx_opt->metadata().name() : "(none)", + expected_idx.value_or("(none)")))); + BOOST_CHECK_MESSAGE( + sr->uses_secondary_indexing() == uses_idx, + ctx_msg(fmt::format("uses_secondary_indexing: got {} want {}", + sr->uses_secondary_indexing(), uses_idx))); + + // --- need_filtering --- + // Filtering is needed when: + // - PK is not fully specified (partial PK needs filtering unless + // using index) + // - CK has a gap (ck2 without ck1) needs filtering + // - Non-PK restrictions that aren't consumed by the index need + // filtering + // - When using an index, remaining restrictions beyond the + // indexed one need filtering + // - Multi-column CK restrictions that can't be converted to + // bounds need filtering + // The exact logic is complex; we check invariants. + bool need_filt = sr->need_filtering(); + + // 1. If no restrictions at all, no filtering needed. + if (mask == 0) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: empty should be false")); + } + // 2. If only the full PK is specified (no CK, no non-PK), no filtering. + if (mask == (PK1 | PK2)) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: full PK only should be false")); + } + // 3. Single indexed column (no CK): no filtering needed. + if (mask == PK1 || mask == V1 || mask == S1) { + BOOST_CHECK_MESSAGE(!need_filt, ctx_msg("need_filtering: single indexed column should be false")); + } + // 4. If using index + has extra non-PK restrictions, filtering is needed. + if (uses_idx) { + int non_pk_indexed_count = (has_v1 ? 1 : 0) + (has_v3 ? 1 : 0) + (has_s1 ? 1 : 0) + + (has_m_val ? 1 : 0) + (has_m_key ? 1 : 0) + (has_m_ent ? 1 : 0) + + (has_fs ? 1 : 0); + if (non_pk_indexed_count > 1) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: multiple non-PK restrictions with index should need filtering")); + } + } + // 5. Partial PK with no index needs filtering. + if ((has_pk1 != has_pk2) && !uses_idx) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: partial PK without index should need filtering")); + } + // 6. CK gap (ck2 without ck1, single-column only) needs filtering. + if (ck_need_filtering_internal) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: CK gap should need filtering")); + } + // 7. Non-PK restriction without index needs filtering. + if (has_nonpk && !uses_idx) { + BOOST_CHECK_MESSAGE(need_filt, + ctx_msg("need_filtering: non-PK restriction without index should need filtering")); + } + + // --- pk_restrictions_need_filtering --- + bool pk_needs_filter = sr->pk_restrictions_need_filtering(); + if (!uses_idx && has_pk1 != has_pk2) { + BOOST_CHECK_MESSAGE(pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: partial PK should need filtering")); + } + if (!has_pk1 && !has_pk2) { + BOOST_CHECK_MESSAGE(!pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: no PK should be false")); + } + if (full_pk) { + BOOST_CHECK_MESSAGE(!pk_needs_filter, + ctx_msg("pk_restrictions_need_filtering: full PK should be false")); + } + + // --- is_empty --- + BOOST_CHECK_MESSAGE( + sr->is_empty() == (mask == 0), + ctx_msg("is_empty")); + + // --- get_not_null_columns: none of our fragments use IS NOT NULL --- + BOOST_CHECK_MESSAGE( + sr->get_not_null_columns().empty(), + ctx_msg("get_not_null_columns should be empty")); + + // --- get_partition_key_ranges --- + // Always returns exactly 1 range. Full PK → singular range + // (specific partition). Otherwise → open-ended range. + { + auto pk_ranges = sr->get_partition_key_ranges(query_options({})); + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1, + ctx_msg(fmt::format("get_partition_key_ranges: {} ranges, want 1", + pk_ranges.size()))); + if (full_pk) { + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && pk_ranges[0].is_singular(), + ctx_msg("get_partition_key_ranges: full PK should yield singular range")); + } else { + BOOST_CHECK_MESSAGE(pk_ranges.size() == 1 && !pk_ranges[0].is_singular(), + ctx_msg("get_partition_key_ranges: incomplete PK should yield non-singular range")); + } + } + + // --- get_clustering_bounds --- + // Expected range count: + // Empty CK restrictions → 1 open-ended range. + // Multi-column: MULTI_IN → 2 singular, else → 1. + // Single-column: CK1_IN without CK1_EQ → 2 (IN values expand), + // else → 1 (CK1_EQ narrows intersection to 1 value). + { + auto ck_bounds = sr->get_clustering_bounds(query_options({})); + unsigned expected_ck_bounds; + if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) { + // No CK restrictions → 1 open-ended range. + expected_ck_bounds = 1; + } else if (mask & MULTI_CK_MASK) { + expected_ck_bounds = (mask & MULTI_IN) ? 2 : 1; + } else { + expected_ck_bounds = ((mask & CK1_IN) && !(mask & CK1_EQ)) ? 2 : 1; + } + BOOST_CHECK_MESSAGE(ck_bounds.size() == expected_ck_bounds, + ctx_msg(fmt::format("get_clustering_bounds: {} ranges, want {}", + ck_bounds.size(), expected_ck_bounds))); + // With no CK restrictions the range should be open-ended. + if (!(mask & (SINGLE_CK_MASK | MULTI_CK_MASK))) { + BOOST_CHECK_MESSAGE( + ck_bounds.size() == 1 + && !ck_bounds[0].start() + && !ck_bounds[0].end(), + ctx_msg("get_clustering_bounds: no CK should be open-ended")); + } + } + + // --- Index table range APIs --- + if (uses_idx) { + bool is_local_idx = (expected_idx == "comb_v3_local"); + const auto& view_schema = *sr->get_view_schema(); + + if (is_local_idx) { + // --- get_local_index_clustering_ranges --- + // Local index CK prefix = (indexed_col, base_ck1, ...). + // The indexed column is always EQ (1 value); CK IN values + // multiply via the base CK appended to the prefix. + auto local_ranges = sr->get_local_index_clustering_ranges(query_options({}), view_schema); + unsigned expected_local = 1 * idx_range_multiplier; + BOOST_CHECK_MESSAGE(!local_ranges.empty(), + ctx_msg("get_local_index_clustering_ranges should not be empty")); + BOOST_CHECK_MESSAGE(local_ranges.size() == expected_local, + ctx_msg(fmt::format("get_local_index_clustering_ranges: {} ranges, want {}", + local_ranges.size(), expected_local))); + } else { + // --- get_global_index_clustering_ranges --- + // Global index CK prefix = (token, pk1, pk2, ...base CK...). + // With full PK: CK IN values expand the prefix. + // Without full PK: prefix is empty → 1 open-ended range. + auto global_ranges = sr->get_global_index_clustering_ranges(query_options({}), view_schema); + BOOST_CHECK_MESSAGE(!global_ranges.empty(), + ctx_msg("get_global_index_clustering_ranges should not be empty")); + if (full_pk) { + unsigned expected_global = 1 * idx_range_multiplier; + BOOST_CHECK_MESSAGE(global_ranges.size() == expected_global, + ctx_msg(fmt::format( + "get_global_index_clustering_ranges (full PK): {} ranges, want {}", + global_ranges.size(), expected_global))); + } else { + // Without full PK the prefix has no token/PK entries, + // so we get 1 open-ended range. + BOOST_CHECK_MESSAGE(global_ranges.size() == 1, + ctx_msg(fmt::format( + "get_global_index_clustering_ranges (!full PK): {} ranges, want 1", + global_ranges.size()))); + } + + // --- get_global_index_token_clustering_ranges --- + // For modern (non-v1) indexes the token column is + // long_type, so this dispatches to the same + // get_single_column_clustering_bounds as + // get_global_index_clustering_ranges. + auto token_ranges = sr->get_global_index_token_clustering_ranges(query_options({}), view_schema); + BOOST_CHECK_MESSAGE(token_ranges.size() == global_ranges.size(), + ctx_msg(fmt::format( + "get_global_index_token_clustering_ranges: {} ranges, want {} (same as global)", + token_ranges.size(), global_ranges.size()))); + } + } + + ++total_tested; + } + + BOOST_TEST_MESSAGE(fmt::format("Tested {} restriction combinations ({} legal, {} illegal, 2^{} = {} total)", + total_tested, total_tested - total_illegal, total_illegal, N_FRAG, FRAG_TOTAL)); + }, {}, tattr); +} + + // Currently expression doesn't have operator==(). // Implementing it is ugly, because there are shared pointers and the term base class. // For testing purposes checking stringified expressions is enough.