From cf372ba87b32ca18c900b18eba42e4959a7dd405 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Hudobski?= Date: Thu, 23 Apr 2026 10:49:23 +0200 Subject: [PATCH 1/2] index: fix local vector index locality detection after schema reload When index metadata was deserialized from system tables during schema reload, target_parser::is_local() failed to recognize local vector indexes. It only handled the non-vector JSON format {"pk": [...], "ck": [...]}, but vector indexes serialize their targets as {"pk": [...], "tc": "..."}. As a result, every local vector index was incorrectly marked as global after a schema reload. Fix this by introducing vector_index::is_local() that recognizes the vector-specific target format, and dispatching to it from the schema deserialization code based on the index class name. This keeps target_parser as secondary-index-specific and follows the same dispatch pattern already used for target serialization. Also remove the now-unused has_vector_index_on_column() helper (its callers were removed by #29407). --- db/schema_tables.cc | 7 ++++++- index/vector_index.cc | 16 ++++++++++------ index/vector_index.hh | 2 +- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/db/schema_tables.cc b/db/schema_tables.cc index 63db6d6e17..f480620f26 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -71,6 +71,7 @@ #include "data_dictionary/user_types_metadata.hh" #include "index/target_parser.hh" +#include "index/vector_index.hh" #include "lang/lua.hh" #include "lang/manager.hh" @@ -2473,7 +2474,11 @@ static index_metadata create_index_from_index_row(const query::result_set_row& r } index_metadata_kind kind = deserialize_index_kind(row.get_nonnull("kind")); sstring target_string = options.at(cql3::statements::index_target::target_option_name); - const index_metadata::is_local_index is_local(secondary_index::target_parser::is_local(target_string)); + auto class_it = options.find("class_name"); + bool is_vector = class_it != options.end() && class_it->second == "vector_index"; + const index_metadata::is_local_index is_local( + is_vector ? secondary_index::vector_index::is_local(target_string) + : secondary_index::target_parser::is_local(target_string)); return index_metadata{index_name, options, kind, is_local}; } diff --git a/index/vector_index.cc b/index/vector_index.cc index e2184f1233..23c7ee2c1b 100644 --- a/index/vector_index.cc +++ b/index/vector_index.cc @@ -327,13 +327,17 @@ void vector_index::validate(const schema &schema, const cql3::statements::index_ check_index_options(properties); } -bool vector_index::has_vector_index_on_column(const schema& s, const sstring& target_name) { - for (const auto& index : s.indices()) { - if (is_vector_index_on_column(index, target_name)) { - return true; - } +bool vector_index::is_local(const sstring& target_string) { + std::optional json_value = rjson::try_parse(target_string); + if (!json_value || !json_value->IsObject()) { + return false; } - return false; + rjson::value* pk = rjson::find(*json_value, PK_TARGET_KEY); + if (!pk || !pk->IsArray() || pk->Empty()) { + return false; + } + rjson::value* tc = rjson::find(*json_value, TC_TARGET_KEY); + return tc && tc->IsString(); } bool vector_index::is_vector_index_on_column(const index_metadata& im, const sstring& target_name) { diff --git a/index/vector_index.hh b/index/vector_index.hh index 2665561793..a61068a26d 100644 --- a/index/vector_index.hh +++ b/index/vector_index.hh @@ -31,7 +31,7 @@ public: const std::vector<::shared_ptr> &targets, const gms::feature_service& fs, const data_dictionary::database& db) const override; static bool has_index(const schema& s) { return has_index_impl(s); } - static bool has_vector_index_on_column(const schema& s, const sstring& target_name); + static bool is_local(const sstring& target_string); static bool is_vector_index_on_column(const index_metadata& im, const sstring& target_name); static void check_cdc_options(const schema& s) { check_cdc_options_impl(s); From 119ef942f86fc1e21b2bd32280f59d6c327d7e9d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Hudobski?= Date: Thu, 23 Apr 2026 10:49:15 +0200 Subject: [PATCH 2/2] test/cqlpy: add tests for global and local vector index coexistence Add integration tests verifying that both a global and a local vector index can be created on the same column without triggering a spurious "duplicate custom index" error. This was fixed by #29407. Tests cover: - Creating global+local and local+global index pairs on the same column. - Duplicate detection still rejects a second index of the same locality. - IF NOT EXISTS is a no-op for a duplicate same-locality index (and verifies no extra index is created). - IF NOT EXISTS with a different locality creates both indexes. - Two indexes with the same name on different tables are rejected (partially validates VECTOR-643). Fixes: SCYLLADB-987 --- test/cqlpy/test_vector_index.py | 61 +++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/test/cqlpy/test_vector_index.py b/test/cqlpy/test_vector_index.py index 0a6e390e62..9ffdeded30 100644 --- a/test/cqlpy/test_vector_index.py +++ b/test/cqlpy/test_vector_index.py @@ -496,6 +496,15 @@ def test_two_same_name_indexes_on_different_tables_with_if_not_exists(cql, test_ assert rows1[0].index_name == 'ann_index' assert len(rows2) == 0 +def test_two_same_name_indexes_on_different_tables(cql, test_keyspace, skip_on_scylla_vnodes): + schema = "p int primary key, v vector" + with new_test_table(cql, test_keyspace, schema) as table: + schema = "p int primary key, v vector" + with new_test_table(cql, test_keyspace, schema) as table2: + cql.execute(f"CREATE CUSTOM INDEX ann_index ON {table}(v) USING 'sai'") + with pytest.raises(InvalidRequest, match="already exists"): + cql.execute(f"CREATE CUSTOM INDEX ann_index ON {table2}(v) USING 'sai'") + def test_two_same_name_indexes_on_different_columns_with_if_not_exists(cql, test_keyspace, skip_on_scylla_vnodes): schema = "p int primary key, v vector, v2 vector" @@ -512,6 +521,58 @@ def test_two_same_name_indexes_on_different_columns_with_if_not_exists(cql, test assert rows[0].index_name == 'ann_index' +def test_global_and_local_vector_indexes_on_same_column(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, v vector, PRIMARY KEY ((p1, p2))' + with new_test_table(cql, test_keyspace, schema) as table: + # Create global index first, then local — should not raise. + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'") + +def test_local_and_global_vector_indexes_on_same_column(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, v vector, PRIMARY KEY ((p1, p2))' + with new_test_table(cql, test_keyspace, schema) as table: + # Create local index first, then global — should not raise. + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + +# Even with both local and global indexes, creating a second index of the +# same locality on the same column should still be rejected as duplicate. +def test_duplicate_global_vector_index_rejected(cql, test_keyspace, skip_on_scylla_vnodes): + schema = 'p1 int, p2 int, v vector, PRIMARY KEY ((p1, p2))' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + with pytest.raises(InvalidRequest, match="duplicate"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + +def test_duplicate_local_vector_index_rejected(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, v vector, PRIMARY KEY ((p1, p2))' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'") + with pytest.raises(InvalidRequest, match="duplicate"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'") + +def test_duplicate_local_vector_index_if_not_exists(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, v vector, PRIMARY KEY ((p1, p2))' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'") + cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}((p1, p2), v) USING 'sai'") + # Verify that only one index exists — the IF NOT EXISTS should not create a duplicate. + ks, cf = table.split(".") + indexes = list(cql.execute(f"SELECT index_name FROM system_schema.indexes WHERE keyspace_name = '{ks}' AND table_name = '{cf}'")) + assert len(indexes) == 1 + +# IF NOT EXISTS with a different locality than the existing index should +# create the second index, not silently skip it. +def test_global_then_local_vector_index_if_not_exists(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, v vector, PRIMARY KEY ((p1, p2))' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}((p1, p2), v) USING 'sai'") + # Verify both indexes exist. + indexes = list(cql.execute(f"SELECT index_name FROM system_schema.indexes WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table.split('.')[1]}'")) + assert len(indexes) == 2 + + ############################################################################### # SAI (StorageAttachedIndex) compatibility tests #