Merge 'index: fix local vector index locality detection after schema reload' from Michał Hudobski

After schema reload, `target_parser::is_local()` did not recognize the
vector-index local target format `{"pk": [...], "tc": "..."}`, causing
local vector indexes to be treated as global. This broke duplicate
detection when both a global and a local vector index existed on the same
column. Fix by introducing `vector_index::is_local()` and dispatching
to it from `create_index_from_index_row()` based on the index class.
Also adds tests for local/global vector index coexistence.

Fixes: SCYLLADB-987

backport reasoning: we added local vector index support in 2026.1

Closes scylladb/scylladb#29492

* github.com:scylladb/scylladb:
  test/cqlpy: add tests for global and local vector index coexistence
  index: fix local vector index locality detection after schema reload
This commit is contained in:
Nadav Har'El
2026-05-27 15:34:57 +03:00
4 changed files with 78 additions and 8 deletions

View File

@@ -71,6 +71,7 @@
#include "data_dictionary/user_types_metadata.hh"
#include "index/target_parser.hh"
#include "index/vector_index.hh"
#include "lang/lua.hh"
#include "lang/manager.hh"
@@ -2470,7 +2471,11 @@ static index_metadata create_index_from_index_row(const query::result_set_row& r
}
index_metadata_kind kind = deserialize_index_kind(row.get_nonnull<sstring>("kind"));
sstring target_string = options.at(cql3::statements::index_target::target_option_name);
const index_metadata::is_local_index is_local(secondary_index::target_parser::is_local(target_string));
auto class_it = options.find("class_name");
bool is_vector = class_it != options.end() && class_it->second == "vector_index";
const index_metadata::is_local_index is_local(
is_vector ? secondary_index::vector_index::is_local(target_string)
: secondary_index::target_parser::is_local(target_string));
return index_metadata{index_name, options, kind, is_local};
}

View File

@@ -338,13 +338,17 @@ void vector_index::validate(const schema &schema, const cql3::statements::index_
check_index_options(properties);
}
bool vector_index::has_vector_index_on_column(const schema& s, const sstring& target_name) {
for (const auto& index : s.indices()) {
if (is_vector_index_on_column(index, target_name)) {
return true;
}
bool vector_index::is_local(const sstring& target_string) {
std::optional<rjson::value> json_value = rjson::try_parse(target_string);
if (!json_value || !json_value->IsObject()) {
return false;
}
return false;
rjson::value* pk = rjson::find(*json_value, PK_TARGET_KEY);
if (!pk || !pk->IsArray() || pk->Empty()) {
return false;
}
rjson::value* tc = rjson::find(*json_value, TC_TARGET_KEY);
return tc && tc->IsString();
}
bool vector_index::is_vector_index_on_column(const index_metadata& im, const sstring& target_name) {

View File

@@ -31,7 +31,7 @@ public:
const std::vector<::shared_ptr<cql3::statements::index_target>> &targets, const gms::feature_service& fs,
const data_dictionary::database& db) const override;
static bool has_index(const schema& s) { return has_index_impl<vector_index>(s); }
static bool has_vector_index_on_column(const schema& s, const sstring& target_name);
static bool is_local(const sstring& target_string);
static bool is_vector_index_on_column(const index_metadata& im, const sstring& target_name);
static void check_cdc_options(const schema& s) {
check_cdc_options_impl<vector_index>(s);

View File

@@ -496,6 +496,15 @@ def test_two_same_name_indexes_on_different_tables_with_if_not_exists(cql, test_
assert rows1[0].index_name == 'ann_index'
assert len(rows2) == 0
def test_two_same_name_indexes_on_different_tables(cql, test_keyspace, skip_on_scylla_vnodes):
schema = "p int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
schema = "p int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table2:
cql.execute(f"CREATE CUSTOM INDEX ann_index ON {table}(v) USING 'sai'")
with pytest.raises(InvalidRequest, match="already exists"):
cql.execute(f"CREATE CUSTOM INDEX ann_index ON {table2}(v) USING 'sai'")
def test_two_same_name_indexes_on_different_columns_with_if_not_exists(cql, test_keyspace, skip_on_scylla_vnodes):
schema = "p int primary key, v vector<float, 3>, v2 vector<float, 3>"
@@ -512,6 +521,58 @@ def test_two_same_name_indexes_on_different_columns_with_if_not_exists(cql, test
assert rows[0].index_name == 'ann_index'
def test_global_and_local_vector_indexes_on_same_column(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, v vector<float, 3>, PRIMARY KEY ((p1, p2))'
with new_test_table(cql, test_keyspace, schema) as table:
# Create global index first, then local — should not raise.
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'")
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'")
def test_local_and_global_vector_indexes_on_same_column(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, v vector<float, 3>, PRIMARY KEY ((p1, p2))'
with new_test_table(cql, test_keyspace, schema) as table:
# Create local index first, then global — should not raise.
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'")
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'")
# Even with both local and global indexes, creating a second index of the
# same locality on the same column should still be rejected as duplicate.
def test_duplicate_global_vector_index_rejected(cql, test_keyspace, skip_on_scylla_vnodes):
schema = 'p1 int, p2 int, v vector<float, 3>, PRIMARY KEY ((p1, p2))'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'")
with pytest.raises(InvalidRequest, match="duplicate"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'")
def test_duplicate_local_vector_index_rejected(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, v vector<float, 3>, PRIMARY KEY ((p1, p2))'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'")
with pytest.raises(InvalidRequest, match="duplicate"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'")
def test_duplicate_local_vector_index_if_not_exists(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, v vector<float, 3>, PRIMARY KEY ((p1, p2))'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v) USING 'sai'")
cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}((p1, p2), v) USING 'sai'")
# Verify that only one index exists — the IF NOT EXISTS should not create a duplicate.
ks, cf = table.split(".")
indexes = list(cql.execute(f"SELECT index_name FROM system_schema.indexes WHERE keyspace_name = '{ks}' AND table_name = '{cf}'"))
assert len(indexes) == 1
# IF NOT EXISTS with a different locality than the existing index should
# create the second index, not silently skip it.
def test_global_then_local_vector_index_if_not_exists(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, v vector<float, 3>, PRIMARY KEY ((p1, p2))'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'")
cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}((p1, p2), v) USING 'sai'")
# Verify both indexes exist.
indexes = list(cql.execute(f"SELECT index_name FROM system_schema.indexes WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table.split('.')[1]}'"))
assert len(indexes) == 2
###############################################################################
# SAI (StorageAttachedIndex) compatibility tests
#