From a8c7c9d561fc77fa65593e5f93bc8dcfe5286b4e Mon Sep 17 00:00:00 2001 From: Pawel Pery Date: Fri, 6 Mar 2026 12:32:54 +0100 Subject: [PATCH] vector-store: fix creating local vector search indexes with a part of the partition key Users ought to have possibility to create the local index for Vector Search based only on a part of the partition key. This commits provides this by removing requirements of 'full partition key only' for custom local index. The commit updates docs to explain that local vector index can use only a part of the partition key. The commit implements cqlpy test to check fixed functionality. Fixes: SCYLLADB-953 Needs to be backported to 2026.1 as it is a fix for local vector indexes. Closes scylladb/scylladb#28931 (cherry picked from commit 7883f161bb8ea1c3719a28b52bad4eb6be13a7f8) Closes scylladb/scylladb#29543 --- cql3/statements/create_index_statement.cc | 6 +++++- docs/cql/secondary-indexes.rst | 9 +++++---- test/cqlpy/test_vector_index.py | 8 ++++++++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/cql3/statements/create_index_statement.cc b/cql3/statements/create_index_statement.cc index e2a113dedd..dff65fb251 100644 --- a/cql3/statements/create_index_statement.cc +++ b/cql3/statements/create_index_statement.cc @@ -459,11 +459,15 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons auto base_pk_identifiers = *index_pk | std::views::transform([&schema] (const ::shared_ptr& raw_ident) { return raw_ident->prepare_column_identifier(schema); }); + auto const is_vector_index = _idx_properties->custom_class && is_vector_capable_class(*_idx_properties->custom_class); auto remaining_base_pk_columns = schema.partition_key_columns(); auto next_expected_base_column = remaining_base_pk_columns.begin(); for (const auto& ident : base_pk_identifiers) { auto it = schema.columns_by_name().find(ident->name()); if (it == schema.columns_by_name().end() || !it->second->is_partition_key()) { + if (is_vector_index) { + throw exceptions::invalid_request_exception(format("Local vector index definition must contain partition key's columns only. Redundant column: {}", ident->to_string())); + } throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Redundant column: {}", ident->to_string())); } if (next_expected_base_column == remaining_base_pk_columns.end()) { @@ -474,7 +478,7 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons } ++next_expected_base_column; } - if (next_expected_base_column != remaining_base_pk_columns.end()) { + if (!is_vector_index && next_expected_base_column != remaining_base_pk_columns.end()) { throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Missing column: {}", next_expected_base_column->name_as_text())); } if (_raw_targets.size() == 1) { diff --git a/docs/cql/secondary-indexes.rst b/docs/cql/secondary-indexes.rst index 0303956c7c..ade21fa7c0 100644 --- a/docs/cql/secondary-indexes.rst +++ b/docs/cql/secondary-indexes.rst @@ -148,8 +148,8 @@ the ``CUSTOM`` keyword and specifying the index type as ``vector_index``. It is add additional columns to the index for filtering the search results. The partition column specified in the global vector index definition must be the vector column, and any subsequent columns are treated as filtering columns. The local vector index requires that the partition key -of the base table is also the partition key of the index and the vector column is the first one -from the following columns. +of the index is a subset of the table's partition key columns and the vector column is the first +one from the following columns. Example of a simple index: @@ -187,8 +187,9 @@ Example of a local vector index: The vector column (``embedding``) is indexed for similarity search (a local index) and additional columns are added for filtering the search results. The filtering is possible on ``category``, ``info`` and all primary key columns of -the base table. The columns ``id`` and ``created_at`` must be the partition key -of the base table. +the base table. The partition key of the base table must contains both columns +``id`` and ``created_at``. It is allowed to create a local vector index using +only a part of the partition key columns of the base table. Vector indexes support additional filtering columns of native data types (excluding counter and duration). The indexed column itself must be a vector diff --git a/test/cqlpy/test_vector_index.py b/test/cqlpy/test_vector_index.py index ba0219cf05..88caf71c48 100644 --- a/test/cqlpy/test_vector_index.py +++ b/test/cqlpy/test_vector_index.py @@ -91,6 +91,14 @@ def test_create_vector_search_local_index_with_filtering_columns(cql, test_keysp with new_test_table(cql, test_keyspace, schema) as table: cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v, f1, f2) USING 'vector_index'") +def test_create_vector_search_local_index_with_part_of_partition_key(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, c1 int, c2 int, v1 vector, v2 vector, f1 int, f2 int, primary key ((p1, p2), c1, c2)' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest, match="Local vector index definition must contain partition key's columns only. Redundant column: c1"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, c1), v, f1, f2) USING 'vector_index'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1), v1, f1, f2) USING 'vector_index'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p2), v2, f1, f2) USING 'vector_index'") + def test_create_vector_search_local_index_with_filtering_columns_on_nonvector_column(cql, test_keyspace, scylla_only, skip_without_tablets): schema = 'p1 int, p2 int, c1 int, c2 int, v int, f1 int, f2 int, primary key ((p1, p2), c1, c2)' with new_test_table(cql, test_keyspace, schema) as table: