diff --git a/cql3/statements/create_index_statement.cc b/cql3/statements/create_index_statement.cc index e2a113dedd..dff65fb251 100644 --- a/cql3/statements/create_index_statement.cc +++ b/cql3/statements/create_index_statement.cc @@ -459,11 +459,15 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons auto base_pk_identifiers = *index_pk | std::views::transform([&schema] (const ::shared_ptr& raw_ident) { return raw_ident->prepare_column_identifier(schema); }); + auto const is_vector_index = _idx_properties->custom_class && is_vector_capable_class(*_idx_properties->custom_class); auto remaining_base_pk_columns = schema.partition_key_columns(); auto next_expected_base_column = remaining_base_pk_columns.begin(); for (const auto& ident : base_pk_identifiers) { auto it = schema.columns_by_name().find(ident->name()); if (it == schema.columns_by_name().end() || !it->second->is_partition_key()) { + if (is_vector_index) { + throw exceptions::invalid_request_exception(format("Local vector index definition must contain partition key's columns only. Redundant column: {}", ident->to_string())); + } throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Redundant column: {}", ident->to_string())); } if (next_expected_base_column == remaining_base_pk_columns.end()) { @@ -474,7 +478,7 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons } ++next_expected_base_column; } - if (next_expected_base_column != remaining_base_pk_columns.end()) { + if (!is_vector_index && next_expected_base_column != remaining_base_pk_columns.end()) { throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Missing column: {}", next_expected_base_column->name_as_text())); } if (_raw_targets.size() == 1) { diff --git a/docs/cql/secondary-indexes.rst b/docs/cql/secondary-indexes.rst index 0303956c7c..ade21fa7c0 100644 --- a/docs/cql/secondary-indexes.rst +++ b/docs/cql/secondary-indexes.rst @@ -148,8 +148,8 @@ the ``CUSTOM`` keyword and specifying the index type as ``vector_index``. It is add additional columns to the index for filtering the search results. The partition column specified in the global vector index definition must be the vector column, and any subsequent columns are treated as filtering columns. The local vector index requires that the partition key -of the base table is also the partition key of the index and the vector column is the first one -from the following columns. +of the index is a subset of the table's partition key columns and the vector column is the first +one from the following columns. Example of a simple index: @@ -187,8 +187,9 @@ Example of a local vector index: The vector column (``embedding``) is indexed for similarity search (a local index) and additional columns are added for filtering the search results. The filtering is possible on ``category``, ``info`` and all primary key columns of -the base table. The columns ``id`` and ``created_at`` must be the partition key -of the base table. +the base table. The partition key of the base table must contains both columns +``id`` and ``created_at``. It is allowed to create a local vector index using +only a part of the partition key columns of the base table. Vector indexes support additional filtering columns of native data types (excluding counter and duration). The indexed column itself must be a vector diff --git a/test/cqlpy/test_vector_index.py b/test/cqlpy/test_vector_index.py index ba0219cf05..88caf71c48 100644 --- a/test/cqlpy/test_vector_index.py +++ b/test/cqlpy/test_vector_index.py @@ -91,6 +91,14 @@ def test_create_vector_search_local_index_with_filtering_columns(cql, test_keysp with new_test_table(cql, test_keyspace, schema) as table: cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v, f1, f2) USING 'vector_index'") +def test_create_vector_search_local_index_with_part_of_partition_key(cql, test_keyspace, scylla_only, skip_without_tablets): + schema = 'p1 int, p2 int, c1 int, c2 int, v1 vector, v2 vector, f1 int, f2 int, primary key ((p1, p2), c1, c2)' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest, match="Local vector index definition must contain partition key's columns only. Redundant column: c1"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, c1), v, f1, f2) USING 'vector_index'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1), v1, f1, f2) USING 'vector_index'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p2), v2, f1, f2) USING 'vector_index'") + def test_create_vector_search_local_index_with_filtering_columns_on_nonvector_column(cql, test_keyspace, scylla_only, skip_without_tablets): schema = 'p1 int, p2 int, c1 int, c2 int, v int, f1 int, f2 int, primary key ((p1, p2), c1, c2)' with new_test_table(cql, test_keyspace, schema) as table: