mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-26 03:20:37 +00:00
vector-store: fix creating local vector search indexes with a part of the partition key
Users ought to have possibility to create the local index for Vector Search
based only on a part of the partition key. This commits provides this by
removing requirements of 'full partition key only' for custom local index.
The commit updates docs to explain that local vector index can use only a part
of the partition key.
The commit implements cqlpy test to check fixed functionality.
Fixes: SCYLLADB-953
Needs to be backported to 2026.1 as it is a fix for local vector indexes.
Closes scylladb/scylladb#28931
(cherry picked from commit 7883f161bb)
Closes scylladb/scylladb#29543
This commit is contained in:
@@ -459,11 +459,15 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons
|
||||
auto base_pk_identifiers = *index_pk | std::views::transform([&schema] (const ::shared_ptr<column_identifier::raw>& raw_ident) {
|
||||
return raw_ident->prepare_column_identifier(schema);
|
||||
});
|
||||
auto const is_vector_index = _idx_properties->custom_class && is_vector_capable_class(*_idx_properties->custom_class);
|
||||
auto remaining_base_pk_columns = schema.partition_key_columns();
|
||||
auto next_expected_base_column = remaining_base_pk_columns.begin();
|
||||
for (const auto& ident : base_pk_identifiers) {
|
||||
auto it = schema.columns_by_name().find(ident->name());
|
||||
if (it == schema.columns_by_name().end() || !it->second->is_partition_key()) {
|
||||
if (is_vector_index) {
|
||||
throw exceptions::invalid_request_exception(format("Local vector index definition must contain partition key's columns only. Redundant column: {}", ident->to_string()));
|
||||
}
|
||||
throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Redundant column: {}", ident->to_string()));
|
||||
}
|
||||
if (next_expected_base_column == remaining_base_pk_columns.end()) {
|
||||
@@ -474,7 +478,7 @@ void create_index_statement::validate_for_local_index(const schema& schema) cons
|
||||
}
|
||||
++next_expected_base_column;
|
||||
}
|
||||
if (next_expected_base_column != remaining_base_pk_columns.end()) {
|
||||
if (!is_vector_index && next_expected_base_column != remaining_base_pk_columns.end()) {
|
||||
throw exceptions::invalid_request_exception(format("Local index definition must contain full partition key only. Missing column: {}", next_expected_base_column->name_as_text()));
|
||||
}
|
||||
if (_raw_targets.size() == 1) {
|
||||
|
||||
@@ -148,8 +148,8 @@ the ``CUSTOM`` keyword and specifying the index type as ``vector_index``. It is
|
||||
add additional columns to the index for filtering the search results. The partition column
|
||||
specified in the global vector index definition must be the vector column, and any subsequent
|
||||
columns are treated as filtering columns. The local vector index requires that the partition key
|
||||
of the base table is also the partition key of the index and the vector column is the first one
|
||||
from the following columns.
|
||||
of the index is a subset of the table's partition key columns and the vector column is the first
|
||||
one from the following columns.
|
||||
|
||||
Example of a simple index:
|
||||
|
||||
@@ -187,8 +187,9 @@ Example of a local vector index:
|
||||
The vector column (``embedding``) is indexed for similarity search (a local
|
||||
index) and additional columns are added for filtering the search results. The
|
||||
filtering is possible on ``category``, ``info`` and all primary key columns of
|
||||
the base table. The columns ``id`` and ``created_at`` must be the partition key
|
||||
of the base table.
|
||||
the base table. The partition key of the base table must contains both columns
|
||||
``id`` and ``created_at``. It is allowed to create a local vector index using
|
||||
only a part of the partition key columns of the base table.
|
||||
|
||||
Vector indexes support additional filtering columns of native data types
|
||||
(excluding counter and duration). The indexed column itself must be a vector
|
||||
|
||||
@@ -91,6 +91,14 @@ def test_create_vector_search_local_index_with_filtering_columns(cql, test_keysp
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v, f1, f2) USING 'vector_index'")
|
||||
|
||||
def test_create_vector_search_local_index_with_part_of_partition_key(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p1 int, p2 int, c1 int, c2 int, v1 vector<float, 3>, v2 vector<float, 3>, f1 int, f2 int, primary key ((p1, p2), c1, c2)'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
with pytest.raises(InvalidRequest, match="Local vector index definition must contain partition key's columns only. Redundant column: c1"):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, c1), v, f1, f2) USING 'vector_index'")
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1), v1, f1, f2) USING 'vector_index'")
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p2), v2, f1, f2) USING 'vector_index'")
|
||||
|
||||
def test_create_vector_search_local_index_with_filtering_columns_on_nonvector_column(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p1 int, p2 int, c1 int, c2 int, v int, f1 int, f2 int, primary key ((p1, p2), c1, c2)'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
|
||||
Reference in New Issue
Block a user