Files
scylladb/test/cqlpy/test_vector_index.py
Pawel Pery f49c9e896a vector_search: allow full secondary indexes syntax while creating the vector index
Vector Search feature needs to support creating vector indexes with additional
filtering column. There will be two types of indexes: global which indexes
vectors per table, and local which indexes vectors per partition key. The new
syntaxes are based on ScyllaDB's Global Secondary Index and Local Secondary
Index. Vector indexes don't use secondary indexes functionalities in any way -
all indexing, filtering and processing data will be done on Vector Store side.

This patch allows creating vector indexes using this CQL syntax:

```
CREATE TABLE IF NOT EXISTS cycling.comments_vs (
  commenter text,
  comment text,
  comment_vector VECTOR <FLOAT, 5>,
  created_at timestamp,
  discussion_board_id int,
  country text,
  lang text,
  PRIMARY KEY ((commenter, discussion_board_id), created_at)
);

CREATE CUSTOM INDEX IF NOT EXISTS global_ann_index
  ON cycling.comments_vs(comment_vector, country, lang) USING 'vector_index'
  WITH OPTIONS = { 'similarity_function': 'DOT_PRODUCT' };

CREATE CUSTOM INDEX IF NOT EXISTS local_ann_index
  ON cycling.comments_vs((commenter, discussion_board_id), comment_vector, country, lang)
  USING 'vector_index'
  WITH OPTIONS = { 'similarity_function': 'DOT_PRODUCT' };
```

Currently, if we run these queries to create indexes we will receive such errors:

```
InvalidRequest: Error from server: code=2200 [Invalid query] message="Vector index can only be created on a single column"
InvalidRequest: Error from server: code=2200 [Invalid query] message="Local index definition must contain full partition key only. Redundant column: XYZ"
```

This commit refactors `vector_index::check_target` to correctly validate
columns building the index. Vector-store currently support filtering by native
types, so the type of columns is checked. The first column from the list must
be a vector (to build index based on these vectors), so it is also checked.

Allowed types for columns are native types without counter (it is not possible
to create a table with counter and vector) and without duration (it is not
possible to correctly compare durations, this type is even not allowed in
secondary indexes).

This commits adds cqlpy test to check errors while creating indexes.

Fixes: SCYLLADB-298

This needs to be backported to version 2026.1 as this is a fix for filtering support.

Closes scylladb/scylladb#28366
2026-01-30 01:14:31 +02:00

464 lines
25 KiB
Python

# Copyright 2025-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
###############################################################################
# Tests for vector indexes
###############################################################################
import pytest
from .util import new_test_table, is_scylla, unique_name
from cassandra.protocol import InvalidRequest, ConfigurationException
supported_filtering_types = [
'ascii',
'bigint',
'blob',
'boolean',
'date',
'decimal',
'double',
'float',
'inet',
'int',
'smallint',
'text',
'varchar',
'time',
'timestamp',
'timeuuid',
'tinyint',
'uuid',
'varint',
]
unsupported_filtering_types = [
'duration',
'map<int, int>',
'list<int>',
'set<int>',
'tuple<int, int>',
'vector<float, 3>',
'frozen<map<int, int>>',
'frozen<list<int>>',
'frozen<set<int>>',
'frozen<tuple<int, int>>',
]
def test_create_vector_search_index(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
def test_create_vector_search_index_without_custom_keyword(cql, test_keyspace, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
if is_scylla(cql):
custom_class = 'vector_index'
else:
custom_class = 'sai'
cql.execute(f"CREATE INDEX ON {table}(v) USING '{custom_class}'")
def test_create_custom_index_with_invalid_class(cql, test_keyspace):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
invalid_custom_class = "invalid.custom.class"
with pytest.raises((InvalidRequest, ConfigurationException), match=r"Non-supported custom class|Unable to find"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{invalid_custom_class}'")
def test_create_custom_index_without_custom_class(cql, test_keyspace):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises((InvalidRequest, ConfigurationException), match=r"CUSTOM index requires specifying|Unable to find"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v)")
def test_create_vector_search_index_on_nonvector_column(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v int'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest, match="Vector indexes are only supported on columns of vectors of floats"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
def test_create_vector_search_global_index_with_filtering_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, c1 int, c2 int, v vector<float, 3>, f1 int, f2 int, primary key ((p1, p2), c1, c2)'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v, f1, f2) USING 'vector_index'")
def test_create_vector_search_local_index_with_filtering_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, c1 int, c2 int, v vector<float, 3>, f1 int, f2 int, primary key ((p1, p2), c1, c2)'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v, f1, f2) USING 'vector_index'")
def test_create_vector_search_local_index_with_filtering_columns_on_nonvector_column(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p1 int, p2 int, c1 int, c2 int, v int, f1 int, f2 int, primary key ((p1, p2), c1, c2)'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest, match="Vector indexes are only supported on columns of vectors of floats"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p1, p2), v, f1, f2) USING 'vector_index'")
def test_create_vector_search_index_with_supported_and_unsupported_filtering_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
supported_columns = ', '.join([f's{idx} {typ}' for idx, typ in enumerate(supported_filtering_types)])
unsupported_columns = ', '.join([f'u{idx} {typ}' for idx, typ in enumerate(unsupported_filtering_types)])
schema = f'p int, c int, v vector<float, 3>, {supported_columns}, {unsupported_columns}, primary key (p, c)'
with new_test_table(cql, test_keyspace, schema) as table:
for idx in range(len(supported_filtering_types)):
cql.execute(f"CREATE CUSTOM INDEX global_idx ON {table}(v, s{idx}) USING 'vector_index'")
cql.execute(f"DROP INDEX {test_keyspace}.global_idx")
cql.execute(f"CREATE CUSTOM INDEX local_idx ON {table}((p), v, s{idx}) USING 'vector_index'")
cql.execute(f"DROP INDEX {test_keyspace}.local_idx")
for idx in range(len(unsupported_filtering_types)):
with pytest.raises(InvalidRequest, match=f"Unsupported vector index filtering column u{idx} type|Secondary indexes are not supported"):
cql.execute(f"CREATE CUSTOM INDEX global_idx ON {table}(v, u{idx}) USING 'vector_index'")
with pytest.raises(InvalidRequest, match=f"Unsupported vector index filtering column u{idx} type|Secondary indexes are not supported"):
cql.execute(f"CREATE CUSTOM INDEX local_idx ON {table}((p), v, u{idx}) USING 'vector_index'")
def test_create_vector_search_local_index_with_unsupported_partition_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
for filter_type in unsupported_filtering_types:
schema = f'p {filter_type}, c int, v vector<float, 3>, f int, primary key (p, c)'
with pytest.raises(InvalidRequest, match="Unsupported|Invalid"):
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}((p), v, f) USING 'vector_index'")
def test_create_vector_search_index_with_duplicated_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = f'p int, c int, v vector<float, 3>, x int, primary key (p, c)'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest, match=f"Cannot create secondary index on partition key column p"):
cql.execute(f"CREATE CUSTOM INDEX global_idx ON {table}(v, p) USING 'vector_index'")
with pytest.raises(InvalidRequest, match=f"Duplicate column x in index target list"):
cql.execute(f"CREATE CUSTOM INDEX global_idx ON {table}(v, x, x) USING 'vector_index'")
with pytest.raises(InvalidRequest, match=f"Cannot create secondary index on partition key column p"):
cql.execute(f"CREATE CUSTOM INDEX local_idx ON {table}((p), v, p) USING 'vector_index'")
with pytest.raises(InvalidRequest, match=f"Duplicate column x in index target list"):
cql.execute(f"CREATE CUSTOM INDEX local_idx ON {table}((p), v, x, x) USING 'vector_index'")
def test_create_vector_search_index_with_bad_options(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest, match="Unsupported option"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'bad_option': 'bad_value'}}")
def test_create_vector_search_index_with_bad_numeric_value(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
for val in ['-1', '513']:
with pytest.raises(InvalidRequest, match="out of valid range"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'maximum_node_connections': '{val}' }}")
for val in ['dog', '123dog']:
with pytest.raises(InvalidRequest, match="not an integer"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'maximum_node_connections': '{val}' }}")
with pytest.raises(InvalidRequest, match="out of valid range"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'construction_beam_width': '5000' }}")
def test_create_vector_search_index_with_bad_similarity_value(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest, match="Invalid value in option 'similarity_function'"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'similarity_function': 'bad_similarity_function'}}")
def test_create_vector_search_index_on_nonfloat_vector_column(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<int, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest, match="Vector indexes are only supported on columns of vectors of floats"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
def test_no_view_for_vector_search_index(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX abc ON {table}(v) USING 'vector_index'")
result = cql.execute(f"SELECT * FROM system_schema.views WHERE keyspace_name = '{test_keyspace}' AND view_name = 'abc_index'")
assert len(result.current_rows) == 0, "Vector search index should not create a view in system_schema.views"
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE INDEX def ON {table}(v)")
result = cql.execute(f"SELECT * FROM system_schema.views WHERE keyspace_name = '{test_keyspace}' AND view_name = 'def_index'")
assert len(result.current_rows) == 1, "Regular index should create a view in system_schema.views"
def test_describe_custom_index(cql, test_keyspace, skip_without_tablets):
schema = 'p int primary key, v1 vector<float, 3>, v2 vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
# Cassandra inserts a space between the table name and parentheses,
# Scylla doesn't. This difference doesn't matter because both are
# valid CQL commands
# Scylla doesn't support sai custom class.
if is_scylla(cql):
maybe_space = ''
custom_class = 'vector_index'
else:
maybe_space = ' '
custom_class = 'sai'
create_idx_a = f"CREATE INDEX custom ON {table}(v1) USING '{custom_class}'"
create_idx_b = f"CREATE CUSTOM INDEX custom1 ON {table}(v2) USING '{custom_class}'"
cql.execute(create_idx_a)
cql.execute(create_idx_b)
a_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom").one().create_statement
b_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom1").one().create_statement
assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{custom_class}'" in a_desc
assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{custom_class}'" in b_desc
def test_vector_index_version_on_recreate(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
_, table_name = table.split('.')
base_table_version_query = f"SELECT version FROM system_schema.scylla_tables WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'"
index_version_query = f"SELECT * FROM system_schema.indexes WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}' AND index_name = 'abc'"
# Fetch the base table version.
version = str(cql.execute(base_table_version_query).one().version)
# Create the vector index.
cql.execute(f"CREATE CUSTOM INDEX abc ON {table}(v) USING 'vector_index'")
# Fetch the index version.
# It should be the same as the base table version before the index was created.
result = cql.execute(index_version_query)
assert len(result.current_rows) == 1
assert result.current_rows[0].options['index_version'] == version
# Drop and create new index with the same parameters.
cql.execute(f"DROP INDEX {test_keyspace}.abc")
cql.execute(f"CREATE CUSTOM INDEX abc ON {table}(v) USING 'vector_index'")
# Check if the index version changed.
result = cql.execute(index_version_query)
assert len(result.current_rows) == 1
assert result.current_rows[0].options['index_version'] != version
def test_vector_index_version_unaffected_by_alter(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
_, table_name = table.split('.')
base_table_version_query = f"SELECT version FROM system_schema.scylla_tables WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}'"
index_version_query = f"SELECT * FROM system_schema.indexes WHERE keyspace_name = '{test_keyspace}' AND table_name = '{table_name}' AND index_name = 'abc'"
# Fetch the base table version.
version = str(cql.execute(base_table_version_query).one().version)
# Create the vector index.
cql.execute(f"CREATE CUSTOM INDEX abc ON {table}(v) USING 'vector_index'")
# Fetch the index version.
# It should be the same as the base table version before the index was created.
result = cql.execute(index_version_query)
assert len(result.current_rows) == 1
assert result.current_rows[0].options['index_version'] == version
# ALTER the base table.
cql.execute(f"ALTER TABLE {table} ADD v2 vector<float, 3>")
# Check if the index version is still the same.
result = cql.execute(index_version_query)
assert len(result.current_rows) == 1
assert result.current_rows[0].options['index_version'] == version
def test_vector_index_version_fail_given_as_option(cql, test_keyspace, scylla_only):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
# Fail to create vector index with version option given by the user.
with pytest.raises(InvalidRequest, match="Cannot specify index_version as a CUSTOM option"):
cql.execute(f"CREATE CUSTOM INDEX abc ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'index_version': '18ad2003-05ea-17d9-1855-0325ac0a755d'}}")
def test_one_vector_index_on_column(cql, test_keyspace, skip_without_tablets):
schema = "p int primary key, v vector<float, 3>"
if is_scylla(cql):
custom_class = 'vector_index'
else:
custom_class = 'sai'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{custom_class}'")
with pytest.raises(InvalidRequest, match=r"There exists a duplicate custom index|Cannot create more than one storage-attached index on the same column"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{custom_class}'")
cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}(v) USING '{custom_class}'")
# Reproduces issue #26672
def test_two_same_name_indexes_on_different_tables_with_if_not_exists(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = "p int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
schema = "p int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table2:
cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ann_index ON {table}(v) USING 'vector_index'")
cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ann_index ON {table2}(v) USING 'vector_index'")
###############################################################################
# Tests for CDC with vector indexes
#
# The following tests verify that the constraints between Vector Search
# and CDC settings are properly enforced.
#
# If a vector index is created, CDC may only be enabled with options meeting
# the Vector Search requirements:
# - CDC's TTL must be at least 24 hours (86400 seconds) OR set to 0 (infinite).
# - CDC's delta mode must be set to 'full' or CDC's postimage must be enabled.
#
# We test that:
# * Enabling CDC with default or valid options succeeds.
# * Enabling CDC with invalid TTL (< 24h) or invalid delta and postimage
# disabled is rejected.
# * Preimage options can be freely toggled.
# * Disabling CDC is not allowed when a vector index already exists.
#
# We also verify that creating a vector index is forbidden
# if CDC is enabled but uses invalid options or CDC is explicitly disabled
# (set to false by the user), and allowed only when CDC is either undeclared
# or configured to satisfy the minimal Vector Search requirements.
###############################################################################
VS_TTL_SECONDS = 86400 # 24 hours
def alter_cdc(cql, table, options):
try:
cql.execute(f"ALTER TABLE {table} WITH cdc = {options}")
except InvalidRequest as e:
with pytest.raises(InvalidRequest, match="CDC log must meet the minimal requirements of Vector Search"):
raise e
return False
return True
def create_index(cql, test_keyspace, table, column):
idx_name = f"{column}_idx_{unique_name()}"
query = f"CREATE INDEX {idx_name} ON {table} ({column}) USING 'vector_index'"
try:
cql.execute(query)
except InvalidRequest as e:
with pytest.raises(InvalidRequest, match="CDC log must meet the minimal requirements of Vector Search"):
raise e
return False
cql.execute(f"DROP INDEX {test_keyspace}.{idx_name}")
return True
def test_try_create_cdc_with_vector_search_enabled(scylla_only, cql, test_keyspace, skip_without_tablets):
schema = "pk int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
# The vector index requires CDC to be enabled with specific options:
# - TTL must be at least 24 hours (86400 seconds)
# - delta mode must be set to 'full' or postimage must be enabled.
# Enable Vector Search by creating a vector index.
cql.execute(f"CREATE INDEX v_idx ON {table} (v) USING 'vector_index'")
# Allow creating CDC log table with default options.
assert alter_cdc(cql, table, {'enabled': True})
# Disallow changing CDC's TTL to less than 24 hours.
assert not alter_cdc(cql, table, {'enabled': True, 'ttl': 1})
assert alter_cdc(cql, table, {'enabled': True, 'ttl': 86400})
# Allow changing CDC's TTL to 0 (infinite).
assert alter_cdc(cql, table, {'enabled': True, 'ttl': 0})
# Disallow changing CDC's delta to 'keys' if postimage is disabled.
assert not alter_cdc(cql, table, {'enabled': True, 'delta': 'keys'})
assert alter_cdc(cql, table, {'enabled': True, 'delta': 'full'})
# Allow creating CDC with postimage enabled instead of delta set to 'full'.
assert alter_cdc(cql, table, {'enabled': True, 'postimage': True, 'delta': 'keys'})
# Allow changing CDC's preimage and enabling postimage freely.
assert alter_cdc(cql, table, {'enabled': True, 'preimage': True})
assert alter_cdc(cql, table, {'enabled': True, 'preimage': False})
assert alter_cdc(cql, table, {'enabled': True, 'postimage': True})
assert alter_cdc(cql, table, {'enabled': True, 'preimage': True, 'postimage': True})
# Disallow changing CDC's postimage to false when delta is 'keys'.
assert not alter_cdc(cql, table, {'enabled': True, 'delta': 'keys', 'postimage': False})
def test_try_disable_cdc_with_vector_search_enabled(scylla_only, cql, test_keyspace, skip_without_tablets):
schema = "pk int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
# Enable Vector Search by creating a vector index.
cql.execute(f"CREATE INDEX v_idx ON {table} (v) USING 'vector_index'")
# Disallow disabling CDC when Vector Search is enabled.
with pytest.raises(InvalidRequest, match="Cannot disable CDC when Vector Search is enabled on the table"):
cql.execute(f"ALTER TABLE {table} WITH cdc = {{'enabled': False}}")
def test_try_enable_vector_search_with_cdc_enabled(scylla_only, cql, test_keyspace, skip_without_tablets):
schema = "pk int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
# The vector index requires CDC to be enabled with specific options:
# - TTL must be at least 24 hours (86400 seconds)
# - delta mode must be set to 'full' or postimage must be enabled.
# Disallow creating the vector index when CDC's TTL is less than 24h.
assert alter_cdc(cql, table, {'enabled': True, 'ttl': 1})
assert not create_index(cql, test_keyspace, table, "v")
# Allow creating the vector index when CDC's TTL is 0 (infinite).
assert alter_cdc(cql, table, {'enabled': True, 'ttl': 0})
assert create_index(cql, test_keyspace, table, "v")
# Disallow creating the vector index when CDC's delta is set to 'keys'.
assert alter_cdc(cql, table, {'enabled': True, 'delta': 'keys'})
assert not create_index(cql, test_keyspace, table, "v")
# Allow creating the vector index when CDC's postimage is enabled instead of delta set to 'full'.
assert alter_cdc(cql, table, {'enabled': True, 'delta': 'keys', 'postimage': True})
assert create_index(cql, test_keyspace, table, "v")
# Allow creating the vector index when CDC's options fulfill the minimal requirements of Vector Search.
assert alter_cdc(cql, table, {'enabled': True, 'ttl': 172800, 'delta': 'full', 'preimage': True, 'postimage': True})
assert create_index(cql, test_keyspace, table, "v")
def test_try_enable_vector_search_with_cdc_disabled(scylla_only, cql, test_keyspace, skip_without_tablets):
schema = "pk int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
# Disallow creating the vector index when CDC is explicitly disabled.
assert alter_cdc(cql, table, {'enabled': False, 'ttl': 172800, 'postimage' : True})
with pytest.raises(InvalidRequest, match="Cannot create the vector index when CDC is explicitly disabled."):
cql.execute(f"CREATE INDEX v_idx ON {table} (v) USING 'vector_index'")
# Allow creating the vector index when CDC is enabled again.
assert alter_cdc(cql, table, {'enabled': True})
assert create_index(cql, test_keyspace, table, "v")
# This test reproduces VECTOR-179.
# It performs a vector search with tracing enabled. An exception is expected
# because the vector store node is not configured. However, due to the bug,
# Scylla crashes instead of returning an error.
def test_vector_search_when_tracing_is_enabled(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = "p int primary key, v vector<float, 3>"
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
with pytest.raises(InvalidRequest, match="Vector Store is disabled"):
cql.execute(
f"SELECT * FROM {table} ORDER BY v ANN OF [0.2,0.3,0.4] LIMIT 1",
trace=True,
)
@pytest.mark.xfail(reason="""We do not support primary key filtering yet, see VECTOR-374,
additionally this test will fail when that issue is fixed because pytest does not run the vector search backend.
It does pass on Cassandra however, so we keep it xfailed for future reference.""")
def test_ann_query_with_restriction_works_only_on_pk(cql, test_keyspace):
schema = 'p int primary key, q int, v vector<float, 3>'
custom_index = 'vector_index' if is_scylla(cql) else 'sai'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{custom_index}'")
cql.execute(f"INSERT INTO {table} (p, q, v) VALUES (1, 1, [1.0, 1.0, 1.0])")
cql.execute(f"INSERT INTO {table} (p, q, v) VALUES (2, 2, [1.0, 1.0, 1.0])")
cql.execute(f"INSERT INTO {table} (p, q, v) VALUES (3, 3, [1.0, 1.0, 1.0])")
result = cql.execute(f"SELECT * FROM {table} WHERE p = 1 ORDER BY v ANN OF [1.0, 1.0, 1.0] LIMIT 3")
assert len(result.current_rows) == 1
assert result.current_rows[0].p == 1
with pytest.raises(InvalidRequest, match="ANN ordering by vector requires all restricted column(s) to be indexed"):
cql.execute(f"SELECT * FROM {table} WHERE q = 1 ORDER BY v ANN OF [1.0, 1.0, 1.0] LIMIT 3")