diff --git a/cql3/statements/create_index_statement.cc b/cql3/statements/create_index_statement.cc index e2a113dedd..53254429c2 100644 --- a/cql3/statements/create_index_statement.cc +++ b/cql3/statements/create_index_statement.cc @@ -36,10 +36,12 @@ #include "db/schema_tables.hh" #include "index/secondary_index_manager.hh" #include "types/concrete_types.hh" +#include "types/vector.hh" #include "db/tags/extension.hh" #include "tombstone_gc_extension.hh" #include "index/secondary_index.hh" +#include #include namespace cql3 { @@ -118,8 +120,51 @@ static data_type type_for_computed_column(cql3::statements::index_target::target } } +// Cassandra SAI compatibility: detect the StorageAttachedIndex class name +// used by Cassandra to create vector and metadata indexes. +static bool is_sai_class_name(const sstring& class_name) { + return class_name == "org.apache.cassandra.index.sai.StorageAttachedIndex" + || boost::iequals(class_name, "storageattachedindex") + || boost::iequals(class_name, "sai"); +} + +// Returns true if the custom class name refers to a vector-capable index +// (either ScyllaDB's native vector_index or Cassandra's SAI). static bool is_vector_capable_class(const sstring& class_name) { - return boost::iequals(class_name, "vector_index"); + return class_name == "vector_index" || is_sai_class_name(class_name); +} + +// When the custom class is SAI, verify that at least one target is a +// vector column and rewrite the class to ScyllaDB's native "vector_index". +// Non-vector single-column targets and multi-column (local-index partition +// key) targets are skipped — they are treated as filtering columns by +// vector_index::check_target(). +static void maybe_rewrite_sai_to_vector_index( + const schema& schema, + const std::vector<::shared_ptr>& targets, + index_specific_prop_defs& props) { + if (!props.custom_class || !is_sai_class_name(*props.custom_class)) { + return; + } + for (const auto& target : targets) { + auto* ident = std::get_if<::shared_ptr>(&target->value); + if (!ident) { + // Multi-column target (local-index partition key) — skip. + continue; + } + auto cd = schema.get_column_definition((*ident)->name()); + if (!cd) { + // Nonexistent column — skip; vector_index::validate() will catch it. + continue; + } + if (dynamic_cast(cd->type.get())) { + props.custom_class = "vector_index"; + return; + } + } + throw exceptions::invalid_request_exception( + "StorageAttachedIndex (SAI) is only supported on vector columns; " + "use a secondary index for non-vector columns"); } static bool is_vector_index(const index_options_map& options) { @@ -276,7 +321,7 @@ create_index_statement::validate(query_processor& qp, const service::client_stat _idx_properties->validate(); - // FIXME: This is ugly and can be improved. + const bool is_vector_index = _idx_properties->custom_class && is_vector_capable_class(*_idx_properties->custom_class); const bool uses_view_properties = _view_properties.properties()->count() > 0 || _view_properties.use_compact_storage() @@ -363,6 +408,8 @@ create_index_statement::validate_while_executing(data_dictionary::database db, l targets.emplace_back(raw_target->prepare(*schema)); } + maybe_rewrite_sai_to_vector_index(*schema, targets, *_idx_properties); + if (_idx_properties && _idx_properties->custom_class) { auto custom_index_factory = secondary_index::secondary_index_manager::get_custom_class_factory(*_idx_properties->custom_class); if (!custom_index_factory) { diff --git a/docs/cql/secondary-indexes.rst b/docs/cql/secondary-indexes.rst index c662dc5628..e5093b9479 100644 --- a/docs/cql/secondary-indexes.rst +++ b/docs/cql/secondary-indexes.rst @@ -263,6 +263,44 @@ The following options are supported for vector indexes. All of them are optional +------------------------------+----------------------------------------------------------------------------------------------------------+---------------+ +.. _cassandra-sai-compatibility: + +Cassandra SAI Compatibility for Vector Search +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +ScyllaDB accepts the Cassandra ``StorageAttachedIndex`` (SAI) class name in ``CREATE CUSTOM INDEX`` +statements **for vector columns**. Cassandra libraries such as +`CassIO `_ and `LangChain `_ use SAI to create +vector indexes; ScyllaDB recognizes these statements for compatibility. + +When ScyllaDB encounters an SAI class name on a **vector column**, the index is automatically +created as a native ``vector_index``. The following class names are recognized: + +* ``org.apache.cassandra.index.sai.StorageAttachedIndex`` (exact case required) +* ``StorageAttachedIndex`` (case-insensitive) +* ``SAI`` (case-insensitive) + +Example:: + + -- Cassandra SAI statement accepted by ScyllaDB: + CREATE CUSTOM INDEX ON my_table (embedding) + USING 'org.apache.cassandra.index.sai.StorageAttachedIndex' + WITH OPTIONS = {'similarity_function': 'COSINE'}; + + -- Equivalent to: + CREATE CUSTOM INDEX ON my_table (embedding) + USING 'vector_index' + WITH OPTIONS = {'similarity_function': 'COSINE'}; + +The ``similarity_function`` option is supported by both Cassandra SAI and ScyllaDB. + +.. note:: + + SAI class names are only supported on **vector columns**. Using an SAI class name on a + non-vector column (e.g., ``text`` or ``int``) will result in an error. General SAI + indexing of non-vector columns is not supported by ScyllaDB; use a + :doc:`secondary index ` instead. + .. _drop-index-statement: DROP INDEX diff --git a/docs/using-scylla/cassandra-compatibility.rst b/docs/using-scylla/cassandra-compatibility.rst index c6d1afa066..a6626f6df6 100644 --- a/docs/using-scylla/cassandra-compatibility.rst +++ b/docs/using-scylla/cassandra-compatibility.rst @@ -227,13 +227,19 @@ Security Indexing and Caching ^^^^^^^^^^^^^^^^^^^^^ -+--------------------------------------------------------------+--------------------------------------------------------------------------------------+ -| Options | Support | -+==============================================================+======================================================================================+ -|:doc:`Secondary Index ` | |v| | -+--------------------------------------------------------------+--------------------------------------------------------------------------------------+ -|:doc:`Materialized Views ` | |v| | -+--------------------------------------------------------------+--------------------------------------------------------------------------------------+ ++----------------------------------------------------------------+--------------------------------------------------------------------------------------+ +| Options | Support | ++================================================================+======================================================================================+ +|:doc:`Secondary Index ` | |v| | ++----------------------------------------------------------------+--------------------------------------------------------------------------------------+ +|StorageAttachedIndex (SAI) | |x| | ++----------------------------------------------------------------+--------------------------------------------------------------------------------------+ +|:ref:`SAI for vector search ` | |v| :sup:`*` | ++----------------------------------------------------------------+--------------------------------------------------------------------------------------+ +|:doc:`Materialized Views ` | |v| | ++----------------------------------------------------------------+--------------------------------------------------------------------------------------+ + +:sup:`*` SAI class name on vector columns is rewritten to native ``vector_index`` Additional Features diff --git a/index/secondary_index_manager.cc b/index/secondary_index_manager.cc index 1b02b94790..fbf78ec77a 100644 --- a/index/secondary_index_manager.cc +++ b/index/secondary_index_manager.cc @@ -202,6 +202,10 @@ std::optional secondary_index_manager::custom_index_class(const schema& // This function returns a factory, as the custom index class should be lightweight, preferably not holding any state. // We prefer this over a static custom index class instance, as it allows us to avoid any issues with thread safety. +// +// Note: SAI class names (StorageAttachedIndex, sai) are not listed here +// because maybe_rewrite_sai_to_vector_index() in create_index_statement.cc +// rewrites them to "vector_index" before the index metadata is persisted. std::optional()>> secondary_index_manager::get_custom_class_factory(const sstring& class_name) { sstring lower_class_name = class_name; std::transform(lower_class_name.begin(), lower_class_name.end(), lower_class_name.begin(), ::tolower); diff --git a/test/cqlpy/conftest.py b/test/cqlpy/conftest.py index 829bcaeccc..645db82304 100644 --- a/test/cqlpy/conftest.py +++ b/test/cqlpy/conftest.py @@ -252,6 +252,14 @@ def skip_without_tablets(scylla_only, has_tablets): if not has_tablets: pytest.skip("Test needs tablets experimental feature on") + +# Like skip_without_tablets but does not require scylla_only, so Cassandra +# tests using this fixture will run (Cassandra has no tablet prerequisite). +@pytest.fixture(scope="function") +def skip_on_scylla_vnodes(cql, has_tablets): + if is_scylla(cql) and not has_tablets: + pytest.skip("Test needs tablets experimental feature on") + # Recent versions of Scylla deprecated the "WITH COMPACT STORAGE" feature, # but it can be enabled temporarily for a test. So to keep our old compact # storage tests alive for a while longer (at least until this feature is diff --git a/test/cqlpy/test_vector_index.py b/test/cqlpy/test_vector_index.py index 5bf7b7b84e..15f5ffce7d 100644 --- a/test/cqlpy/test_vector_index.py +++ b/test/cqlpy/test_vector_index.py @@ -4,6 +4,20 @@ ############################################################################### # Tests for vector indexes +# +# Index class usage: +# - Tests that exercise only features common to both Cassandra SAI and +# ScyllaDB (basic vector creation, similarity_function, IF NOT EXISTS, +# error on nonexistent columns, etc.) use USING 'sai' so they run +# against both backends. +# - Tests that exercise ScyllaDB-specific options or features (quantization, +# rescoring, oversampling, CDC integration, etc.) +# use USING 'vector_index' and are marked scylla_only. +# +# Note: pytest does not run the vector search backend, so ANN queries that +# pass CQL validation will fail with "Vector Store is disabled" rather than +# returning results. Tests use this error to confirm that CQL parsing and +# validation succeeded. ############################################################################### import pytest @@ -47,20 +61,34 @@ unsupported_filtering_types = [ ] def test_create_vector_search_index(cql, test_keyspace, scylla_only, skip_without_tablets): + """ScyllaDB-only: basic vector index creation using the native + 'vector_index' class (non-SAI) to verify the ScyllaDB-specific path.""" schema = 'p int primary key, v vector' with new_test_table(cql, test_keyspace, schema) as table: cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'") -def test_create_vector_search_index_without_custom_keyword(cql, test_keyspace, skip_without_tablets): +def test_create_vector_search_index_uppercase_class(cql, test_keyspace, scylla_only, skip_without_tablets): + """Custom index class name lookup is case-insensitive, so 'VECTOR_INDEX' + should be accepted just like 'vector_index'.""" schema = 'p int primary key, v vector' with new_test_table(cql, test_keyspace, schema) as table: - if is_scylla(cql): - custom_class = 'vector_index' - else: - custom_class = 'sai' - - cql.execute(f"CREATE INDEX ON {table}(v) USING '{custom_class}'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'VECTOR_INDEX'") + + +def test_create_vector_search_index_using_sai(cql, test_keyspace, skip_on_scylla_vnodes): + """SAI class name should be accepted for creating a vector index on a + VECTOR column. On ScyllaDB the class is translated to the + native 'vector_index'; on Cassandra SAI handles it natively.""" + schema = 'p int primary key, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + + +def test_create_vector_search_index_without_custom_keyword(cql, test_keyspace, skip_on_scylla_vnodes): + schema = 'p int primary key, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE INDEX ON {table}(v) USING 'sai'") def test_create_custom_index_with_invalid_class(cql, test_keyspace): schema = 'p int primary key, v vector' @@ -76,6 +104,8 @@ def test_create_custom_index_without_custom_class(cql, test_keyspace): cql.execute(f"CREATE CUSTOM INDEX ON {table}(v)") def test_create_vector_search_index_on_nonvector_column(cql, test_keyspace, scylla_only, skip_without_tablets): + """ScyllaDB-only: the native vector_index class rejects non-vector columns. + Cassandra SAI accepts all column types, so this rejection is ScyllaDB-specific.""" schema = 'p int primary key, v int' with new_test_table(cql, test_keyspace, schema) as table: with pytest.raises(InvalidRequest, match="Vector indexes are only supported on columns of vectors of floats"): @@ -132,29 +162,34 @@ def test_create_vector_search_index_with_duplicated_columns(cql, test_keyspace, with pytest.raises(InvalidRequest, match=f"Duplicate column x in index target list"): cql.execute(f"CREATE CUSTOM INDEX local_idx ON {table}((p), v, x, x) USING 'vector_index'") -def test_create_vector_search_index_with_bad_options(cql, test_keyspace, scylla_only, skip_without_tablets): +def test_create_vector_search_index_with_bad_options(cql, test_keyspace, skip_on_scylla_vnodes): schema = 'p int primary key, v vector' with new_test_table(cql, test_keyspace, schema) as table: - with pytest.raises(InvalidRequest, match="Unsupported option"): - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'bad_option': 'bad_value'}}") + with pytest.raises((InvalidRequest, ConfigurationException), match='Unsupported option|not understood by'): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai' WITH OPTIONS = {{'bad_option': 'bad_value'}}") -def test_create_vector_search_index_with_bad_numeric_value(cql, test_keyspace, scylla_only, skip_without_tablets): +def test_create_vector_search_index_with_bad_numeric_value(cql, test_keyspace, skip_on_scylla_vnodes): schema = 'p int primary key, v vector' with new_test_table(cql, test_keyspace, schema) as table: - for val in ['-1', '513']: - with pytest.raises(InvalidRequest, match="out of valid range"): - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'maximum_node_connections': '{val}' }}") - for val in ['dog', '123dog']: - with pytest.raises(InvalidRequest, match="not an integer"): - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'maximum_node_connections': '{val}' }}") - with pytest.raises(InvalidRequest, match="out of valid range"): - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'construction_beam_width': '5000' }}") + def check_bad_option(option, bad_range_vals): + for val in bad_range_vals: + with pytest.raises(InvalidRequest, match='out of valid range|cannot be <= 0'): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai' WITH OPTIONS = {{'{option}': '{val}' }}") + for val in ['dog', '123dog']: + with pytest.raises(InvalidRequest, match='not.*integer'): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai' WITH OPTIONS = {{'{option}': '{val}' }}") -def test_create_vector_search_index_with_bad_similarity_value(cql, test_keyspace, scylla_only, skip_without_tablets): + check_bad_option('maximum_node_connections', ['-1', '513']) + check_bad_option('construction_beam_width', ['-1', '5000']) + # search_beam_width is a ScyllaDB-specific option (not in Cassandra SAI). + if is_scylla(cql): + check_bad_option('search_beam_width', ['-1', '5000']) + +def test_create_vector_search_index_with_bad_similarity_value(cql, test_keyspace, skip_on_scylla_vnodes): schema = 'p int primary key, v vector' with new_test_table(cql, test_keyspace, schema) as table: - with pytest.raises(InvalidRequest, match="Invalid value in option 'similarity_function'"): - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'similarity_function': 'bad_similarity_function'}}") + with pytest.raises(InvalidRequest, match="Invalid value in option 'similarity_function'|was not recognized"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai' WITH OPTIONS = {{'similarity_function': 'bad_similarity_function'}}") def test_create_vector_search_index_on_nonfloat_vector_column(cql, test_keyspace, scylla_only, skip_without_tablets): schema = 'p int primary key, v vector' @@ -173,23 +208,23 @@ def test_no_view_for_vector_search_index(cql, test_keyspace, scylla_only, skip_w result = cql.execute(f"SELECT * FROM system_schema.views WHERE keyspace_name = '{test_keyspace}' AND view_name = 'def_index'") assert len(result.current_rows) == 1, "Regular index should create a view in system_schema.views" -def test_describe_custom_index(cql, test_keyspace, skip_without_tablets): +def test_describe_custom_index(cql, test_keyspace, skip_on_scylla_vnodes): schema = 'p int primary key, v1 vector, v2 vector' with new_test_table(cql, test_keyspace, schema) as table: # Cassandra inserts a space between the table name and parentheses, # Scylla doesn't. This difference doesn't matter because both are - # valid CQL commands - # Scylla doesn't support sai custom class. + # valid CQL commands. + # ScyllaDB translates SAI to vector_index, so DESCRIBE shows + # 'vector_index'; Cassandra shows 'sai'. if is_scylla(cql): maybe_space = '' - custom_class = 'vector_index' + described_class = 'vector_index' else: maybe_space = ' ' - custom_class = 'sai' + described_class = 'sai' - - create_idx_a = f"CREATE INDEX custom ON {table}(v1) USING '{custom_class}'" - create_idx_b = f"CREATE CUSTOM INDEX custom1 ON {table}(v2) USING '{custom_class}'" + create_idx_a = f"CREATE INDEX custom ON {table}(v1) USING 'sai'" + create_idx_b = f"CREATE CUSTOM INDEX custom1 ON {table}(v2) USING 'sai'" cql.execute(create_idx_a) cql.execute(create_idx_b) @@ -197,8 +232,8 @@ def test_describe_custom_index(cql, test_keyspace, skip_without_tablets): a_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom").one().create_statement b_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom1").one().create_statement - assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{custom_class}'" in a_desc - assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{custom_class}'" in b_desc + assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{described_class}'" in a_desc + assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{described_class}'" in b_desc def test_describe_vector_index_with_filtering_columns(cql, test_keyspace, scylla_only, skip_without_tablets): schema = 'p int primary key, v vector, f1 int, f2 int' @@ -353,17 +388,13 @@ def test_vector_index_target_serialization_local_index_with_filtering_columns(cq assert len(res) == 1 assert json.loads(res[0].options['target']) == {"tc": "v", "pk": ["p1", "p2"], "fc": ["f1", "f2"]} -def test_one_vector_index_on_column(cql, test_keyspace, skip_without_tablets): +def test_one_vector_index_on_column(cql, test_keyspace, skip_on_scylla_vnodes): schema = "p int primary key, v vector" - if is_scylla(cql): - custom_class = 'vector_index' - else: - custom_class = 'sai' with new_test_table(cql, test_keyspace, schema) as table: - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{custom_class}'") - with pytest.raises(InvalidRequest, match=r"There exists a duplicate custom index|Cannot create more than one storage-attached index on the same column"): - cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{custom_class}'") - cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}(v) USING '{custom_class}'") + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + with pytest.raises(InvalidRequest, match=r"There exists a duplicate custom index|Cannot create more than one storage-attached index on the same column|is a duplicate of existing index"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai'") + cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ON {table}(v) USING 'sai'") # Validates fix for issue #26672 def test_two_same_name_indexes_on_different_tables_with_if_not_exists(cql, test_keyspace, scylla_only, skip_without_tablets): @@ -374,6 +405,132 @@ def test_two_same_name_indexes_on_different_tables_with_if_not_exists(cql, test_ cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ann_index ON {table}(v) USING 'vector_index'") cql.execute(f"CREATE CUSTOM INDEX IF NOT EXISTS ann_index ON {table2}(v) USING 'vector_index'") +############################################################################### +# SAI (StorageAttachedIndex) compatibility tests +# +# Cassandra SAI is accepted by ScyllaDB for vector columns and translated +# to the native 'vector_index'. These tests validate all accepted SAI +# class name variants and that non-vector SAI targets are properly rejected. +############################################################################### + + +def test_sai_vector_index_with_similarity_function(cql, test_keyspace, skip_on_scylla_vnodes): + """SAI vector index should accept the similarity_function option + (supported on both ScyllaDB and Cassandra).""" + schema = 'p int PRIMARY KEY, v vector' + for func in ['cosine', 'euclidean', 'dot_product']: + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute( + f"CREATE CUSTOM INDEX ON {table}(v) USING 'sai' " + f"WITH OPTIONS = {{'similarity_function': '{func}'}}" + ) + + +def test_sai_vector_index_if_not_exists(cql, test_keyspace, skip_on_scylla_vnodes): + """IF NOT EXISTS should work with SAI class name.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + idx = unique_name() + cql.execute( + f"CREATE CUSTOM INDEX IF NOT EXISTS {idx} ON {table}(v) USING 'sai'" + ) + # Creating the same index again with IF NOT EXISTS should not fail + cql.execute( + f"CREATE CUSTOM INDEX IF NOT EXISTS {idx} ON {table}(v) USING 'sai'" + ) + + +def test_sai_fully_qualified_class_name(cql, test_keyspace, skip_on_scylla_vnodes): + """The fully qualified Cassandra SAI class name should be accepted.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute( + f"CREATE CUSTOM INDEX ON {table}(v) " + f"USING 'org.apache.cassandra.index.sai.StorageAttachedIndex'" + ) + + +def test_sai_short_class_name(cql, test_keyspace, skip_on_scylla_vnodes): + """The short class name 'StorageAttachedIndex' should also be accepted.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'StorageAttachedIndex'") + + +def test_sai_short_class_uppercase_name(cql, test_keyspace, skip_on_scylla_vnodes): + """The short class name 'StorageAttachedIndex' should also be accepted.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'SAI'") + + +def test_sai_short_class_name_case_insensitive(cql, test_keyspace, skip_on_scylla_vnodes): + """Short SAI class name matching should be case-insensitive.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute( + f"CREATE CUSTOM INDEX ON {table}(v) " + f"USING 'STORAGEATTACHEDINDEX'" + ) + + +def test_sai_fqn_requires_exact_case(cql, test_keyspace, skip_without_tablets): + """The fully qualified SAI class name requires exact casing. + Only 'org.apache.cassandra.index.sai.StorageAttachedIndex' is accepted.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest, match=r"Non-supported custom class|Unable to find"): + cql.execute( + f"CREATE CUSTOM INDEX ON {table}(v) " + f"USING 'org.apache.cassandra.index.sai.STORAGEATTACHEDINDEX'" + ) + + +def test_sai_local_index_with_vector_column(cql, test_keyspace, skip_without_tablets): + """SAI with a multi-column (local index) target like ((p), v) should + succeed — vector_index supports local indexes.""" + schema = 'p int, q int, v vector, PRIMARY KEY (p, q)' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}((p), v) USING 'sai'") + + +# --- SAI on non-vector columns: rejected by ScyllaDB, accepted by Cassandra --- +# +# Cassandra SAI natively supports indexing regular and collection columns. +# ScyllaDB only accepts SAI for vector columns and rejects non-vector targets +# with an explicit error. These tests validate the ScyllaDB rejection; they +# would pass on Cassandra but are expected to fail on ScyllaDB (scylla_only). + + +def test_sai_on_regular_column_rejected(cql, test_keyspace, scylla_only): + """SAI on a regular (non-vector) column is rejected by ScyllaDB. + On Cassandra this would succeed since SAI supports arbitrary columns.""" + schema = 'p int PRIMARY KEY, x text' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest, match='SAI.*only supported on vector columns'): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(x) USING 'sai'") + + +def test_sai_entries_on_map_rejected(cql, test_keyspace, scylla_only): + """SAI ENTRIES index on a MAP column is rejected by ScyllaDB. + On Cassandra this is a common pattern for metadata filtering.""" + schema = 'p int PRIMARY KEY, metadata_s map' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest, match='SAI.*only supported on vector columns'): + cql.execute( + f"CREATE CUSTOM INDEX ON {table}(ENTRIES(metadata_s)) " + f"USING 'sai'" + ) + + +def test_sai_on_nonexistent_column(cql, test_keyspace, skip_on_scylla_vnodes): + """SAI on a non-existent column should fail with an appropriate error.""" + schema = 'p int PRIMARY KEY, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest, match='No column definition found|Undefined column name|SAI.*only supported on vector columns'): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(nonexistent) USING 'sai'") + + ############################################################################### # Tests for CDC with vector indexes #