Merge 'index: implement schema management layer for vector search indexes' from null

This pull request adds support for creating custom indexes (at a metadata level) as long as a supported custom class is provided (currently only vector search).

The patch contains:

- a change in CREATE INDEX statement that allows for the USING keyword to be present as long as one of the supported classes is used
-  support for describing custom indexes in the DESCRIBE statement
- unit tests

Co-authored by: @Balwancia

Closes scylladb/scylladb#23720

* github.com:scylladb/scylladb:
  test/cqlpy: add custom index tests
  index: support storing metadata for custom indices
This commit is contained in:
Nadav Har'El
2025-05-22 12:19:36 +03:00
9 changed files with 139 additions and 20 deletions

View File

@@ -711,11 +711,6 @@ SEASTAR_TEST_CASE(test_secondary_index_create_custom_index) {
// "exceptions::invalid_request_exception: CUSTOM index requires
// specifying the index class"
assert_that_failed(e.execute_cql("create custom index on cf (a)"));
// It's also a syntax error to try to specify a "USING" without
// specifying CUSTOM. We expect the exception:
// "exceptions::invalid_request_exception: Cannot specify index class
// for a non-CUSTOM index"
assert_that_failed(e.execute_cql("create index on cf (a) using 'org.apache.cassandra.index.sasi.SASIIndex'"));
});
}

View File

@@ -0,0 +1,77 @@
# Copyright 2025-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
###############################################################################
# Tests for vector indexes
###############################################################################
import pytest
from .util import new_test_table, is_scylla
from cassandra.protocol import InvalidRequest, ConfigurationException
def test_create_vector_search_index(cql, test_keyspace, scylla_only):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
def test_create_vector_search_index_without_custom_keyword(cql, test_keyspace):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
if is_scylla(cql):
custom_class = 'vector_index'
else:
custom_class = 'sai'
cql.execute(f"CREATE INDEX ON {table}(v) USING '{custom_class}'")
def test_create_custom_index_with_invalid_class(cql, test_keyspace):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
invalid_custom_class = "invalid.custom.class"
with pytest.raises((InvalidRequest, ConfigurationException), match=r"Non-supported custom class|Unable to find"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{invalid_custom_class}'")
def test_create_custom_index_without_custom_class(cql, test_keyspace):
schema = 'p int primary key, v vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises((InvalidRequest, ConfigurationException), match=r"CUSTOM index requires specifying|Unable to find"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v)")
@pytest.mark.xfail(reason="Scylla doesn't validate vector indexes, as they are not implemented yet.")
def test_create_vector_search_index_on_nonvector_column(cql, test_keyspace, scylla_only):
schema = 'p int primary key, v int'
with new_test_table(cql, test_keyspace, schema) as table:
with pytest.raises(InvalidRequest):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
def test_describe_custom_index(cql, test_keyspace):
schema = 'p int primary key, v1 vector<float, 3>, v2 vector<float, 3>'
with new_test_table(cql, test_keyspace, schema) as table:
# Cassandra inserts a space between the table name and parentheses,
# Scylla doesn't. This difference doesn't matter because both are
# valid CQL commands
# Scylla doesn't support sai custom class.
if is_scylla(cql):
maybe_space = ''
custom_class = 'vector_index'
else:
maybe_space = ' '
custom_class = 'sai'
create_idx_a = f"CREATE INDEX custom ON {table}(v1) USING '{custom_class}'"
create_idx_b = f"CREATE CUSTOM INDEX custom1 ON {table}(v2) USING '{custom_class}'"
cql.execute(create_idx_a)
cql.execute(create_idx_b)
a_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom").one().create_statement
b_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom1").one().create_statement
assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{custom_class}'" in a_desc
assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{custom_class}'" in b_desc