diff --git a/cql3/statements/create_index_statement.cc b/cql3/statements/create_index_statement.cc index e164a20042..0cff5bb2e4 100644 --- a/cql3/statements/create_index_statement.cc +++ b/cql3/statements/create_index_statement.cc @@ -12,6 +12,7 @@ #include "create_index_statement.hh" #include "exceptions/exceptions.hh" #include "prepared_statement.hh" +#include "types/types.hh" #include "validation.hh" #include "service/storage_proxy.hh" #include "service/migration_manager.hh" @@ -101,6 +102,15 @@ std::vector<::shared_ptr> create_index_statement::validate_while_e targets.emplace_back(raw_target->prepare(*schema)); } + if (_properties && _properties->custom_class) { + if (!secondary_index::secondary_index_manager::is_custom_class_supported(db.features(), *_properties->custom_class)) { + throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *(_properties->custom_class))); + } + + //TODO: run custom index class validation once added + //See https://github.com/scylladb/vector-store/issues/115 + } + if (targets.size() > 1) { validate_targets_for_multi_column_index(targets); } @@ -348,9 +358,9 @@ std::optional create_index_s } index_metadata_kind kind; index_options_map index_options; - if (_properties->is_custom) { - kind = index_metadata_kind::custom; + if (_properties->custom_class) { index_options = _properties->get_options(); + kind = index_metadata_kind::custom; } else { kind = schema->is_compound() ? index_metadata_kind::composites : index_metadata_kind::keys; } diff --git a/cql3/statements/index_prop_defs.cc b/cql3/statements/index_prop_defs.cc index 1eb3355ef8..9b61add734 100644 --- a/cql3/statements/index_prop_defs.cc +++ b/cql3/statements/index_prop_defs.cc @@ -22,10 +22,7 @@ void cql3::statements::index_prop_defs::validate() { if (is_custom && !custom_class) { throw exceptions::invalid_request_exception("CUSTOM index requires specifying the index class"); } - - if (!is_custom && custom_class) { - throw exceptions::invalid_request_exception("Cannot specify index class for a non-CUSTOM index"); - } + if (!is_custom && !_properties.empty()) { throw exceptions::invalid_request_exception("Cannot specify options for a non-CUSTOM index"); } @@ -36,15 +33,6 @@ void cql3::statements::index_prop_defs::validate() { db::index::secondary_index::custom_index_option_name)); } - // Currently, Scylla does not support *any* class of custom index - // implementation. If in the future we do (e.g., SASI, or something - // new), we'll need to check for valid values here. - if (is_custom && custom_class) { - throw exceptions::invalid_request_exception( - format("Unsupported CUSTOM INDEX class {}. Note that currently, Scylla does not support SASI or any other CUSTOM INDEX class.", - *custom_class)); - - } } index_options_map diff --git a/index/secondary_index_manager.cc b/index/secondary_index_manager.cc index d2a4b3c336..dba81989bb 100644 --- a/index/secondary_index_manager.cc +++ b/index/secondary_index_manager.cc @@ -8,13 +8,18 @@ * SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0) */ +#include +#include #include +#include +#include #include "index/secondary_index_manager.hh" #include "cql3/statements/index_target.hh" #include "cql3/expr/expression.hh" #include "index/target_parser.hh" +#include "schema/schema.hh" #include "schema/schema_builder.hh" #include "db/view/view.hh" #include "concrete_types.hh" @@ -341,4 +346,27 @@ bool secondary_index_manager::is_global_index(const schema& s) const { }); } +std::optional secondary_index_manager::custom_index_class(const schema& s) const { + + auto idx = _indices.find(index_name_from_table_name(s.cf_name())); + + if (idx == _indices.end() || !(*idx).second.metadata().options().contains(cql3::statements::index_target::custom_index_option_name)) { + return std::nullopt; + } else { + return (*idx).second.metadata().options().at(cql3::statements::index_target::custom_index_option_name); + } +} + +// We pass the feature_service as the supported custom classes will depend on the features +bool secondary_index_manager::is_custom_class_supported(const gms::feature_service& fs, const sstring& class_name) { + + // TODO: Change this set to a map to implementation + // when https://github.com/scylladb/vector-store/issues/115 is done + + const static std::unordered_set classes = { + "vector_index", + }; + return classes.contains(class_name); +} + } diff --git a/index/secondary_index_manager.hh b/index/secondary_index_manager.hh index 38ab2ac6d9..2df6cedac6 100644 --- a/index/secondary_index_manager.hh +++ b/index/secondary_index_manager.hh @@ -10,11 +10,13 @@ #pragma once +#include "gms/feature_service.hh" #include "schema/schema.hh" #include "data_dictionary/data_dictionary.hh" #include "cql3/statements/index_target.hh" +#include #include namespace cql3::expr { @@ -99,6 +101,8 @@ public: bool is_index(view_ptr) const; bool is_index(const schema& s) const; bool is_global_index(const schema& s) const; + std::optional custom_index_class(const schema& s) const; + static bool is_custom_class_supported(const gms::feature_service& fs, const sstring& class_name); private: void add_index(const index_metadata& im); }; diff --git a/replica/schema_describe_helper.hh b/replica/schema_describe_helper.hh index 7b02618ff5..21bb23ff14 100644 --- a/replica/schema_describe_helper.hh +++ b/replica/schema_describe_helper.hh @@ -11,6 +11,8 @@ #include "data_dictionary/data_dictionary.hh" #include "index/secondary_index_manager.hh" #include "schema/schema.hh" +#include +#include namespace replica { @@ -27,6 +29,10 @@ public: return _db.find_column_family(base_id).get_index_manager().is_index(view_s); } + virtual std::optional custom_index_class(const table_id& base_id, const schema& view_s) const override { + return _db.find_column_family(base_id).get_index_manager().custom_index_class(view_s); + } + virtual schema_ptr find_schema(const table_id& id) const override { return _db.find_schema(id); } diff --git a/schema/schema.cc b/schema/schema.cc index dde5a94721..788185a94c 100644 --- a/schema/schema.cc +++ b/schema/schema.cc @@ -1010,11 +1010,21 @@ sstring schema::get_create_statement(const schema_describe_helper& helper, bool if (is_view()) { if (helper.is_index(view_info()->base_id(), *this)) { auto is_local = !helper.is_global_index(view_info()->base_id(), *this); + auto custom_index_class = helper.custom_index_class(view_info()->base_id(), *this); + + if (custom_index_class) { + os << "CUSTOM "; + } os << "INDEX " << cql3::util::maybe_quote(secondary_index::index_name_from_table_name(cf_name())) << " ON " << cql3::util::maybe_quote(ks_name()) << "." << cql3::util::maybe_quote(view_info()->base_name()); describe_index_columns(os, is_local, *this, helper.find_schema(view_info()->base_id())); + + if (custom_index_class) { + os << " USING '" << *custom_index_class << "'"; + } + os << ";\n"; return std::move(os).str(); diff --git a/schema/schema.hh b/schema/schema.hh index b9ce882ac1..e96da56e38 100644 --- a/schema/schema.hh +++ b/schema/schema.hh @@ -506,6 +506,7 @@ class schema_describe_helper { public: virtual bool is_global_index(const table_id& base_id, const schema& view_s) const = 0; virtual bool is_index(const table_id& base_id, const schema& view_s) const = 0; + virtual std::optional custom_index_class(const table_id& base_id, const schema& view_s) const = 0; virtual schema_ptr find_schema(const table_id& id) const = 0; virtual ~schema_describe_helper() = default; }; diff --git a/test/boost/secondary_index_test.cc b/test/boost/secondary_index_test.cc index 2e7554fb25..22e4453078 100644 --- a/test/boost/secondary_index_test.cc +++ b/test/boost/secondary_index_test.cc @@ -711,11 +711,6 @@ SEASTAR_TEST_CASE(test_secondary_index_create_custom_index) { // "exceptions::invalid_request_exception: CUSTOM index requires // specifying the index class" assert_that_failed(e.execute_cql("create custom index on cf (a)")); - // It's also a syntax error to try to specify a "USING" without - // specifying CUSTOM. We expect the exception: - // "exceptions::invalid_request_exception: Cannot specify index class - // for a non-CUSTOM index" - assert_that_failed(e.execute_cql("create index on cf (a) using 'org.apache.cassandra.index.sasi.SASIIndex'")); }); } diff --git a/test/cqlpy/test_vector_index.py b/test/cqlpy/test_vector_index.py new file mode 100644 index 0000000000..6ba09bf19a --- /dev/null +++ b/test/cqlpy/test_vector_index.py @@ -0,0 +1,77 @@ +# Copyright 2025-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 + +############################################################################### +# Tests for vector indexes +############################################################################### + +import pytest +from .util import new_test_table, is_scylla +from cassandra.protocol import InvalidRequest, ConfigurationException + + +def test_create_vector_search_index(cql, test_keyspace, scylla_only): + schema = 'p int primary key, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'") + + + +def test_create_vector_search_index_without_custom_keyword(cql, test_keyspace): + schema = 'p int primary key, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + if is_scylla(cql): + custom_class = 'vector_index' + else: + custom_class = 'sai' + + cql.execute(f"CREATE INDEX ON {table}(v) USING '{custom_class}'") + +def test_create_custom_index_with_invalid_class(cql, test_keyspace): + schema = 'p int primary key, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + invalid_custom_class = "invalid.custom.class" + with pytest.raises((InvalidRequest, ConfigurationException), match=r"Non-supported custom class|Unable to find"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{invalid_custom_class}'") + +def test_create_custom_index_without_custom_class(cql, test_keyspace): + schema = 'p int primary key, v vector' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises((InvalidRequest, ConfigurationException), match=r"CUSTOM index requires specifying|Unable to find"): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v)") + +@pytest.mark.xfail(reason="Scylla doesn't validate vector indexes, as they are not implemented yet.") +def test_create_vector_search_index_on_nonvector_column(cql, test_keyspace, scylla_only): + schema = 'p int primary key, v int' + with new_test_table(cql, test_keyspace, schema) as table: + with pytest.raises(InvalidRequest): + cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'") + + +def test_describe_custom_index(cql, test_keyspace): + schema = 'p int primary key, v1 vector, v2 vector' + with new_test_table(cql, test_keyspace, schema) as table: + # Cassandra inserts a space between the table name and parentheses, + # Scylla doesn't. This difference doesn't matter because both are + # valid CQL commands + # Scylla doesn't support sai custom class. + if is_scylla(cql): + maybe_space = '' + custom_class = 'vector_index' + else: + maybe_space = ' ' + custom_class = 'sai' + + + create_idx_a = f"CREATE INDEX custom ON {table}(v1) USING '{custom_class}'" + create_idx_b = f"CREATE CUSTOM INDEX custom1 ON {table}(v2) USING '{custom_class}'" + + cql.execute(create_idx_a) + cql.execute(create_idx_b) + + a_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom").one().create_statement + b_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom1").one().create_statement + + assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{custom_class}'" in a_desc + assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{custom_class}'" in b_desc \ No newline at end of file