Merge 'index: implement schema management layer for vector search indexes' from null
This pull request adds support for creating custom indexes (at a metadata level) as long as a supported custom class is provided (currently only vector search). The patch contains: - a change in CREATE INDEX statement that allows for the USING keyword to be present as long as one of the supported classes is used - support for describing custom indexes in the DESCRIBE statement - unit tests Co-authored by: @Balwancia Closes scylladb/scylladb#23720 * github.com:scylladb/scylladb: test/cqlpy: add custom index tests index: support storing metadata for custom indices
This commit is contained in:
@@ -12,6 +12,7 @@
|
||||
#include "create_index_statement.hh"
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "prepared_statement.hh"
|
||||
#include "types/types.hh"
|
||||
#include "validation.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
@@ -101,6 +102,15 @@ std::vector<::shared_ptr<index_target>> create_index_statement::validate_while_e
|
||||
targets.emplace_back(raw_target->prepare(*schema));
|
||||
}
|
||||
|
||||
if (_properties && _properties->custom_class) {
|
||||
if (!secondary_index::secondary_index_manager::is_custom_class_supported(db.features(), *_properties->custom_class)) {
|
||||
throw exceptions::invalid_request_exception(format("Non-supported custom class \'{}\' provided", *(_properties->custom_class)));
|
||||
}
|
||||
|
||||
//TODO: run custom index class validation once added
|
||||
//See https://github.com/scylladb/vector-store/issues/115
|
||||
}
|
||||
|
||||
if (targets.size() > 1) {
|
||||
validate_targets_for_multi_column_index(targets);
|
||||
}
|
||||
@@ -348,9 +358,9 @@ std::optional<create_index_statement::base_schema_with_new_index> create_index_s
|
||||
}
|
||||
index_metadata_kind kind;
|
||||
index_options_map index_options;
|
||||
if (_properties->is_custom) {
|
||||
kind = index_metadata_kind::custom;
|
||||
if (_properties->custom_class) {
|
||||
index_options = _properties->get_options();
|
||||
kind = index_metadata_kind::custom;
|
||||
} else {
|
||||
kind = schema->is_compound() ? index_metadata_kind::composites : index_metadata_kind::keys;
|
||||
}
|
||||
|
||||
@@ -22,10 +22,7 @@ void cql3::statements::index_prop_defs::validate() {
|
||||
if (is_custom && !custom_class) {
|
||||
throw exceptions::invalid_request_exception("CUSTOM index requires specifying the index class");
|
||||
}
|
||||
|
||||
if (!is_custom && custom_class) {
|
||||
throw exceptions::invalid_request_exception("Cannot specify index class for a non-CUSTOM index");
|
||||
}
|
||||
|
||||
if (!is_custom && !_properties.empty()) {
|
||||
throw exceptions::invalid_request_exception("Cannot specify options for a non-CUSTOM index");
|
||||
}
|
||||
@@ -36,15 +33,6 @@ void cql3::statements::index_prop_defs::validate() {
|
||||
db::index::secondary_index::custom_index_option_name));
|
||||
}
|
||||
|
||||
// Currently, Scylla does not support *any* class of custom index
|
||||
// implementation. If in the future we do (e.g., SASI, or something
|
||||
// new), we'll need to check for valid values here.
|
||||
if (is_custom && custom_class) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Unsupported CUSTOM INDEX class {}. Note that currently, Scylla does not support SASI or any other CUSTOM INDEX class.",
|
||||
*custom_class));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
index_options_map
|
||||
|
||||
@@ -8,13 +8,18 @@
|
||||
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include <optional>
|
||||
#include <ranges>
|
||||
#include <seastar/core/shared_ptr.hh>
|
||||
#include <string_view>
|
||||
#include <unordered_set>
|
||||
|
||||
#include "index/secondary_index_manager.hh"
|
||||
|
||||
#include "cql3/statements/index_target.hh"
|
||||
#include "cql3/expr/expression.hh"
|
||||
#include "index/target_parser.hh"
|
||||
#include "schema/schema.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "db/view/view.hh"
|
||||
#include "concrete_types.hh"
|
||||
@@ -341,4 +346,27 @@ bool secondary_index_manager::is_global_index(const schema& s) const {
|
||||
});
|
||||
}
|
||||
|
||||
std::optional<sstring> secondary_index_manager::custom_index_class(const schema& s) const {
|
||||
|
||||
auto idx = _indices.find(index_name_from_table_name(s.cf_name()));
|
||||
|
||||
if (idx == _indices.end() || !(*idx).second.metadata().options().contains(cql3::statements::index_target::custom_index_option_name)) {
|
||||
return std::nullopt;
|
||||
} else {
|
||||
return (*idx).second.metadata().options().at(cql3::statements::index_target::custom_index_option_name);
|
||||
}
|
||||
}
|
||||
|
||||
// We pass the feature_service as the supported custom classes will depend on the features
|
||||
bool secondary_index_manager::is_custom_class_supported(const gms::feature_service& fs, const sstring& class_name) {
|
||||
|
||||
// TODO: Change this set to a map to implementation
|
||||
// when https://github.com/scylladb/vector-store/issues/115 is done
|
||||
|
||||
const static std::unordered_set<std::string_view> classes = {
|
||||
"vector_index",
|
||||
};
|
||||
return classes.contains(class_name);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -10,11 +10,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "gms/feature_service.hh"
|
||||
#include "schema/schema.hh"
|
||||
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "cql3/statements/index_target.hh"
|
||||
|
||||
#include <string_view>
|
||||
#include <vector>
|
||||
|
||||
namespace cql3::expr {
|
||||
@@ -99,6 +101,8 @@ public:
|
||||
bool is_index(view_ptr) const;
|
||||
bool is_index(const schema& s) const;
|
||||
bool is_global_index(const schema& s) const;
|
||||
std::optional<sstring> custom_index_class(const schema& s) const;
|
||||
static bool is_custom_class_supported(const gms::feature_service& fs, const sstring& class_name);
|
||||
private:
|
||||
void add_index(const index_metadata& im);
|
||||
};
|
||||
|
||||
@@ -11,6 +11,8 @@
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "index/secondary_index_manager.hh"
|
||||
#include "schema/schema.hh"
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <optional>
|
||||
|
||||
namespace replica {
|
||||
|
||||
@@ -27,6 +29,10 @@ public:
|
||||
return _db.find_column_family(base_id).get_index_manager().is_index(view_s);
|
||||
}
|
||||
|
||||
virtual std::optional<sstring> custom_index_class(const table_id& base_id, const schema& view_s) const override {
|
||||
return _db.find_column_family(base_id).get_index_manager().custom_index_class(view_s);
|
||||
}
|
||||
|
||||
virtual schema_ptr find_schema(const table_id& id) const override {
|
||||
return _db.find_schema(id);
|
||||
}
|
||||
|
||||
@@ -1010,11 +1010,21 @@ sstring schema::get_create_statement(const schema_describe_helper& helper, bool
|
||||
if (is_view()) {
|
||||
if (helper.is_index(view_info()->base_id(), *this)) {
|
||||
auto is_local = !helper.is_global_index(view_info()->base_id(), *this);
|
||||
auto custom_index_class = helper.custom_index_class(view_info()->base_id(), *this);
|
||||
|
||||
if (custom_index_class) {
|
||||
os << "CUSTOM ";
|
||||
}
|
||||
|
||||
os << "INDEX " << cql3::util::maybe_quote(secondary_index::index_name_from_table_name(cf_name())) << " ON "
|
||||
<< cql3::util::maybe_quote(ks_name()) << "." << cql3::util::maybe_quote(view_info()->base_name());
|
||||
|
||||
describe_index_columns(os, is_local, *this, helper.find_schema(view_info()->base_id()));
|
||||
|
||||
if (custom_index_class) {
|
||||
os << " USING '" << *custom_index_class << "'";
|
||||
}
|
||||
|
||||
os << ";\n";
|
||||
|
||||
return std::move(os).str();
|
||||
|
||||
@@ -506,6 +506,7 @@ class schema_describe_helper {
|
||||
public:
|
||||
virtual bool is_global_index(const table_id& base_id, const schema& view_s) const = 0;
|
||||
virtual bool is_index(const table_id& base_id, const schema& view_s) const = 0;
|
||||
virtual std::optional<sstring> custom_index_class(const table_id& base_id, const schema& view_s) const = 0;
|
||||
virtual schema_ptr find_schema(const table_id& id) const = 0;
|
||||
virtual ~schema_describe_helper() = default;
|
||||
};
|
||||
|
||||
@@ -711,11 +711,6 @@ SEASTAR_TEST_CASE(test_secondary_index_create_custom_index) {
|
||||
// "exceptions::invalid_request_exception: CUSTOM index requires
|
||||
// specifying the index class"
|
||||
assert_that_failed(e.execute_cql("create custom index on cf (a)"));
|
||||
// It's also a syntax error to try to specify a "USING" without
|
||||
// specifying CUSTOM. We expect the exception:
|
||||
// "exceptions::invalid_request_exception: Cannot specify index class
|
||||
// for a non-CUSTOM index"
|
||||
assert_that_failed(e.execute_cql("create index on cf (a) using 'org.apache.cassandra.index.sasi.SASIIndex'"));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
77
test/cqlpy/test_vector_index.py
Normal file
77
test/cqlpy/test_vector_index.py
Normal file
@@ -0,0 +1,77 @@
|
||||
# Copyright 2025-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
|
||||
###############################################################################
|
||||
# Tests for vector indexes
|
||||
###############################################################################
|
||||
|
||||
import pytest
|
||||
from .util import new_test_table, is_scylla
|
||||
from cassandra.protocol import InvalidRequest, ConfigurationException
|
||||
|
||||
|
||||
def test_create_vector_search_index(cql, test_keyspace, scylla_only):
|
||||
schema = 'p int primary key, v vector<float, 3>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
|
||||
|
||||
|
||||
|
||||
def test_create_vector_search_index_without_custom_keyword(cql, test_keyspace):
|
||||
schema = 'p int primary key, v vector<float, 3>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
if is_scylla(cql):
|
||||
custom_class = 'vector_index'
|
||||
else:
|
||||
custom_class = 'sai'
|
||||
|
||||
cql.execute(f"CREATE INDEX ON {table}(v) USING '{custom_class}'")
|
||||
|
||||
def test_create_custom_index_with_invalid_class(cql, test_keyspace):
|
||||
schema = 'p int primary key, v vector<float, 3>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
invalid_custom_class = "invalid.custom.class"
|
||||
with pytest.raises((InvalidRequest, ConfigurationException), match=r"Non-supported custom class|Unable to find"):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING '{invalid_custom_class}'")
|
||||
|
||||
def test_create_custom_index_without_custom_class(cql, test_keyspace):
|
||||
schema = 'p int primary key, v vector<float, 3>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
with pytest.raises((InvalidRequest, ConfigurationException), match=r"CUSTOM index requires specifying|Unable to find"):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v)")
|
||||
|
||||
@pytest.mark.xfail(reason="Scylla doesn't validate vector indexes, as they are not implemented yet.")
|
||||
def test_create_vector_search_index_on_nonvector_column(cql, test_keyspace, scylla_only):
|
||||
schema = 'p int primary key, v int'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
with pytest.raises(InvalidRequest):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
|
||||
|
||||
|
||||
def test_describe_custom_index(cql, test_keyspace):
|
||||
schema = 'p int primary key, v1 vector<float, 3>, v2 vector<float, 3>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
# Cassandra inserts a space between the table name and parentheses,
|
||||
# Scylla doesn't. This difference doesn't matter because both are
|
||||
# valid CQL commands
|
||||
# Scylla doesn't support sai custom class.
|
||||
if is_scylla(cql):
|
||||
maybe_space = ''
|
||||
custom_class = 'vector_index'
|
||||
else:
|
||||
maybe_space = ' '
|
||||
custom_class = 'sai'
|
||||
|
||||
|
||||
create_idx_a = f"CREATE INDEX custom ON {table}(v1) USING '{custom_class}'"
|
||||
create_idx_b = f"CREATE CUSTOM INDEX custom1 ON {table}(v2) USING '{custom_class}'"
|
||||
|
||||
cql.execute(create_idx_a)
|
||||
cql.execute(create_idx_b)
|
||||
|
||||
a_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom").one().create_statement
|
||||
b_desc = cql.execute(f"DESC INDEX {test_keyspace}.custom1").one().create_statement
|
||||
|
||||
assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{custom_class}'" in a_desc
|
||||
assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{custom_class}'" in b_desc
|
||||
Reference in New Issue
Block a user