index: fix vector index with filtering target column

The secondary index mechanism is currently used to determine the target column.
This mechanism works incorrectly for vector indexes with filtering because
it returns the last specified column as the target (vectors) column.
However, the syntax for a vector index requires the first column to be the target:
```
CREATE CUSTOM INDEX ON t(vectors, users) USING 'vector_index';
```

This discrepancy eventually leads to the following exception when performing an
ANN search on a vector index with filtering columns:
````
ANN ordering by vector requires the column to be indexed using 'vector_index'
````

This commit fixes the issue by introducing dedicated logic for vector indexes
to correctly identify the target(vectors) column.

Fixes: SCYLLADB-635
This commit is contained in:
Karol Nowacki
2026-02-19 12:15:16 +01:00
parent ba7f314cdc
commit 15788c3734
4 changed files with 56 additions and 8 deletions

View File

@@ -2004,9 +2004,7 @@ static std::optional<ann_ordering_info> get_ann_ordering_info(
auto indexes = sim.list_indexes();
auto it = std::find_if(indexes.begin(), indexes.end(), [&prepared_ann_ordering](const auto& ind) {
return (ind.metadata().options().contains(db::index::secondary_index::custom_class_option_name) &&
ind.metadata().options().at(db::index::secondary_index::custom_class_option_name) == ANN_CUSTOM_INDEX_OPTION) &&
(ind.target_column() == prepared_ann_ordering.first->name_as_text());
return secondary_index::vector_index::is_vector_index_on_column(ind.metadata(), prepared_ann_ordering.first->name_as_text());
});
if (it == indexes.end()) {

View File

@@ -16,6 +16,7 @@
#include "index/vector_index.hh"
#include "index/secondary_index.hh"
#include "index/secondary_index_manager.hh"
#include "index/target_parser.hh"
#include "types/concrete_types.hh"
#include "types/types.hh"
#include "utils/managed_string.hh"
@@ -104,6 +105,19 @@ const static std::unordered_map<sstring, std::function<void(const sstring&, cons
{"rescoring", std::bind_front(validate_enumerated_option, boolean_values)},
};
sstring get_vector_index_target_column(const sstring& targets) {
std::optional<rjson::value> json_value = rjson::try_parse(targets);
if (!json_value || !json_value->IsObject()) {
return target_parser::get_target_column_name_from_string(targets);
}
rjson::value* pk = rjson::find(*json_value, "pk");
if (pk && pk->IsArray() && !pk->Empty()) {
return sstring(rjson::to_string_view(pk->GetArray()[0]));
}
return target_parser::get_target_column_name_from_string(targets);
}
bool vector_index::is_rescoring_enabled(const index_options_map& properties) {
auto q = properties.find("quantization");
auto r = properties.find("rescoring");
@@ -320,16 +334,23 @@ bool vector_index::has_vector_index(const schema& s) {
bool vector_index::has_vector_index_on_column(const schema& s, const sstring& target_name) {
for (const auto& index : s.indices()) {
auto class_it = index.options().find(db::index::secondary_index::custom_class_option_name);
auto target_it = index.options().find(cql3_parser::index_target::target_option_name);
if (class_it != index.options().end() && target_it != index.options().end()) {
auto custom_class = secondary_index_manager::get_custom_class_factory(class_it->second);
return custom_class && dynamic_cast<vector_index*>((*custom_class)().get()) && target_it->second == target_name;
if (is_vector_index_on_column(index, target_name)) {
return true;
}
}
return false;
}
bool vector_index::is_vector_index_on_column(const index_metadata& im, const sstring& target_name) {
auto class_it = im.options().find(db::index::secondary_index::custom_class_option_name);
auto target_it = im.options().find(cql3_parser::index_target::target_option_name);
if (class_it != im.options().end() && target_it != im.options().end()) {
auto custom_class = secondary_index_manager::get_custom_class_factory(class_it->second);
return custom_class && dynamic_cast<vector_index*>((*custom_class)().get()) && get_vector_index_target_column(target_it->second) == target_name;
}
return false;
}
/// Returns the schema version of the base table at which the index was created.
/// This is used to determine if the index needs to be rebuilt after a schema change.
/// The CREATE INDEX and DROP INDEX statements does change the schema version.

View File

@@ -34,6 +34,7 @@ public:
table_schema_version index_version(const schema& schema) override;
static bool has_vector_index(const schema& s);
static bool has_vector_index_on_column(const schema& s, const sstring& target_name);
static bool is_vector_index_on_column(const index_metadata& im, const sstring& target_name);
static void check_cdc_options(const schema& schema);
static bool is_rescoring_enabled(const index_options_map& properties);

View File

@@ -1184,3 +1184,31 @@ SEASTAR_TEST_CASE(vector_store_client_abort_due_to_query_timeout) {
co_await server->stop();
}));
}
// Create a vector index with an additional filtering column.
// Because the local secondary index logic was used to determine the index target column,
// the implementation wrongly selects last column as the target(vectors) column, leading to an exception
// on the SELECT query:
// ANN ordering by vector requires the column to be indexed using 'vector_index'.
// Reproduces SCYLLADB-635.
SEASTAR_TEST_CASE(vector_store_client_vector_index_with_additional_filtering_column) {
auto server = co_await make_vs_mock_server();
auto cfg = make_config();
cfg.db_config->vector_store_primary_uri.set(format("http://server.node:{}", server->port()));
co_await do_with_cql_env(
[&](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "test");
auto& vs = env.local_qp().vector_store_client();
configure(vs).with_dns({{"server.node", std::vector<std::string>{server->host()}}});
vs.start_background_tasks();
// Create a vector index on the embedding column, including ck1 for filtered ANN search support.
auto result = co_await env.execute_cql("CREATE CUSTOM INDEX idx ON ks.test (embedding, ck1) USING 'vector_index'");
BOOST_CHECK_NO_THROW(co_await env.execute_cql("SELECT * FROM ks.test ORDER BY embedding ANN OF [0.1, 0.2, 0.3] LIMIT 5;"));
},
cfg)
.finally(seastar::coroutine::lambda([&] -> future<> {
co_await server->stop();
}));
}