Files
scylladb/index/vector_index.hh
Karol Nowacki 30487e8854 index: fix vector index with filtering target column
The secondary index mechanism is currently used to determine the target column.
This mechanism works incorrectly for vector indexes with filtering because
it returns the last specified column as the target (vectors) column.
However, the syntax for a vector index requires the first column to be the target:
```
CREATE CUSTOM INDEX ON t(vectors, users) USING 'vector_index';
```

This discrepancy eventually leads to the following exception when performing an
ANN search on a vector index with filtering columns:
````
ANN ordering by vector requires the column to be indexed using 'vector_index'
````

This commit fixes the issue by introducing dedicated logic for vector indexes
to correctly identify the target(vectors) column.

Fixes: SCYLLADB-635

Closes scylladb/scylladb#28740
2026-03-02 18:47:58 +02:00

52 lines
2.1 KiB
C++

/*
* Copyright 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "schema/schema.hh"
#include "data_dictionary/data_dictionary.hh"
#include "cql3/statements/index_target.hh"
#include "index/secondary_index_manager.hh"
#include <vector>
namespace secondary_index {
class vector_index: public custom_index {
public:
// The minimal TTL for the CDC used by Vector Search.
// Required to ensure that the data is not deleted until the vector index is fully built.
static constexpr int VS_TTL_SECONDS = 86400; // 24 hours
vector_index() = default;
~vector_index() override = default;
std::optional<cql3::description> describe(const index_metadata& im, const schema& base_schema) const override;
bool view_should_exist() const override;
void validate(const schema &schema, const cql3::statements::index_specific_prop_defs &properties,
const std::vector<::shared_ptr<cql3::statements::index_target>> &targets, const gms::feature_service& fs,
const data_dictionary::database& db) const override;
table_schema_version index_version(const schema& schema) override;
static bool has_vector_index(const schema& s);
static bool has_vector_index_on_column(const schema& s, const sstring& target_name);
static bool is_vector_index_on_column(const index_metadata& im, const sstring& target_name);
static void check_cdc_options(const schema& schema);
static bool is_rescoring_enabled(const index_options_map& properties);
static float get_oversampling(const index_options_map& properties);
static sstring get_cql_similarity_function_name(const index_options_map& properties);
private:
void check_uses_tablets(const schema& schema, const data_dictionary::database& db) const;
void check_cdc_not_explicitly_disabled(const schema& schema) const;
void check_target(const schema& schema, const std::vector<::shared_ptr<cql3::statements::index_target>>& targets) const;
void check_index_options(const cql3::statements::index_specific_prop_defs& properties) const;
};
std::unique_ptr<secondary_index::custom_index> vector_index_factory();
}