Files
scylladb/index/vector_index.hh
Szymon Malewski c89957b725 vector_index: rescoring: Add hidden similarity score column
Rescoring consist of recalculating similarity score and reordering results based on it.
In this patch we add calculation of similarity score as a hidden (non-serialized) column and following patch will add reordering.
Normal ordering uses `add_column_for_post_processing`, however this works only for regular columns, not function.
So we create it together with user requested columns (this also forces the use of `selection_with_processing`) and hide the column later.
This also requires special handling for 'SELECT *' case - we need to manually add all columns before adding similarity column.

In case user already asks for similarity score in the SELECT clause, this value will be calculated twice - is should be optimized in future patches.
2026-01-22 15:38:40 +01:00

51 lines
2.1 KiB
C++

/*
* Copyright 2025-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#pragma once
#include "schema/schema.hh"
#include "data_dictionary/data_dictionary.hh"
#include "cql3/statements/index_target.hh"
#include "index/secondary_index_manager.hh"
#include <vector>
namespace secondary_index {
class vector_index: public custom_index {
public:
// The minimal TTL for the CDC used by Vector Search.
// Required to ensure that the data is not deleted until the vector index is fully built.
static constexpr int VS_TTL_SECONDS = 86400; // 24 hours
vector_index() = default;
~vector_index() override = default;
std::optional<cql3::description> describe(const index_metadata& im, const schema& base_schema) const override;
bool view_should_exist() const override;
void validate(const schema &schema, const cql3::statements::index_specific_prop_defs &properties,
const std::vector<::shared_ptr<cql3::statements::index_target>> &targets, const gms::feature_service& fs,
const data_dictionary::database& db) const override;
table_schema_version index_version(const schema& schema) override;
static bool has_vector_index(const schema& s);
static bool has_vector_index_on_column(const schema& s, const sstring& target_name);
static void check_cdc_options(const schema& schema);
static bool is_rescoring_enabled(const index_options_map& properties);
static float get_oversampling(const index_options_map& properties);
static sstring get_cql_similarity_function_name(const index_options_map& properties);
private:
void check_uses_tablets(const schema& schema, const data_dictionary::database& db) const;
void check_cdc_not_explicitly_disabled(const schema& schema) const;
void check_target(const schema& schema, const std::vector<::shared_ptr<cql3::statements::index_target>>& targets) const;
void check_index_options(const cql3::statements::index_specific_prop_defs& properties) const;
};
std::unique_ptr<secondary_index::custom_index> vector_index_factory();
}