Rescoring consist of recalculating similarity score and reordering results based on it. In this patch we add calculation of similarity score as a hidden (non-serialized) column and following patch will add reordering. Normal ordering uses `add_column_for_post_processing`, however this works only for regular columns, not function. So we create it together with user requested columns (this also forces the use of `selection_with_processing`) and hide the column later. This also requires special handling for 'SELECT *' case - we need to manually add all columns before adding similarity column. In case user already asks for similarity score in the SELECT clause, this value will be calculated twice - is should be optimized in future patches.
51 lines
2.1 KiB
C++
51 lines
2.1 KiB
C++
/*
|
|
* Copyright 2025-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "schema/schema.hh"
|
|
|
|
#include "data_dictionary/data_dictionary.hh"
|
|
#include "cql3/statements/index_target.hh"
|
|
#include "index/secondary_index_manager.hh"
|
|
|
|
#include <vector>
|
|
|
|
namespace secondary_index {
|
|
|
|
class vector_index: public custom_index {
|
|
public:
|
|
// The minimal TTL for the CDC used by Vector Search.
|
|
// Required to ensure that the data is not deleted until the vector index is fully built.
|
|
static constexpr int VS_TTL_SECONDS = 86400; // 24 hours
|
|
|
|
vector_index() = default;
|
|
~vector_index() override = default;
|
|
std::optional<cql3::description> describe(const index_metadata& im, const schema& base_schema) const override;
|
|
bool view_should_exist() const override;
|
|
void validate(const schema &schema, const cql3::statements::index_specific_prop_defs &properties,
|
|
const std::vector<::shared_ptr<cql3::statements::index_target>> &targets, const gms::feature_service& fs,
|
|
const data_dictionary::database& db) const override;
|
|
table_schema_version index_version(const schema& schema) override;
|
|
static bool has_vector_index(const schema& s);
|
|
static bool has_vector_index_on_column(const schema& s, const sstring& target_name);
|
|
static void check_cdc_options(const schema& schema);
|
|
|
|
static bool is_rescoring_enabled(const index_options_map& properties);
|
|
static float get_oversampling(const index_options_map& properties);
|
|
static sstring get_cql_similarity_function_name(const index_options_map& properties);
|
|
private:
|
|
void check_uses_tablets(const schema& schema, const data_dictionary::database& db) const;
|
|
void check_cdc_not_explicitly_disabled(const schema& schema) const;
|
|
void check_target(const schema& schema, const std::vector<::shared_ptr<cql3::statements::index_target>>& targets) const;
|
|
void check_index_options(const cql3::statements::index_specific_prop_defs& properties) const;
|
|
};
|
|
|
|
std::unique_ptr<secondary_index::custom_index> vector_index_factory();
|
|
}
|