mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-23 08:12:08 +00:00
The vector store returns for every ANN search, in addition to the keys of the matching items, two additional vectors - "distances" and "similarity_cores". The "distances" are raw distance metrics - lower scores are better matches, while "similarity_scores" are modified such that higher scores are better matches. Traditionally, search scores in systems like Cassandra and Open Search use the "similarity scores" approach (higher is better, results are returned in decreasing similarity order), so this is the more interesting vector of the two. But before this patch, our vector_store_client::ann() inspected only "distances". But... then, it didn't return even that to the caller :-) So in this patch, we: 1. Ignore "distances" and instead look at "similarity scores", which is what users really want based on their experience with other vector and non-vector search engines. 2. Return the similarity score of each match together with the match. We already have this score (the vector store returns it) and we can add it to the existing primary_key structure of each result. So each result is a "struct primary_key" which has fields partition, clustering, and after this patch - similarity. Existing callers in CQL and Alternator vector search will ignore this "similarity" field in each result, and not notice it was added. But in the next patch, we'll allow Alternator's vector search to return this similarity in each result. The existing unit tests for vector_store_client.cc mocked vector-store responses with "distances", without "similarity_scores", so no longer represent what we actually expect the vector store to do. So this patch also contains modifications for these tests, to mock and to test "similarity_scores" - not "distances". The more interesting tests, in the next patch, use the real vector store and check that we really do get a "similarity_scores" response from it. This patch also handles a small corner case for DOT_PRODUCT, which is the only unbounded similarity function. If the similarity overflows the 32-bit float, the vector store returns a JSON "null" instead of a JSON number (since JSON doesn't support infinite numbers). Our existing vector-store client code errored out when it saw this "null", which is wrong - the request should be allowed to proceed. So in this patch when we see a "null" JSON for similarity, we return +Inf. This is usually correct because the top results really have +Inf, not -Inf, but if we ask for all items we can reach those with similarity -Inf and incorrectly assign +Inf to them (we have a test for this case in the next patch). But this problenm won't happen when Limit is low, and in any case it's better than aborting the request after it had already succeeded. Signed-off-by: Nadav Har'El <nyh@scylladb.com>
120 lines
4.3 KiB
C++
120 lines
4.3 KiB
C++
/*
|
|
* Copyright (C) 2025-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "dht/decorated_key.hh"
|
|
#include "keys/keys.hh"
|
|
#include "seastarx.hh"
|
|
#include "error.hh"
|
|
#include "utils/rjson.hh"
|
|
#include <seastar/core/shared_future.hh>
|
|
#include <seastar/core/shared_ptr.hh>
|
|
#include <seastar/http/reply.hh>
|
|
#include <seastar/core/sharded.hh>
|
|
#include <expected>
|
|
|
|
class schema;
|
|
namespace db {
|
|
class config;
|
|
}
|
|
|
|
namespace seastar::net {
|
|
class inet_address;
|
|
}
|
|
|
|
namespace vector_search {
|
|
|
|
struct primary_key {
|
|
dht::decorated_key partition;
|
|
clustering_key_prefix clustering;
|
|
/// The similarity score returned by the vector store (higher = more
|
|
/// similar, and earlier in the result set). Similarity is in the range
|
|
/// [0.0, 1.0] for cosine and euclidean; unbounded for dot product on
|
|
/// non-normalized vectors.
|
|
float similarity = 0.0f;
|
|
};
|
|
|
|
/// A client with the vector-store service.
|
|
class vector_store_client final : public seastar::peering_sharded_service<vector_store_client> {
|
|
struct impl;
|
|
std::unique_ptr<impl> _impl;
|
|
|
|
public:
|
|
using config = db::config;
|
|
using vs_vector = std::vector<float>;
|
|
using host_name = sstring;
|
|
using index_name = sstring;
|
|
using keyspace_name = sstring;
|
|
using limit = std::size_t;
|
|
using port_number = std::uint16_t;
|
|
using primary_keys = std::vector<primary_key>;
|
|
using schema_ptr = lw_shared_ptr<schema const>;
|
|
using status_type = http::reply::status_type;
|
|
|
|
using disabled = disabled_error;
|
|
using aborted = aborted_error;
|
|
using addr_unavailable = addr_unavailable_error;
|
|
using service_unavailable = service_unavailable_error;
|
|
using service_error = service_error;
|
|
using service_reply_format_error = service_reply_format_error;
|
|
|
|
using ann_error = std::variant<disabled, aborted, addr_unavailable, service_unavailable, service_error, service_reply_format_error>;
|
|
using ann_error_visitor = error_visitor;
|
|
|
|
explicit vector_store_client(config const& cfg);
|
|
~vector_store_client();
|
|
|
|
/// Start background tasks.
|
|
void start_background_tasks();
|
|
|
|
/// Stop the service.
|
|
auto stop() -> future<>;
|
|
|
|
/// Check if the vector_store_client is disabled.
|
|
auto is_disabled() const -> bool;
|
|
|
|
/// The operational status of a single vector index, as reported by the vector store.
|
|
enum class index_status {
|
|
/// The index is not yet ready: initializing, not yet discovered, or the
|
|
/// vector store is unreachable.
|
|
creating,
|
|
/// The index is performing the initial full scan of the base table
|
|
/// (backfilling). Queries may be served but results are incomplete.
|
|
backfilling,
|
|
/// The index has completed the initial scan and is fully operational.
|
|
serving,
|
|
};
|
|
|
|
/// Query the vector store for the current status of a specific vector index.
|
|
auto get_index_status(keyspace_name keyspace, index_name name, abort_source& as) -> future<index_status>;
|
|
|
|
/// Request the vector store service for the primary keys of the nearest
|
|
/// neighbors. Each returned primary_key has its similarity field set to
|
|
/// the similarity score returned by the vector store, which sorts the
|
|
/// results in decreasing similarity order (higher similarity score = more
|
|
/// similar).
|
|
auto ann(keyspace_name keyspace, index_name name, schema_ptr schema, vs_vector vs_vector, limit limit, const rjson::value& filter, abort_source& as)
|
|
-> future<std::expected<primary_keys, ann_error>>;
|
|
|
|
private:
|
|
friend struct vector_store_client_tester;
|
|
};
|
|
|
|
/// A tester for the vector_store_client, used for testing purposes.
|
|
struct vector_store_client_tester {
|
|
static void set_dns_refresh_interval(vector_store_client& vsc, std::chrono::milliseconds interval);
|
|
static void set_wait_for_client_timeout(vector_store_client& vsc, std::chrono::milliseconds timeout);
|
|
static void set_dns_resolver(vector_store_client& vsc, std::function<future<std::vector<net::inet_address>>(sstring const&)> resolver);
|
|
static void trigger_dns_resolver(vector_store_client& vsc);
|
|
static auto resolve_hostname(vector_store_client& vsc, abort_source& as) -> future<std::vector<net::inet_address>>;
|
|
static unsigned truststore_reload_count(vector_store_client& vsc);
|
|
};
|
|
|
|
} // namespace vector_search
|