mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-30 11:36:54 +00:00
test/vector_search: migrate wildcard select rescoring test to pytest
Migrate wildcard_select_is_correctly_rescored from rescoring_test.cc to pytest. The test verifies that SELECT * with rescoring returns rows in the correct similarity order with correct embedding values, covering a slightly different processing path from the explicit-column SELECT test. No semantic change.
This commit is contained in:
@@ -194,3 +194,21 @@ def test_similarity_function_returns_correctly_rescored_results(cql, test_keyspa
|
||||
for row, d_row in zip(rows, expected):
|
||||
assert row.similarity == pytest.approx(d_row.expected_similarity, abs=0.01)
|
||||
assert len(rows[0]) == 2
|
||||
|
||||
|
||||
# Verifies that SELECT * with rescoring returns rows in the correct similarity
|
||||
# order with correct embedding values. Tests a slightly different processing
|
||||
# path compared to the explicit column SELECT in test_result_returned_by_vector_store_is_rescored.
|
||||
def test_wildcard_select_is_correctly_rescored(cql, test_keyspace, vector_store_mock, skip_without_tablets):
|
||||
for func_name, data in TEST_DATA.items():
|
||||
with rescoring_test_table(cql, test_keyspace, data,
|
||||
extra_options={"similarity_function": func_name}) as table:
|
||||
vector_store_mock.set_next_ann_response(200, reversed_ann_response(data))
|
||||
rows = list(cql.execute(
|
||||
f"SELECT * FROM {table} ORDER BY embedding ANN OF {ANN_QUERY_VECTOR_LITERAL} LIMIT 2"))
|
||||
|
||||
expected = data[:2]
|
||||
assert [row.id for row in rows] == [d_row.id for d_row in expected]
|
||||
for row, d_row in zip(rows, expected):
|
||||
assert list(row.embedding) == pytest.approx(d_row.embedding)
|
||||
assert len(rows[0]) == 2
|
||||
|
||||
@@ -107,42 +107,6 @@ struct print_log_value<std::vector<float>> {
|
||||
};
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(wildcard_select_is_correctly_rescored) {
|
||||
// Another case with slightly different path of processing is "SELECT * ...".
|
||||
|
||||
for (const auto& params : test_data) {
|
||||
auto server = co_await make_vs_mock_server();
|
||||
co_await do_with_cql_env(
|
||||
[&](cql_test_env& env) -> future<> {
|
||||
configure(env.local_qp().vector_store_client()).with_dns({{"server.node", std::vector<std::string>{server->host()}}});
|
||||
env.local_qp().vector_store_client().start_background_tasks();
|
||||
co_await create_index_and_insert_data(env, params);
|
||||
|
||||
// Mock Response: Return all keys but in REVERSE similarity order.
|
||||
server->next_ann_response({http::reply::status_type::ok, R"({
|
||||
"primary_keys": { "id": [4, 3, 2, 1] },
|
||||
"similarity_scores": [0, 0, 0, 0]
|
||||
})"});
|
||||
auto msg = co_await env.execute_cql("SELECT * FROM ks.cf ORDER BY embedding ANN OF [0.1, 0.1] LIMIT 2;");
|
||||
|
||||
auto rms = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
|
||||
BOOST_REQUIRE(rms);
|
||||
const auto& rows = rms->rs().result_set().rows();
|
||||
BOOST_REQUIRE(rows.size() >= 2);
|
||||
BOOST_CHECK_EQUAL(rows.size(), 2);
|
||||
BOOST_CHECK_EQUAL(rms->rs().result_set().get_metadata().column_count(), 2);
|
||||
BOOST_CHECK_EQUAL(get_id_col_value(rows.at(0)), test_ids[0]);
|
||||
BOOST_CHECK_EQUAL(get_embedding_col_value(rows.at(0)), params.vectors[0]);
|
||||
BOOST_CHECK_EQUAL(get_id_col_value(rows.at(1)), 2);
|
||||
BOOST_CHECK_EQUAL(get_embedding_col_value(rows.at(1)), params.vectors[1]);
|
||||
},
|
||||
make_config(format("http://server.node:{}", server->port())))
|
||||
.finally(seastar::coroutine::lambda([&] -> future<> {
|
||||
co_await server->stop();
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(select_similarity_function_other_than_ann_ordering) {
|
||||
// Another tricky case with similarity column with argument different from ANN ordering vector.
|
||||
// Especially if we use prepared statement and the difference is only seen at execution time.
|
||||
|
||||
Reference in New Issue
Block a user