test/vector_search: migrate wildcard select rescoring test to pytest

Migrate wildcard_select_is_correctly_rescored from rescoring_test.cc to
pytest. The test verifies that SELECT * with rescoring returns rows in
the correct similarity order with correct embedding values, covering a
slightly different processing path from the explicit-column SELECT test.
No semantic change.
This commit is contained in:
Szymon Malewski
2026-05-14 11:28:38 +02:00
parent cae816a8c6
commit 0cb557695a
2 changed files with 18 additions and 36 deletions

View File

@@ -194,3 +194,21 @@ def test_similarity_function_returns_correctly_rescored_results(cql, test_keyspa
for row, d_row in zip(rows, expected):
assert row.similarity == pytest.approx(d_row.expected_similarity, abs=0.01)
assert len(rows[0]) == 2
# Verifies that SELECT * with rescoring returns rows in the correct similarity
# order with correct embedding values. Tests a slightly different processing
# path compared to the explicit column SELECT in test_result_returned_by_vector_store_is_rescored.
def test_wildcard_select_is_correctly_rescored(cql, test_keyspace, vector_store_mock, skip_without_tablets):
for func_name, data in TEST_DATA.items():
with rescoring_test_table(cql, test_keyspace, data,
extra_options={"similarity_function": func_name}) as table:
vector_store_mock.set_next_ann_response(200, reversed_ann_response(data))
rows = list(cql.execute(
f"SELECT * FROM {table} ORDER BY embedding ANN OF {ANN_QUERY_VECTOR_LITERAL} LIMIT 2"))
expected = data[:2]
assert [row.id for row in rows] == [d_row.id for d_row in expected]
for row, d_row in zip(rows, expected):
assert list(row.embedding) == pytest.approx(d_row.embedding)
assert len(rows[0]) == 2

View File

@@ -107,42 +107,6 @@ struct print_log_value<std::vector<float>> {
};
}
SEASTAR_TEST_CASE(wildcard_select_is_correctly_rescored) {
// Another case with slightly different path of processing is "SELECT * ...".
for (const auto& params : test_data) {
auto server = co_await make_vs_mock_server();
co_await do_with_cql_env(
[&](cql_test_env& env) -> future<> {
configure(env.local_qp().vector_store_client()).with_dns({{"server.node", std::vector<std::string>{server->host()}}});
env.local_qp().vector_store_client().start_background_tasks();
co_await create_index_and_insert_data(env, params);
// Mock Response: Return all keys but in REVERSE similarity order.
server->next_ann_response({http::reply::status_type::ok, R"({
"primary_keys": { "id": [4, 3, 2, 1] },
"similarity_scores": [0, 0, 0, 0]
})"});
auto msg = co_await env.execute_cql("SELECT * FROM ks.cf ORDER BY embedding ANN OF [0.1, 0.1] LIMIT 2;");
auto rms = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
BOOST_REQUIRE(rms);
const auto& rows = rms->rs().result_set().rows();
BOOST_REQUIRE(rows.size() >= 2);
BOOST_CHECK_EQUAL(rows.size(), 2);
BOOST_CHECK_EQUAL(rms->rs().result_set().get_metadata().column_count(), 2);
BOOST_CHECK_EQUAL(get_id_col_value(rows.at(0)), test_ids[0]);
BOOST_CHECK_EQUAL(get_embedding_col_value(rows.at(0)), params.vectors[0]);
BOOST_CHECK_EQUAL(get_id_col_value(rows.at(1)), 2);
BOOST_CHECK_EQUAL(get_embedding_col_value(rows.at(1)), params.vectors[1]);
},
make_config(format("http://server.node:{}", server->port())))
.finally(seastar::coroutine::lambda([&] -> future<> {
co_await server->stop();
}));
}
}
SEASTAR_TEST_CASE(select_similarity_function_other_than_ann_ordering) {
// Another tricky case with similarity column with argument different from ANN ordering vector.
// Especially if we use prepared statement and the difference is only seen at execution time.