test/vector_search: migrate non-ANN similarity argument rescoring test to pytest

Migrate select_similarity_function_other_than_ann_ordering from rescoring_test.cc to pytest. The test verifies that similarity scores in SELECT are computed against the explicitly supplied argument vector rather than the ANN ordering vector. No semantic change.
2026-05-30 03:30:49 +00:00 · 2026-05-14 11:29:23 +02:00
parent 0cb557695a
commit 63d9b7445f
2 changed files with 24 additions and 39 deletions
--- a/test/cqlpy/test_vector_search_rescoring_with_mock.py
+++ b/test/cqlpy/test_vector_search_rescoring_with_mock.py
@@ -212,3 +212,27 @@ def test_wildcard_select_is_correctly_rescored(cql, test_keyspace, vector_store_
            for row, d_row in zip(rows, expected):
                assert list(row.embedding) == pytest.approx(d_row.embedding)
            assert len(rows[0]) == 2
+
+
+# Verifies that when the similarity function argument in SELECT differs from the
+# ANN ordering vector, the correct similarity values are computed. Uses a
+# prepared statement so the argument difference is only visible at execution time.
+def test_select_similarity_function_other_than_ann_ordering(cql, test_keyspace, vector_store_mock, skip_without_tablets):
+    data = TEST_DATA["cosine"]
+    with rescoring_test_table(cql, test_keyspace, data) as table:
+        vector_store_mock.set_next_ann_response(200, reversed_ann_response(data))
+
+        prepared = cql.prepare(
+            f"SELECT id, similarity_cosine(embedding, ?) AS similarity FROM {table} "
+            f"ORDER BY embedding ANN OF ? LIMIT 2")
+        # Compute similarity to data[1].embedding while ordering by ANN_QUERY_VECTOR.
+        rows = list(cql.execute(prepared, [data[1].embedding, ANN_QUERY_VECTOR]))
+
+        expected = data[:2]
+        assert [row.id for row in rows] == [d_row.id for d_row in expected]
+        # Similarity is computed against data[1].embedding, not the ANN query vector.
+        # id=1 (rescored rank 0): similarity(data[0].embedding, data[1].embedding) = 0.97.
+        # id=2 (rescored rank 1): similarity(data[1].embedding, data[1].embedding) = 1 (self-similarity).
+        assert rows[0].similarity == pytest.approx(0.97, abs=0.01)
+        assert rows[1].similarity == pytest.approx(1, abs=0.01)
+        assert len(rows[0]) == 2
--- a/test/vector_search/rescoring_test.cc
+++ b/test/vector_search/rescoring_test.cc
@@ -107,45 +107,6 @@ struct print_log_value<std::vector<float>> {
 };
 }

-SEASTAR_TEST_CASE(select_similarity_function_other_than_ann_ordering) {
-    // Another tricky case with similarity column with argument different from ANN ordering vector.
-    // Especially if we use prepared statement and the difference is only seen at execution time.
-    const auto& params = test_data[0];
-    auto server = co_await make_vs_mock_server();
-    co_await do_with_cql_env(
-            [&](cql_test_env& env) -> future<> {
-                configure(env.local_qp().vector_store_client()).with_dns({{"server.node", std::vector<std::string>{server->host()}}});
-                env.local_qp().vector_store_client().start_background_tasks();
-                co_await create_index_and_insert_data(env, params);
-
-                // Mock Response: Return all keys but in REVERSE similarity order.
-                server->next_ann_response({http::reply::status_type::ok, R"({
-                    "primary_keys": { "id": [4, 3, 2, 1] },
-                    "similarity_scores": [0, 0, 0, 0]
-                })"});
-                auto prep = co_await env.prepare(fmt::format(
-                        "SELECT id, similarity_{}(embedding, ?) FROM ks.cf ORDER BY embedding ANN OF ? LIMIT 2;", params.function_name));
-                auto msg = co_await env.execute_prepared(prep, cql3::raw_value_vector_with_unset{
-                    cql3::raw_value::make_value(to_embedding(params.vectors[1])),
-                    cql3::raw_value::make_value(to_embedding({0.1f, 0.1f}))});
-
-                auto rms = dynamic_pointer_cast<cql_transport::messages::result_message::rows>(msg);
-                BOOST_REQUIRE(rms);
-                const auto& rows = rms->rs().result_set().rows();
-                BOOST_REQUIRE(rows.size() >= 2);
-                BOOST_CHECK_EQUAL(rows.size(), 2);
-                BOOST_CHECK_EQUAL(rms->rs().result_set().get_metadata().column_count(), 2);
-                BOOST_CHECK_EQUAL(get_id_col_value(rows.at(0)), 1);
-                BOOST_CHECK(is_similarity_eq(get_similarity_col_value(rows.at(0)), params.expected_similarity[1]));
-                BOOST_CHECK_EQUAL(get_id_col_value(rows.at(1)), 2);
-                BOOST_CHECK(is_similarity_eq(get_similarity_col_value(rows.at(1)), params.expected_similarity[0]));
-            },
-            make_config(format("http://server.node:{}", server->port())))
-            .finally(seastar::coroutine::lambda([&] -> future<> {
-                co_await server->stop();
-            }));
-}
-
 // Rescoring does not filter out NULL embeddings yet, but they should be sorted as last.
 // So this test is expected to report error on result set size, but passes if the first element is correct.
 SEASTAR_TEST_CASE(no_nulls_in_rescored_results, *boost::unit_test::expected_failures(3)) {