From 0cb557695aeb50f03dcd511e7d119a71e5d839d6 Mon Sep 17 00:00:00 2001 From: Szymon Malewski Date: Thu, 14 May 2026 11:28:38 +0200 Subject: [PATCH] test/vector_search: migrate wildcard select rescoring test to pytest Migrate wildcard_select_is_correctly_rescored from rescoring_test.cc to pytest. The test verifies that SELECT * with rescoring returns rows in the correct similarity order with correct embedding values, covering a slightly different processing path from the explicit-column SELECT test. No semantic change. --- .../test_vector_search_rescoring_with_mock.py | 18 ++++++++++ test/vector_search/rescoring_test.cc | 36 ------------------- 2 files changed, 18 insertions(+), 36 deletions(-) diff --git a/test/cqlpy/test_vector_search_rescoring_with_mock.py b/test/cqlpy/test_vector_search_rescoring_with_mock.py index e41e0d993f..721d5ca36d 100644 --- a/test/cqlpy/test_vector_search_rescoring_with_mock.py +++ b/test/cqlpy/test_vector_search_rescoring_with_mock.py @@ -194,3 +194,21 @@ def test_similarity_function_returns_correctly_rescored_results(cql, test_keyspa for row, d_row in zip(rows, expected): assert row.similarity == pytest.approx(d_row.expected_similarity, abs=0.01) assert len(rows[0]) == 2 + + +# Verifies that SELECT * with rescoring returns rows in the correct similarity +# order with correct embedding values. Tests a slightly different processing +# path compared to the explicit column SELECT in test_result_returned_by_vector_store_is_rescored. +def test_wildcard_select_is_correctly_rescored(cql, test_keyspace, vector_store_mock, skip_without_tablets): + for func_name, data in TEST_DATA.items(): + with rescoring_test_table(cql, test_keyspace, data, + extra_options={"similarity_function": func_name}) as table: + vector_store_mock.set_next_ann_response(200, reversed_ann_response(data)) + rows = list(cql.execute( + f"SELECT * FROM {table} ORDER BY embedding ANN OF {ANN_QUERY_VECTOR_LITERAL} LIMIT 2")) + + expected = data[:2] + assert [row.id for row in rows] == [d_row.id for d_row in expected] + for row, d_row in zip(rows, expected): + assert list(row.embedding) == pytest.approx(d_row.embedding) + assert len(rows[0]) == 2 diff --git a/test/vector_search/rescoring_test.cc b/test/vector_search/rescoring_test.cc index adf1cc8b1a..bccc77e460 100644 --- a/test/vector_search/rescoring_test.cc +++ b/test/vector_search/rescoring_test.cc @@ -107,42 +107,6 @@ struct print_log_value> { }; } -SEASTAR_TEST_CASE(wildcard_select_is_correctly_rescored) { - // Another case with slightly different path of processing is "SELECT * ...". - - for (const auto& params : test_data) { - auto server = co_await make_vs_mock_server(); - co_await do_with_cql_env( - [&](cql_test_env& env) -> future<> { - configure(env.local_qp().vector_store_client()).with_dns({{"server.node", std::vector{server->host()}}}); - env.local_qp().vector_store_client().start_background_tasks(); - co_await create_index_and_insert_data(env, params); - - // Mock Response: Return all keys but in REVERSE similarity order. - server->next_ann_response({http::reply::status_type::ok, R"({ - "primary_keys": { "id": [4, 3, 2, 1] }, - "similarity_scores": [0, 0, 0, 0] - })"}); - auto msg = co_await env.execute_cql("SELECT * FROM ks.cf ORDER BY embedding ANN OF [0.1, 0.1] LIMIT 2;"); - - auto rms = dynamic_pointer_cast(msg); - BOOST_REQUIRE(rms); - const auto& rows = rms->rs().result_set().rows(); - BOOST_REQUIRE(rows.size() >= 2); - BOOST_CHECK_EQUAL(rows.size(), 2); - BOOST_CHECK_EQUAL(rms->rs().result_set().get_metadata().column_count(), 2); - BOOST_CHECK_EQUAL(get_id_col_value(rows.at(0)), test_ids[0]); - BOOST_CHECK_EQUAL(get_embedding_col_value(rows.at(0)), params.vectors[0]); - BOOST_CHECK_EQUAL(get_id_col_value(rows.at(1)), 2); - BOOST_CHECK_EQUAL(get_embedding_col_value(rows.at(1)), params.vectors[1]); - }, - make_config(format("http://server.node:{}", server->port()))) - .finally(seastar::coroutine::lambda([&] -> future<> { - co_await server->stop(); - })); - } -} - SEASTAR_TEST_CASE(select_similarity_function_other_than_ann_ordering) { // Another tricky case with similarity column with argument different from ANN ordering vector. // Especially if we use prepared statement and the difference is only seen at execution time.