test/vector_search: migrate vector_index option validation tests to pytest

CREATE INDEX option tests for quantization, oversampling, and rescoring
are moved from rescoring_test.cc to test_vector_index.py alongside the
existing index option tests. These tests exercise only option parsing and
validation - no vector store mock needed. No semantic change.
This commit is contained in:
Szymon Malewski
2026-05-14 11:25:06 +02:00
parent 1d17d2144f
commit 9f632182fb
2 changed files with 28 additions and 86 deletions

View File

@@ -894,3 +894,31 @@ def test_create_vector_index_accepts_255_key_columns(cql, test_keyspace, scylla_
schema = f'{pk_cols}, v vector<float, 3>, PRIMARY KEY (({pk_names}))'
with new_test_table(cql, test_keyspace, schema) as table:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
# Tests for vector_index-specific CREATE INDEX options: quantization, oversampling, rescoring.
# All tests use scylla_only because these options are ScyllaDB-specific.
def test_vector_index_option_quantization_validation(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 2>'
with new_test_table(cql, test_keyspace, schema) as table:
for quantization in ['f32', 'f16', 'bf16', 'i8', 'b1', 'F32', 'F16', 'BF16', 'I8', 'B1']:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'quantization': '{quantization}'}}")
with pytest.raises(InvalidRequest, match="Invalid value in option 'quantization' for vector index"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'quantization': 'invalid_value'}}")
def test_vector_index_option_oversampling_validation(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 2>'
with new_test_table(cql, test_keyspace, schema) as table:
for factor in ['1.0', '50.5', '100.0', '10', '1e1', '1000000e-4', '0x10', ' 9 ']:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'oversampling': {factor}}}")
for factor in ['0.9', '100.1', '0', '-5', "'abc'", "' '", "NaN", "INFINITY"]:
with pytest.raises(InvalidRequest, match="Invalid value in option 'oversampling' for vector index"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'oversampling': {factor}}}")
def test_vector_index_option_rescoring_validation(cql, test_keyspace, scylla_only, skip_without_tablets):
schema = 'p int primary key, v vector<float, 2>'
with new_test_table(cql, test_keyspace, schema) as table:
for rescoring in ['true', 'false', 'True', 'False', 'TRUE', 'FALSE']:
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'rescoring': '{rescoring}'}}")
for rescoring in ['invalid_value', '0', '1', ' true', 'false ']:
with pytest.raises(InvalidRequest, match="Invalid value in option 'rescoring' for vector index"):
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'rescoring': '{rescoring}'}}")

View File

@@ -107,92 +107,6 @@ struct print_log_value<std::vector<float>> {
};
}
SEASTAR_TEST_CASE(index_option_quantization_valid_values) {
std::vector<sstring> supported_quantizations = {"f32", "f16", "bf16", "i8", "b1", "F32", "F16", "BF16", "I8", "B1"};
for (const auto& quantization : supported_quantizations) {
co_await do_with_cql_env(
[&quantization](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "cf");
BOOST_REQUIRE_NO_THROW(co_await env.execute_cql(
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'quantization': '{}'}};", quantization)));
},
make_config());
}
}
SEASTAR_TEST_CASE(index_option_quantization_invalid_value) {
co_await do_with_cql_env(
[](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "cf");
BOOST_REQUIRE_THROW(
co_await env.execute_cql("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={'quantization': 'invalid_value'};"),
exceptions::invalid_request_exception);
},
make_config());
}
SEASTAR_TEST_CASE(index_option_oversampling_valid_values) {
std::vector<sstring> valid_factors = {"1.0", "50.5", "100.0", "10", "1e1", "1000000e-4", "0x10", " 10 "};
for (const auto& factor : valid_factors) {
co_await do_with_cql_env(
[&factor](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "cf");
BOOST_REQUIRE_NO_THROW(co_await env.execute_cql(
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'oversampling': {}}};", factor)));
},
make_config());
}
}
SEASTAR_TEST_CASE(index_option_oversampling_invalid_values) {
std::vector<sstring> invalid_factors = {"0.9", "100.1", "0", "-5", "abc", " ", "NaN", "inf"};
for (const auto& factor : invalid_factors) {
co_await do_with_cql_env(
[&factor](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "cf");
BOOST_REQUIRE_THROW(
co_await env.execute_cql(
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'oversampling': '{}'}};", factor)),
exceptions::invalid_request_exception);
},
make_config());
}
}
SEASTAR_TEST_CASE(index_option_rescoring_valid_values) {
std::vector<sstring> valid_rescoring = {"true", "false", "True", "False", "TRUE", "FALSE"};
for (const auto& rescoring : valid_rescoring) {
co_await do_with_cql_env(
[&rescoring](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "cf");
BOOST_REQUIRE_NO_THROW(co_await env.execute_cql(
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'rescoring': '{}'}};", rescoring)));
},
make_config());
}
}
SEASTAR_TEST_CASE(index_option_rescoring_invalid_value) {
std::vector<sstring> invalid_rescoring = {"invalid_value", "0", "1", " true", "false "};
for (const auto& rescoring : invalid_rescoring) {
co_await do_with_cql_env(
[&rescoring](cql_test_env& env) -> future<> {
auto schema = co_await create_test_table(env, "ks", "cf");
BOOST_REQUIRE_THROW(
co_await env.execute_cql(
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'rescoring': '{}'}};", rescoring)),
exceptions::invalid_request_exception);
},
make_config());
}
}
SEASTAR_TEST_CASE(oversampling_multiplies_limit_for_vector_store_query) {
auto server = co_await make_vs_mock_server();
co_await do_with_cql_env(