mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-09 16:33:35 +00:00
test/vector_search: migrate vector_index option validation tests to pytest
CREATE INDEX option tests for quantization, oversampling, and rescoring are moved from rescoring_test.cc to test_vector_index.py alongside the existing index option tests. These tests exercise only option parsing and validation - no vector store mock needed. No semantic change.
This commit is contained in:
@@ -894,3 +894,31 @@ def test_create_vector_index_accepts_255_key_columns(cql, test_keyspace, scylla_
|
||||
schema = f'{pk_cols}, v vector<float, 3>, PRIMARY KEY (({pk_names}))'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index'")
|
||||
|
||||
# Tests for vector_index-specific CREATE INDEX options: quantization, oversampling, rescoring.
|
||||
# All tests use scylla_only because these options are ScyllaDB-specific.
|
||||
def test_vector_index_option_quantization_validation(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p int primary key, v vector<float, 2>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
for quantization in ['f32', 'f16', 'bf16', 'i8', 'b1', 'F32', 'F16', 'BF16', 'I8', 'B1']:
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'quantization': '{quantization}'}}")
|
||||
with pytest.raises(InvalidRequest, match="Invalid value in option 'quantization' for vector index"):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'quantization': 'invalid_value'}}")
|
||||
|
||||
def test_vector_index_option_oversampling_validation(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p int primary key, v vector<float, 2>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
for factor in ['1.0', '50.5', '100.0', '10', '1e1', '1000000e-4', '0x10', ' 9 ']:
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'oversampling': {factor}}}")
|
||||
for factor in ['0.9', '100.1', '0', '-5', "'abc'", "' '", "NaN", "INFINITY"]:
|
||||
with pytest.raises(InvalidRequest, match="Invalid value in option 'oversampling' for vector index"):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'oversampling': {factor}}}")
|
||||
|
||||
def test_vector_index_option_rescoring_validation(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p int primary key, v vector<float, 2>'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
for rescoring in ['true', 'false', 'True', 'False', 'TRUE', 'FALSE']:
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'rescoring': '{rescoring}'}}")
|
||||
for rescoring in ['invalid_value', '0', '1', ' true', 'false ']:
|
||||
with pytest.raises(InvalidRequest, match="Invalid value in option 'rescoring' for vector index"):
|
||||
cql.execute(f"CREATE CUSTOM INDEX ON {table}(v) USING 'vector_index' WITH OPTIONS = {{'rescoring': '{rescoring}'}}")
|
||||
|
||||
@@ -107,92 +107,6 @@ struct print_log_value<std::vector<float>> {
|
||||
};
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(index_option_quantization_valid_values) {
|
||||
std::vector<sstring> supported_quantizations = {"f32", "f16", "bf16", "i8", "b1", "F32", "F16", "BF16", "I8", "B1"};
|
||||
for (const auto& quantization : supported_quantizations) {
|
||||
co_await do_with_cql_env(
|
||||
[&quantization](cql_test_env& env) -> future<> {
|
||||
auto schema = co_await create_test_table(env, "ks", "cf");
|
||||
|
||||
BOOST_REQUIRE_NO_THROW(co_await env.execute_cql(
|
||||
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'quantization': '{}'}};", quantization)));
|
||||
},
|
||||
make_config());
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(index_option_quantization_invalid_value) {
|
||||
co_await do_with_cql_env(
|
||||
[](cql_test_env& env) -> future<> {
|
||||
auto schema = co_await create_test_table(env, "ks", "cf");
|
||||
|
||||
BOOST_REQUIRE_THROW(
|
||||
co_await env.execute_cql("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={'quantization': 'invalid_value'};"),
|
||||
exceptions::invalid_request_exception);
|
||||
},
|
||||
make_config());
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(index_option_oversampling_valid_values) {
|
||||
std::vector<sstring> valid_factors = {"1.0", "50.5", "100.0", "10", "1e1", "1000000e-4", "0x10", " 10 "};
|
||||
for (const auto& factor : valid_factors) {
|
||||
co_await do_with_cql_env(
|
||||
[&factor](cql_test_env& env) -> future<> {
|
||||
auto schema = co_await create_test_table(env, "ks", "cf");
|
||||
|
||||
BOOST_REQUIRE_NO_THROW(co_await env.execute_cql(
|
||||
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'oversampling': {}}};", factor)));
|
||||
},
|
||||
make_config());
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(index_option_oversampling_invalid_values) {
|
||||
std::vector<sstring> invalid_factors = {"0.9", "100.1", "0", "-5", "abc", " ", "NaN", "inf"};
|
||||
for (const auto& factor : invalid_factors) {
|
||||
co_await do_with_cql_env(
|
||||
[&factor](cql_test_env& env) -> future<> {
|
||||
auto schema = co_await create_test_table(env, "ks", "cf");
|
||||
|
||||
BOOST_REQUIRE_THROW(
|
||||
co_await env.execute_cql(
|
||||
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'oversampling': '{}'}};", factor)),
|
||||
exceptions::invalid_request_exception);
|
||||
},
|
||||
make_config());
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(index_option_rescoring_valid_values) {
|
||||
std::vector<sstring> valid_rescoring = {"true", "false", "True", "False", "TRUE", "FALSE"};
|
||||
for (const auto& rescoring : valid_rescoring) {
|
||||
co_await do_with_cql_env(
|
||||
[&rescoring](cql_test_env& env) -> future<> {
|
||||
auto schema = co_await create_test_table(env, "ks", "cf");
|
||||
|
||||
BOOST_REQUIRE_NO_THROW(co_await env.execute_cql(
|
||||
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'rescoring': '{}'}};", rescoring)));
|
||||
},
|
||||
make_config());
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(index_option_rescoring_invalid_value) {
|
||||
std::vector<sstring> invalid_rescoring = {"invalid_value", "0", "1", " true", "false "};
|
||||
for (const auto& rescoring : invalid_rescoring) {
|
||||
co_await do_with_cql_env(
|
||||
[&rescoring](cql_test_env& env) -> future<> {
|
||||
auto schema = co_await create_test_table(env, "ks", "cf");
|
||||
|
||||
BOOST_REQUIRE_THROW(
|
||||
co_await env.execute_cql(
|
||||
fmt::format("CREATE INDEX idx ON ks.cf (embedding) USING 'vector_index' WITH OPTIONS={{'rescoring': '{}'}};", rescoring)),
|
||||
exceptions::invalid_request_exception);
|
||||
},
|
||||
make_config());
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(oversampling_multiplies_limit_for_vector_store_query) {
|
||||
auto server = co_await make_vs_mock_server();
|
||||
co_await do_with_cql_env(
|
||||
|
||||
Reference in New Issue
Block a user