mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-21 09:00:35 +00:00
index: fix DESC INDEX for vector index
The `DESC INDEX` command returned incorrect results for local vector indexes and for vector indexes that included filtering columns. This patch corrects the implementation to ensure `DESCRIBE INDEX` accurately reflects the index configuration. This was a pre-existing issue, not a regression from recent serialization schema changes for vector index target options.
This commit is contained in:
committed by
Marcin Maliszkiewicz
parent
dae73f4781
commit
ec46a8a7d3
@@ -20,6 +20,7 @@
|
||||
#include "types/concrete_types.hh"
|
||||
#include "types/types.hh"
|
||||
#include "utils/managed_string.hh"
|
||||
#include <ranges>
|
||||
#include <seastar/core/sstring.hh>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
@@ -103,12 +104,53 @@ const static std::unordered_map<sstring, std::function<void(const sstring&, cons
|
||||
{"oversampling", std::bind_front(validate_factor_option, 1.0f, 100.0f)},
|
||||
// 'rescoring' enables recalculating of similarity scores of candidates retrieved from vector store when quantization is used.
|
||||
{"rescoring", std::bind_front(validate_enumerated_option, boolean_values)},
|
||||
};
|
||||
};
|
||||
|
||||
static constexpr auto TC_TARGET_KEY = "tc";
|
||||
static constexpr auto PK_TARGET_KEY = "pk";
|
||||
static constexpr auto FC_TARGET_KEY = "fc";
|
||||
|
||||
// Convert a serialized targets string (as produced by serialize_targets())
|
||||
// back into the CQL column list used inside CREATE INDEX ... ON table(<here>).
|
||||
//
|
||||
// JSON examples:
|
||||
// {"tc":"v","fc":["f1","f2"]} -> "v, f1, f2"
|
||||
// {"tc":"v","pk":["p1","p2"]} -> "(p1, p2), v"
|
||||
// {"tc":"v","pk":["p1","p2"],"fc":["f1"]} -> "(p1, p2), v, f1"
|
||||
static sstring targets_to_cql(const sstring& targets) {
|
||||
std::optional<rjson::value> json_value = rjson::try_parse(targets);
|
||||
if (!json_value || !json_value->IsObject()) {
|
||||
return cql3::util::maybe_quote(cql3::statements::index_target::column_name_from_target_string(targets));
|
||||
}
|
||||
|
||||
sstring result;
|
||||
|
||||
const rjson::value* pk = rjson::find(*json_value, PK_TARGET_KEY);
|
||||
if (pk && pk->IsArray() && !pk->Empty()) {
|
||||
result += "(";
|
||||
auto pk_cols = std::views::all(pk->GetArray()) | std::views::transform([&](const rjson::value& col) {
|
||||
return cql3::util::maybe_quote(sstring(rjson::to_string_view(col)));
|
||||
}) | std::ranges::to<std::vector<sstring>>();
|
||||
result += boost::algorithm::join(pk_cols, ", ");
|
||||
result += "), ";
|
||||
}
|
||||
|
||||
const rjson::value* tc = rjson::find(*json_value, TC_TARGET_KEY);
|
||||
if (tc && tc->IsString()) {
|
||||
result += cql3::util::maybe_quote(sstring(rjson::to_string_view(*tc)));
|
||||
}
|
||||
|
||||
const rjson::value* fc = rjson::find(*json_value, FC_TARGET_KEY);
|
||||
if (fc && fc->IsArray()) {
|
||||
for (rapidjson::SizeType i = 0; i < fc->Size(); ++i) {
|
||||
result += ", ";
|
||||
result += cql3::util::maybe_quote(sstring(rjson::to_string_view((*fc)[i])));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Serialize vector index targets into a format using:
|
||||
// "tc" for the target (vector) column,
|
||||
// "pk" for partition key columns (local index),
|
||||
@@ -209,9 +251,8 @@ bool vector_index::view_should_exist() const {
|
||||
|
||||
std::optional<cql3::description> vector_index::describe(const index_metadata& im, const schema& base_schema) const {
|
||||
fragmented_ostringstream os;
|
||||
os << "CREATE CUSTOM INDEX " << cql3::util::maybe_quote(im.name()) << " ON "
|
||||
<< cql3::util::maybe_quote(base_schema.ks_name()) << "." << cql3::util::maybe_quote(base_schema.cf_name())
|
||||
<< "(" << cql3::util::maybe_quote(im.options().at(cql3::statements::index_target::target_option_name)) << ")"
|
||||
os << "CREATE CUSTOM INDEX " << cql3::util::maybe_quote(im.name()) << " ON " << cql3::util::maybe_quote(base_schema.ks_name()) << "."
|
||||
<< cql3::util::maybe_quote(base_schema.cf_name()) << "(" << targets_to_cql(im.options().at(cql3::statements::index_target::target_option_name)) << ")"
|
||||
<< " USING 'vector_index'";
|
||||
|
||||
return cql3::description{
|
||||
|
||||
@@ -200,6 +200,36 @@ def test_describe_custom_index(cql, test_keyspace, skip_without_tablets):
|
||||
assert f"CREATE CUSTOM INDEX custom ON {table}{maybe_space}(v1) USING '{custom_class}'" in a_desc
|
||||
assert f"CREATE CUSTOM INDEX custom1 ON {table}{maybe_space}(v2) USING '{custom_class}'" in b_desc
|
||||
|
||||
def test_describe_vector_index_with_filtering_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p int primary key, v vector<float, 3>, f1 int, f2 int'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
idx = unique_name()
|
||||
cql.execute(f"CREATE CUSTOM INDEX {idx} ON {table}(v, f1, f2) USING 'vector_index'")
|
||||
|
||||
desc = cql.execute(f"DESC INDEX {test_keyspace}.{idx}").one().create_statement
|
||||
|
||||
assert f"CREATE CUSTOM INDEX {idx} ON {table}(v, f1, f2) USING 'vector_index'" in desc
|
||||
|
||||
def test_describe_vector_index_local(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p1 int, p2 int, c int, v vector<float, 3>, PRIMARY KEY ((p1, p2), c)'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
idx = unique_name()
|
||||
cql.execute(f"CREATE CUSTOM INDEX {idx} ON {table}((p1, p2), v) USING 'vector_index'")
|
||||
|
||||
desc = cql.execute(f"DESC INDEX {test_keyspace}.{idx}").one().create_statement
|
||||
|
||||
assert f"CREATE CUSTOM INDEX {idx} ON {table}((p1, p2), v) USING 'vector_index'" in desc
|
||||
|
||||
def test_describe_vector_index_local_with_filtering_columns(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p1 int, p2 int, c int, v vector<float, 3>, f1 text, f2 text, PRIMARY KEY ((p1, p2), c)'
|
||||
with new_test_table(cql, test_keyspace, schema) as table:
|
||||
idx = unique_name()
|
||||
cql.execute(f"CREATE CUSTOM INDEX {idx} ON {table}((p1, p2), v, f1, f2) USING 'vector_index'")
|
||||
|
||||
desc = cql.execute(f"DESC INDEX {test_keyspace}.{idx}").one().create_statement
|
||||
|
||||
assert f"CREATE CUSTOM INDEX {idx} ON {table}((p1, p2), v, f1, f2) USING 'vector_index'" in desc
|
||||
|
||||
|
||||
def test_vector_index_version_on_recreate(cql, test_keyspace, scylla_only, skip_without_tablets):
|
||||
schema = 'p int primary key, v vector<float, 3>'
|
||||
|
||||
Reference in New Issue
Block a user