mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-19 16:15:07 +00:00
alternator: implement UpdateTable with a vector index
After an earlier patch allowed CreateTable to create vector indexes together with a table, in this patch we add to UpdateTable the ability to add a new vector index to an existing table, as well as the ability to delete a vector index from an existing table. The implementation is inspired by DynamoDB's syntax for GSI - just like GSI has GlobalSecondaryIndexUpdates with "Create" and "Delete" operations, for vector indexes we have VectorIndexUpdates supporting Create and Delete. "Update" is not yet supported - we didn't implement yet any parameter that can be updated - but we can easily implement it in the future.
This commit is contained in:
@@ -487,6 +487,10 @@ static std::string view_name(std::string_view table_name, std::string_view index
|
||||
return ret;
|
||||
}
|
||||
|
||||
static std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) {
|
||||
return view_name(table_name, index_name, ":", validate_len);
|
||||
}
|
||||
|
||||
static std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) {
|
||||
return view_name(table_name, index_name, "!:", validate_len);
|
||||
}
|
||||
@@ -1789,6 +1793,23 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the given attribute name is already the target of any vector
|
||||
// index on the schema. Analogous to schema::has_index(), but looks up by the
|
||||
// indexed attribute name rather than the index name.
|
||||
static bool has_vector_index_on_attribute(const schema& s, std::string_view attribute_name) {
|
||||
for (const index_metadata& im : s.indices()) {
|
||||
// No need to check if the secondary index is a vector index, because
|
||||
// Alternator doesn't use secondary indexes for anything else (GSI and
|
||||
// LSI are implemented as materialized views, not secondary indexes).
|
||||
const auto& opts = im.options();
|
||||
auto target_it = opts.find(cql3::statements::index_target::target_option_name);
|
||||
if (target_it != opts.end() && target_it->second == attribute_name) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Returns the validated "Dimensions" value from a VectorAttribute JSON object
|
||||
// or throws api_error::validation if invalid. The "source" parameter is used
|
||||
// in error messages (e.g., "VectorIndexes" or "VectorIndexUpdates").
|
||||
@@ -2319,6 +2340,129 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
}
|
||||
}
|
||||
|
||||
// Support VectorIndexUpdates to add or delete a vector index,
|
||||
// similar to GlobalSecondaryIndexUpdates above. We handle this
|
||||
// before builder.build() so we can use builder directly.
|
||||
rjson::value* vector_index_updates = rjson::find(request, "VectorIndexUpdates");
|
||||
if (vector_index_updates) {
|
||||
if (!vector_index_updates->IsArray()) {
|
||||
co_return api_error::validation("VectorIndexUpdates must be an array");
|
||||
}
|
||||
if (vector_index_updates->Size() > 1) {
|
||||
// VectorIndexUpdates mirrors GlobalSecondaryIndexUpdates.
|
||||
// Since DynamoDB artifically limits the latter to just a
|
||||
// single operation (one Create or one Delete), we also
|
||||
// place the same artificial limit on VectorIndexUpdates,
|
||||
// and throw the same LimitExceeded error if the client
|
||||
// tries to pass more than one operation.
|
||||
co_return api_error::limit_exceeded("VectorIndexUpdates only allows one index creation or deletion");
|
||||
}
|
||||
}
|
||||
if (vector_index_updates && vector_index_updates->Size() == 1) {
|
||||
empty_request = false;
|
||||
if (!(*vector_index_updates)[0].IsObject() || (*vector_index_updates)[0].MemberCount() != 1) {
|
||||
co_return api_error::validation("VectorIndexUpdates array must contain one object with a Create or Delete operation");
|
||||
}
|
||||
auto it = (*vector_index_updates)[0].MemberBegin();
|
||||
const std::string_view op = rjson::to_string_view(it->name);
|
||||
if (!it->value.IsObject()) {
|
||||
co_return api_error::validation("VectorIndexUpdates entries must be objects");
|
||||
}
|
||||
const rjson::value* index_name_v = rjson::find(it->value, "IndexName");
|
||||
if (!index_name_v || !index_name_v->IsString()) {
|
||||
co_return api_error::validation("VectorIndexUpdates operation must have IndexName");
|
||||
}
|
||||
sstring index_name = rjson::to_sstring(*index_name_v);
|
||||
if (op == "Create") {
|
||||
if (!p.local().local_db().find_keyspace(tab->ks_name()).get_replication_strategy().uses_tablets()) {
|
||||
co_return api_error::validation("Vector indexes are not supported on tables using vnodes.");
|
||||
}
|
||||
validate_table_name(index_name, "VectorIndexUpdates IndexName");
|
||||
// Check for duplicate index name against existing vector indexes, GSIs and LSIs.
|
||||
if (tab->has_index(index_name)) {
|
||||
// Alternator only uses a secondary index for vector
|
||||
// search (GSI and LSI are implemented as materialized
|
||||
// views, not secondary indexes), so the error message
|
||||
// can refer to a "Vector index".
|
||||
co_return api_error::validation(fmt::format(
|
||||
"Vector index {} already exists in table {}", index_name, tab->cf_name()));
|
||||
}
|
||||
if (p.local().data_dictionary().has_schema(tab->ks_name(), gsi_name(tab->cf_name(), index_name, false)) ||
|
||||
p.local().data_dictionary().has_schema(tab->ks_name(), lsi_name(tab->cf_name(), index_name, false))) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"GSI or LSI {} already exists in table {}, cannot reuse the name for a vector index", index_name, tab->cf_name()));
|
||||
}
|
||||
const rjson::value* vector_attribute_v = rjson::find(it->value, "VectorAttribute");
|
||||
if (!vector_attribute_v || !vector_attribute_v->IsObject()) {
|
||||
co_return api_error::validation("VectorIndexUpdates Create VectorAttribute must be an object.");
|
||||
}
|
||||
const rjson::value* attribute_name_v = rjson::find(*vector_attribute_v, "AttributeName");
|
||||
if (!attribute_name_v || !attribute_name_v->IsString()) {
|
||||
co_return api_error::validation("VectorIndexUpdates Create AttributeName must be a string.");
|
||||
}
|
||||
std::string_view attribute_name = rjson::to_string_view(*attribute_name_v);
|
||||
validate_attr_name_length("VectorIndexUpdates", attribute_name.size(), /*is_key=*/false, "AttributeName ");
|
||||
// attribute_name must not be a key column of the base
|
||||
// table or any of its GSIs or LSIs, because those have
|
||||
// mandatory types (defined in AttributeDefinitions) which
|
||||
// will never be a vector.
|
||||
for (const column_definition& cdef : tab->primary_key_columns()) {
|
||||
if (cdef.name_as_text() == attribute_name) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"VectorIndexUpdates AttributeName '{}' is a key column and cannot be used as a vector index target.", attribute_name));
|
||||
}
|
||||
}
|
||||
for (const auto& view : p.local().data_dictionary().find_column_family(tab).views()) {
|
||||
for (const column_definition& cdef : view->primary_key_columns()) {
|
||||
if (cdef.name_as_text() == attribute_name) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"VectorIndexUpdates AttributeName '{}' is a key column of a GSI or LSI and cannot be used as a vector index target.", attribute_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
// attribute_name must not already be the target of an
|
||||
// existing vector index.
|
||||
if (has_vector_index_on_attribute(*tab, attribute_name)) {
|
||||
co_return api_error::validation(fmt::format(
|
||||
"VectorIndexUpdates AttributeName '{}' is already the target of an existing vector index.", attribute_name));
|
||||
}
|
||||
int dimensions = get_dimensions(*vector_attribute_v, "VectorIndexUpdates");
|
||||
// The optional Projection parameter is only supported with
|
||||
// ProjectionType=KEYS_ONLY. Other values are not yet supported.
|
||||
const rjson::value* projection_v = rjson::find(it->value, "Projection");
|
||||
if (projection_v) {
|
||||
if (!projection_v->IsObject()) {
|
||||
co_return api_error::validation("VectorIndexUpdates Projection must be an object.");
|
||||
}
|
||||
const rjson::value* projection_type_v = rjson::find(*projection_v, "ProjectionType");
|
||||
if (!projection_type_v || !projection_type_v->IsString() ||
|
||||
rjson::to_string_view(*projection_type_v) != "KEYS_ONLY") {
|
||||
co_return api_error::validation("VectorIndexUpdates Projection: only ProjectionType=KEYS_ONLY is currently supported.");
|
||||
}
|
||||
}
|
||||
// A vector index will use CDC on this table, so the CDC
|
||||
// log table name will need to fit our length limits
|
||||
validate_cdc_log_name_length(builder.cf_name());
|
||||
index_options_map index_options;
|
||||
index_options[db::index::secondary_index::custom_class_option_name] = "vector_index";
|
||||
index_options[cql3::statements::index_target::target_option_name] = sstring(attribute_name);
|
||||
index_options["dimensions"] = std::to_string(dimensions);
|
||||
builder.with_index(index_metadata{index_name, index_options,
|
||||
index_metadata_kind::custom, index_metadata::is_local_index(false)});
|
||||
} else if (op == "Delete") {
|
||||
if (!tab->has_index(index_name)) {
|
||||
co_return api_error::resource_not_found(fmt::format(
|
||||
"No vector index {} in table {}", index_name, tab->cf_name()));
|
||||
}
|
||||
builder.without_index(index_name);
|
||||
} else {
|
||||
// Update operation not yet supported, as we don't yet
|
||||
// have any updatable properties of vector indexes.
|
||||
co_return api_error::validation(fmt::format(
|
||||
"VectorIndexUpdates supports a Create or Delete operation, saw '{}'", op));
|
||||
}
|
||||
}
|
||||
|
||||
schema = builder.build();
|
||||
std::vector<view_ptr> new_views;
|
||||
std::vector<std::string> dropped_views;
|
||||
@@ -2335,6 +2479,10 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
// a LimitExceededException if this is attempted.
|
||||
co_return api_error::limit_exceeded("GlobalSecondaryIndexUpdates only allows one index creation or deletion");
|
||||
}
|
||||
if (vector_index_updates && vector_index_updates->IsArray() &&
|
||||
vector_index_updates->Size() && gsi_updates->Size()) {
|
||||
co_return api_error::limit_exceeded("UpdateTable cannot have both VectorIndexUpdates and GlobalSecondaryIndexUpdates in the same request");
|
||||
}
|
||||
if (gsi_updates->Size() == 1) {
|
||||
empty_request = false;
|
||||
if (!(*gsi_updates)[0].IsObject() || (*gsi_updates)[0].MemberCount() != 1) {
|
||||
@@ -2451,7 +2599,7 @@ future<executor::request_return_type> executor::update_table(client_state& clien
|
||||
}
|
||||
|
||||
if (empty_request) {
|
||||
co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, StreamSpecification or BillingMode to be specified");
|
||||
co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, VectorIndexUpdates, StreamSpecification or BillingMode to be specified");
|
||||
}
|
||||
|
||||
co_await verify_permission(enforce_authorization, warn_authorization, client_state_other_shard.get(), schema, auth::permission::ALTER, e.local()._stats);
|
||||
|
||||
Reference in New Issue
Block a user