From 82de16f92cff17ffcfd7285dc735fe895a85473e Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 15 Mar 2026 18:59:39 +0200 Subject: [PATCH] alternator: implement UpdateTable with a vector index After an earlier patch allowed CreateTable to create vector indexes together with a table, in this patch we add to UpdateTable the ability to add a new vector index to an existing table, as well as the ability to delete a vector index from an existing table. The implementation is inspired by DynamoDB's syntax for GSI - just like GSI has GlobalSecondaryIndexUpdates with "Create" and "Delete" operations, for vector indexes we have VectorIndexUpdates supporting Create and Delete. "Update" is not yet supported - we didn't implement yet any parameter that can be updated - but we can easily implement it in the future. --- alternator/executor.cc | 150 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 149 insertions(+), 1 deletion(-) diff --git a/alternator/executor.cc b/alternator/executor.cc index 4918745274..e293b9cd9b 100644 --- a/alternator/executor.cc +++ b/alternator/executor.cc @@ -487,6 +487,10 @@ static std::string view_name(std::string_view table_name, std::string_view index return ret; } +static std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) { + return view_name(table_name, index_name, ":", validate_len); +} + static std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) { return view_name(table_name, index_name, "!:", validate_len); } @@ -1789,6 +1793,23 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector& out, } } +// Returns true if the given attribute name is already the target of any vector +// index on the schema. Analogous to schema::has_index(), but looks up by the +// indexed attribute name rather than the index name. +static bool has_vector_index_on_attribute(const schema& s, std::string_view attribute_name) { + for (const index_metadata& im : s.indices()) { + // No need to check if the secondary index is a vector index, because + // Alternator doesn't use secondary indexes for anything else (GSI and + // LSI are implemented as materialized views, not secondary indexes). + const auto& opts = im.options(); + auto target_it = opts.find(cql3::statements::index_target::target_option_name); + if (target_it != opts.end() && target_it->second == attribute_name) { + return true; + } + } + return false; +} + // Returns the validated "Dimensions" value from a VectorAttribute JSON object // or throws api_error::validation if invalid. The "source" parameter is used // in error messages (e.g., "VectorIndexes" or "VectorIndexUpdates"). @@ -2319,6 +2340,129 @@ future executor::update_table(client_state& clien } } + // Support VectorIndexUpdates to add or delete a vector index, + // similar to GlobalSecondaryIndexUpdates above. We handle this + // before builder.build() so we can use builder directly. + rjson::value* vector_index_updates = rjson::find(request, "VectorIndexUpdates"); + if (vector_index_updates) { + if (!vector_index_updates->IsArray()) { + co_return api_error::validation("VectorIndexUpdates must be an array"); + } + if (vector_index_updates->Size() > 1) { + // VectorIndexUpdates mirrors GlobalSecondaryIndexUpdates. + // Since DynamoDB artifically limits the latter to just a + // single operation (one Create or one Delete), we also + // place the same artificial limit on VectorIndexUpdates, + // and throw the same LimitExceeded error if the client + // tries to pass more than one operation. + co_return api_error::limit_exceeded("VectorIndexUpdates only allows one index creation or deletion"); + } + } + if (vector_index_updates && vector_index_updates->Size() == 1) { + empty_request = false; + if (!(*vector_index_updates)[0].IsObject() || (*vector_index_updates)[0].MemberCount() != 1) { + co_return api_error::validation("VectorIndexUpdates array must contain one object with a Create or Delete operation"); + } + auto it = (*vector_index_updates)[0].MemberBegin(); + const std::string_view op = rjson::to_string_view(it->name); + if (!it->value.IsObject()) { + co_return api_error::validation("VectorIndexUpdates entries must be objects"); + } + const rjson::value* index_name_v = rjson::find(it->value, "IndexName"); + if (!index_name_v || !index_name_v->IsString()) { + co_return api_error::validation("VectorIndexUpdates operation must have IndexName"); + } + sstring index_name = rjson::to_sstring(*index_name_v); + if (op == "Create") { + if (!p.local().local_db().find_keyspace(tab->ks_name()).get_replication_strategy().uses_tablets()) { + co_return api_error::validation("Vector indexes are not supported on tables using vnodes."); + } + validate_table_name(index_name, "VectorIndexUpdates IndexName"); + // Check for duplicate index name against existing vector indexes, GSIs and LSIs. + if (tab->has_index(index_name)) { + // Alternator only uses a secondary index for vector + // search (GSI and LSI are implemented as materialized + // views, not secondary indexes), so the error message + // can refer to a "Vector index". + co_return api_error::validation(fmt::format( + "Vector index {} already exists in table {}", index_name, tab->cf_name())); + } + if (p.local().data_dictionary().has_schema(tab->ks_name(), gsi_name(tab->cf_name(), index_name, false)) || + p.local().data_dictionary().has_schema(tab->ks_name(), lsi_name(tab->cf_name(), index_name, false))) { + co_return api_error::validation(fmt::format( + "GSI or LSI {} already exists in table {}, cannot reuse the name for a vector index", index_name, tab->cf_name())); + } + const rjson::value* vector_attribute_v = rjson::find(it->value, "VectorAttribute"); + if (!vector_attribute_v || !vector_attribute_v->IsObject()) { + co_return api_error::validation("VectorIndexUpdates Create VectorAttribute must be an object."); + } + const rjson::value* attribute_name_v = rjson::find(*vector_attribute_v, "AttributeName"); + if (!attribute_name_v || !attribute_name_v->IsString()) { + co_return api_error::validation("VectorIndexUpdates Create AttributeName must be a string."); + } + std::string_view attribute_name = rjson::to_string_view(*attribute_name_v); + validate_attr_name_length("VectorIndexUpdates", attribute_name.size(), /*is_key=*/false, "AttributeName "); + // attribute_name must not be a key column of the base + // table or any of its GSIs or LSIs, because those have + // mandatory types (defined in AttributeDefinitions) which + // will never be a vector. + for (const column_definition& cdef : tab->primary_key_columns()) { + if (cdef.name_as_text() == attribute_name) { + co_return api_error::validation(fmt::format( + "VectorIndexUpdates AttributeName '{}' is a key column and cannot be used as a vector index target.", attribute_name)); + } + } + for (const auto& view : p.local().data_dictionary().find_column_family(tab).views()) { + for (const column_definition& cdef : view->primary_key_columns()) { + if (cdef.name_as_text() == attribute_name) { + co_return api_error::validation(fmt::format( + "VectorIndexUpdates AttributeName '{}' is a key column of a GSI or LSI and cannot be used as a vector index target.", attribute_name)); + } + } + } + // attribute_name must not already be the target of an + // existing vector index. + if (has_vector_index_on_attribute(*tab, attribute_name)) { + co_return api_error::validation(fmt::format( + "VectorIndexUpdates AttributeName '{}' is already the target of an existing vector index.", attribute_name)); + } + int dimensions = get_dimensions(*vector_attribute_v, "VectorIndexUpdates"); + // The optional Projection parameter is only supported with + // ProjectionType=KEYS_ONLY. Other values are not yet supported. + const rjson::value* projection_v = rjson::find(it->value, "Projection"); + if (projection_v) { + if (!projection_v->IsObject()) { + co_return api_error::validation("VectorIndexUpdates Projection must be an object."); + } + const rjson::value* projection_type_v = rjson::find(*projection_v, "ProjectionType"); + if (!projection_type_v || !projection_type_v->IsString() || + rjson::to_string_view(*projection_type_v) != "KEYS_ONLY") { + co_return api_error::validation("VectorIndexUpdates Projection: only ProjectionType=KEYS_ONLY is currently supported."); + } + } + // A vector index will use CDC on this table, so the CDC + // log table name will need to fit our length limits + validate_cdc_log_name_length(builder.cf_name()); + index_options_map index_options; + index_options[db::index::secondary_index::custom_class_option_name] = "vector_index"; + index_options[cql3::statements::index_target::target_option_name] = sstring(attribute_name); + index_options["dimensions"] = std::to_string(dimensions); + builder.with_index(index_metadata{index_name, index_options, + index_metadata_kind::custom, index_metadata::is_local_index(false)}); + } else if (op == "Delete") { + if (!tab->has_index(index_name)) { + co_return api_error::resource_not_found(fmt::format( + "No vector index {} in table {}", index_name, tab->cf_name())); + } + builder.without_index(index_name); + } else { + // Update operation not yet supported, as we don't yet + // have any updatable properties of vector indexes. + co_return api_error::validation(fmt::format( + "VectorIndexUpdates supports a Create or Delete operation, saw '{}'", op)); + } + } + schema = builder.build(); std::vector new_views; std::vector dropped_views; @@ -2335,6 +2479,10 @@ future executor::update_table(client_state& clien // a LimitExceededException if this is attempted. co_return api_error::limit_exceeded("GlobalSecondaryIndexUpdates only allows one index creation or deletion"); } + if (vector_index_updates && vector_index_updates->IsArray() && + vector_index_updates->Size() && gsi_updates->Size()) { + co_return api_error::limit_exceeded("UpdateTable cannot have both VectorIndexUpdates and GlobalSecondaryIndexUpdates in the same request"); + } if (gsi_updates->Size() == 1) { empty_request = false; if (!(*gsi_updates)[0].IsObject() || (*gsi_updates)[0].MemberCount() != 1) { @@ -2451,7 +2599,7 @@ future executor::update_table(client_state& clien } if (empty_request) { - co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, StreamSpecification or BillingMode to be specified"); + co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, VectorIndexUpdates, StreamSpecification or BillingMode to be specified"); } co_await verify_permission(enforce_authorization, warn_authorization, client_state_other_shard.get(), schema, auth::permission::ALTER, e.local()._stats);