alternator: implement UpdateTable with a vector index

After an earlier patch allowed CreateTable to create vector indexes
together with a table, in this patch we add to UpdateTable the ability
to add a new vector index to an existing table, as well as the ability
to delete a vector index from an existing table.

The implementation is inspired by DynamoDB's syntax for GSI - just like
GSI has GlobalSecondaryIndexUpdates with "Create" and "Delete" operations,
for vector indexes we have VectorIndexUpdates supporting Create and
Delete. "Update" is not yet supported - we didn't implement yet any
parameter that can be updated - but we can easily implement it in the
future.
This commit is contained in:
Nadav Har'El
2026-03-15 18:59:39 +02:00
parent 217090a996
commit 82de16f92c

View File

@@ -487,6 +487,10 @@ static std::string view_name(std::string_view table_name, std::string_view index
return ret;
}
static std::string gsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) {
return view_name(table_name, index_name, ":", validate_len);
}
static std::string lsi_name(std::string_view table_name, std::string_view index_name, bool validate_len = true) {
return view_name(table_name, index_name, "!:", validate_len);
}
@@ -1789,6 +1793,23 @@ static future<> mark_view_schemas_as_built(utils::chunked_vector<mutation>& out,
}
}
// Returns true if the given attribute name is already the target of any vector
// index on the schema. Analogous to schema::has_index(), but looks up by the
// indexed attribute name rather than the index name.
static bool has_vector_index_on_attribute(const schema& s, std::string_view attribute_name) {
for (const index_metadata& im : s.indices()) {
// No need to check if the secondary index is a vector index, because
// Alternator doesn't use secondary indexes for anything else (GSI and
// LSI are implemented as materialized views, not secondary indexes).
const auto& opts = im.options();
auto target_it = opts.find(cql3::statements::index_target::target_option_name);
if (target_it != opts.end() && target_it->second == attribute_name) {
return true;
}
}
return false;
}
// Returns the validated "Dimensions" value from a VectorAttribute JSON object
// or throws api_error::validation if invalid. The "source" parameter is used
// in error messages (e.g., "VectorIndexes" or "VectorIndexUpdates").
@@ -2319,6 +2340,129 @@ future<executor::request_return_type> executor::update_table(client_state& clien
}
}
// Support VectorIndexUpdates to add or delete a vector index,
// similar to GlobalSecondaryIndexUpdates above. We handle this
// before builder.build() so we can use builder directly.
rjson::value* vector_index_updates = rjson::find(request, "VectorIndexUpdates");
if (vector_index_updates) {
if (!vector_index_updates->IsArray()) {
co_return api_error::validation("VectorIndexUpdates must be an array");
}
if (vector_index_updates->Size() > 1) {
// VectorIndexUpdates mirrors GlobalSecondaryIndexUpdates.
// Since DynamoDB artifically limits the latter to just a
// single operation (one Create or one Delete), we also
// place the same artificial limit on VectorIndexUpdates,
// and throw the same LimitExceeded error if the client
// tries to pass more than one operation.
co_return api_error::limit_exceeded("VectorIndexUpdates only allows one index creation or deletion");
}
}
if (vector_index_updates && vector_index_updates->Size() == 1) {
empty_request = false;
if (!(*vector_index_updates)[0].IsObject() || (*vector_index_updates)[0].MemberCount() != 1) {
co_return api_error::validation("VectorIndexUpdates array must contain one object with a Create or Delete operation");
}
auto it = (*vector_index_updates)[0].MemberBegin();
const std::string_view op = rjson::to_string_view(it->name);
if (!it->value.IsObject()) {
co_return api_error::validation("VectorIndexUpdates entries must be objects");
}
const rjson::value* index_name_v = rjson::find(it->value, "IndexName");
if (!index_name_v || !index_name_v->IsString()) {
co_return api_error::validation("VectorIndexUpdates operation must have IndexName");
}
sstring index_name = rjson::to_sstring(*index_name_v);
if (op == "Create") {
if (!p.local().local_db().find_keyspace(tab->ks_name()).get_replication_strategy().uses_tablets()) {
co_return api_error::validation("Vector indexes are not supported on tables using vnodes.");
}
validate_table_name(index_name, "VectorIndexUpdates IndexName");
// Check for duplicate index name against existing vector indexes, GSIs and LSIs.
if (tab->has_index(index_name)) {
// Alternator only uses a secondary index for vector
// search (GSI and LSI are implemented as materialized
// views, not secondary indexes), so the error message
// can refer to a "Vector index".
co_return api_error::validation(fmt::format(
"Vector index {} already exists in table {}", index_name, tab->cf_name()));
}
if (p.local().data_dictionary().has_schema(tab->ks_name(), gsi_name(tab->cf_name(), index_name, false)) ||
p.local().data_dictionary().has_schema(tab->ks_name(), lsi_name(tab->cf_name(), index_name, false))) {
co_return api_error::validation(fmt::format(
"GSI or LSI {} already exists in table {}, cannot reuse the name for a vector index", index_name, tab->cf_name()));
}
const rjson::value* vector_attribute_v = rjson::find(it->value, "VectorAttribute");
if (!vector_attribute_v || !vector_attribute_v->IsObject()) {
co_return api_error::validation("VectorIndexUpdates Create VectorAttribute must be an object.");
}
const rjson::value* attribute_name_v = rjson::find(*vector_attribute_v, "AttributeName");
if (!attribute_name_v || !attribute_name_v->IsString()) {
co_return api_error::validation("VectorIndexUpdates Create AttributeName must be a string.");
}
std::string_view attribute_name = rjson::to_string_view(*attribute_name_v);
validate_attr_name_length("VectorIndexUpdates", attribute_name.size(), /*is_key=*/false, "AttributeName ");
// attribute_name must not be a key column of the base
// table or any of its GSIs or LSIs, because those have
// mandatory types (defined in AttributeDefinitions) which
// will never be a vector.
for (const column_definition& cdef : tab->primary_key_columns()) {
if (cdef.name_as_text() == attribute_name) {
co_return api_error::validation(fmt::format(
"VectorIndexUpdates AttributeName '{}' is a key column and cannot be used as a vector index target.", attribute_name));
}
}
for (const auto& view : p.local().data_dictionary().find_column_family(tab).views()) {
for (const column_definition& cdef : view->primary_key_columns()) {
if (cdef.name_as_text() == attribute_name) {
co_return api_error::validation(fmt::format(
"VectorIndexUpdates AttributeName '{}' is a key column of a GSI or LSI and cannot be used as a vector index target.", attribute_name));
}
}
}
// attribute_name must not already be the target of an
// existing vector index.
if (has_vector_index_on_attribute(*tab, attribute_name)) {
co_return api_error::validation(fmt::format(
"VectorIndexUpdates AttributeName '{}' is already the target of an existing vector index.", attribute_name));
}
int dimensions = get_dimensions(*vector_attribute_v, "VectorIndexUpdates");
// The optional Projection parameter is only supported with
// ProjectionType=KEYS_ONLY. Other values are not yet supported.
const rjson::value* projection_v = rjson::find(it->value, "Projection");
if (projection_v) {
if (!projection_v->IsObject()) {
co_return api_error::validation("VectorIndexUpdates Projection must be an object.");
}
const rjson::value* projection_type_v = rjson::find(*projection_v, "ProjectionType");
if (!projection_type_v || !projection_type_v->IsString() ||
rjson::to_string_view(*projection_type_v) != "KEYS_ONLY") {
co_return api_error::validation("VectorIndexUpdates Projection: only ProjectionType=KEYS_ONLY is currently supported.");
}
}
// A vector index will use CDC on this table, so the CDC
// log table name will need to fit our length limits
validate_cdc_log_name_length(builder.cf_name());
index_options_map index_options;
index_options[db::index::secondary_index::custom_class_option_name] = "vector_index";
index_options[cql3::statements::index_target::target_option_name] = sstring(attribute_name);
index_options["dimensions"] = std::to_string(dimensions);
builder.with_index(index_metadata{index_name, index_options,
index_metadata_kind::custom, index_metadata::is_local_index(false)});
} else if (op == "Delete") {
if (!tab->has_index(index_name)) {
co_return api_error::resource_not_found(fmt::format(
"No vector index {} in table {}", index_name, tab->cf_name()));
}
builder.without_index(index_name);
} else {
// Update operation not yet supported, as we don't yet
// have any updatable properties of vector indexes.
co_return api_error::validation(fmt::format(
"VectorIndexUpdates supports a Create or Delete operation, saw '{}'", op));
}
}
schema = builder.build();
std::vector<view_ptr> new_views;
std::vector<std::string> dropped_views;
@@ -2335,6 +2479,10 @@ future<executor::request_return_type> executor::update_table(client_state& clien
// a LimitExceededException if this is attempted.
co_return api_error::limit_exceeded("GlobalSecondaryIndexUpdates only allows one index creation or deletion");
}
if (vector_index_updates && vector_index_updates->IsArray() &&
vector_index_updates->Size() && gsi_updates->Size()) {
co_return api_error::limit_exceeded("UpdateTable cannot have both VectorIndexUpdates and GlobalSecondaryIndexUpdates in the same request");
}
if (gsi_updates->Size() == 1) {
empty_request = false;
if (!(*gsi_updates)[0].IsObject() || (*gsi_updates)[0].MemberCount() != 1) {
@@ -2451,7 +2599,7 @@ future<executor::request_return_type> executor::update_table(client_state& clien
}
if (empty_request) {
co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, StreamSpecification or BillingMode to be specified");
co_return api_error::validation("UpdateTable requires one of GlobalSecondaryIndexUpdates, VectorIndexUpdates, StreamSpecification or BillingMode to be specified");
}
co_await verify_permission(enforce_authorization, warn_authorization, client_state_other_shard.get(), schema, auth::permission::ALTER, e.local()._stats);