filer: add ObjectTransaction for atomic multi-entry object writes (#9646)

A versioned object write touches several entries that must change together: the
main object, a delete marker or version file, and the latest pointer on the
.versions directory. Holding a distributed lock across separate RPCs to do this
is what the per-path lock was meant to replace, but a single CreateEntry only
covers one entry.

Add ObjectTransaction: a request carries a lock_key (the object path), an
optional WriteCondition, and an ordered list of mutations (PUT / DELETE /
PATCH_EXTENDED). The filer holds the per-path lock on lock_key for the whole
call, checks the condition against the entry at lock_key, then applies the
mutations in order. Callers route the object's writes to its owner filer so the
lock is authoritative across all of the object's entries.

DELETE and PATCH of an absent entry are no-ops, so a replayed transaction is
idempotent. PUT entries are metadata-scoped; data-bearing writes (chunks) are
written before the transaction, as today.
This commit is contained in:
Chris Lu
2026-05-23 17:34:30 -07:00
committed by GitHub
parent b18d3dc96c
commit bf022ca018
6 changed files with 1133 additions and 410 deletions

View File

@@ -31,6 +31,9 @@ service SeaweedFiler {
rpc DeleteEntry (DeleteEntryRequest) returns (DeleteEntryResponse) {
}
rpc ObjectTransaction (ObjectTransactionRequest) returns (ObjectTransactionResponse) {
}
rpc AtomicRenameEntry (AtomicRenameEntryRequest) returns (AtomicRenameEntryResponse) {
}
rpc StreamRenameEntry (StreamRenameEntryRequest) returns (stream StreamRenameEntryResponse) {
@@ -271,6 +274,46 @@ enum FilerError {
PRECONDITION_FAILED = 6; // WriteCondition not satisfied
}
// ObjectMutation is one entry-level change applied by ObjectTransaction. All
// mutations of a transaction run under a single per-path lock (the request's
// lock_key) and in order, so the gateway can describe a multi-entry object
// operation as one request instead of holding a distributed lock across
// several RPCs. Data-bearing writes (entries with chunks) should be written
// before the transaction; mutations here are metadata-scoped.
message ObjectMutation {
enum Type {
PUT = 0; // create or replace the entry (entry field)
DELETE = 1; // delete the entry at directory/name (no error if absent)
PATCH_EXTENDED = 2; // merge set_extended / remove delete_extended on the entry
}
Type type = 1;
string directory = 2;
string name = 3; // entry name for DELETE / PATCH_EXTENDED
Entry entry = 4; // full entry for PUT
map<string, bytes> set_extended = 5; // PATCH_EXTENDED: keys to set
repeated string delete_extended = 6; // PATCH_EXTENDED: keys to remove
bool is_delete_data = 7; // DELETE: also delete chunk data
bool is_recursive = 8; // DELETE: recurse into a directory
}
// ObjectTransactionRequest applies an ordered list of mutations atomically with
// respect to other writers of the same object, by holding the filer's per-path
// lock on lock_key for the whole transaction. The optional condition is checked
// first, against the entry at lock_key. Callers must route the object's writes
// to its owner filer for the lock to be authoritative.
message ObjectTransactionRequest {
string lock_key = 1; // object path to lock and to evaluate the condition against
WriteCondition condition = 2; // optional precondition, checked under the lock
repeated ObjectMutation mutations = 3;
bool is_from_other_cluster = 4;
repeated int32 signatures = 5;
}
message ObjectTransactionResponse {
string error = 1;
FilerError error_code = 2;
}
message CreateEntryResponse {
string error = 1; // kept for human readability + backward compat
SubscribeMetadataResponse metadata_event = 2;