From 83c1e564c44b996cb882f0f92c10f2f15b82cbba Mon Sep 17 00:00:00 2001 From: Catherine Date: Tue, 21 Oct 2025 00:49:27 +0000 Subject: [PATCH] Add `stored_size` (size after deduplication) to manifest. --- src/manifest.go | 24 ++++++++++++++++-------- src/schema.pb.go | 20 +++++++++++++++----- src/schema.proto | 3 ++- 3 files changed, 33 insertions(+), 14 deletions(-) diff --git a/src/manifest.go b/src/manifest.go index c387b1c..839865e 100644 --- a/src/manifest.go +++ b/src/manifest.go @@ -180,29 +180,37 @@ func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manif // Replace inline files over certain size with references to external data. extManifest := Manifest{ - RepoUrl: manifest.RepoUrl, - Branch: manifest.Branch, - Commit: manifest.Commit, - Contents: make(map[string]*Entry), - Redirects: manifest.Redirects, - Problems: manifest.Problems, - TotalSize: proto.Int64(0), + RepoUrl: manifest.RepoUrl, + Branch: manifest.Branch, + Commit: manifest.Commit, + Contents: make(map[string]*Entry), + Redirects: manifest.Redirects, + Problems: manifest.Problems, + TotalSize: proto.Int64(0), + StoredSize: proto.Int64(0), } + extObjectMap := make(map[string]int64) for name, entry := range manifest.Contents { cannotBeInlined := entry.GetType() == Type_InlineFile && entry.GetSize() > int64(config.Limits.MaxInlineFileSize.Bytes()) if cannotBeInlined { + dataHash := sha256.Sum256(entry.Data) extManifest.Contents[name] = &Entry{ Type: Type_ExternalFile.Enum(), Size: entry.Size, - Data: fmt.Appendf(nil, "sha256-%x", sha256.Sum256(entry.Data)), + Data: fmt.Appendf(nil, "sha256-%x", dataHash), Xfrm: entry.Xfrm, } + extObjectMap[string(dataHash[:])] = *entry.Size } else { extManifest.Contents[name] = entry } *extManifest.TotalSize += entry.GetSize() } + // `extObjectMap` stores size once per object, deduplicating it + for _, storedSize := range extObjectMap { + *extManifest.StoredSize += storedSize + } // Upload the resulting manifest and the blob it references. extManifestData := EncodeManifest(&extManifest) diff --git a/src/schema.pb.go b/src/schema.pb.go index b040af6..677c223 100644 --- a/src/schema.pb.go +++ b/src/schema.pb.go @@ -1,6 +1,6 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.36.9 +// protoc-gen-go v1.36.10 // protoc v6.30.2 // source: schema.proto @@ -336,8 +336,9 @@ type Manifest struct { Branch *string `protobuf:"bytes,2,opt,name=branch" json:"branch,omitempty"` Commit *string `protobuf:"bytes,3,opt,name=commit" json:"commit,omitempty"` // Contents - Contents map[string]*Entry `protobuf:"bytes,4,rep,name=contents" json:"contents,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` - TotalSize *int64 `protobuf:"varint,5,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` + Contents map[string]*Entry `protobuf:"bytes,4,rep,name=contents" json:"contents,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` + TotalSize *int64 `protobuf:"varint,5,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` + StoredSize *int64 `protobuf:"varint,8,opt,name=stored_size,json=storedSize" json:"stored_size,omitempty"` // after deduplication // Netlify-style `_redirects` Redirects []*Redirect `protobuf:"bytes,6,rep,name=redirects" json:"redirects,omitempty"` // Diagnostics for non-fatal errors @@ -411,6 +412,13 @@ func (x *Manifest) GetTotalSize() int64 { return 0 } +func (x *Manifest) GetStoredSize() int64 { + if x != nil && x.StoredSize != nil { + return *x.StoredSize + } + return 0 +} + func (x *Manifest) GetRedirects() []*Redirect { if x != nil { return x.Redirects @@ -443,14 +451,16 @@ const file_schema_proto_rawDesc = "" + "\x05force\x18\x04 \x01(\bR\x05force\"3\n" + "\aProblem\x12\x12\n" + "\x04path\x18\x01 \x01(\tR\x04path\x12\x14\n" + - "\x05cause\x18\x02 \x01(\tR\x05cause\"\xbd\x02\n" + + "\x05cause\x18\x02 \x01(\tR\x05cause\"\xde\x02\n" + "\bManifest\x12\x19\n" + "\brepo_url\x18\x01 \x01(\tR\arepoUrl\x12\x16\n" + "\x06branch\x18\x02 \x01(\tR\x06branch\x12\x16\n" + "\x06commit\x18\x03 \x01(\tR\x06commit\x123\n" + "\bcontents\x18\x04 \x03(\v2\x17.Manifest.ContentsEntryR\bcontents\x12\x1d\n" + "\n" + - "total_size\x18\x05 \x01(\x03R\ttotalSize\x12'\n" + + "total_size\x18\x05 \x01(\x03R\ttotalSize\x12\x1f\n" + + "\vstored_size\x18\b \x01(\x03R\n" + + "storedSize\x12'\n" + "\tredirects\x18\x06 \x03(\v2\t.RedirectR\tredirects\x12$\n" + "\bproblems\x18\a \x03(\v2\b.ProblemR\bproblems\x1aC\n" + "\rContentsEntry\x12\x10\n" + diff --git a/src/schema.proto b/src/schema.proto index b97ca95..c31d480 100644 --- a/src/schema.proto +++ b/src/schema.proto @@ -61,7 +61,8 @@ message Manifest { // Contents map contents = 4; - int64 total_size = 5; + int64 total_size = 5; // simple sum of each `entry.size` + int64 stored_size = 8; // external objects, after deduplication // Netlify-style `_redirects` repeated Redirect redirects = 6;