Add original (decompressed) size to site manifest.

This size is not used by git-pages itself, and is not representative of
storage needs, but may be used for estimating how large a site would
be if downloaded in its entirety.
This commit is contained in:
Catherine
2025-11-16 19:27:02 +00:00
parent 770ff5c416
commit 91dc7e0c54
3 changed files with 45 additions and 34 deletions

View File

@@ -167,7 +167,7 @@ func CompressFiles(ctx context.Context, manifest *Manifest) {
span, _ := ObserveFunction(ctx, "CompressFiles")
defer span.Finish()
var originalSize, transformedSize int64
var originalSize, compressedSize int64
for _, entry := range manifest.Contents {
if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_None {
mtype := getMediaType(entry.GetContentType())
@@ -181,15 +181,17 @@ func CompressFiles(ctx context.Context, manifest *Manifest) {
entry.Size = proto.Int64(int64(len(entry.Data)))
entry.Transform = Transform_Zstandard.Enum()
}
transformedSize += entry.GetSize()
compressedSize += entry.GetSize()
}
}
manifest.OriginalSize = proto.Int64(originalSize)
manifest.CompressedSize = proto.Int64(compressedSize)
spaceSaving := (float64(originalSize) - float64(transformedSize)) / float64(originalSize)
spaceSaving := (float64(originalSize) - float64(compressedSize)) / float64(originalSize)
log.Printf("compress: saved %.2f percent (%s to %s)",
spaceSaving*100.0,
datasize.ByteSize(originalSize).HR(),
datasize.ByteSize(transformedSize).HR(),
datasize.ByteSize(compressedSize).HR(),
)
siteCompressionSpaceSaving.
Observe(spaceSaving)
@@ -232,17 +234,18 @@ func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manif
// Replace inline files over certain size with references to external data.
extManifest := Manifest{
RepoUrl: manifest.RepoUrl,
Branch: manifest.Branch,
Commit: manifest.Commit,
Contents: make(map[string]*Entry),
Redirects: manifest.Redirects,
Headers: manifest.Headers,
Problems: manifest.Problems,
TotalSize: proto.Int64(0),
StoredSize: proto.Int64(0),
RepoUrl: manifest.RepoUrl,
Branch: manifest.Branch,
Commit: manifest.Commit,
Contents: make(map[string]*Entry),
Redirects: manifest.Redirects,
Headers: manifest.Headers,
Problems: manifest.Problems,
OriginalSize: manifest.OriginalSize,
CompressedSize: manifest.CompressedSize,
StoredSize: proto.Int64(0),
}
extObjectMap := make(map[string]int64)
extObjectSizes := make(map[string]int64)
for name, entry := range manifest.Contents {
cannotBeInlined := entry.GetType() == Type_InlineFile &&
entry.GetSize() > int64(config.Limits.MaxInlineFileSize.Bytes())
@@ -255,14 +258,13 @@ func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manif
Transform: entry.Transform,
ContentType: entry.ContentType,
}
extObjectMap[string(dataHash[:])] = *entry.Size
extObjectSizes[string(dataHash[:])] = entry.GetSize()
} else {
extManifest.Contents[name] = entry
}
*extManifest.TotalSize += entry.GetSize()
}
// `extObjectMap` stores size once per object, deduplicating it
for _, storedSize := range extObjectMap {
for _, storedSize := range extObjectSizes {
*extManifest.StoredSize += storedSize
}

View File

@@ -7,12 +7,11 @@
package git_pages
import (
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
reflect "reflect"
sync "sync"
unsafe "unsafe"
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
)
const (
@@ -452,9 +451,10 @@ type Manifest struct {
Branch *string `protobuf:"bytes,2,opt,name=branch" json:"branch,omitempty"`
Commit *string `protobuf:"bytes,3,opt,name=commit" json:"commit,omitempty"`
// Contents
Contents map[string]*Entry `protobuf:"bytes,4,rep,name=contents" json:"contents,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
TotalSize *int64 `protobuf:"varint,5,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` // simple sum of each `entry.size`
StoredSize *int64 `protobuf:"varint,8,opt,name=stored_size,json=storedSize" json:"stored_size,omitempty"` // external objects, after deduplication
Contents map[string]*Entry `protobuf:"bytes,4,rep,name=contents" json:"contents,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"`
OriginalSize *int64 `protobuf:"varint,10,opt,name=original_size,json=originalSize" json:"original_size,omitempty"` // total size of entries before compression
CompressedSize *int64 `protobuf:"varint,5,opt,name=compressed_size,json=compressedSize" json:"compressed_size,omitempty"` // simple sum of each `entry.size`
StoredSize *int64 `protobuf:"varint,8,opt,name=stored_size,json=storedSize" json:"stored_size,omitempty"` // total size of (deduplicated) external objects
// Netlify-style `_redirects` and `_headers`
Redirects []*RedirectRule `protobuf:"bytes,6,rep,name=redirects" json:"redirects,omitempty"`
Headers []*HeaderRule `protobuf:"bytes,9,rep,name=headers" json:"headers,omitempty"`
@@ -522,9 +522,16 @@ func (x *Manifest) GetContents() map[string]*Entry {
return nil
}
func (x *Manifest) GetTotalSize() int64 {
if x != nil && x.TotalSize != nil {
return *x.TotalSize
func (x *Manifest) GetOriginalSize() int64 {
if x != nil && x.OriginalSize != nil {
return *x.OriginalSize
}
return 0
}
func (x *Manifest) GetCompressedSize() int64 {
if x != nil && x.CompressedSize != nil {
return *x.CompressedSize
}
return 0
}
@@ -584,14 +591,15 @@ const file_schema_proto_rawDesc = "" +
"header_map\x18\x02 \x03(\v2\a.HeaderR\theaderMap\"3\n" +
"\aProblem\x12\x12\n" +
"\x04path\x18\x01 \x01(\tR\x04path\x12\x14\n" +
"\x05cause\x18\x02 \x01(\tR\x05cause\"\x89\x03\n" +
"\x05cause\x18\x02 \x01(\tR\x05cause\"\xb8\x03\n" +
"\bManifest\x12\x19\n" +
"\brepo_url\x18\x01 \x01(\tR\arepoUrl\x12\x16\n" +
"\x06branch\x18\x02 \x01(\tR\x06branch\x12\x16\n" +
"\x06commit\x18\x03 \x01(\tR\x06commit\x123\n" +
"\bcontents\x18\x04 \x03(\v2\x17.Manifest.ContentsEntryR\bcontents\x12\x1d\n" +
"\n" +
"total_size\x18\x05 \x01(\x03R\ttotalSize\x12\x1f\n" +
"\bcontents\x18\x04 \x03(\v2\x17.Manifest.ContentsEntryR\bcontents\x12#\n" +
"\roriginal_size\x18\n" +
" \x01(\x03R\foriginalSize\x12'\n" +
"\x0fcompressed_size\x18\x05 \x01(\x03R\x0ecompressedSize\x12\x1f\n" +
"\vstored_size\x18\b \x01(\x03R\n" +
"storedSize\x12+\n" +
"\tredirects\x18\x06 \x03(\v2\r.RedirectRuleR\tredirects\x12%\n" +
@@ -609,7 +617,7 @@ const file_schema_proto_rawDesc = "" +
"\aSymlink\x10\x04*$\n" +
"\tTransform\x12\b\n" +
"\x04None\x10\x00\x12\r\n" +
"\tZstandard\x10\x01B'Z%codeberg.org/git-pages/git-pages/mainb\beditionsp\xe8\a"
"\tZstandard\x10\x01B,Z*codeberg.org/git-pages/git-pages/git_pagesb\beditionsp\xe8\a"
var (
file_schema_proto_rawDescOnce sync.Once

View File

@@ -1,6 +1,6 @@
edition = "2023";
option go_package = "codeberg.org/git-pages/git-pages/main";
option go_package = "codeberg.org/git-pages/git-pages/git_pages";
enum Type {
// Invalid entry.
@@ -75,8 +75,9 @@ message Manifest {
// Contents
map<string, Entry> contents = 4;
int64 total_size = 5; // simple sum of each `entry.size`
int64 stored_size = 8; // external objects, after deduplication
int64 original_size = 10; // total size of entries before compression
int64 compressed_size = 5; // simple sum of each `entry.size`
int64 stored_size = 8; // total size of (deduplicated) external objects
// Netlify-style `_redirects` and `_headers`
repeated RedirectRule redirects = 6;