mirror of
https://codeberg.org/git-pages/git-pages.git
synced 2026-05-23 15:41:36 +00:00
Compress files with Zstandard.
This can save as much as 30% of storage space while adding negligible CPU overhead.
This commit is contained in:
@@ -13,6 +13,7 @@ import (
|
||||
"sync"
|
||||
|
||||
"github.com/c2h5oh/datasize"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"google.golang.org/protobuf/encoding/protojson"
|
||||
"google.golang.org/protobuf/proto"
|
||||
)
|
||||
@@ -120,6 +121,29 @@ again:
|
||||
}
|
||||
}
|
||||
|
||||
// Compress contents of inline files.
|
||||
func CompressFiles(manifest *Manifest) {
|
||||
var originalSize, transformedSize uint32
|
||||
var encoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression))
|
||||
for _, entry := range manifest.Contents {
|
||||
if entry.GetType() == Type_InlineFile && entry.GetXfrm() == Transform_None {
|
||||
originalSize += entry.GetSize()
|
||||
compressedData := encoder.EncodeAll(entry.GetData(), make([]byte, 0, entry.GetSize()))
|
||||
if len(compressedData) < int(*entry.Size) {
|
||||
entry.Data = compressedData
|
||||
entry.Size = proto.Uint32(uint32(len(entry.Data)))
|
||||
entry.Xfrm = Transform_Zstandard.Enum()
|
||||
}
|
||||
transformedSize += entry.GetSize()
|
||||
}
|
||||
}
|
||||
log.Printf("compress: saved %.2f%% (%s to %s)",
|
||||
(float32(originalSize)-float32(transformedSize))/float32(originalSize)*100.0,
|
||||
datasize.ByteSize(originalSize).HR(),
|
||||
datasize.ByteSize(transformedSize).HR(),
|
||||
)
|
||||
}
|
||||
|
||||
// Apply post-processing steps to the manifest.
|
||||
// At the moment, there isn't a good way to report errors except to log them on the terminal.
|
||||
// (Perhaps in the future they could be exposed at `.git-pages/status.txt`?)
|
||||
@@ -131,6 +155,8 @@ func PrepareManifest(manifest *Manifest) error {
|
||||
log.Printf("redirects ok: %d rules\n", len(manifest.Redirects))
|
||||
}
|
||||
|
||||
CompressFiles(manifest)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -157,6 +183,7 @@ func StoreManifest(name string, manifest *Manifest) (*Manifest, error) {
|
||||
Type: Type_ExternalFile.Enum(),
|
||||
Size: entry.Size,
|
||||
Data: fmt.Appendf(nil, "sha256-%x", sha256.Sum256(entry.Data)),
|
||||
Xfrm: entry.Xfrm,
|
||||
}
|
||||
} else {
|
||||
extManifest.Contents[name] = entry
|
||||
|
||||
22
src/pages.go
22
src/pages.go
@@ -16,6 +16,7 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
)
|
||||
@@ -60,6 +61,10 @@ func reportSiteUpdate(via string, result *UpdateResult) {
|
||||
}
|
||||
}
|
||||
|
||||
// The `clauspost/compress/zstd` package recommends reusing a decompressor to avoid repeated
|
||||
// allocations of internal buffers.
|
||||
var zstdDecoder, _ = zstd.NewReader(nil)
|
||||
|
||||
func getPage(w http.ResponseWriter, r *http.Request) error {
|
||||
var err error
|
||||
var sitePath string
|
||||
@@ -198,6 +203,23 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
|
||||
defer closer.Close()
|
||||
}
|
||||
|
||||
switch entry.GetXfrm() {
|
||||
case Transform_None:
|
||||
// nothing to do
|
||||
case Transform_Zstandard:
|
||||
// Ideally, we would serve zstd-compressed data to a client that indicates support with
|
||||
// an `Accept-Encoding: zstd` header. Unfortunately we can't because we rely on MIME
|
||||
// type detection done in `http.ServeContent`.
|
||||
compressedData, _ := io.ReadAll(reader)
|
||||
decompressedData, err := zstdDecoder.DecodeAll(compressedData, []byte{})
|
||||
if err != nil {
|
||||
w.WriteHeader(http.StatusInternalServerError)
|
||||
fmt.Fprintf(w, "internal server error: %s\n", err)
|
||||
return err
|
||||
}
|
||||
reader = bytes.NewReader(decompressedData)
|
||||
}
|
||||
|
||||
// decide on the HTTP status
|
||||
if status != 200 {
|
||||
w.WriteHeader(status)
|
||||
|
||||
108
src/schema.pb.go
108
src/schema.pb.go
@@ -81,10 +81,59 @@ func (Type) EnumDescriptor() ([]byte, []int) {
|
||||
return file_schema_proto_rawDescGZIP(), []int{0}
|
||||
}
|
||||
|
||||
type Transform int32
|
||||
|
||||
const (
|
||||
// No transformation.
|
||||
Transform_None Transform = 0
|
||||
// Zstandard compression.
|
||||
Transform_Zstandard Transform = 1
|
||||
)
|
||||
|
||||
// Enum value maps for Transform.
|
||||
var (
|
||||
Transform_name = map[int32]string{
|
||||
0: "None",
|
||||
1: "Zstandard",
|
||||
}
|
||||
Transform_value = map[string]int32{
|
||||
"None": 0,
|
||||
"Zstandard": 1,
|
||||
}
|
||||
)
|
||||
|
||||
func (x Transform) Enum() *Transform {
|
||||
p := new(Transform)
|
||||
*p = x
|
||||
return p
|
||||
}
|
||||
|
||||
func (x Transform) String() string {
|
||||
return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x))
|
||||
}
|
||||
|
||||
func (Transform) Descriptor() protoreflect.EnumDescriptor {
|
||||
return file_schema_proto_enumTypes[1].Descriptor()
|
||||
}
|
||||
|
||||
func (Transform) Type() protoreflect.EnumType {
|
||||
return &file_schema_proto_enumTypes[1]
|
||||
}
|
||||
|
||||
func (x Transform) Number() protoreflect.EnumNumber {
|
||||
return protoreflect.EnumNumber(x)
|
||||
}
|
||||
|
||||
// Deprecated: Use Transform.Descriptor instead.
|
||||
func (Transform) EnumDescriptor() ([]byte, []int) {
|
||||
return file_schema_proto_rawDescGZIP(), []int{1}
|
||||
}
|
||||
|
||||
type Entry struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
Type *Type `protobuf:"varint,1,opt,name=type,enum=Type" json:"type,omitempty"`
|
||||
// Only present for `type == InlineFile` and `type == ExternalFile`
|
||||
// Only present for `type == InlineFile` and `type == ExternalFile`.
|
||||
// For transformed entries, refers to the post-transformation (compressed) size.
|
||||
Size *uint32 `protobuf:"varint,2,opt,name=size" json:"size,omitempty"`
|
||||
// Meaning depends on `type`:
|
||||
// - If `type == InlineFile`, contains file data.
|
||||
@@ -92,7 +141,10 @@ type Entry struct {
|
||||
// cryptographically secure content hash).
|
||||
// - If `type == Symlink`, contains link target.
|
||||
// - Otherwise not present.
|
||||
Data []byte `protobuf:"bytes,3,opt,name=data" json:"data,omitempty"`
|
||||
Data []byte `protobuf:"bytes,3,opt,name=data" json:"data,omitempty"`
|
||||
// Only present for `type == InlineFile` and `type == ExternalFile` that
|
||||
// have been transformed.
|
||||
Xfrm *Transform `protobuf:"varint,4,opt,name=xfrm,enum=Transform" json:"xfrm,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
@@ -148,6 +200,13 @@ func (x *Entry) GetData() []byte {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *Entry) GetXfrm() Transform {
|
||||
if x != nil && x.Xfrm != nil {
|
||||
return *x.Xfrm
|
||||
}
|
||||
return Transform_None
|
||||
}
|
||||
|
||||
// See https://docs.netlify.com/manage/routing/redirects/overview/ for details.
|
||||
// Only a subset of the Netlify specification is representable here.
|
||||
type Redirect struct {
|
||||
@@ -370,11 +429,13 @@ var File_schema_proto protoreflect.FileDescriptor
|
||||
|
||||
const file_schema_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"\fschema.proto\"J\n" +
|
||||
"\fschema.proto\"j\n" +
|
||||
"\x05Entry\x12\x19\n" +
|
||||
"\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12\x12\n" +
|
||||
"\x04size\x18\x02 \x01(\rR\x04size\x12\x12\n" +
|
||||
"\x04data\x18\x03 \x01(\fR\x04data\"\\\n" +
|
||||
"\x04data\x18\x03 \x01(\fR\x04data\x12\x1e\n" +
|
||||
"\x04xfrm\x18\x04 \x01(\x0e2\n" +
|
||||
".TransformR\x04xfrm\"\\\n" +
|
||||
"\bRedirect\x12\x12\n" +
|
||||
"\x04from\x18\x01 \x01(\tR\x04from\x12\x0e\n" +
|
||||
"\x02to\x18\x02 \x01(\tR\x02to\x12\x16\n" +
|
||||
@@ -401,7 +462,10 @@ const file_schema_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"InlineFile\x10\x02\x12\x10\n" +
|
||||
"\fExternalFile\x10\x03\x12\v\n" +
|
||||
"\aSymlink\x10\x04B'Z%codeberg.org/git-pages/git-pages/mainb\beditionsp\xe8\a"
|
||||
"\aSymlink\x10\x04*$\n" +
|
||||
"\tTransform\x12\b\n" +
|
||||
"\x04None\x10\x00\x12\r\n" +
|
||||
"\tZstandard\x10\x01B'Z%codeberg.org/git-pages/git-pages/mainb\beditionsp\xe8\a"
|
||||
|
||||
var (
|
||||
file_schema_proto_rawDescOnce sync.Once
|
||||
@@ -415,27 +479,29 @@ func file_schema_proto_rawDescGZIP() []byte {
|
||||
return file_schema_proto_rawDescData
|
||||
}
|
||||
|
||||
var file_schema_proto_enumTypes = make([]protoimpl.EnumInfo, 1)
|
||||
var file_schema_proto_enumTypes = make([]protoimpl.EnumInfo, 2)
|
||||
var file_schema_proto_msgTypes = make([]protoimpl.MessageInfo, 5)
|
||||
var file_schema_proto_goTypes = []any{
|
||||
(Type)(0), // 0: Type
|
||||
(*Entry)(nil), // 1: Entry
|
||||
(*Redirect)(nil), // 2: Redirect
|
||||
(*Problem)(nil), // 3: Problem
|
||||
(*Manifest)(nil), // 4: Manifest
|
||||
nil, // 5: Manifest.ContentsEntry
|
||||
(Transform)(0), // 1: Transform
|
||||
(*Entry)(nil), // 2: Entry
|
||||
(*Redirect)(nil), // 3: Redirect
|
||||
(*Problem)(nil), // 4: Problem
|
||||
(*Manifest)(nil), // 5: Manifest
|
||||
nil, // 6: Manifest.ContentsEntry
|
||||
}
|
||||
var file_schema_proto_depIdxs = []int32{
|
||||
0, // 0: Entry.type:type_name -> Type
|
||||
5, // 1: Manifest.contents:type_name -> Manifest.ContentsEntry
|
||||
2, // 2: Manifest.redirects:type_name -> Redirect
|
||||
3, // 3: Manifest.problems:type_name -> Problem
|
||||
1, // 4: Manifest.ContentsEntry.value:type_name -> Entry
|
||||
5, // [5:5] is the sub-list for method output_type
|
||||
5, // [5:5] is the sub-list for method input_type
|
||||
5, // [5:5] is the sub-list for extension type_name
|
||||
5, // [5:5] is the sub-list for extension extendee
|
||||
0, // [0:5] is the sub-list for field type_name
|
||||
1, // 1: Entry.xfrm:type_name -> Transform
|
||||
6, // 2: Manifest.contents:type_name -> Manifest.ContentsEntry
|
||||
3, // 3: Manifest.redirects:type_name -> Redirect
|
||||
4, // 4: Manifest.problems:type_name -> Problem
|
||||
2, // 5: Manifest.ContentsEntry.value:type_name -> Entry
|
||||
6, // [6:6] is the sub-list for method output_type
|
||||
6, // [6:6] is the sub-list for method input_type
|
||||
6, // [6:6] is the sub-list for extension type_name
|
||||
6, // [6:6] is the sub-list for extension extendee
|
||||
0, // [0:6] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_schema_proto_init() }
|
||||
@@ -448,7 +514,7 @@ func file_schema_proto_init() {
|
||||
File: protoimpl.DescBuilder{
|
||||
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
|
||||
RawDescriptor: unsafe.Slice(unsafe.StringData(file_schema_proto_rawDesc), len(file_schema_proto_rawDesc)),
|
||||
NumEnums: 1,
|
||||
NumEnums: 2,
|
||||
NumMessages: 5,
|
||||
NumExtensions: 0,
|
||||
NumServices: 0,
|
||||
|
||||
@@ -15,9 +15,17 @@ enum Type {
|
||||
Symlink = 4;
|
||||
}
|
||||
|
||||
enum Transform {
|
||||
// No transformation.
|
||||
None = 0;
|
||||
// Zstandard compression.
|
||||
Zstandard = 1;
|
||||
}
|
||||
|
||||
message Entry {
|
||||
Type type = 1;
|
||||
// Only present for `type == InlineFile` and `type == ExternalFile`
|
||||
// Only present for `type == InlineFile` and `type == ExternalFile`.
|
||||
// For transformed entries, refers to the post-transformation (compressed) size.
|
||||
uint32 size = 2;
|
||||
// Meaning depends on `type`:
|
||||
// * If `type == InlineFile`, contains file data.
|
||||
@@ -26,6 +34,9 @@ message Entry {
|
||||
// * If `type == Symlink`, contains link target.
|
||||
// * Otherwise not present.
|
||||
bytes data = 3;
|
||||
// Only present for `type == InlineFile` and `type == ExternalFile` that
|
||||
// have been transformed.
|
||||
Transform xfrm = 4;
|
||||
}
|
||||
|
||||
// See https://docs.netlify.com/manage/routing/redirects/overview/ for details.
|
||||
|
||||
Reference in New Issue
Block a user