From 25f7ea08c9069b80a2a1d52765c1e020cc09d7ba Mon Sep 17 00:00:00 2001 From: Catherine Date: Tue, 21 Oct 2025 03:40:29 +0000 Subject: [PATCH] Sniff `Content-Type` during site update. This isn't yet used in the code responding to GET requests because we do not yet have a migration path for legacy code. --- src/manifest.go | 36 ++++++++++++++++++++++++++++++------ src/pages.go | 2 +- src/schema.pb.go | 33 ++++++++++++++++++++++----------- src/schema.proto | 5 ++++- 4 files changed, 57 insertions(+), 19 deletions(-) diff --git a/src/manifest.go b/src/manifest.go index 839865e..225f673 100644 --- a/src/manifest.go +++ b/src/manifest.go @@ -9,7 +9,10 @@ import ( "errors" "fmt" "log" + "mime" + "net/http" "path" + "path/filepath" "strings" "sync" @@ -122,6 +125,24 @@ again: } } +// Sniff content type using the same algorithm as `http.ServeContent`. +func DetectContentType(manifest *Manifest) { + for path, entry := range manifest.Contents { + if entry.GetType() == Type_Directory || entry.GetType() == Type_Symlink { + // no Content-Type + } else if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_None { + contentType := mime.TypeByExtension(filepath.Ext(path)) + if contentType == "" { + contentType = http.DetectContentType(entry.Data[:512]) + } + entry.ContentType = proto.String(contentType) + } else { + panic(fmt.Errorf("DetectContentType encountered invalid entry: %v, %v", + entry.GetType(), entry.GetTransform())) + } + } +} + // The `clauspost/compress/zstd` package recommends reusing a compressor to avoid repeated // allocations of internal buffers. var zstdEncoder, _ = zstd.NewWriter(nil, zstd.WithEncoderLevel(zstd.SpeedBetterCompression)) @@ -133,13 +154,13 @@ func CompressFiles(ctx context.Context, manifest *Manifest) { var originalSize, transformedSize int64 for _, entry := range manifest.Contents { - if entry.GetType() == Type_InlineFile && entry.GetXfrm() == Transform_None { + if entry.GetType() == Type_InlineFile && entry.GetTransform() == Transform_None { originalSize += entry.GetSize() compressedData := zstdEncoder.EncodeAll(entry.GetData(), make([]byte, 0, entry.GetSize())) if len(compressedData) < int(*entry.Size) { entry.Data = compressedData entry.Size = proto.Int64(int64(len(entry.Data))) - entry.Xfrm = Transform_Zstandard.Enum() + entry.Transform = Transform_Zstandard.Enum() } transformedSize += entry.GetSize() } @@ -163,6 +184,8 @@ func PrepareManifest(ctx context.Context, manifest *Manifest) error { log.Printf("redirects ok: %d rules\n", len(manifest.Redirects)) } + DetectContentType(manifest) + if config.Feature("compress") { CompressFiles(ctx, manifest) } @@ -196,10 +219,11 @@ func StoreManifest(ctx context.Context, name string, manifest *Manifest) (*Manif if cannotBeInlined { dataHash := sha256.Sum256(entry.Data) extManifest.Contents[name] = &Entry{ - Type: Type_ExternalFile.Enum(), - Size: entry.Size, - Data: fmt.Appendf(nil, "sha256-%x", dataHash), - Xfrm: entry.Xfrm, + Type: Type_ExternalFile.Enum(), + Size: entry.Size, + Data: fmt.Appendf(nil, "sha256-%x", dataHash), + Transform: entry.Transform, + ContentType: entry.ContentType, } extObjectMap[string(dataHash[:])] = *entry.Size } else { diff --git a/src/pages.go b/src/pages.go index 6f4000e..4ca9672 100644 --- a/src/pages.go +++ b/src/pages.go @@ -223,7 +223,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { defer closer.Close() } - switch entry.GetXfrm() { + switch entry.GetTransform() { case Transform_None: // nothing to do case Transform_Zstandard: diff --git a/src/schema.pb.go b/src/schema.pb.go index 677c223..4366f72 100644 --- a/src/schema.pb.go +++ b/src/schema.pb.go @@ -144,7 +144,10 @@ type Entry struct { Data []byte `protobuf:"bytes,3,opt,name=data" json:"data,omitempty"` // Only present for `type == InlineFile` and `type == ExternalFile` that // have been transformed. - Xfrm *Transform `protobuf:"varint,4,opt,name=xfrm,enum=Transform" json:"xfrm,omitempty"` + Transform *Transform `protobuf:"varint,4,opt,name=transform,enum=Transform" json:"transform,omitempty"` + // Only present for `type == InlineFile` and `type == ExternalFile`. + // Currently, optional (not present on certain legacy manifests). + ContentType *string `protobuf:"bytes,5,opt,name=content_type,json=contentType" json:"content_type,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -200,13 +203,20 @@ func (x *Entry) GetData() []byte { return nil } -func (x *Entry) GetXfrm() Transform { - if x != nil && x.Xfrm != nil { - return *x.Xfrm +func (x *Entry) GetTransform() Transform { + if x != nil && x.Transform != nil { + return *x.Transform } return Transform_None } +func (x *Entry) GetContentType() string { + if x != nil && x.ContentType != nil { + return *x.ContentType + } + return "" +} + // See https://docs.netlify.com/manage/routing/redirects/overview/ for details. // Only a subset of the Netlify specification is representable here. type Redirect struct { @@ -337,8 +347,8 @@ type Manifest struct { Commit *string `protobuf:"bytes,3,opt,name=commit" json:"commit,omitempty"` // Contents Contents map[string]*Entry `protobuf:"bytes,4,rep,name=contents" json:"contents,omitempty" protobuf_key:"bytes,1,opt,name=key" protobuf_val:"bytes,2,opt,name=value"` - TotalSize *int64 `protobuf:"varint,5,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` - StoredSize *int64 `protobuf:"varint,8,opt,name=stored_size,json=storedSize" json:"stored_size,omitempty"` // after deduplication + TotalSize *int64 `protobuf:"varint,5,opt,name=total_size,json=totalSize" json:"total_size,omitempty"` // simple sum of each `entry.size` + StoredSize *int64 `protobuf:"varint,8,opt,name=stored_size,json=storedSize" json:"stored_size,omitempty"` // external objects, after deduplication // Netlify-style `_redirects` Redirects []*Redirect `protobuf:"bytes,6,rep,name=redirects" json:"redirects,omitempty"` // Diagnostics for non-fatal errors @@ -437,13 +447,14 @@ var File_schema_proto protoreflect.FileDescriptor const file_schema_proto_rawDesc = "" + "\n" + - "\fschema.proto\"j\n" + + "\fschema.proto\"\x97\x01\n" + "\x05Entry\x12\x19\n" + "\x04type\x18\x01 \x01(\x0e2\x05.TypeR\x04type\x12\x12\n" + "\x04size\x18\x02 \x01(\x03R\x04size\x12\x12\n" + - "\x04data\x18\x03 \x01(\fR\x04data\x12\x1e\n" + - "\x04xfrm\x18\x04 \x01(\x0e2\n" + - ".TransformR\x04xfrm\"\\\n" + + "\x04data\x18\x03 \x01(\fR\x04data\x12(\n" + + "\ttransform\x18\x04 \x01(\x0e2\n" + + ".TransformR\ttransform\x12!\n" + + "\fcontent_type\x18\x05 \x01(\tR\vcontentType\"\\\n" + "\bRedirect\x12\x12\n" + "\x04from\x18\x01 \x01(\tR\x04from\x12\x0e\n" + "\x02to\x18\x02 \x01(\tR\x02to\x12\x16\n" + @@ -502,7 +513,7 @@ var file_schema_proto_goTypes = []any{ } var file_schema_proto_depIdxs = []int32{ 0, // 0: Entry.type:type_name -> Type - 1, // 1: Entry.xfrm:type_name -> Transform + 1, // 1: Entry.transform:type_name -> Transform 6, // 2: Manifest.contents:type_name -> Manifest.ContentsEntry 3, // 3: Manifest.redirects:type_name -> Redirect 4, // 4: Manifest.problems:type_name -> Problem diff --git a/src/schema.proto b/src/schema.proto index c31d480..fbd4257 100644 --- a/src/schema.proto +++ b/src/schema.proto @@ -36,7 +36,10 @@ message Entry { bytes data = 3; // Only present for `type == InlineFile` and `type == ExternalFile` that // have been transformed. - Transform xfrm = 4; + Transform transform = 4; + // Only present for `type == InlineFile` and `type == ExternalFile`. + // Currently, optional (not present on certain legacy manifests). + string content_type = 5; } // See https://docs.netlify.com/manage/routing/redirects/overview/ for details.