diff --git a/src/http.go b/src/http.go new file mode 100644 index 0000000..d6da818 --- /dev/null +++ b/src/http.go @@ -0,0 +1,89 @@ +package git_pages + +import ( + "cmp" + "regexp" + "slices" + "strconv" + "strings" +) + +var httpAcceptEncodingRegexp = regexp.MustCompile(`` + + // token optionally prefixed by whitespace + `^[ \t]*([a-zA-Z0-9$!#$%&'*+.^_\x60|~-]+)` + + // quality value prefixed by a semicolon optionally surrounded by whitespace + `(?:[ \t]*;[ \t]*q=(0(?:\.[0-9]{1,3})?|1(?:\.0{1,3})?))?` + + // optional whitespace followed by comma or end of line + `[ \t]*(?:,|$)`, +) + +type httpEncoding struct { + code string + qval float64 +} + +type httpEncodings struct { + encodings []httpEncoding +} + +func parseHTTPEncodings(headerValue string) (result httpEncodings) { + for headerValue != "" { + matches := httpAcceptEncodingRegexp.FindStringSubmatch(headerValue) + if matches == nil { + return httpEncodings{} + } + enc := httpEncoding{strings.ToLower(matches[1]), 1.0} + if matches[2] != "" { + enc.qval, _ = strconv.ParseFloat(matches[2], 64) + } + result.encodings = append(result.encodings, enc) + headerValue = headerValue[len(matches[0]):] + } + if len(result.encodings) == 0 { + // RFC 9110 says (https://httpwg.org/specs/rfc9110.html#field.accept-encoding): + // "If no Accept-Encoding header field is in the request, any content + // coding is considered acceptable by the user agent." + // In practice, no client expects to receive a compressed response + // without having sent Accept-Encoding in the request. + } + return +} + +// Negotiate returns the most preferred encoding that is acceptable by the +// client, or an empty string if no encodings are acceptable. +func (e *httpEncodings) Negotiate(codes ...string) string { + prefs := make(map[string]float64, len(codes)) + for _, code := range codes { + prefs[code] = 0 + } + implicitIdentity := true + for _, enc := range e.encodings { + if enc.code == "*" { + for code := range prefs { + prefs[code] = enc.qval + } + implicitIdentity = false + } else if _, ok := prefs[enc.code]; ok { + prefs[enc.code] = enc.qval + } + if enc.code == "*" || enc.code == "identity" { + implicitIdentity = false + } + } + if _, ok := prefs["identity"]; ok && implicitIdentity { + prefs["identity"] = -1 // sort last + } + encs := make([]httpEncoding, len(codes)) + for idx, code := range codes { + encs[idx] = httpEncoding{code, prefs[code]} + } + slices.SortStableFunc(encs, func(a, b httpEncoding) int { + return -cmp.Compare(a.qval, b.qval) + }) + for _, enc := range encs { + if enc.qval != 0 { + return enc.code + } + } + return "" +} diff --git a/src/pages.go b/src/pages.go index 9d9ad6a..be490e2 100644 --- a/src/pages.go +++ b/src/pages.go @@ -13,6 +13,7 @@ import ( "net/url" "os" "path" + "strconv" "strings" "time" @@ -229,21 +230,44 @@ func getPage(w http.ResponseWriter, r *http.Request) error { defer closer.Close() } + acceptedEncodings := parseHTTPEncodings(r.Header.Get("Accept-Encoding")) + negotiatedEncoding := true + switch entry.GetTransform() { case Transform_None: - // nothing to do - case Transform_Zstandard: - // Ideally, we would serve zstd-compressed data to a client that indicates support with - // an `Accept-Encoding: zstd` header. Unfortunately we can't because we rely on MIME - // type detection done in `http.ServeContent`. - compressedData, _ := io.ReadAll(reader) - decompressedData, err := zstdDecoder.DecodeAll(compressedData, []byte{}) - if err != nil { - w.WriteHeader(http.StatusInternalServerError) - fmt.Fprintf(w, "internal server error: %s\n", err) - return err + if acceptedEncodings.Negotiate("identity") != "identity" { + negotiatedEncoding = false } - reader = bytes.NewReader(decompressedData) + case Transform_Zstandard: + supported := []string{"zstd", "identity"} + if entry.ContentType == nil { + // If Content-Type is unset, `http.ServeContent` will try to sniff + // the file contents. That won't work if it's compressed. + supported = []string{"identity"} + } + switch acceptedEncodings.Negotiate(supported...) { + case "zstd": + // Set Content-Length ourselves since `http.ServeContent` only sets + // it if Content-Encoding is unset or if it's a range request. + w.Header().Set("Content-Length", strconv.FormatInt(*entry.Size, 10)) + w.Header().Set("Content-Encoding", "zstd") + case "identity": + compressedData, _ := io.ReadAll(reader) + decompressedData, err := zstdDecoder.DecodeAll(compressedData, []byte{}) + if err != nil { + w.WriteHeader(http.StatusInternalServerError) + fmt.Fprintf(w, "internal server error: %s\n", err) + return err + } + reader = bytes.NewReader(decompressedData) + default: + negotiatedEncoding = false + } + } + if !negotiatedEncoding { + w.WriteHeader(http.StatusNotAcceptable) + return fmt.Errorf("no supported content encodings (accept-encoding: %q)", + r.Header.Get("Accept-Encoding")) } // decide on the HTTP status @@ -253,6 +277,11 @@ func getPage(w http.ResponseWriter, r *http.Request) error { io.Copy(w, reader) } } else { + if entry.ContentType != nil { + // don't let http.ServeContent mime-sniff compressed data + w.Header().Set("Content-Type", *entry.ContentType) + } + // allow the use of multi-threading in WebAssembly w.Header().Set("Cross-Origin-Embedder-Policy", "credentialless") w.Header().Set("Cross-Origin-Opener-Policy", "same-origin") @@ -265,7 +294,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { w.Header().Set("Cache-Control", "max-age=60, stale-while-revalidate=3600") // see https://web.dev/articles/stale-while-revalidate for details - // http.ServeContent handles content type and caching + // http.ServeContent handles conditional requests and range requests http.ServeContent(w, r, entryPath, mtime, reader) } return nil