From bd44f65b514e700e454d1739873e699b0c20d860 Mon Sep 17 00:00:00 2001 From: Catherine Date: Fri, 5 Dec 2025 18:20:14 +0000 Subject: [PATCH] Add handling of `Accept: application/vnd.git-pages.unresolved`. This will be used for incremental archive updates. --- README.md | 3 ++ src/http.go | 85 ++++++++++++++++++++++++++++++++++++++-------------- src/pages.go | 35 +++++++++++++++++----- 3 files changed, 92 insertions(+), 31 deletions(-) diff --git a/README.md b/README.md index 1f00821..520cdeb 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,9 @@ Features * Files with a certain name, when placed in the root of a site, have special functions: - [Netlify `_redirects`][_redirects] file can be used to specify HTTP redirect and rewrite rules. The _git-pages_ implementation currently does not support placeholders, query parameters, or conditions, and may differ from Netlify in other minor ways. If you find that a supported `_redirects` file feature does not work the same as on Netlify, please file an issue. (Note that _git-pages_ does not perform URL normalization; `/foo` and `/foo/` are *not* the same, unlike with Netlify.) - [Netlify `_headers`][_headers] file can be used to specify custom HTTP response headers (if allowlisted by configuration). In particular, this is useful to enable [CORS requests][cors]. The _git-pages_ implementation may differ from Netlify in minor ways; if you find that a `_headers` file feature does not work the same as on Netlify, please file an issue. +* Incremental updates can be made using `PUT` or `PATCH` requests where the body contains an archive (both tar and zip are supported). + - Any archive entry that is a symlink to `/git/pages/` is replaced with an existing manifest entry for the same site whose git blob hash matches ``. If there is no existing manifest entry with the specified git hash, the update fails with a `422 Unprocessable Entity`. + - For this error response only, if the negotiated content type is `application/vnd.git-pages.unresolved`, the response will contain the `` of each unresolved reference, one per line. * Support for SHA-256 Git hashes is [limited by go-git][go-git-sha256]; once go-git implements the required features, _git-pages_ will automatically gain support for SHA-256 Git hashes. Note that shallow clones (used by _git-pages_ to conserve bandwidth if available) aren't supported yet in the Git protocol as of 2025. [_redirects]: https://docs.netlify.com/manage/routing/redirects/overview/ diff --git a/src/http.go b/src/http.go index 4e6d1ef..b3d833d 100644 --- a/src/http.go +++ b/src/http.go @@ -10,7 +10,7 @@ import ( var httpAcceptRegexp = regexp.MustCompile(`` + // token optionally prefixed by whitespace - `^[ \t]*([a-zA-Z0-9$!#$%&'*+.^_\x60|~-]+)` + + `^[ \t]*([a-zA-Z0-9$!#$%&'*+./^_\x60|~-]+)` + // quality value prefixed by a semicolon optionally surrounded by whitespace `(?:[ \t]*;[ \t]*q=(0(?:\.[0-9]{1,3})?|1(?:\.0{1,3})?))?` + // optional whitespace followed by comma or end of line @@ -22,23 +22,70 @@ type httpAcceptOffer struct { qval float64 } +func parseGenericAcceptHeader(headerValue string) (result []httpAcceptOffer) { + for headerValue != "" { + matches := httpAcceptRegexp.FindStringSubmatch(headerValue) + if matches == nil { + return + } + offer := httpAcceptOffer{strings.ToLower(matches[1]), 1.0} + if matches[2] != "" { + offer.qval, _ = strconv.ParseFloat(matches[2], 64) + } + result = append(result, offer) + headerValue = headerValue[len(matches[0]):] + } + return +} + +func preferredAcceptOffer(offers []httpAcceptOffer) string { + slices.SortStableFunc(offers, func(a, b httpAcceptOffer) int { + return -cmp.Compare(a.qval, b.qval) + }) + for _, offer := range offers { + if offer.qval != 0 { + return offer.code + } + } + return "" +} + +type HTTPContentTypes struct { + contentTypes []httpAcceptOffer +} + +func ParseAcceptHeader(headerValue string) (result HTTPContentTypes) { + result = HTTPContentTypes{parseGenericAcceptHeader(headerValue)} + return +} + +func (e *HTTPContentTypes) Negotiate(offers ...string) string { + prefs := make(map[string]float64, len(offers)) + for _, code := range offers { + prefs[code] = 0 + } + for _, ctyp := range e.contentTypes { + if ctyp.code == "*" || ctyp.code == "*/*" { + for code := range prefs { + prefs[code] = ctyp.qval + } + } else if _, ok := prefs[ctyp.code]; ok { + prefs[ctyp.code] = ctyp.qval + } + } + ctyps := make([]httpAcceptOffer, len(offers)) + for idx, code := range offers { + ctyps[idx] = httpAcceptOffer{code, prefs[code]} + } + return preferredAcceptOffer(ctyps) +} + type HTTPEncodings struct { encodings []httpAcceptOffer } -func ParseHTTPAcceptEncoding(headerValue string) (result HTTPEncodings) { - for headerValue != "" { - matches := httpAcceptRegexp.FindStringSubmatch(headerValue) - if matches == nil { - return HTTPEncodings{} - } - enc := httpAcceptOffer{strings.ToLower(matches[1]), 1.0} - if matches[2] != "" { - enc.qval, _ = strconv.ParseFloat(matches[2], 64) - } - result.encodings = append(result.encodings, enc) - headerValue = headerValue[len(matches[0]):] - } +func ParseAcceptEncodingHeader(headerValue string) (result HTTPEncodings) { + result = HTTPEncodings{parseGenericAcceptHeader(headerValue)} if len(result.encodings) == 0 { // RFC 9110 says (https://httpwg.org/specs/rfc9110.html#field.accept-encoding): // "If no Accept-Encoding header field is in the request, any content @@ -77,13 +124,5 @@ func (e *HTTPEncodings) Negotiate(offers ...string) string { for idx, code := range offers { encs[idx] = httpAcceptOffer{code, prefs[code]} } - slices.SortStableFunc(encs, func(a, b httpAcceptOffer) int { - return -cmp.Compare(a.qval, b.qval) - }) - for _, enc := range encs { - if enc.qval != 0 { - return enc.code - } - } - return "" + return preferredAcceptOffer(encs) } diff --git a/src/pages.go b/src/pages.go index d8f9f65..875e335 100644 --- a/src/pages.go +++ b/src/pages.go @@ -214,7 +214,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { // we only offer `/.git-pages/archive.tar` and not the `.tar.gz`/`.tar.zst` variants // because HTTP can already request compression using the `Content-Encoding` mechanism - acceptedEncodings := ParseHTTPAcceptEncoding(r.Header.Get("Accept-Encoding")) + acceptedEncodings := ParseAcceptEncodingHeader(r.Header.Get("Accept-Encoding")) negotiated := acceptedEncodings.Negotiate("zstd", "gzip", "identity") if negotiated != "" { w.Header().Set("Content-Encoding", negotiated) @@ -322,8 +322,8 @@ func getPage(w http.ResponseWriter, r *http.Request) error { defer closer.Close() } - offeredEncodings := []string{} - acceptedEncodings := ParseHTTPAcceptEncoding(r.Header.Get("Accept-Encoding")) + var offeredEncodings []string + acceptedEncodings := ParseAcceptEncodingHeader(r.Header.Get("Accept-Encoding")) negotiatedEncoding := true switch entry.GetTransform() { case Transform_Identity: @@ -379,7 +379,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { if !negotiatedEncoding { w.Header().Set("Accept-Encoding", strings.Join(offeredEncodings, ", ")) w.WriteHeader(http.StatusNotAcceptable) - return fmt.Errorf("no supported content encodings (Accept-Encoding: %q)", + return fmt.Errorf("no supported content encodings (Accept-Encoding: %s)", r.Header.Get("Accept-Encoding")) } @@ -506,7 +506,7 @@ func putPage(w http.ResponseWriter, r *http.Request) error { result = UpdateFromArchive(ctx, webRoot, contentType, reader) } - return reportUpdateResult(w, result) + return reportUpdateResult(w, r, result) } func patchPage(w http.ResponseWriter, r *http.Request) error { @@ -569,13 +569,32 @@ func patchPage(w http.ResponseWriter, r *http.Request) error { contentType := getMediaType(r.Header.Get("Content-Type")) reader := http.MaxBytesReader(w, r.Body, int64(config.Limits.MaxSiteSize.Bytes())) result := PartialUpdateFromArchive(ctx, webRoot, contentType, reader, parents) - return reportUpdateResult(w, result) + return reportUpdateResult(w, r, result) } -func reportUpdateResult(w http.ResponseWriter, result UpdateResult) error { +func reportUpdateResult(w http.ResponseWriter, r *http.Request, result UpdateResult) error { + var unresolvedRefErr UnresolvedRefError + if result.outcome == UpdateError && errors.As(result.err, &unresolvedRefErr) { + offeredContentTypes := []string{"application/vnd.git-pages.unresolved", "text/plain"} + acceptedContentTypes := ParseAcceptHeader(r.Header.Get("Accept")) + switch acceptedContentTypes.Negotiate(offeredContentTypes...) { + default: + w.Header().Set("Accept", strings.Join(offeredContentTypes, ", ")) + w.WriteHeader(http.StatusNotAcceptable) + return fmt.Errorf("no supported content types (Accept: %s)", r.Header.Get("Accept")) + case "application/vnd.git-pages.unresolved": + w.WriteHeader(http.StatusUnprocessableEntity) + for _, missingRef := range unresolvedRefErr.missing { + fmt.Fprintln(w, missingRef) + } + return nil + case "text/plain": + // handled below + } + } + switch result.outcome { case UpdateError: - var unresolvedRefErr UnresolvedRefError if errors.Is(result.err, ErrManifestTooLarge) { w.WriteHeader(http.StatusRequestEntityTooLarge) } else if errors.Is(result.err, errArchiveFormat) {