diff --git a/README.md b/README.md index a4977ab..1f59aa6 100644 --- a/README.md +++ b/README.md @@ -65,10 +65,11 @@ Features - If the URL matches `https:///...` and the previous rule did not apply, the index site is selected. - Site paths starting with `.git-pages/...` are reserved. - The `.git-pages/manifest.json` path returns a [ProtoJSON](https://protobuf.dev/programming-guides/json/) representation of the deployed site manifest. -* In response to a `PUT` or `POST` request, the server performs a shallow clone of the indicated git repository into a temporary location, checks out the relevant branch, and atomically updates a site. The URL of the request must be the root URL of the site that is being published. - - The `PUT` method requires an `application/x-www-form-urlencoded` body. The body contains the repository URL to be cloned. The `X-Pages-Branch` header contains the branch to be checked out; the `pages` branch is used if the header is absent.s - - The `POST` method requires an `application/json` body containing a Forgejo/Gitea/Gogs/GitHub webhook event payload. Requests where the `ref` key contains anything other than `refs/heads/pages` are ignored, and only the `pages` branch is used. The `repository.clone_url` key contains the repository URL to be cloned. - - If the checked out commit is empty, performs the same action as `DELETE`. +* In response to a `PUT` or `POST` request, the server retrieves updates a site with new content. The URL of the request must be the root URL of the site that is being published. + - If the `PUT` method receives an `application/x-www-form-urlencoded` body, it contains a repository URL to be shallowly cloned. The `X-Pages-Branch` header contains the branch to be checked out; the `pages` branch is used if the header is absent. + - If the `PUT` method receives an `application/x-tar` or `application/zip` body, it contains an archive to be extracted. + - The `POST` method requires an `application/json` body containing a Forgejo/Gitea/Gogs/GitHub webhook event payload. Requests where the `ref` key contains anything other than `refs/heads/pages` are ignored, and only the `pages` branch is used. The `repository.clone_url` key contains a repository URL to be shallowly cloned. + - If the received contents is empty, performs the same action as `DELETE`. * In response to a `DELETE` request, the server unpublishes a site. The URL of the request must be the root URL of the site that is being unpublished. Site data remains stored for an indeterminate period of time, but becomes completely inaccessible. * All updates to site content are atomic (subject to consistency guarantees of the storage backend). That is, there is an instantaneous moment during an update before which the server will return the old content and after which it will return the new content. @@ -82,10 +83,10 @@ The authorization flow for content updates (`PUT`, `DELETE`, `POST` requests) pr 1. **Development Mode:** If the environment variable `INSECURE` is set to the value `very`, the request is authorized. 2. **DNS Challenge:** If the method is `PUT`, `DELETE`, `POST`, and a well-formed `Authorization:` header is provided containing a ``, and a TXT record lookup at `_git-pages-challenge.` returns a record whose concatenated value equals `SHA256(" ")`, the request is authorized. - - **Pages scheme:** Request includes an `Authorization: Pages ` header. - - **Basic scheme:** Request includes an `Authorization: Basic ` header, where `` is equal to `Base64("Pages:")`. (Useful for non-Forgejo forges.) -3. **DNS Allowlist:** If the method is `PUT` or `POST`, and a TXT record lookup at `_git-pages-repository.` returns a set of well-formed absolute URLs, and the requested clone URLs is contained in this set of URLs, the request is authorized. -4. **Wildcard Match (Site):** If the method is `POST`, and a `[wildcard]` configuration section is present, and the suffix of a hostname (compared label-wise) is equal to `[wildcard].domain`, and the requested clone URL is a *matching* clone URL, the request is authorized. + - **`Pages` scheme:** Request includes an `Authorization: Pages ` header. + - **`Basic` scheme:** Request includes an `Authorization: Basic ` header, where `` is equal to `Base64("Pages:")`. (Useful for non-Forgejo forges.) +3. **DNS Allowlist:** If the method is `PUT` or `POST`, and a TXT record lookup at `_git-pages-repository.` returns a set of well-formed absolute URLs, and (for `PUT` requests) the body contains a repository URL, and the requested clone URLs is contained in this set of URLs, the request is authorized. +4. **Wildcard Match (Site):** If the method is `POST`, and a `[wildcard]` configuration section is present, and the suffix of a hostname (compared label-wise) is equal to `[wildcard].domain`, and (for `PUT` requests) the body contains a repository URL, and the requested clone URL is a *matching* clone URL, the request is authorized. - **Index repository:** If the request URL is `scheme://./`, a *matching* clone URL is computed by templating `[wildcard.clone-url]` with `` and ``, where `` is computed by templating each element of `[wildcard].index-repos` with ``. - **Project repository:** If the request URL is `scheme://.//`, a *matching* clone URL is computed by templating `[wildcard.clone-url]` with `` and ``. 5. **Default Deny:** Otherwise, the request is not authorized. diff --git a/src/auth.go b/src/auth.go index 3f5add7..46bfc31 100644 --- a/src/auth.go +++ b/src/auth.go @@ -205,7 +205,7 @@ func authorizeWildcardMatchSite(r *http.Request) (*Authorization, error) { } } -func AuthorizeMetadata(r *http.Request) (*Authorization, error) { +func AuthorizeMetadataRetrieval(r *http.Request) (*Authorization, error) { causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} if InsecureMode() { @@ -240,7 +240,7 @@ func AuthorizeMetadata(r *http.Request) (*Authorization, error) { // Returns `repoURLs, err` where if `err == nil` then the request is authorized to clone from // any repository URL included in `repoURLs` (by case-insensitive comparison), or any URL at all // if `repoURLs == nil`. -func AuthorizeUpdate(r *http.Request) (*Authorization, error) { +func AuthorizeUpdateFromRepository(r *http.Request) (*Authorization, error) { causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} if InsecureMode() { @@ -330,3 +330,25 @@ func AuthorizeBranch(branch string, auth *Authorization) error { } } } + +func AuthorizeUpdateFromArchive(r *http.Request) (*Authorization, error) { + causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} + + if InsecureMode() { + log.Println("auth: INSECURE mode") + return &Authorization{}, nil // for testing only + } + + // DNS challenge gives absolute authority. + auth, err := authorizeDNSChallenge(r) + if err != nil && IsUnauthorized(err) { + causes = append(causes, err) + } else if err != nil { // bad request + return nil, err + } else { + log.Println("auth: DNS challenge") + return auth, nil + } + + return nil, errors.Join(causes...) +} diff --git a/src/extract.go b/src/extract.go new file mode 100644 index 0000000..5b6bb96 --- /dev/null +++ b/src/extract.go @@ -0,0 +1,111 @@ +package main + +import ( + "archive/tar" + "archive/zip" + "bytes" + "errors" + "fmt" + "io" + "strings" + + "google.golang.org/protobuf/proto" +) + +func ExtractTar(reader io.Reader) (*Manifest, error) { + archive := tar.NewReader(reader) + + manifest := Manifest{ + Contents: map[string]*Entry{ + "": {Type: Type_Directory.Enum()}, + }, + } + for { + header, err := archive.Next() + if err == io.EOF { + break + } else if err != nil { + return nil, err + } + + manifestEntry := Entry{} + switch header.Typeflag { + case tar.TypeReg: + fileData := make([]byte, header.Size) + length, err := archive.Read(fileData) + if !(length == int(header.Size) && err == io.EOF) { + return nil, fmt.Errorf("tar: read: %w (expected %d bytes, read %d)", + err, header.Size, length) + } + + manifestEntry.Type = Type_InlineFile.Enum() + manifestEntry.Size = proto.Uint32(uint32(header.Size)) + manifestEntry.Data = fileData + + case tar.TypeSymlink: + manifestEntry.Type = Type_Symlink.Enum() + manifestEntry.Size = proto.Uint32(uint32(header.Size)) + manifestEntry.Data = []byte(header.Linkname) + + case tar.TypeDir: + manifestEntry.Type = Type_Directory.Enum() + + default: + manifestEntry.Type = Type_Invalid.Enum() + } + manifest.Contents[strings.TrimSuffix(header.Name, "/")] = &manifestEntry + } + return &manifest, nil +} + +var errZipBomb = errors.New("zip file size limit exceeded") + +func ExtractZip(reader io.Reader) (*Manifest, error) { + data, err := io.ReadAll(reader) + if err != nil { + return nil, err + } + + archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data))) + if err != nil { + return nil, err + } + + // Detect and defuse zipbombs. + var totalSize uint64 + for _, file := range archive.File { + totalSize += file.UncompressedSize64 + } + if totalSize > SiteSizeMax { + return nil, fmt.Errorf("%w: %d > %d bytes", errZipBomb, totalSize, SiteSizeMax) + } + + manifest := Manifest{ + Contents: map[string]*Entry{ + "": {Type: Type_Directory.Enum()}, + }, + } + for _, file := range archive.File { + manifestEntry := Entry{} + if !strings.HasSuffix(file.Name, "/") { + fileReader, err := file.Open() + if err != nil { + return nil, err + } + defer fileReader.Close() + + fileData, err := io.ReadAll(fileReader) + if err != nil { + return nil, fmt.Errorf("zip: read: %w", err) + } + + manifestEntry.Type = Type_InlineFile.Enum() + manifestEntry.Size = proto.Uint32(uint32(file.UncompressedSize64)) + manifestEntry.Data = fileData + } else { + manifestEntry.Type = Type_Directory.Enum() + } + manifest.Contents[strings.TrimSuffix(file.Name, "/")] = &manifestEntry + } + return &manifest, nil +} diff --git a/src/fetch.go b/src/fetch.go index 877f435..9e501c7 100644 --- a/src/fetch.go +++ b/src/fetch.go @@ -62,12 +62,13 @@ func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manif defer walker.Close() manifest := Manifest{ - RepoUrl: proto.String(repoURL), - Branch: proto.String(branch), - Commit: proto.String(ref.Hash().String()), - Contents: make(map[string]*Entry), + RepoUrl: proto.String(repoURL), + Branch: proto.String(branch), + Commit: proto.String(ref.Hash().String()), + Contents: map[string]*Entry{ + "": {Type: Type_Directory.Enum()}, + }, } - manifest.Contents[""] = &Entry{Type: Type_Directory.Enum()} for { name, entry, err := walker.Next() if err == io.EOF { @@ -86,6 +87,7 @@ func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manif if err != nil { return nil, fmt.Errorf("git blob open: %w", err) } + defer reader.Close() data, err := io.ReadAll(reader) if err != nil { diff --git a/src/manifest.go b/src/manifest.go index a0346b8..6888221 100644 --- a/src/manifest.go +++ b/src/manifest.go @@ -62,7 +62,7 @@ func DecodeManifest(data []byte) (*Manifest, error) { return &manifest, err } -func ManifestDebugJSON(manifest *Manifest) []byte { +func ManifestDebugJSON(manifest *Manifest) string { result, err := protojson.MarshalOptions{ Multiline: true, EmitDefaultValues: true, @@ -70,7 +70,7 @@ func ManifestDebugJSON(manifest *Manifest) []byte { if err != nil { panic(err) } - return result + return string(result) } const maxSymlinkLevels int = 128 @@ -148,17 +148,20 @@ func ExternalizeFiles(manifest *Manifest) *Manifest { const ManifestSizeMax int = 1048576 +var errManifestTooLarge = errors.New("manifest size limit exceeded") + // Uploads inline file data over certain size to the storage backend. Returns a copy of // the manifest updated to refer to an external content-addressable store. func StoreManifest(name string, manifest *Manifest) (*Manifest, error) { extManifest := ExternalizeFiles(manifest) extManifestData := EncodeManifest(extManifest) if len(extManifestData) > ManifestSizeMax { - return nil, fmt.Errorf("manifest too big: %d > %d bytes", extManifestData, ManifestSizeMax) + return nil, fmt.Errorf("%w: %d > %d bytes", + errManifestTooLarge, extManifestData, ManifestSizeMax) } if err := backend.StageManifest(extManifest); err != nil { - return nil, fmt.Errorf("stage: %w", err) + return nil, fmt.Errorf("stage manifest: %w", err) } wg := sync.WaitGroup{} @@ -180,7 +183,7 @@ func StoreManifest(name string, manifest *Manifest) (*Manifest, error) { } if err := backend.CommitManifest(name, extManifest); err != nil { - return nil, fmt.Errorf("commit: %w", err) + return nil, fmt.Errorf("commit manifest: %w", err) } return extManifest, nil diff --git a/src/pages.go b/src/pages.go index ad18165..3d3ee8b 100644 --- a/src/pages.go +++ b/src/pages.go @@ -58,7 +58,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { if metadataPath, found := strings.CutPrefix(sitePath, ".git-pages/"); found { // metadata requests require authorization to avoid making pushes from private // repositories enumerable - _, err := AuthorizeMetadata(r) + _, err := AuthorizeMetadataRetrieval(r) if err != nil { return err } @@ -67,7 +67,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error { case "manifest.json": w.Header().Add("Content-Type", "application/json; charset=utf-8") w.WriteHeader(http.StatusOK) - w.Write(ManifestDebugJSON(manifest)) + w.Write([]byte(ManifestDebugJSON(manifest))) default: w.WriteHeader(http.StatusNotFound) fmt.Fprintf(w, "not found\n") @@ -158,11 +158,10 @@ func getPage(w http.ResponseWriter, r *http.Request) error { return nil } +const SiteSizeMax = 512 * 1048576 + func putPage(w http.ResponseWriter, r *http.Request) error { - auth, err := AuthorizeUpdate(r) - if err != nil { - return err - } + var result UpdateResult host := GetHost(r) @@ -171,34 +170,59 @@ func putPage(w http.ResponseWriter, r *http.Request) error { return err } - // URLs have no length limit, but 64K seems enough for a repository URL - requestBody, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 65536)) - if err != nil { - return fmt.Errorf("body read: %w", err) - } - webRoot := makeWebRoot(host, projectName) - // request body contains git repository URL - repoURL := string(requestBody) - if err := AuthorizeRepository(repoURL, auth); err != nil { - return err + contentType := r.Header.Get("Content-Type") + if contentType == "application/x-www-form-urlencoded" { + auth, err := AuthorizeUpdateFromRepository(r) + if err != nil { + return err + } + + // URLs have no length limit, but 64K seems enough for a repository URL + requestBody, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 65536)) + if err != nil { + return fmt.Errorf("body read: %w", err) + } + + repoURL := string(requestBody) + if err := AuthorizeRepository(repoURL, auth); err != nil { + return err + } + + branch := "pages" + if customBranch := r.Header.Get("X-Pages-Branch"); customBranch != "" { + branch = customBranch + } + if err := AuthorizeBranch(branch, auth); err != nil { + return err + } + + ctx, cancel := context.WithTimeout(r.Context(), updateTimeout) + defer cancel() + result = UpdateFromRepository(ctx, webRoot, repoURL, branch) + } else { + _, err := AuthorizeUpdateFromArchive(r) + if err != nil { + return err + } + + // request body contains archive + reader := http.MaxBytesReader(w, r.Body, SiteSizeMax) + result = UpdateFromArchive(webRoot, contentType, reader) } - branch := "pages" - if customBranch := r.Header.Get("X-Pages-Branch"); customBranch != "" { - branch = customBranch - } - if err := AuthorizeBranch(branch, auth); err != nil { - return err - } - - ctx, cancel := context.WithTimeout(r.Context(), updateTimeout) - defer cancel() - result := Update(ctx, webRoot, repoURL, branch) switch result.outcome { case UpdateError: - w.WriteHeader(http.StatusServiceUnavailable) + if errors.Is(result.err, errManifestTooLarge) { + w.WriteHeader(http.StatusRequestEntityTooLarge) + } else if errors.Is(result.err, errArchiveFormat) { + w.WriteHeader(http.StatusUnsupportedMediaType) + } else if errors.Is(result.err, errZipBomb) { + w.WriteHeader(http.StatusRequestEntityTooLarge) + } else { + w.WriteHeader(http.StatusServiceUnavailable) + } case UpdateTimeout: w.WriteHeader(http.StatusGatewayTimeout) case UpdateNoChange: @@ -211,7 +235,9 @@ func putPage(w http.ResponseWriter, r *http.Request) error { w.Header().Add("X-Pages-Outcome", "deleted") } if result.manifest != nil { - fmt.Fprintln(w, *result.manifest.Commit) + if result.manifest.Commit != nil { + fmt.Fprintln(w, *result.manifest.Commit) + } } else if result.err != nil { fmt.Fprintln(w, result.err) } else { @@ -221,7 +247,7 @@ func putPage(w http.ResponseWriter, r *http.Request) error { } func deletePage(w http.ResponseWriter, r *http.Request) error { - _, err := AuthorizeUpdate(r) + _, err := AuthorizeUpdateFromRepository(r) if err != nil { return err } @@ -246,7 +272,7 @@ func deletePage(w http.ResponseWriter, r *http.Request) error { } func postPage(w http.ResponseWriter, r *http.Request) error { - auth, err := AuthorizeUpdate(r) + auth, err := AuthorizeUpdateFromRepository(r) if err != nil { return err } @@ -258,6 +284,8 @@ func postPage(w http.ResponseWriter, r *http.Request) error { return err } + webRoot := makeWebRoot(host, projectName) + eventName := "" for _, header := range []string{ "X-Forgejo-Event", @@ -307,8 +335,6 @@ func postPage(w http.ResponseWriter, r *http.Request) error { return nil } - webRoot := makeWebRoot(host, projectName) - repoURL := event["repository"].(map[string]any)["clone_url"].(string) if err := AuthorizeRepository(repoURL, auth); err != nil { return err @@ -316,7 +342,7 @@ func postPage(w http.ResponseWriter, r *http.Request) error { ctx, cancel := context.WithTimeout(r.Context(), updateTimeout) defer cancel() - result := Update(ctx, webRoot, repoURL, "pages") + result := UpdateFromRepository(ctx, webRoot, repoURL, "pages") switch result.outcome { case UpdateError: w.WriteHeader(http.StatusServiceUnavailable) @@ -341,7 +367,7 @@ func postPage(w http.ResponseWriter, r *http.Request) error { } func ServePages(w http.ResponseWriter, r *http.Request) { - log.Println("pages:", r.Method, r.Host, r.URL) + log.Println("pages:", r.Method, r.Host, r.URL, r.Header.Get("Content-Type")) if region := os.Getenv("FLY_REGION"); region != "" { w.Header().Add("Server", fmt.Sprintf("git-pages (fly.io; %s)", region)) } else { diff --git a/src/update.go b/src/update.go index 407c99c..beca052 100644 --- a/src/update.go +++ b/src/update.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "io" "log" ) @@ -25,42 +26,35 @@ type UpdateResult struct { } func Update( - ctx context.Context, webRoot string, - repoURL string, - branch string, + manifest *Manifest, ) UpdateResult { - var fetchManifest, oldManifest, newManifest *Manifest + var oldManifest, newManifest *Manifest var err error - log.Println("update:", webRoot, repoURL, branch) - outcome := UpdateError - fetchManifest, err = FetchRepository(ctx, repoURL, branch) - if errors.Is(err, context.DeadlineExceeded) { - outcome = UpdateTimeout - err = fmt.Errorf("update timeout") - } else if err == nil { - oldManifest, _ = backend.GetManifest(webRoot) - if IsManifestEmpty(fetchManifest) { - newManifest, err = fetchManifest, backend.DeleteManifest(webRoot) - if err == nil { - if oldManifest == nil { - outcome = UpdateNoChange - } else { - outcome = UpdateDeleted - } + oldManifest, _ = backend.GetManifest(webRoot) + // log.Println("OLD", ManifestDebugJSON(oldManifest)) + if IsManifestEmpty(manifest) { + newManifest, err = manifest, backend.DeleteManifest(webRoot) + // log.Println("NEW", ManifestDebugJSON(newManifest)) + if err == nil { + if oldManifest == nil { + outcome = UpdateNoChange + } else { + outcome = UpdateDeleted } - } else if err = PrepareManifest(fetchManifest); err == nil { - newManifest, err = StoreManifest(webRoot, fetchManifest) - if err == nil { - if oldManifest == nil { - outcome = UpdateCreated - } else if CompareManifest(oldManifest, newManifest) { - outcome = UpdateNoChange - } else { - outcome = UpdateReplaced - } + } + } else if err = PrepareManifest(manifest); err == nil { + newManifest, err = StoreManifest(webRoot, manifest) + // log.Println("NEW", ManifestDebugJSON(newManifest)) + if err == nil { + if oldManifest == nil { + outcome = UpdateCreated + } else if CompareManifest(oldManifest, newManifest) { + outcome = UpdateNoChange + } else { + outcome = UpdateReplaced } } } @@ -77,10 +71,61 @@ func Update( case UpdateNoChange: status = "unchanged" } - log.Printf("update ok: %s %s %s", webRoot, *newManifest.Commit, status) + if newManifest.Commit != nil { + log.Printf("update %s ok: %s %s", webRoot, status, *newManifest.Commit) + } else { + log.Printf("update %s ok: %s", webRoot, status) + } } else { - log.Printf("update err: %s %s", webRoot, err) + log.Printf("update %s err: %s", webRoot, err) } return UpdateResult{outcome, newManifest, err} } + +func UpdateFromRepository( + ctx context.Context, + webRoot string, + repoURL string, + branch string, +) UpdateResult { + log.Printf("update %s: %s %s\n", webRoot, repoURL, branch) + + manifest, err := FetchRepository(ctx, repoURL, branch) + if errors.Is(err, context.DeadlineExceeded) { + return UpdateResult{UpdateTimeout, nil, fmt.Errorf("update timeout")} + } else if err != nil { + return UpdateResult{UpdateError, nil, err} + } else { + return Update(webRoot, manifest) + } +} + +var errArchiveFormat = errors.New("unsupported archive format") + +func UpdateFromArchive( + webRoot string, + contentType string, + reader io.Reader, +) UpdateResult { + var manifest *Manifest + var err error + + switch contentType { + case "application/x-tar": + log.Printf("update %s: (tar)", webRoot) + manifest, err = ExtractTar(reader) // yellow? definitely yellow. + case "application/zip": + log.Printf("update %s: (zip)", webRoot) + manifest, err = ExtractZip(reader) + default: + err = errArchiveFormat + } + + if err != nil { + log.Printf("update %s err: %s", webRoot, err) + return UpdateResult{UpdateError, nil, err} + } else { + return Update(webRoot, manifest) + } +}