Allow zip and tar archive uploads PUT request.

This commit is contained in:
Catherine
2025-09-20 07:05:19 +00:00
parent 95814dd3f3
commit 15b2f1ea39
7 changed files with 297 additions and 87 deletions

View File

@@ -205,7 +205,7 @@ func authorizeWildcardMatchSite(r *http.Request) (*Authorization, error) {
}
}
func AuthorizeMetadata(r *http.Request) (*Authorization, error) {
func AuthorizeMetadataRetrieval(r *http.Request) (*Authorization, error) {
causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
if InsecureMode() {
@@ -240,7 +240,7 @@ func AuthorizeMetadata(r *http.Request) (*Authorization, error) {
// Returns `repoURLs, err` where if `err == nil` then the request is authorized to clone from
// any repository URL included in `repoURLs` (by case-insensitive comparison), or any URL at all
// if `repoURLs == nil`.
func AuthorizeUpdate(r *http.Request) (*Authorization, error) {
func AuthorizeUpdateFromRepository(r *http.Request) (*Authorization, error) {
causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
if InsecureMode() {
@@ -330,3 +330,25 @@ func AuthorizeBranch(branch string, auth *Authorization) error {
}
}
}
func AuthorizeUpdateFromArchive(r *http.Request) (*Authorization, error) {
causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
if InsecureMode() {
log.Println("auth: INSECURE mode")
return &Authorization{}, nil // for testing only
}
// DNS challenge gives absolute authority.
auth, err := authorizeDNSChallenge(r)
if err != nil && IsUnauthorized(err) {
causes = append(causes, err)
} else if err != nil { // bad request
return nil, err
} else {
log.Println("auth: DNS challenge")
return auth, nil
}
return nil, errors.Join(causes...)
}

111
src/extract.go Normal file
View File

@@ -0,0 +1,111 @@
package main
import (
"archive/tar"
"archive/zip"
"bytes"
"errors"
"fmt"
"io"
"strings"
"google.golang.org/protobuf/proto"
)
func ExtractTar(reader io.Reader) (*Manifest, error) {
archive := tar.NewReader(reader)
manifest := Manifest{
Contents: map[string]*Entry{
"": {Type: Type_Directory.Enum()},
},
}
for {
header, err := archive.Next()
if err == io.EOF {
break
} else if err != nil {
return nil, err
}
manifestEntry := Entry{}
switch header.Typeflag {
case tar.TypeReg:
fileData := make([]byte, header.Size)
length, err := archive.Read(fileData)
if !(length == int(header.Size) && err == io.EOF) {
return nil, fmt.Errorf("tar: read: %w (expected %d bytes, read %d)",
err, header.Size, length)
}
manifestEntry.Type = Type_InlineFile.Enum()
manifestEntry.Size = proto.Uint32(uint32(header.Size))
manifestEntry.Data = fileData
case tar.TypeSymlink:
manifestEntry.Type = Type_Symlink.Enum()
manifestEntry.Size = proto.Uint32(uint32(header.Size))
manifestEntry.Data = []byte(header.Linkname)
case tar.TypeDir:
manifestEntry.Type = Type_Directory.Enum()
default:
manifestEntry.Type = Type_Invalid.Enum()
}
manifest.Contents[strings.TrimSuffix(header.Name, "/")] = &manifestEntry
}
return &manifest, nil
}
var errZipBomb = errors.New("zip file size limit exceeded")
func ExtractZip(reader io.Reader) (*Manifest, error) {
data, err := io.ReadAll(reader)
if err != nil {
return nil, err
}
archive, err := zip.NewReader(bytes.NewReader(data), int64(len(data)))
if err != nil {
return nil, err
}
// Detect and defuse zipbombs.
var totalSize uint64
for _, file := range archive.File {
totalSize += file.UncompressedSize64
}
if totalSize > SiteSizeMax {
return nil, fmt.Errorf("%w: %d > %d bytes", errZipBomb, totalSize, SiteSizeMax)
}
manifest := Manifest{
Contents: map[string]*Entry{
"": {Type: Type_Directory.Enum()},
},
}
for _, file := range archive.File {
manifestEntry := Entry{}
if !strings.HasSuffix(file.Name, "/") {
fileReader, err := file.Open()
if err != nil {
return nil, err
}
defer fileReader.Close()
fileData, err := io.ReadAll(fileReader)
if err != nil {
return nil, fmt.Errorf("zip: read: %w", err)
}
manifestEntry.Type = Type_InlineFile.Enum()
manifestEntry.Size = proto.Uint32(uint32(file.UncompressedSize64))
manifestEntry.Data = fileData
} else {
manifestEntry.Type = Type_Directory.Enum()
}
manifest.Contents[strings.TrimSuffix(file.Name, "/")] = &manifestEntry
}
return &manifest, nil
}

View File

@@ -62,12 +62,13 @@ func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manif
defer walker.Close()
manifest := Manifest{
RepoUrl: proto.String(repoURL),
Branch: proto.String(branch),
Commit: proto.String(ref.Hash().String()),
Contents: make(map[string]*Entry),
RepoUrl: proto.String(repoURL),
Branch: proto.String(branch),
Commit: proto.String(ref.Hash().String()),
Contents: map[string]*Entry{
"": {Type: Type_Directory.Enum()},
},
}
manifest.Contents[""] = &Entry{Type: Type_Directory.Enum()}
for {
name, entry, err := walker.Next()
if err == io.EOF {
@@ -86,6 +87,7 @@ func FetchRepository(ctx context.Context, repoURL string, branch string) (*Manif
if err != nil {
return nil, fmt.Errorf("git blob open: %w", err)
}
defer reader.Close()
data, err := io.ReadAll(reader)
if err != nil {

View File

@@ -62,7 +62,7 @@ func DecodeManifest(data []byte) (*Manifest, error) {
return &manifest, err
}
func ManifestDebugJSON(manifest *Manifest) []byte {
func ManifestDebugJSON(manifest *Manifest) string {
result, err := protojson.MarshalOptions{
Multiline: true,
EmitDefaultValues: true,
@@ -70,7 +70,7 @@ func ManifestDebugJSON(manifest *Manifest) []byte {
if err != nil {
panic(err)
}
return result
return string(result)
}
const maxSymlinkLevels int = 128
@@ -148,17 +148,20 @@ func ExternalizeFiles(manifest *Manifest) *Manifest {
const ManifestSizeMax int = 1048576
var errManifestTooLarge = errors.New("manifest size limit exceeded")
// Uploads inline file data over certain size to the storage backend. Returns a copy of
// the manifest updated to refer to an external content-addressable store.
func StoreManifest(name string, manifest *Manifest) (*Manifest, error) {
extManifest := ExternalizeFiles(manifest)
extManifestData := EncodeManifest(extManifest)
if len(extManifestData) > ManifestSizeMax {
return nil, fmt.Errorf("manifest too big: %d > %d bytes", extManifestData, ManifestSizeMax)
return nil, fmt.Errorf("%w: %d > %d bytes",
errManifestTooLarge, extManifestData, ManifestSizeMax)
}
if err := backend.StageManifest(extManifest); err != nil {
return nil, fmt.Errorf("stage: %w", err)
return nil, fmt.Errorf("stage manifest: %w", err)
}
wg := sync.WaitGroup{}
@@ -180,7 +183,7 @@ func StoreManifest(name string, manifest *Manifest) (*Manifest, error) {
}
if err := backend.CommitManifest(name, extManifest); err != nil {
return nil, fmt.Errorf("commit: %w", err)
return nil, fmt.Errorf("commit manifest: %w", err)
}
return extManifest, nil

View File

@@ -58,7 +58,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
if metadataPath, found := strings.CutPrefix(sitePath, ".git-pages/"); found {
// metadata requests require authorization to avoid making pushes from private
// repositories enumerable
_, err := AuthorizeMetadata(r)
_, err := AuthorizeMetadataRetrieval(r)
if err != nil {
return err
}
@@ -67,7 +67,7 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
case "manifest.json":
w.Header().Add("Content-Type", "application/json; charset=utf-8")
w.WriteHeader(http.StatusOK)
w.Write(ManifestDebugJSON(manifest))
w.Write([]byte(ManifestDebugJSON(manifest)))
default:
w.WriteHeader(http.StatusNotFound)
fmt.Fprintf(w, "not found\n")
@@ -158,11 +158,10 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
return nil
}
const SiteSizeMax = 512 * 1048576
func putPage(w http.ResponseWriter, r *http.Request) error {
auth, err := AuthorizeUpdate(r)
if err != nil {
return err
}
var result UpdateResult
host := GetHost(r)
@@ -171,34 +170,59 @@ func putPage(w http.ResponseWriter, r *http.Request) error {
return err
}
// URLs have no length limit, but 64K seems enough for a repository URL
requestBody, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 65536))
if err != nil {
return fmt.Errorf("body read: %w", err)
}
webRoot := makeWebRoot(host, projectName)
// request body contains git repository URL
repoURL := string(requestBody)
if err := AuthorizeRepository(repoURL, auth); err != nil {
return err
contentType := r.Header.Get("Content-Type")
if contentType == "application/x-www-form-urlencoded" {
auth, err := AuthorizeUpdateFromRepository(r)
if err != nil {
return err
}
// URLs have no length limit, but 64K seems enough for a repository URL
requestBody, err := io.ReadAll(http.MaxBytesReader(w, r.Body, 65536))
if err != nil {
return fmt.Errorf("body read: %w", err)
}
repoURL := string(requestBody)
if err := AuthorizeRepository(repoURL, auth); err != nil {
return err
}
branch := "pages"
if customBranch := r.Header.Get("X-Pages-Branch"); customBranch != "" {
branch = customBranch
}
if err := AuthorizeBranch(branch, auth); err != nil {
return err
}
ctx, cancel := context.WithTimeout(r.Context(), updateTimeout)
defer cancel()
result = UpdateFromRepository(ctx, webRoot, repoURL, branch)
} else {
_, err := AuthorizeUpdateFromArchive(r)
if err != nil {
return err
}
// request body contains archive
reader := http.MaxBytesReader(w, r.Body, SiteSizeMax)
result = UpdateFromArchive(webRoot, contentType, reader)
}
branch := "pages"
if customBranch := r.Header.Get("X-Pages-Branch"); customBranch != "" {
branch = customBranch
}
if err := AuthorizeBranch(branch, auth); err != nil {
return err
}
ctx, cancel := context.WithTimeout(r.Context(), updateTimeout)
defer cancel()
result := Update(ctx, webRoot, repoURL, branch)
switch result.outcome {
case UpdateError:
w.WriteHeader(http.StatusServiceUnavailable)
if errors.Is(result.err, errManifestTooLarge) {
w.WriteHeader(http.StatusRequestEntityTooLarge)
} else if errors.Is(result.err, errArchiveFormat) {
w.WriteHeader(http.StatusUnsupportedMediaType)
} else if errors.Is(result.err, errZipBomb) {
w.WriteHeader(http.StatusRequestEntityTooLarge)
} else {
w.WriteHeader(http.StatusServiceUnavailable)
}
case UpdateTimeout:
w.WriteHeader(http.StatusGatewayTimeout)
case UpdateNoChange:
@@ -211,7 +235,9 @@ func putPage(w http.ResponseWriter, r *http.Request) error {
w.Header().Add("X-Pages-Outcome", "deleted")
}
if result.manifest != nil {
fmt.Fprintln(w, *result.manifest.Commit)
if result.manifest.Commit != nil {
fmt.Fprintln(w, *result.manifest.Commit)
}
} else if result.err != nil {
fmt.Fprintln(w, result.err)
} else {
@@ -221,7 +247,7 @@ func putPage(w http.ResponseWriter, r *http.Request) error {
}
func deletePage(w http.ResponseWriter, r *http.Request) error {
_, err := AuthorizeUpdate(r)
_, err := AuthorizeUpdateFromRepository(r)
if err != nil {
return err
}
@@ -246,7 +272,7 @@ func deletePage(w http.ResponseWriter, r *http.Request) error {
}
func postPage(w http.ResponseWriter, r *http.Request) error {
auth, err := AuthorizeUpdate(r)
auth, err := AuthorizeUpdateFromRepository(r)
if err != nil {
return err
}
@@ -258,6 +284,8 @@ func postPage(w http.ResponseWriter, r *http.Request) error {
return err
}
webRoot := makeWebRoot(host, projectName)
eventName := ""
for _, header := range []string{
"X-Forgejo-Event",
@@ -307,8 +335,6 @@ func postPage(w http.ResponseWriter, r *http.Request) error {
return nil
}
webRoot := makeWebRoot(host, projectName)
repoURL := event["repository"].(map[string]any)["clone_url"].(string)
if err := AuthorizeRepository(repoURL, auth); err != nil {
return err
@@ -316,7 +342,7 @@ func postPage(w http.ResponseWriter, r *http.Request) error {
ctx, cancel := context.WithTimeout(r.Context(), updateTimeout)
defer cancel()
result := Update(ctx, webRoot, repoURL, "pages")
result := UpdateFromRepository(ctx, webRoot, repoURL, "pages")
switch result.outcome {
case UpdateError:
w.WriteHeader(http.StatusServiceUnavailable)
@@ -341,7 +367,7 @@ func postPage(w http.ResponseWriter, r *http.Request) error {
}
func ServePages(w http.ResponseWriter, r *http.Request) {
log.Println("pages:", r.Method, r.Host, r.URL)
log.Println("pages:", r.Method, r.Host, r.URL, r.Header.Get("Content-Type"))
if region := os.Getenv("FLY_REGION"); region != "" {
w.Header().Add("Server", fmt.Sprintf("git-pages (fly.io; %s)", region))
} else {

View File

@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"io"
"log"
)
@@ -25,42 +26,35 @@ type UpdateResult struct {
}
func Update(
ctx context.Context,
webRoot string,
repoURL string,
branch string,
manifest *Manifest,
) UpdateResult {
var fetchManifest, oldManifest, newManifest *Manifest
var oldManifest, newManifest *Manifest
var err error
log.Println("update:", webRoot, repoURL, branch)
outcome := UpdateError
fetchManifest, err = FetchRepository(ctx, repoURL, branch)
if errors.Is(err, context.DeadlineExceeded) {
outcome = UpdateTimeout
err = fmt.Errorf("update timeout")
} else if err == nil {
oldManifest, _ = backend.GetManifest(webRoot)
if IsManifestEmpty(fetchManifest) {
newManifest, err = fetchManifest, backend.DeleteManifest(webRoot)
if err == nil {
if oldManifest == nil {
outcome = UpdateNoChange
} else {
outcome = UpdateDeleted
}
oldManifest, _ = backend.GetManifest(webRoot)
// log.Println("OLD", ManifestDebugJSON(oldManifest))
if IsManifestEmpty(manifest) {
newManifest, err = manifest, backend.DeleteManifest(webRoot)
// log.Println("NEW", ManifestDebugJSON(newManifest))
if err == nil {
if oldManifest == nil {
outcome = UpdateNoChange
} else {
outcome = UpdateDeleted
}
} else if err = PrepareManifest(fetchManifest); err == nil {
newManifest, err = StoreManifest(webRoot, fetchManifest)
if err == nil {
if oldManifest == nil {
outcome = UpdateCreated
} else if CompareManifest(oldManifest, newManifest) {
outcome = UpdateNoChange
} else {
outcome = UpdateReplaced
}
}
} else if err = PrepareManifest(manifest); err == nil {
newManifest, err = StoreManifest(webRoot, manifest)
// log.Println("NEW", ManifestDebugJSON(newManifest))
if err == nil {
if oldManifest == nil {
outcome = UpdateCreated
} else if CompareManifest(oldManifest, newManifest) {
outcome = UpdateNoChange
} else {
outcome = UpdateReplaced
}
}
}
@@ -77,10 +71,61 @@ func Update(
case UpdateNoChange:
status = "unchanged"
}
log.Printf("update ok: %s %s %s", webRoot, *newManifest.Commit, status)
if newManifest.Commit != nil {
log.Printf("update %s ok: %s %s", webRoot, status, *newManifest.Commit)
} else {
log.Printf("update %s ok: %s", webRoot, status)
}
} else {
log.Printf("update err: %s %s", webRoot, err)
log.Printf("update %s err: %s", webRoot, err)
}
return UpdateResult{outcome, newManifest, err}
}
func UpdateFromRepository(
ctx context.Context,
webRoot string,
repoURL string,
branch string,
) UpdateResult {
log.Printf("update %s: %s %s\n", webRoot, repoURL, branch)
manifest, err := FetchRepository(ctx, repoURL, branch)
if errors.Is(err, context.DeadlineExceeded) {
return UpdateResult{UpdateTimeout, nil, fmt.Errorf("update timeout")}
} else if err != nil {
return UpdateResult{UpdateError, nil, err}
} else {
return Update(webRoot, manifest)
}
}
var errArchiveFormat = errors.New("unsupported archive format")
func UpdateFromArchive(
webRoot string,
contentType string,
reader io.Reader,
) UpdateResult {
var manifest *Manifest
var err error
switch contentType {
case "application/x-tar":
log.Printf("update %s: (tar)", webRoot)
manifest, err = ExtractTar(reader) // yellow? definitely yellow.
case "application/zip":
log.Printf("update %s: (zip)", webRoot)
manifest, err = ExtractZip(reader)
default:
err = errArchiveFormat
}
if err != nil {
log.Printf("update %s err: %s", webRoot, err)
return UpdateResult{UpdateError, nil, err}
} else {
return Update(webRoot, manifest)
}
}