diff --git a/flake.nix b/flake.nix index 05459ae..7e05933 100644 --- a/flake.nix +++ b/flake.nix @@ -43,7 +43,7 @@ "-s -w" ]; - vendorHash = "sha256-UQl8AeijqJd2qpVZBDuHT/+Dtd3+Uwrf4w4yAOaFs98="; + vendorHash = "sha256-oVXELOXbRTzzU8pUGNE4K552thlZXGAX7qpv6ETwz6o="; }; in { diff --git a/go.mod b/go.mod index 8ec4edb..17ed55c 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module codeberg.org/git-pages/git-pages go 1.25.0 require ( - codeberg.org/git-pages/go-headers v1.0.0 + codeberg.org/git-pages/go-headers v1.1.0 github.com/KimMachineGun/automemlimit v0.7.5 github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 github.com/creasty/defaults v1.8.0 diff --git a/go.sum b/go.sum index 85b247e..d6b54de 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,7 @@ codeberg.org/git-pages/go-headers v1.0.0 h1:hvGU97hQdXaT5HwCpZJWQdg7akvtOBCSUNL4u2a5uTs= codeberg.org/git-pages/go-headers v1.0.0/go.mod h1:N4gwH0U3YPwmuyxqH7xBA8j44fTPX+vOEP7ejJVBPts= +codeberg.org/git-pages/go-headers v1.1.0 h1:rk7/SOSsn+XuL7PUQZFYUaWKHEaj6K8mXmUV9rF2VxE= +codeberg.org/git-pages/go-headers v1.1.0/go.mod h1:N4gwH0U3YPwmuyxqH7xBA8j44fTPX+vOEP7ejJVBPts= github.com/KimMachineGun/automemlimit v0.7.5 h1:RkbaC0MwhjL1ZuBKunGDjE/ggwAX43DwZrJqVwyveTk= github.com/KimMachineGun/automemlimit v0.7.5/go.mod h1:QZxpHaGOQoYvFhv/r4u3U0JTC2ZcOwbSr11UZF46UBM= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= diff --git a/src/auth.go b/src/auth.go index 8898dc7..6689b21 100644 --- a/src/auth.go +++ b/src/auth.go @@ -310,6 +310,7 @@ func authorizeCodebergPagesV2(r *http.Request) (*Authorization, error) { } } +// Checks whether an operation that enables enumerating site contents is allowed. func AuthorizeMetadataRetrieval(r *http.Request) (*Authorization, error) { causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}} diff --git a/src/collect.go b/src/collect.go new file mode 100644 index 0000000..64e7175 --- /dev/null +++ b/src/collect.go @@ -0,0 +1,126 @@ +package git_pages + +import ( + "archive/tar" + "context" + "fmt" + "io" + "time" +) + +type Flusher interface { + Flush() error +} + +// Inverse of `ExtractTar`. +func CollectTar( + context context.Context, writer io.Writer, manifest *Manifest, manifestMtime time.Time, +) ( + err error, +) { + archive := tar.NewWriter(writer) + + appendFile := func(header *tar.Header, data []byte, transform Transform) (err error) { + switch transform { + case Transform_None: + case Transform_Zstandard: + data, err = zstdDecoder.DecodeAll(data, []byte{}) + if err != nil { + return err + } + default: + return fmt.Errorf("unexpected transform") + } + header.Size = int64(len(data)) + + err = archive.WriteHeader(header) + if err != nil { + return + } + _, err = archive.Write(data) + return + } + + for fileName, entry := range manifest.Contents { + var header tar.Header + if fileName == "" { + continue + } + header.Name = fileName + + switch entry.GetType() { + case Type_Directory: + header.Typeflag = tar.TypeDir + header.Mode = 0755 + header.ModTime = manifestMtime + err = appendFile(&header, nil, Transform_None) + + case Type_InlineFile: + header.Typeflag = tar.TypeReg + header.Mode = 0644 + header.ModTime = manifestMtime + err = appendFile(&header, entry.GetData(), entry.GetTransform()) + + case Type_ExternalFile: + var blobReader io.Reader + var blobMtime time.Time + var blobData []byte + blobReader, _, blobMtime, err = backend.GetBlob(context, string(entry.Data)) + if err != nil { + return + } + blobData, _ = io.ReadAll(blobReader) + header.Typeflag = tar.TypeReg + header.Mode = 0644 + header.ModTime = blobMtime + err = appendFile(&header, blobData, entry.GetTransform()) + + case Type_Symlink: + header.Typeflag = tar.TypeSymlink + header.Mode = 0644 + header.ModTime = manifestMtime + err = appendFile(&header, entry.GetData(), Transform_None) + + default: + return fmt.Errorf("unexpected entry type") + } + if err != nil { + return err + } + } + + if redirects := CollectRedirectsFile(manifest); redirects != "" { + err = appendFile(&tar.Header{ + Name: RedirectsFileName, + Typeflag: tar.TypeReg, + Mode: 0644, + ModTime: manifestMtime, + }, []byte(redirects), Transform_None) + if err != nil { + return err + } + } + + if headers := CollectHeadersFile(manifest); headers != "" { + err = appendFile(&tar.Header{ + Name: HeadersFileName, + Typeflag: tar.TypeReg, + Mode: 0644, + ModTime: manifestMtime, + }, []byte(headers), Transform_None) + if err != nil { + return err + } + } + + err = archive.Flush() + if err != nil { + return err + } + + flusher, ok := writer.(Flusher) + if ok { + err = flusher.Flush() + } + return err +} diff --git a/src/headers.go b/src/headers.go index 9668542..1385835 100644 --- a/src/headers.go +++ b/src/headers.go @@ -15,7 +15,7 @@ import ( var ErrHeaderNotAllowed = errors.New("custom header not allowed") -const headersFileName string = "_headers" +const HeadersFileName string = "_headers" // Lifted from https://docs.netlify.com/manage/routing/headers/, except for `Set-Cookie` // the rationale for which does not apply in our environment. @@ -86,24 +86,24 @@ func validateHeaderRule(rule headers.Rule) error { // Parses redirects file and injects rules into the manifest. func ProcessHeadersFile(manifest *Manifest) error { - headersEntry := manifest.Contents[headersFileName] - delete(manifest.Contents, headersFileName) + headersEntry := manifest.Contents[HeadersFileName] + delete(manifest.Contents, HeadersFileName) if headersEntry == nil { return nil } else if headersEntry.GetType() != Type_InlineFile { - return AddProblem(manifest, headersFileName, + return AddProblem(manifest, HeadersFileName, "not a regular file") } rules, err := headers.ParseString(string(headersEntry.GetData())) if err != nil { - return AddProblem(manifest, headersFileName, + return AddProblem(manifest, HeadersFileName, "syntax error: %s", err) } for index, rule := range rules { if err := validateHeaderRule(rule); err != nil { - AddProblem(manifest, headersFileName, + AddProblem(manifest, HeadersFileName, "rule #%d %q: %s", index+1, rule.Path, err) continue } @@ -122,6 +122,21 @@ func ProcessHeadersFile(manifest *Manifest) error { return nil } +func CollectHeadersFile(manifest *Manifest) string { + var headersRules []headers.Rule + for _, manifestRule := range manifest.GetHeaders() { + headersRule := headers.Rule{ + Path: manifestRule.GetPath(), + Headers: http.Header{}, + } + for _, manifestHeader := range manifestRule.GetHeaderMap() { + headersRule.Headers[manifestHeader.GetName()] = manifestHeader.GetValues() + } + headersRules = append(headersRules, headersRule) + } + return headers.Must(headers.UnparseString(headersRules)) +} + func ApplyHeaderRules(manifest *Manifest, url *url.URL) (headers http.Header, err error) { headers = http.Header{} fromSegments := pathSegments(url.Path) diff --git a/src/main.go b/src/main.go index 0eb06e5..9732818 100644 --- a/src/main.go +++ b/src/main.go @@ -69,6 +69,18 @@ func serve(listener net.Listener, handler http.Handler) { } } +func webRootArg(arg string) string { + switch strings.Count(arg, "/") { + case 0: + return arg + "/.index" + case 1: + return arg + default: + log.Fatalf("webroot argument must be either 'domain.tld' or 'domain.tld/dir") + return "" + } +} + func Main() { printConfigEnvVars := flag.Bool("print-config-env-vars", false, "print every recognized configuration environment variable and exit") @@ -80,16 +92,28 @@ func Main() { "run without configuration file (configure via environment variables)") runMigration := flag.String("run-migration", "", "run a specific store migration (available: \"create-domain-markers\")") - getManifest := flag.String("get-manifest", "", - "write manifest for `webroot` (either 'domain.tld' or 'domain.tld/dir') to stdout as ProtoJSON") getBlob := flag.String("get-blob", "", - "write `blob` ('sha256-xxxxxxx...xxx') to stdout") + "write contents of `blob-ref` ('sha256-xxxxxxx...xxx') to stdout") + getManifest := flag.String("get-manifest", "", + "write manifest for `site-name` (either 'domain.tld' or 'domain.tld/dir') to stdout as ProtoJSON") + getArchive := flag.String("get-archive", "", + "write archive for `site-name` (either 'domain.tld' or 'domain.tld/dir') to stdout in tar format") updateSite := flag.String("update-site", "", - "update site for `webroot` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL") + "update site for `site-name` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL") flag.Parse() - if *getManifest != "" && *getBlob != "" { - log.Fatalln("-get-manifest and -get-blob are mutually exclusive") + var cliOperations int + if *getBlob != "" { + cliOperations += 1 + } + if *getManifest != "" { + cliOperations += 1 + } + if *getArchive != "" { + cliOperations += 1 + } + if cliOperations > 1 { + log.Fatalln("-get-blob, -get-manifest, and -get-archive are mutually exclusive") } if *configTomlPath != "" && *noConfig { @@ -150,22 +174,6 @@ func Main() { log.Fatalln(err) } - case *getManifest != "": - if err := ConfigureBackend(&config.Storage); err != nil { - log.Fatalln(err) - } - - webRoot := *getManifest - if !strings.Contains(webRoot, "/") { - webRoot += "/.index" - } - - manifest, _, err := backend.GetManifest(context.Background(), webRoot, GetManifestOptions{}) - if err != nil { - log.Fatalln(err) - } - fmt.Println(ManifestDebugJSON(manifest)) - case *getBlob != "": if err := ConfigureBackend(&config.Storage); err != nil { log.Fatalln(err) @@ -178,6 +186,31 @@ func Main() { io.Copy(os.Stdout, reader) + case *getManifest != "": + if err := ConfigureBackend(&config.Storage); err != nil { + log.Fatalln(err) + } + + webRoot := webRootArg(*getManifest) + manifest, _, err := backend.GetManifest(context.Background(), webRoot, GetManifestOptions{}) + if err != nil { + log.Fatalln(err) + } + fmt.Println(ManifestDebugJSON(manifest)) + + case *getArchive != "": + if err := ConfigureBackend(&config.Storage); err != nil { + log.Fatalln(err) + } + + webRoot := webRootArg(*getArchive) + manifest, manifestMtime, err := + backend.GetManifest(context.Background(), webRoot, GetManifestOptions{}) + if err != nil { + log.Fatalln(err) + } + CollectTar(context.Background(), os.Stdout, manifest, manifestMtime) + case *updateSite != "": if err := ConfigureBackend(&config.Storage); err != nil { log.Fatalln(err) diff --git a/src/pages.go b/src/pages.go index 95f47ce..2ee310f 100644 --- a/src/pages.go +++ b/src/pages.go @@ -2,6 +2,7 @@ package git_pages import ( "bytes" + "compress/gzip" "context" "encoding/json" "errors" @@ -159,13 +160,14 @@ func getPage(w http.ResponseWriter, r *http.Request) error { } if metadataPath, found := strings.CutPrefix(sitePath, ".git-pages/"); found { lastModified := manifestMtime.UTC().Format(http.TimeFormat) - switch metadataPath { - case "health": + switch { + case metadataPath == "health": w.Header().Add("Last-Modified", lastModified) w.WriteHeader(http.StatusOK) fmt.Fprintf(w, "ok\n") + return nil - case "manifest.json": + case metadataPath == "manifest.json": // metadata requests require authorization to avoid making pushes from private // repositories enumerable _, err := AuthorizeMetadataRetrieval(r) @@ -177,12 +179,42 @@ func getPage(w http.ResponseWriter, r *http.Request) error { w.Header().Add("Last-Modified", lastModified) w.WriteHeader(http.StatusOK) w.Write([]byte(ManifestDebugJSON(manifest))) + return nil + + case metadataPath == "archive.tar" && config.Feature("archive-site"): + // same as above + _, err := AuthorizeMetadataRetrieval(r) + if err != nil { + return err + } + + // we only offer `/.git-pages/archive.tar` and not the `.tar.gz`/`.tar.zst` variants + // because HTTP can already request compression using the `Content-Encoding` mechanism + acceptedEncodings := parseHTTPEncodings(r.Header.Get("Accept-Encoding")) + negotiated := acceptedEncodings.Negotiate("zstd", "gzip", "identity") + if negotiated != "" { + w.Header().Set("Content-Encoding", negotiated) + } + w.Header().Add("Content-Type", "application/x-tar") + w.Header().Add("Last-Modified", lastModified) + w.Header().Add("Transfer-Encoding", "chunked") + w.WriteHeader(http.StatusOK) + var iow io.Writer + switch negotiated { + case "", "identity": + iow = w + case "gzip": + iow = gzip.NewWriter(w) + case "zstd": + iow, _ = zstd.NewWriter(w) + } + return CollectTar(r.Context(), iow, manifest, manifestMtime) default: w.WriteHeader(http.StatusNotFound) fmt.Fprintf(w, "not found\n") + return nil } - return nil } entryPath := sitePath @@ -297,6 +329,8 @@ func getPage(w http.ResponseWriter, r *http.Request) error { default: negotiatedEncoding = false } + default: + return fmt.Errorf("unexpected transform") } if !negotiatedEncoding { w.WriteHeader(http.StatusNotAcceptable) diff --git a/src/redirects.go b/src/redirects.go index 5dde399..a0568e2 100644 --- a/src/redirects.go +++ b/src/redirects.go @@ -11,7 +11,7 @@ import ( "google.golang.org/protobuf/proto" ) -const redirectsFileName string = "_redirects" +const RedirectsFileName string = "_redirects" func unparseRule(rule redirects.Rule) string { var statusPart string @@ -87,24 +87,24 @@ func validateRedirectRule(rule redirects.Rule) error { // Parses redirects file and injects rules into the manifest. func ProcessRedirectsFile(manifest *Manifest) error { - redirectsEntry := manifest.Contents[redirectsFileName] - delete(manifest.Contents, redirectsFileName) + redirectsEntry := manifest.Contents[RedirectsFileName] + delete(manifest.Contents, RedirectsFileName) if redirectsEntry == nil { return nil } else if redirectsEntry.GetType() != Type_InlineFile { - return AddProblem(manifest, redirectsFileName, + return AddProblem(manifest, RedirectsFileName, "not a regular file") } rules, err := redirects.ParseString(string(redirectsEntry.GetData())) if err != nil { - return AddProblem(manifest, redirectsFileName, + return AddProblem(manifest, RedirectsFileName, "syntax error: %s", err) } for index, rule := range rules { if err := validateRedirectRule(rule); err != nil { - AddProblem(manifest, redirectsFileName, + AddProblem(manifest, RedirectsFileName, "rule #%d %q: %s", index+1, unparseRule(rule), err) continue } @@ -118,6 +118,19 @@ func ProcessRedirectsFile(manifest *Manifest) error { return nil } +func CollectRedirectsFile(manifest *Manifest) string { + var rules []string + for _, rule := range manifest.GetRedirects() { + rules = append(rules, unparseRule(redirects.Rule{ + From: rule.GetFrom(), + To: rule.GetTo(), + Status: int(rule.GetStatus()), + Force: rule.GetForce(), + })+"\n") + } + return strings.Join(rules, "") +} + func pathSegments(path string) []string { return strings.Split(strings.TrimPrefix(path, "/"), "/") }