mirror of
https://codeberg.org/git-pages/git-pages.git
synced 2026-05-14 03:01:48 +00:00
Allow downloading entire site via CLI or HTTP.
The HTTP endpoint is `/.git-pages/archive.tar` and it is gated behind a feature flag `archive-site`. It serially downloads every blob and writes it to the client in a chunked response, optionally compressed with gzip or zstd as per `Accept-Encoding:`. It is authorized the same as `/.git-pages/manifest.json`, for the same reasons. The CLI operation is `-get-archive <site-name>` and it writes a tar archive to stdout. This could be useful for an administrator to review the contents of a site in response to a report. Both `_headers` and `_redirects` files are present in the output, reconstituted from the manifest.
This commit is contained in:
@@ -43,7 +43,7 @@
|
||||
"-s -w"
|
||||
];
|
||||
|
||||
vendorHash = "sha256-UQl8AeijqJd2qpVZBDuHT/+Dtd3+Uwrf4w4yAOaFs98=";
|
||||
vendorHash = "sha256-oVXELOXbRTzzU8pUGNE4K552thlZXGAX7qpv6ETwz6o=";
|
||||
};
|
||||
in
|
||||
{
|
||||
|
||||
2
go.mod
2
go.mod
@@ -3,7 +3,7 @@ module codeberg.org/git-pages/git-pages
|
||||
go 1.25.0
|
||||
|
||||
require (
|
||||
codeberg.org/git-pages/go-headers v1.0.0
|
||||
codeberg.org/git-pages/go-headers v1.1.0
|
||||
github.com/KimMachineGun/automemlimit v0.7.5
|
||||
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500
|
||||
github.com/creasty/defaults v1.8.0
|
||||
|
||||
2
go.sum
2
go.sum
@@ -1,5 +1,7 @@
|
||||
codeberg.org/git-pages/go-headers v1.0.0 h1:hvGU97hQdXaT5HwCpZJWQdg7akvtOBCSUNL4u2a5uTs=
|
||||
codeberg.org/git-pages/go-headers v1.0.0/go.mod h1:N4gwH0U3YPwmuyxqH7xBA8j44fTPX+vOEP7ejJVBPts=
|
||||
codeberg.org/git-pages/go-headers v1.1.0 h1:rk7/SOSsn+XuL7PUQZFYUaWKHEaj6K8mXmUV9rF2VxE=
|
||||
codeberg.org/git-pages/go-headers v1.1.0/go.mod h1:N4gwH0U3YPwmuyxqH7xBA8j44fTPX+vOEP7ejJVBPts=
|
||||
github.com/KimMachineGun/automemlimit v0.7.5 h1:RkbaC0MwhjL1ZuBKunGDjE/ggwAX43DwZrJqVwyveTk=
|
||||
github.com/KimMachineGun/automemlimit v0.7.5/go.mod h1:QZxpHaGOQoYvFhv/r4u3U0JTC2ZcOwbSr11UZF46UBM=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
|
||||
@@ -310,6 +310,7 @@ func authorizeCodebergPagesV2(r *http.Request) (*Authorization, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// Checks whether an operation that enables enumerating site contents is allowed.
|
||||
func AuthorizeMetadataRetrieval(r *http.Request) (*Authorization, error) {
|
||||
causes := []error{AuthError{http.StatusUnauthorized, "unauthorized"}}
|
||||
|
||||
|
||||
126
src/collect.go
Normal file
126
src/collect.go
Normal file
@@ -0,0 +1,126 @@
|
||||
package git_pages
|
||||
|
||||
import (
|
||||
"archive/tar"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
)
|
||||
|
||||
type Flusher interface {
|
||||
Flush() error
|
||||
}
|
||||
|
||||
// Inverse of `ExtractTar`.
|
||||
func CollectTar(
|
||||
context context.Context, writer io.Writer, manifest *Manifest, manifestMtime time.Time,
|
||||
) (
|
||||
err error,
|
||||
) {
|
||||
archive := tar.NewWriter(writer)
|
||||
|
||||
appendFile := func(header *tar.Header, data []byte, transform Transform) (err error) {
|
||||
switch transform {
|
||||
case Transform_None:
|
||||
case Transform_Zstandard:
|
||||
data, err = zstdDecoder.DecodeAll(data, []byte{})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unexpected transform")
|
||||
}
|
||||
header.Size = int64(len(data))
|
||||
|
||||
err = archive.WriteHeader(header)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
_, err = archive.Write(data)
|
||||
return
|
||||
}
|
||||
|
||||
for fileName, entry := range manifest.Contents {
|
||||
var header tar.Header
|
||||
if fileName == "" {
|
||||
continue
|
||||
}
|
||||
header.Name = fileName
|
||||
|
||||
switch entry.GetType() {
|
||||
case Type_Directory:
|
||||
header.Typeflag = tar.TypeDir
|
||||
header.Mode = 0755
|
||||
header.ModTime = manifestMtime
|
||||
err = appendFile(&header, nil, Transform_None)
|
||||
|
||||
case Type_InlineFile:
|
||||
header.Typeflag = tar.TypeReg
|
||||
header.Mode = 0644
|
||||
header.ModTime = manifestMtime
|
||||
err = appendFile(&header, entry.GetData(), entry.GetTransform())
|
||||
|
||||
case Type_ExternalFile:
|
||||
var blobReader io.Reader
|
||||
var blobMtime time.Time
|
||||
var blobData []byte
|
||||
blobReader, _, blobMtime, err = backend.GetBlob(context, string(entry.Data))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
blobData, _ = io.ReadAll(blobReader)
|
||||
header.Typeflag = tar.TypeReg
|
||||
header.Mode = 0644
|
||||
header.ModTime = blobMtime
|
||||
err = appendFile(&header, blobData, entry.GetTransform())
|
||||
|
||||
case Type_Symlink:
|
||||
header.Typeflag = tar.TypeSymlink
|
||||
header.Mode = 0644
|
||||
header.ModTime = manifestMtime
|
||||
err = appendFile(&header, entry.GetData(), Transform_None)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unexpected entry type")
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if redirects := CollectRedirectsFile(manifest); redirects != "" {
|
||||
err = appendFile(&tar.Header{
|
||||
Name: RedirectsFileName,
|
||||
Typeflag: tar.TypeReg,
|
||||
Mode: 0644,
|
||||
ModTime: manifestMtime,
|
||||
}, []byte(redirects), Transform_None)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if headers := CollectHeadersFile(manifest); headers != "" {
|
||||
err = appendFile(&tar.Header{
|
||||
Name: HeadersFileName,
|
||||
Typeflag: tar.TypeReg,
|
||||
Mode: 0644,
|
||||
ModTime: manifestMtime,
|
||||
}, []byte(headers), Transform_None)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
err = archive.Flush()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
flusher, ok := writer.(Flusher)
|
||||
if ok {
|
||||
err = flusher.Flush()
|
||||
}
|
||||
return err
|
||||
}
|
||||
@@ -15,7 +15,7 @@ import (
|
||||
|
||||
var ErrHeaderNotAllowed = errors.New("custom header not allowed")
|
||||
|
||||
const headersFileName string = "_headers"
|
||||
const HeadersFileName string = "_headers"
|
||||
|
||||
// Lifted from https://docs.netlify.com/manage/routing/headers/, except for `Set-Cookie`
|
||||
// the rationale for which does not apply in our environment.
|
||||
@@ -86,24 +86,24 @@ func validateHeaderRule(rule headers.Rule) error {
|
||||
|
||||
// Parses redirects file and injects rules into the manifest.
|
||||
func ProcessHeadersFile(manifest *Manifest) error {
|
||||
headersEntry := manifest.Contents[headersFileName]
|
||||
delete(manifest.Contents, headersFileName)
|
||||
headersEntry := manifest.Contents[HeadersFileName]
|
||||
delete(manifest.Contents, HeadersFileName)
|
||||
if headersEntry == nil {
|
||||
return nil
|
||||
} else if headersEntry.GetType() != Type_InlineFile {
|
||||
return AddProblem(manifest, headersFileName,
|
||||
return AddProblem(manifest, HeadersFileName,
|
||||
"not a regular file")
|
||||
}
|
||||
|
||||
rules, err := headers.ParseString(string(headersEntry.GetData()))
|
||||
if err != nil {
|
||||
return AddProblem(manifest, headersFileName,
|
||||
return AddProblem(manifest, HeadersFileName,
|
||||
"syntax error: %s", err)
|
||||
}
|
||||
|
||||
for index, rule := range rules {
|
||||
if err := validateHeaderRule(rule); err != nil {
|
||||
AddProblem(manifest, headersFileName,
|
||||
AddProblem(manifest, HeadersFileName,
|
||||
"rule #%d %q: %s", index+1, rule.Path, err)
|
||||
continue
|
||||
}
|
||||
@@ -122,6 +122,21 @@ func ProcessHeadersFile(manifest *Manifest) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func CollectHeadersFile(manifest *Manifest) string {
|
||||
var headersRules []headers.Rule
|
||||
for _, manifestRule := range manifest.GetHeaders() {
|
||||
headersRule := headers.Rule{
|
||||
Path: manifestRule.GetPath(),
|
||||
Headers: http.Header{},
|
||||
}
|
||||
for _, manifestHeader := range manifestRule.GetHeaderMap() {
|
||||
headersRule.Headers[manifestHeader.GetName()] = manifestHeader.GetValues()
|
||||
}
|
||||
headersRules = append(headersRules, headersRule)
|
||||
}
|
||||
return headers.Must(headers.UnparseString(headersRules))
|
||||
}
|
||||
|
||||
func ApplyHeaderRules(manifest *Manifest, url *url.URL) (headers http.Header, err error) {
|
||||
headers = http.Header{}
|
||||
fromSegments := pathSegments(url.Path)
|
||||
|
||||
77
src/main.go
77
src/main.go
@@ -69,6 +69,18 @@ func serve(listener net.Listener, handler http.Handler) {
|
||||
}
|
||||
}
|
||||
|
||||
func webRootArg(arg string) string {
|
||||
switch strings.Count(arg, "/") {
|
||||
case 0:
|
||||
return arg + "/.index"
|
||||
case 1:
|
||||
return arg
|
||||
default:
|
||||
log.Fatalf("webroot argument must be either 'domain.tld' or 'domain.tld/dir")
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func Main() {
|
||||
printConfigEnvVars := flag.Bool("print-config-env-vars", false,
|
||||
"print every recognized configuration environment variable and exit")
|
||||
@@ -80,16 +92,28 @@ func Main() {
|
||||
"run without configuration file (configure via environment variables)")
|
||||
runMigration := flag.String("run-migration", "",
|
||||
"run a specific store migration (available: \"create-domain-markers\")")
|
||||
getManifest := flag.String("get-manifest", "",
|
||||
"write manifest for `webroot` (either 'domain.tld' or 'domain.tld/dir') to stdout as ProtoJSON")
|
||||
getBlob := flag.String("get-blob", "",
|
||||
"write `blob` ('sha256-xxxxxxx...xxx') to stdout")
|
||||
"write contents of `blob-ref` ('sha256-xxxxxxx...xxx') to stdout")
|
||||
getManifest := flag.String("get-manifest", "",
|
||||
"write manifest for `site-name` (either 'domain.tld' or 'domain.tld/dir') to stdout as ProtoJSON")
|
||||
getArchive := flag.String("get-archive", "",
|
||||
"write archive for `site-name` (either 'domain.tld' or 'domain.tld/dir') to stdout in tar format")
|
||||
updateSite := flag.String("update-site", "",
|
||||
"update site for `webroot` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL")
|
||||
"update site for `site-name` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL")
|
||||
flag.Parse()
|
||||
|
||||
if *getManifest != "" && *getBlob != "" {
|
||||
log.Fatalln("-get-manifest and -get-blob are mutually exclusive")
|
||||
var cliOperations int
|
||||
if *getBlob != "" {
|
||||
cliOperations += 1
|
||||
}
|
||||
if *getManifest != "" {
|
||||
cliOperations += 1
|
||||
}
|
||||
if *getArchive != "" {
|
||||
cliOperations += 1
|
||||
}
|
||||
if cliOperations > 1 {
|
||||
log.Fatalln("-get-blob, -get-manifest, and -get-archive are mutually exclusive")
|
||||
}
|
||||
|
||||
if *configTomlPath != "" && *noConfig {
|
||||
@@ -150,22 +174,6 @@ func Main() {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
case *getManifest != "":
|
||||
if err := ConfigureBackend(&config.Storage); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
webRoot := *getManifest
|
||||
if !strings.Contains(webRoot, "/") {
|
||||
webRoot += "/.index"
|
||||
}
|
||||
|
||||
manifest, _, err := backend.GetManifest(context.Background(), webRoot, GetManifestOptions{})
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
fmt.Println(ManifestDebugJSON(manifest))
|
||||
|
||||
case *getBlob != "":
|
||||
if err := ConfigureBackend(&config.Storage); err != nil {
|
||||
log.Fatalln(err)
|
||||
@@ -178,6 +186,31 @@ func Main() {
|
||||
|
||||
io.Copy(os.Stdout, reader)
|
||||
|
||||
case *getManifest != "":
|
||||
if err := ConfigureBackend(&config.Storage); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
webRoot := webRootArg(*getManifest)
|
||||
manifest, _, err := backend.GetManifest(context.Background(), webRoot, GetManifestOptions{})
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
fmt.Println(ManifestDebugJSON(manifest))
|
||||
|
||||
case *getArchive != "":
|
||||
if err := ConfigureBackend(&config.Storage); err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
|
||||
webRoot := webRootArg(*getArchive)
|
||||
manifest, manifestMtime, err :=
|
||||
backend.GetManifest(context.Background(), webRoot, GetManifestOptions{})
|
||||
if err != nil {
|
||||
log.Fatalln(err)
|
||||
}
|
||||
CollectTar(context.Background(), os.Stdout, manifest, manifestMtime)
|
||||
|
||||
case *updateSite != "":
|
||||
if err := ConfigureBackend(&config.Storage); err != nil {
|
||||
log.Fatalln(err)
|
||||
|
||||
42
src/pages.go
42
src/pages.go
@@ -2,6 +2,7 @@ package git_pages
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"compress/gzip"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
@@ -159,13 +160,14 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
|
||||
}
|
||||
if metadataPath, found := strings.CutPrefix(sitePath, ".git-pages/"); found {
|
||||
lastModified := manifestMtime.UTC().Format(http.TimeFormat)
|
||||
switch metadataPath {
|
||||
case "health":
|
||||
switch {
|
||||
case metadataPath == "health":
|
||||
w.Header().Add("Last-Modified", lastModified)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
fmt.Fprintf(w, "ok\n")
|
||||
return nil
|
||||
|
||||
case "manifest.json":
|
||||
case metadataPath == "manifest.json":
|
||||
// metadata requests require authorization to avoid making pushes from private
|
||||
// repositories enumerable
|
||||
_, err := AuthorizeMetadataRetrieval(r)
|
||||
@@ -177,12 +179,42 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
|
||||
w.Header().Add("Last-Modified", lastModified)
|
||||
w.WriteHeader(http.StatusOK)
|
||||
w.Write([]byte(ManifestDebugJSON(manifest)))
|
||||
return nil
|
||||
|
||||
case metadataPath == "archive.tar" && config.Feature("archive-site"):
|
||||
// same as above
|
||||
_, err := AuthorizeMetadataRetrieval(r)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// we only offer `/.git-pages/archive.tar` and not the `.tar.gz`/`.tar.zst` variants
|
||||
// because HTTP can already request compression using the `Content-Encoding` mechanism
|
||||
acceptedEncodings := parseHTTPEncodings(r.Header.Get("Accept-Encoding"))
|
||||
negotiated := acceptedEncodings.Negotiate("zstd", "gzip", "identity")
|
||||
if negotiated != "" {
|
||||
w.Header().Set("Content-Encoding", negotiated)
|
||||
}
|
||||
w.Header().Add("Content-Type", "application/x-tar")
|
||||
w.Header().Add("Last-Modified", lastModified)
|
||||
w.Header().Add("Transfer-Encoding", "chunked")
|
||||
w.WriteHeader(http.StatusOK)
|
||||
var iow io.Writer
|
||||
switch negotiated {
|
||||
case "", "identity":
|
||||
iow = w
|
||||
case "gzip":
|
||||
iow = gzip.NewWriter(w)
|
||||
case "zstd":
|
||||
iow, _ = zstd.NewWriter(w)
|
||||
}
|
||||
return CollectTar(r.Context(), iow, manifest, manifestMtime)
|
||||
|
||||
default:
|
||||
w.WriteHeader(http.StatusNotFound)
|
||||
fmt.Fprintf(w, "not found\n")
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
entryPath := sitePath
|
||||
@@ -297,6 +329,8 @@ func getPage(w http.ResponseWriter, r *http.Request) error {
|
||||
default:
|
||||
negotiatedEncoding = false
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unexpected transform")
|
||||
}
|
||||
if !negotiatedEncoding {
|
||||
w.WriteHeader(http.StatusNotAcceptable)
|
||||
|
||||
@@ -11,7 +11,7 @@ import (
|
||||
"google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
const redirectsFileName string = "_redirects"
|
||||
const RedirectsFileName string = "_redirects"
|
||||
|
||||
func unparseRule(rule redirects.Rule) string {
|
||||
var statusPart string
|
||||
@@ -87,24 +87,24 @@ func validateRedirectRule(rule redirects.Rule) error {
|
||||
|
||||
// Parses redirects file and injects rules into the manifest.
|
||||
func ProcessRedirectsFile(manifest *Manifest) error {
|
||||
redirectsEntry := manifest.Contents[redirectsFileName]
|
||||
delete(manifest.Contents, redirectsFileName)
|
||||
redirectsEntry := manifest.Contents[RedirectsFileName]
|
||||
delete(manifest.Contents, RedirectsFileName)
|
||||
if redirectsEntry == nil {
|
||||
return nil
|
||||
} else if redirectsEntry.GetType() != Type_InlineFile {
|
||||
return AddProblem(manifest, redirectsFileName,
|
||||
return AddProblem(manifest, RedirectsFileName,
|
||||
"not a regular file")
|
||||
}
|
||||
|
||||
rules, err := redirects.ParseString(string(redirectsEntry.GetData()))
|
||||
if err != nil {
|
||||
return AddProblem(manifest, redirectsFileName,
|
||||
return AddProblem(manifest, RedirectsFileName,
|
||||
"syntax error: %s", err)
|
||||
}
|
||||
|
||||
for index, rule := range rules {
|
||||
if err := validateRedirectRule(rule); err != nil {
|
||||
AddProblem(manifest, redirectsFileName,
|
||||
AddProblem(manifest, RedirectsFileName,
|
||||
"rule #%d %q: %s", index+1, unparseRule(rule), err)
|
||||
continue
|
||||
}
|
||||
@@ -118,6 +118,19 @@ func ProcessRedirectsFile(manifest *Manifest) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func CollectRedirectsFile(manifest *Manifest) string {
|
||||
var rules []string
|
||||
for _, rule := range manifest.GetRedirects() {
|
||||
rules = append(rules, unparseRule(redirects.Rule{
|
||||
From: rule.GetFrom(),
|
||||
To: rule.GetTo(),
|
||||
Status: int(rule.GetStatus()),
|
||||
Force: rule.GetForce(),
|
||||
})+"\n")
|
||||
}
|
||||
return strings.Join(rules, "")
|
||||
}
|
||||
|
||||
func pathSegments(path string) []string {
|
||||
return strings.Split(strings.TrimPrefix(path, "/"), "/")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user