Files
git-pages/src/main.go

661 lines
19 KiB
Go

package git_pages
import (
"cmp"
"context"
"crypto/tls"
"errors"
"flag"
"fmt"
"io"
"log"
"log/slog"
"net"
"net/http"
"net/http/httputil"
"net/url"
"os"
"path"
"runtime/debug"
"slices"
"strings"
"time"
automemlimit "github.com/KimMachineGun/automemlimit/memlimit"
"github.com/c2h5oh/datasize"
"github.com/fatih/color"
"github.com/kankanreno/go-snowflake"
"github.com/prometheus/client_golang/prometheus/promhttp"
"google.golang.org/protobuf/proto"
)
var config *Config
var wildcards []*WildcardPattern
var fallback http.Handler
var backend Backend
func configureFeatures(ctx context.Context) (err error) {
if len(config.Features) > 0 {
logc.Println(ctx, "features:", strings.Join(config.Features, ", "))
}
return
}
func configureMemLimit(ctx context.Context) (err error) {
// Avoid being OOM killed by not garbage collecting early enough.
memlimitBefore := datasize.ByteSize(debug.SetMemoryLimit(-1))
automemlimit.SetGoMemLimitWithOpts(
automemlimit.WithLogger(slog.New(slog.DiscardHandler)),
automemlimit.WithProvider(
automemlimit.ApplyFallback(
automemlimit.FromCgroup,
automemlimit.FromSystem,
),
),
automemlimit.WithRatio(float64(config.Limits.MaxHeapSizeRatio)),
)
memlimitAfter := datasize.ByteSize(debug.SetMemoryLimit(-1))
if memlimitBefore == memlimitAfter {
logc.Println(ctx, "memlimit: now", memlimitBefore.HR())
} else {
logc.Println(ctx, "memlimit: was", memlimitBefore.HR(), "now", memlimitAfter.HR())
}
return
}
// Can only be safely called during initial configuration.
func configureConcurrency(_ context.Context) (err error) {
putBlobSemaphore = make(chan struct{}, config.Limits.ConcurrentUploads)
return
}
func configureWildcards(_ context.Context) (err error) {
newWildcards, err := TranslateWildcards(config.Wildcard)
if err != nil {
return err
} else {
wildcards = newWildcards
return nil
}
}
func configureFallback(_ context.Context) (err error) {
if config.Fallback.ProxyTo != nil {
fallbackURL := &config.Fallback.ProxyTo.URL
fallback = &httputil.ReverseProxy{
Rewrite: func(r *httputil.ProxyRequest) {
r.SetURL(fallbackURL)
r.Out.Host = r.In.Host
r.Out.Header["X-Forwarded-For"] = r.In.Header["X-Forwarded-For"]
},
Transport: &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: config.Fallback.Insecure,
},
},
}
}
return
}
// Thread-unsafe, must be called only during initial configuration.
func configureAudit(_ context.Context) (err error) {
snowflake.SetStartTime(time.Date(2025, 12, 1, 0, 0, 0, 0, time.UTC))
snowflake.SetMachineID(config.Audit.NodeID)
return
}
func listen(ctx context.Context, name string, listen string) net.Listener {
if listen == "-" {
return nil
}
protocol, address, ok := strings.Cut(listen, "/")
if !ok {
logc.Fatalf(ctx, "%s: %s: malformed endpoint", name, listen)
}
listener, err := net.Listen(protocol, address)
if err != nil {
logc.Fatalf(ctx, "%s: %s\n", name, err)
}
return listener
}
func panicHandler(handler http.Handler) http.Handler {
return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
defer func() {
if err := recover(); err != nil {
if err, ok := err.(error); ok && errors.Is(err, http.ErrAbortHandler) {
panic(http.ErrAbortHandler)
}
logc.Printf(r.Context(), "panic: %s %s %s: %s\n%s",
r.Method, r.Host, r.URL.Path, err, string(debug.Stack()))
http.Error(w,
fmt.Sprintf("internal server error: %s", err),
http.StatusInternalServerError,
)
}
}()
handler.ServeHTTP(w, r)
})
}
func serve(ctx context.Context, listener net.Listener, handler http.Handler) {
if listener != nil {
server := http.Server{Handler: handler}
server.Protocols = new(http.Protocols)
server.Protocols.SetHTTP1(true)
server.Protocols.SetUnencryptedHTTP2(true)
logc.Fatalln(ctx, server.Serve(listener))
}
}
func webRootArg(arg string) string {
switch strings.Count(arg, "/") {
case 0:
return arg + "/.index"
case 1:
return arg
default:
logc.Fatalln(context.Background(),
"webroot argument must be either 'domain.tld' or 'domain.tld/dir")
return ""
}
}
func fileOutputArg() (writer io.WriteCloser) {
var err error
if flag.NArg() == 0 {
writer = os.Stdout
} else {
writer, err = os.Create(flag.Arg(0))
if err != nil {
logc.Fatalln(context.Background(), err)
}
}
return
}
func usage() {
fmt.Fprintf(os.Stderr, "Usage:\n")
fmt.Fprintf(os.Stderr, "(server) "+
"git-pages [-config <file>|-no-config]\n")
fmt.Fprintf(os.Stderr, "(info) "+
"git-pages {-version|-print-config-env-vars|-print-config}\n")
fmt.Fprintf(os.Stderr, "(debug) "+
"git-pages {-list-blobs|-list-manifests}\n")
fmt.Fprintf(os.Stderr, "(debug) "+
"git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n")
fmt.Fprintf(os.Stderr, "(admin) "+
"git-pages {-freeze-domain <domain>|-unfreeze-domain <domain>}\n")
fmt.Fprintf(os.Stderr, "(audit) "+
"git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n")
fmt.Fprintf(os.Stderr, "(maint) "+
"git-pages {-run-migration <name>|-trace-garbage|-size-histogram {original|stored}}\n")
flag.PrintDefaults()
}
func Main(versionInfo string) {
ctx := context.Background()
flag.Usage = usage
configTomlPath := flag.String("config", "",
"load configuration from `filename` (default: 'config.toml')")
noConfig := flag.Bool("no-config", false,
"run without configuration file (configure via environment variables)")
printConfigEnvVars := flag.Bool("print-config-env-vars", false,
"print every recognized configuration environment variable and exit")
printConfig := flag.Bool("print-config", false,
"print configuration as JSON and exit")
listBlobs := flag.Bool("list-blobs", false,
"enumerate every blob with its metadata")
listManifests := flag.Bool("list-manifests", false,
"enumerate every manifest with its metadata")
getBlob := flag.String("get-blob", "",
"write contents of `blob` ('sha256-xxxxxxx...xxx')")
getManifest := flag.String("get-manifest", "",
"write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON")
getArchive := flag.String("get-archive", "",
"write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format")
updateSite := flag.String("update-site", "",
"update `site` (either 'domain.tld' or 'domain.tld/dir') from archive or repository URL")
freezeDomain := flag.String("freeze-domain", "",
"prevent any site uploads to a given `domain`")
unfreezeDomain := flag.String("unfreeze-domain", "",
"allow site uploads to a `domain` again after it has been frozen")
auditLog := flag.Bool("audit-log", false,
"display audit log")
auditRead := flag.String("audit-read", "",
"extract contents of audit record `id` to files '<id>-*'")
auditRollback := flag.String("audit-rollback", "",
"restore site from contents of audit record `id`")
auditServer := flag.String("audit-server", "",
"listen for notifications on `endpoint` and spawn a process for each audit event")
runMigration := flag.String("run-migration", "",
"run a store `migration` (one of: create-domain-markers)")
sizeHistogram := flag.String("size-histogram", "",
"display histogram of `size-type` (original or stored) per domain")
traceGarbage := flag.Bool("trace-garbage", false,
"estimate total size of unreachable blobs")
version := flag.Bool("version", false,
"display version")
flag.Parse()
if *version {
fmt.Printf("git-pages %s\n", versionInfo)
os.Exit(0)
}
var cliOperations int
for _, selected := range []bool{
*listBlobs,
*listManifests,
*getBlob != "",
*getManifest != "",
*getArchive != "",
*updateSite != "",
*freezeDomain != "",
*unfreezeDomain != "",
*auditLog,
*auditRead != "",
*auditRollback != "",
*auditServer != "",
*runMigration != "",
*sizeHistogram != "",
*traceGarbage,
} {
if selected {
cliOperations++
}
}
if cliOperations > 1 {
logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+
"-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+
"-audit-rollback, -audit-server, -run-migration, -size-histogram, "+
"and -trace-garbage are mutually exclusive")
}
if *configTomlPath != "" && *noConfig {
logc.Fatalln(ctx, "-no-config and -config are mutually exclusive")
}
if *printConfigEnvVars {
PrintConfigEnvVars()
return
}
var err error
if *configTomlPath == "" && !*noConfig {
*configTomlPath = "config.toml"
}
if config, err = Configure(*configTomlPath); err != nil {
logc.Fatalln(ctx, "config:", err)
}
if *printConfig {
fmt.Println(config.TOML())
return
}
InitObservability()
defer FiniObservability()
if err = errors.Join(
configureFeatures(ctx),
configureMemLimit(ctx),
configureConcurrency(ctx),
configureWildcards(ctx),
configureFallback(ctx),
configureAudit(ctx),
); err != nil {
logc.Fatalln(ctx, err)
}
// The server has its own logic for creating the backend.
if cliOperations > 0 {
if backend, err = CreateBackend(ctx, &config.Storage); err != nil {
logc.Fatalln(ctx, err)
}
}
switch {
case *listBlobs:
for metadata, err := range backend.EnumerateBlobs(ctx) {
if err != nil {
logc.Fatalln(ctx, err)
}
fmt.Fprintf(color.Output, "%s %s %s\n",
metadata.Name,
color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)),
color.HiGreenString(fmt.Sprint(metadata.Size)),
)
}
case *listManifests:
for metadata, err := range backend.EnumerateManifests(ctx) {
if err != nil {
logc.Fatalln(ctx, err)
}
fmt.Fprintf(color.Output, "%s %s %s\n",
metadata.Name,
color.HiWhiteString(metadata.LastModified.UTC().Format(time.RFC3339)),
color.HiGreenString(fmt.Sprint(metadata.Size)),
)
}
case *getBlob != "":
reader, _, err := backend.GetBlob(ctx, *getBlob)
if err != nil {
logc.Fatalln(ctx, err)
}
io.Copy(fileOutputArg(), reader)
case *getManifest != "":
webRoot := webRootArg(*getManifest)
manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
if err != nil {
logc.Fatalln(ctx, err)
}
fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest)))
case *getArchive != "":
webRoot := webRootArg(*getArchive)
manifest, metadata, err :=
backend.GetManifest(ctx, webRoot, GetManifestOptions{})
if err != nil {
logc.Fatalln(ctx, err)
}
if err = CollectTar(ctx, fileOutputArg(), manifest, metadata); err != nil {
logc.Fatalln(ctx, err)
}
case *updateSite != "":
ctx = WithPrincipal(ctx)
GetPrincipal(ctx).CliAdmin = proto.Bool(true)
if flag.NArg() != 1 {
logc.Fatalln(ctx, "update source must be provided as the argument")
}
sourceURL, err := url.Parse(flag.Arg(0))
if err != nil {
logc.Fatalln(ctx, err)
}
var result UpdateResult
if sourceURL.Scheme == "" {
file, err := os.Open(sourceURL.Path)
if err != nil {
logc.Fatalln(ctx, err)
}
defer file.Close()
var contentType string
switch {
case strings.HasSuffix(sourceURL.Path, ".zip"):
contentType = "application/zip"
case strings.HasSuffix(sourceURL.Path, ".tar"):
contentType = "application/x-tar"
case strings.HasSuffix(sourceURL.Path, ".tar.gz"):
contentType = "application/x-tar+gzip"
case strings.HasSuffix(sourceURL.Path, ".tar.zst"):
contentType = "application/x-tar+zstd"
default:
log.Fatalf("cannot determine content type from filename %q\n", sourceURL)
}
webRoot := webRootArg(*updateSite)
result = UpdateFromArchive(ctx, webRoot, contentType, file)
} else {
branch := "pages"
if sourceURL.Fragment != "" {
branch, sourceURL.Fragment = sourceURL.Fragment, ""
}
webRoot := webRootArg(*updateSite)
result = UpdateFromRepository(ctx, webRoot, sourceURL.String(), branch)
}
switch result.outcome {
case UpdateError:
logc.Printf(ctx, "error: %s\n", result.err)
os.Exit(2)
case UpdateTimeout:
logc.Println(ctx, "timeout")
os.Exit(1)
case UpdateCreated:
logc.Println(ctx, "created")
case UpdateReplaced:
logc.Println(ctx, "replaced")
case UpdateDeleted:
logc.Println(ctx, "deleted")
case UpdateNoChange:
logc.Println(ctx, "no-change")
}
case *freezeDomain != "" || *unfreezeDomain != "":
ctx = WithPrincipal(ctx)
GetPrincipal(ctx).CliAdmin = proto.Bool(true)
var domain string
var freeze bool
if *freezeDomain != "" {
domain = *freezeDomain
freeze = true
} else {
domain = *unfreezeDomain
freeze = false
}
if freeze {
if err = backend.FreezeDomain(ctx, domain); err != nil {
logc.Fatalln(ctx, err)
}
logc.Println(ctx, "frozen")
} else {
if err = backend.UnfreezeDomain(ctx, domain); err != nil {
logc.Fatalln(ctx, err)
}
logc.Println(ctx, "thawed")
}
case *auditLog:
records := []*AuditRecord{}
ids := backend.SearchAuditLog(ctx, SearchAuditLogOptions{})
for record, err := range backend.GetAuditLogRecords(ctx, ids) {
if err != nil {
logc.Fatalln(ctx, err)
}
records = append(records, record)
}
slices.SortFunc(records, func(a, b *AuditRecord) int {
return cmp.Compare(a.GetAuditID(), b.GetAuditID())
})
for _, record := range records {
fmt.Fprintf(color.Output, "%s %s %s %s %s\n",
record.GetAuditID().String(),
color.HiWhiteString(record.GetTimestamp().AsTime().UTC().Format(time.RFC3339)),
color.HiMagentaString(record.DescribePrincipal()),
color.HiGreenString(record.DescribeResource()),
record.GetEvent(),
)
}
case *auditRead != "":
id, err := ParseAuditID(*auditRead)
if err != nil {
logc.Fatalln(ctx, err)
}
record, err := backend.QueryAuditLog(ctx, id)
if err != nil {
logc.Fatalln(ctx, err)
}
if err = ExtractAuditRecord(ctx, id, record, "."); err != nil {
logc.Fatalln(ctx, err)
}
case *auditRollback != "":
ctx = WithPrincipal(ctx)
GetPrincipal(ctx).CliAdmin = proto.Bool(true)
id, err := ParseAuditID(*auditRollback)
if err != nil {
logc.Fatalln(ctx, err)
}
record, err := backend.QueryAuditLog(ctx, id)
if err != nil {
logc.Fatalln(ctx, err)
}
if record.GetManifest() == nil || record.GetDomain() == "" || record.GetProject() == "" {
logc.Fatalln(ctx, "no manifest in audit record")
}
webRoot := path.Join(record.GetDomain(), record.GetProject())
err = backend.StageManifest(ctx, record.GetManifest())
if err != nil {
logc.Fatalln(ctx, err)
}
err = backend.CommitManifest(ctx, webRoot, record.GetManifest(), ModifyManifestOptions{})
if err != nil {
logc.Fatalln(ctx, err)
}
case *auditServer != "":
if flag.NArg() < 1 {
logc.Fatalln(ctx, "handler path not provided")
}
processor, err := AuditEventProcessor(flag.Arg(0), flag.Args()[1:])
if err != nil {
logc.Fatalln(ctx, err)
}
serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor))
case *runMigration != "":
if err = RunMigration(ctx, *runMigration); err != nil {
logc.Fatalln(ctx, err)
}
case *sizeHistogram != "":
extractSize := func(s *DomainStatistics) int64 { return 0 }
switch *sizeHistogram {
case "original":
// Displays a size histogram using the `manifest.OriginalSize`, which is useful to see
// which site is the closest to hitting the size limit (checked against apparent size).
// This apparent size does not have any direct relationship with used storage.
extractSize = func(s *DomainStatistics) int64 { return s.OriginalSize }
case "stored":
// Displays a size histogram using the `manifest.StoredSize`, which is useful to see
// which site consumes the most resources. The site is keeping at least this many
// bytes worth of blobs alive, but removing it may not free any space because
// deduplication is global.
extractSize = func(s *DomainStatistics) int64 { return s.StoredSize }
default:
logc.Fatalln(ctx, "unknown histogram type")
}
histogram, err := SizeHistogram(ctx)
if err != nil {
logc.Fatalln(ctx, err)
}
slices.SortFunc(histogram, func(a *DomainStatistics, b *DomainStatistics) int {
return cmp.Compare(extractSize(a), extractSize(b))
})
if len(histogram) > 0 {
fullScaleSize := max(extractSize(histogram[len(histogram)-1]), 1)
fullScaleWidth := int64(40)
for _, statistics := range histogram {
size := extractSize(statistics)
barWidth := size * fullScaleWidth / fullScaleSize
spaceWidth := fullScaleWidth - barWidth
bar := strings.Repeat("*", int(barWidth)) + strings.Repeat(" ", int(spaceWidth))
fmt.Fprintf(color.Output, "%s %s %s\n",
color.HiBlackString(fmt.Sprint("|", bar, "|")),
statistics.Domain,
color.HiGreenString(datasize.ByteSize(extractSize(statistics)).HR()),
)
}
}
case *traceGarbage:
if err = TraceGarbage(ctx); err != nil {
logc.Fatalln(ctx, err)
}
default:
// Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration
// at runtime. This is useful because it preserves S3 backend cache contents. Failed
// configuration reloads will not crash the process; you may want to check the syntax
// first with `git-pages -config ... -print-config` since there is no other feedback.
//
// Note that not all of the configuration is updated on reload. Listeners are kept as-is.
// The backend is not recreated (this is intentional as it allows preserving the cache).
OnReload(func() {
if newConfig, err := Configure(*configTomlPath); err != nil {
logc.Println(ctx, "config: reload err:", err)
} else {
// From https://go.dev/ref/mem:
// > A read r of a memory location x holding a value that is not larger than
// > a machine word must observe some write w such that r does not happen before
// > w and there is no write w' such that w happens before w' and w' happens
// > before r. That is, each read must observe a value written by a preceding or
// > concurrent write.
config = newConfig
if err = errors.Join(
configureFeatures(ctx),
configureMemLimit(ctx),
configureWildcards(ctx),
configureFallback(ctx),
); err != nil {
// At this point the configuration is in an in-between, corrupted state, so
// the only reasonable choice is to crash.
logc.Fatalln(ctx, "config: reload fail:", err)
} else {
logc.Println(ctx, "config: reload ok")
}
}
})
// Start listening on all ports before initializing the backend, otherwise if the backend
// spends some time initializing (which the S3 backend does) a proxy like Caddy can race
// with git-pages on startup and return errors for requests that would have been served
// just 0.5s later.
pagesListener := listen(ctx, "pages", config.Server.Pages)
caddyListener := listen(ctx, "caddy", config.Server.Caddy)
metricsListener := listen(ctx, "metrics", config.Server.Metrics)
if backend, err = CreateBackend(ctx, &config.Storage); err != nil {
logc.Fatalln(ctx, err)
}
backend = NewObservedBackend(backend)
middleware := chainHTTPMiddleware(
panicHandler,
remoteAddrMiddleware,
ObserveHTTPHandler,
)
go serve(ctx, pagesListener, middleware(http.HandlerFunc(ServePages)))
go serve(ctx, caddyListener, middleware(http.HandlerFunc(ServeCaddy)))
go serve(ctx, metricsListener, promhttp.Handler())
if config.Insecure {
logc.Println(ctx, "serve: ready (INSECURE)")
} else {
logc.Println(ctx, "serve: ready")
}
WaitForInterrupt()
logc.Println(ctx, "serve: exiting")
}
}