Add basic garbage tracer.

This isn't a concurrent GC and it cannot provide a reliable result;
the output is just an estimate.
This commit is contained in:
Catherine
2025-12-06 01:21:17 +00:00
parent 7c3b2248c9
commit 82aebb70bf
6 changed files with 141 additions and 40 deletions

View File

@@ -43,7 +43,7 @@
"-s -w"
];
vendorHash = "sha256-D5v6LpJZ+a2Dzdir/YzyFBwY/K4laTr58beywzXOsTM=";
vendorHash = "sha256-wwsxHEwCySO2Ykttf6C+GZupMWczVYkAhSVwaVZHNko=";
};
in
{

1
go.mod
View File

@@ -8,6 +8,7 @@ require (
github.com/KimMachineGun/automemlimit v0.7.5
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500
github.com/creasty/defaults v1.8.0
github.com/dghubble/trie v0.1.0
github.com/fatih/color v1.18.0
github.com/getsentry/sentry-go v0.40.0
github.com/getsentry/sentry-go/slog v0.40.0

2
go.sum
View File

@@ -27,6 +27,8 @@ github.com/cyphar/filepath-securejoin v0.6.1/go.mod h1:A8hd4EnAeyujCJRrICiOWqjS1
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dghubble/trie v0.1.0 h1:kJnjBLFFElBwS60N4tkPvnLhnpcDxbBjIulgI8CpNGM=
github.com/dghubble/trie v0.1.0/go.mod h1:sOmnzfBNH7H92ow2292dDFWNsVQuh/izuD7otCYb1ak=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=

View File

@@ -780,7 +780,7 @@ func (s3 *S3Backend) SearchAuditLog(
ctx context.Context, opts SearchAuditLogOptions,
) iter.Seq2[AuditID, error] {
return func(yield func(AuditID, error) bool) {
logc.Printf(ctx, "s3: query audit\n")
logc.Printf(ctx, "s3: search audit\n")
ctx, cancel := context.WithCancel(ctx)
defer cancel()

87
src/garbage.go Normal file
View File

@@ -0,0 +1,87 @@
package git_pages
import (
"context"
"fmt"
"github.com/c2h5oh/datasize"
"github.com/dghubble/trie"
)
func trieReduce(data trie.Trier) (items, total int64) {
data.Walk(func(key string, value any) error {
items += 1
total += *value.(*int64)
return nil
})
return
}
func TraceGarbage(ctx context.Context) error {
allBlobs := trie.NewRuneTrie()
liveBlobs := trie.NewRuneTrie()
traceManifest := func(manifestName string, manifest *Manifest) error {
for _, entry := range manifest.GetContents() {
if entry.GetType() == Type_ExternalFile {
blobName := string(entry.Data)
if size := allBlobs.Get(blobName); size == nil {
return fmt.Errorf("%s: dangling reference %s", manifestName, blobName)
} else {
liveBlobs.Put(blobName, size)
}
}
}
return nil
}
// Enumerate all blobs.
for metadata, err := range backend.EnumerateBlobs(ctx) {
if err != nil {
return fmt.Errorf("trace blobs err: %w", err)
}
allBlobs.Put(metadata.Name, &metadata.Size)
}
// Enumerate blobs live via site manifests.
for metadata, err := range backend.EnumerateManifests(ctx) {
if err != nil {
return fmt.Errorf("trace sites err: %w", err)
}
manifest, _, err := backend.GetManifest(ctx, metadata.Name, GetManifestOptions{})
if err != nil {
return fmt.Errorf("trace sites err: %w", err)
}
err = traceManifest(metadata.Name, manifest)
if err != nil {
return fmt.Errorf("trace sites err: %w", err)
}
}
// Enumerate blobs live via audit records.
for auditID, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) {
if err != nil {
return fmt.Errorf("trace audit err: %w", err)
}
auditRecord, err := backend.QueryAuditLog(ctx, auditID)
if err != nil {
return fmt.Errorf("trace audit err: %w", err)
}
if auditRecord.Manifest != nil {
err = traceManifest(auditID.String(), auditRecord.Manifest)
if err != nil {
return fmt.Errorf("trace audit err: %w", err)
}
}
}
allBlobsCount, allBlobsSize := trieReduce(allBlobs)
logc.Printf(ctx, "trace all: %d blobs, %s",
allBlobsCount, datasize.ByteSize(allBlobsSize).HR())
liveBlobsCount, liveBlobsSize := trieReduce(liveBlobs)
logc.Printf(ctx, "trace live: %d blobs, %s",
liveBlobsCount, datasize.ByteSize(liveBlobsSize).HR())
return nil
}

View File

@@ -170,16 +170,18 @@ func usage() {
fmt.Fprintf(os.Stderr, "Usage:\n")
fmt.Fprintf(os.Stderr, "(server) "+
"git-pages [-config <file>|-no-config]\n")
fmt.Fprintf(os.Stderr, "(info) "+
"git-pages {-print-config-env-vars|-print-config}\n")
fmt.Fprintf(os.Stderr, "(debug) "+
"git-pages {-list-blobs|-list-manifests}\n")
fmt.Fprintf(os.Stderr, "(debug) "+
"git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n")
fmt.Fprintf(os.Stderr, "(admin) "+
"git-pages {-run-migration <name>|-freeze-domain <domain>|-unfreeze-domain <domain>}\n")
"git-pages {-freeze-domain <domain>|-unfreeze-domain <domain>}\n")
fmt.Fprintf(os.Stderr, "(audit) "+
"git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n")
fmt.Fprintf(os.Stderr, "(info) "+
"git-pages {-print-config-env-vars|-print-config}\n")
fmt.Fprintf(os.Stderr, "(maint) "+
"git-pages {-run-migration <name>|-trace-garbage}\n")
flag.PrintDefaults()
}
@@ -187,24 +189,22 @@ func Main() {
ctx := context.Background()
flag.Usage = usage
printConfigEnvVars := flag.Bool("print-config-env-vars", false,
"print every recognized configuration environment variable and exit")
printConfig := flag.Bool("print-config", false,
"print configuration as JSON and exit")
configTomlPath := flag.String("config", "",
"load configuration from `filename` (default: 'config.toml')")
noConfig := flag.Bool("no-config", false,
"run without configuration file (configure via environment variables)")
runMigration := flag.String("run-migration", "",
"run a store `migration` (one of: create-domain-markers)")
getBlob := flag.String("get-blob", "",
"write contents of `blob` ('sha256-xxxxxxx...xxx')")
printConfigEnvVars := flag.Bool("print-config-env-vars", false,
"print every recognized configuration environment variable and exit")
printConfig := flag.Bool("print-config", false,
"print configuration as JSON and exit")
listBlobs := flag.Bool("list-blobs", false,
"enumerate every blob with its metadata")
getManifest := flag.String("get-manifest", "",
"write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON")
listManifests := flag.Bool("list-manifests", false,
"enumerate every manifest with its metadata")
getBlob := flag.String("get-blob", "",
"write contents of `blob` ('sha256-xxxxxxx...xxx')")
getManifest := flag.String("get-manifest", "",
"write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON")
getArchive := flag.String("get-archive", "",
"write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format")
updateSite := flag.String("update-site", "",
@@ -219,15 +219,18 @@ func Main() {
"extract contents of audit record `id` to files '<id>-*'")
auditServer := flag.String("audit-server", "",
"listen for notifications on `endpoint` and spawn a process for each audit event")
runMigration := flag.String("run-migration", "",
"run a store `migration` (one of: create-domain-markers)")
traceGarbage := flag.Bool("trace-garbage", false,
"estimate total size of unreachable blobs")
flag.Parse()
var cliOperations int
for _, selected := range []bool{
*runMigration != "",
*getBlob != "",
*listBlobs,
*getManifest != "",
*listManifests,
*getBlob != "",
*getManifest != "",
*getArchive != "",
*updateSite != "",
*freezeDomain != "",
@@ -235,14 +238,17 @@ func Main() {
*auditLog,
*auditRead != "",
*auditServer != "",
*runMigration != "",
*traceGarbage,
} {
if selected {
cliOperations++
}
}
if cliOperations > 1 {
logc.Fatalln(ctx, "-get-blob, -get-manifest, -get-archive, -update-site, "+
"-freeze, -unfreeze, -audit-log, and -audit-read are mutually exclusive")
logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+
"-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+
"-audit-server, -run-migration, and -trace-garbage are mutually exclusive")
}
if *configTomlPath != "" && *noConfig {
@@ -288,18 +294,6 @@ func Main() {
}
switch {
case *runMigration != "":
if err := RunMigration(ctx, *runMigration); err != nil {
logc.Fatalln(ctx, err)
}
case *getBlob != "":
reader, _, err := backend.GetBlob(ctx, *getBlob)
if err != nil {
logc.Fatalln(ctx, err)
}
io.Copy(fileOutputArg(), reader)
case *listBlobs:
for metadata, err := range backend.EnumerateBlobs(ctx) {
if err != nil {
@@ -312,14 +306,6 @@ func Main() {
)
}
case *getManifest != "":
webRoot := webRootArg(*getManifest)
manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
if err != nil {
logc.Fatalln(ctx, err)
}
fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest)))
case *listManifests:
for metadata, err := range backend.EnumerateManifests(ctx) {
if err != nil {
@@ -332,6 +318,21 @@ func Main() {
)
}
case *getBlob != "":
reader, _, err := backend.GetBlob(ctx, *getBlob)
if err != nil {
logc.Fatalln(ctx, err)
}
io.Copy(fileOutputArg(), reader)
case *getManifest != "":
webRoot := webRootArg(*getManifest)
manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
if err != nil {
logc.Fatalln(ctx, err)
}
fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest)))
case *getArchive != "":
webRoot := webRootArg(*getArchive)
manifest, metadata, err :=
@@ -491,6 +492,16 @@ func Main() {
serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor))
case *runMigration != "":
if err = RunMigration(ctx, *runMigration); err != nil {
logc.Fatalln(ctx, err)
}
case *traceGarbage:
if err = TraceGarbage(ctx); err != nil {
logc.Fatalln(ctx, err)
}
default:
// Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration
// at runtime. This is useful because it preserves S3 backend cache contents. Failed