mirror of
https://codeberg.org/git-pages/git-pages.git
synced 2026-05-14 03:01:48 +00:00
Add basic garbage tracer.
This isn't a concurrent GC and it cannot provide a reliable result; the output is just an estimate.
This commit is contained in:
@@ -43,7 +43,7 @@
|
||||
"-s -w"
|
||||
];
|
||||
|
||||
vendorHash = "sha256-D5v6LpJZ+a2Dzdir/YzyFBwY/K4laTr58beywzXOsTM=";
|
||||
vendorHash = "sha256-wwsxHEwCySO2Ykttf6C+GZupMWczVYkAhSVwaVZHNko=";
|
||||
};
|
||||
in
|
||||
{
|
||||
|
||||
1
go.mod
1
go.mod
@@ -8,6 +8,7 @@ require (
|
||||
github.com/KimMachineGun/automemlimit v0.7.5
|
||||
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500
|
||||
github.com/creasty/defaults v1.8.0
|
||||
github.com/dghubble/trie v0.1.0
|
||||
github.com/fatih/color v1.18.0
|
||||
github.com/getsentry/sentry-go v0.40.0
|
||||
github.com/getsentry/sentry-go/slog v0.40.0
|
||||
|
||||
2
go.sum
2
go.sum
@@ -27,6 +27,8 @@ github.com/cyphar/filepath-securejoin v0.6.1/go.mod h1:A8hd4EnAeyujCJRrICiOWqjS1
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dghubble/trie v0.1.0 h1:kJnjBLFFElBwS60N4tkPvnLhnpcDxbBjIulgI8CpNGM=
|
||||
github.com/dghubble/trie v0.1.0/go.mod h1:sOmnzfBNH7H92ow2292dDFWNsVQuh/izuD7otCYb1ak=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o=
|
||||
|
||||
@@ -780,7 +780,7 @@ func (s3 *S3Backend) SearchAuditLog(
|
||||
ctx context.Context, opts SearchAuditLogOptions,
|
||||
) iter.Seq2[AuditID, error] {
|
||||
return func(yield func(AuditID, error) bool) {
|
||||
logc.Printf(ctx, "s3: query audit\n")
|
||||
logc.Printf(ctx, "s3: search audit\n")
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
87
src/garbage.go
Normal file
87
src/garbage.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package git_pages
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"github.com/c2h5oh/datasize"
|
||||
"github.com/dghubble/trie"
|
||||
)
|
||||
|
||||
func trieReduce(data trie.Trier) (items, total int64) {
|
||||
data.Walk(func(key string, value any) error {
|
||||
items += 1
|
||||
total += *value.(*int64)
|
||||
return nil
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func TraceGarbage(ctx context.Context) error {
|
||||
allBlobs := trie.NewRuneTrie()
|
||||
liveBlobs := trie.NewRuneTrie()
|
||||
|
||||
traceManifest := func(manifestName string, manifest *Manifest) error {
|
||||
for _, entry := range manifest.GetContents() {
|
||||
if entry.GetType() == Type_ExternalFile {
|
||||
blobName := string(entry.Data)
|
||||
if size := allBlobs.Get(blobName); size == nil {
|
||||
return fmt.Errorf("%s: dangling reference %s", manifestName, blobName)
|
||||
} else {
|
||||
liveBlobs.Put(blobName, size)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Enumerate all blobs.
|
||||
for metadata, err := range backend.EnumerateBlobs(ctx) {
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace blobs err: %w", err)
|
||||
}
|
||||
allBlobs.Put(metadata.Name, &metadata.Size)
|
||||
}
|
||||
|
||||
// Enumerate blobs live via site manifests.
|
||||
for metadata, err := range backend.EnumerateManifests(ctx) {
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace sites err: %w", err)
|
||||
}
|
||||
manifest, _, err := backend.GetManifest(ctx, metadata.Name, GetManifestOptions{})
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace sites err: %w", err)
|
||||
}
|
||||
err = traceManifest(metadata.Name, manifest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace sites err: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Enumerate blobs live via audit records.
|
||||
for auditID, err := range backend.SearchAuditLog(ctx, SearchAuditLogOptions{}) {
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace audit err: %w", err)
|
||||
}
|
||||
auditRecord, err := backend.QueryAuditLog(ctx, auditID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace audit err: %w", err)
|
||||
}
|
||||
if auditRecord.Manifest != nil {
|
||||
err = traceManifest(auditID.String(), auditRecord.Manifest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace audit err: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allBlobsCount, allBlobsSize := trieReduce(allBlobs)
|
||||
logc.Printf(ctx, "trace all: %d blobs, %s",
|
||||
allBlobsCount, datasize.ByteSize(allBlobsSize).HR())
|
||||
|
||||
liveBlobsCount, liveBlobsSize := trieReduce(liveBlobs)
|
||||
logc.Printf(ctx, "trace live: %d blobs, %s",
|
||||
liveBlobsCount, datasize.ByteSize(liveBlobsSize).HR())
|
||||
|
||||
return nil
|
||||
}
|
||||
87
src/main.go
87
src/main.go
@@ -170,16 +170,18 @@ func usage() {
|
||||
fmt.Fprintf(os.Stderr, "Usage:\n")
|
||||
fmt.Fprintf(os.Stderr, "(server) "+
|
||||
"git-pages [-config <file>|-no-config]\n")
|
||||
fmt.Fprintf(os.Stderr, "(info) "+
|
||||
"git-pages {-print-config-env-vars|-print-config}\n")
|
||||
fmt.Fprintf(os.Stderr, "(debug) "+
|
||||
"git-pages {-list-blobs|-list-manifests}\n")
|
||||
fmt.Fprintf(os.Stderr, "(debug) "+
|
||||
"git-pages {-get-blob|-get-manifest|-get-archive|-update-site} <ref> [file]\n")
|
||||
fmt.Fprintf(os.Stderr, "(admin) "+
|
||||
"git-pages {-run-migration <name>|-freeze-domain <domain>|-unfreeze-domain <domain>}\n")
|
||||
"git-pages {-freeze-domain <domain>|-unfreeze-domain <domain>}\n")
|
||||
fmt.Fprintf(os.Stderr, "(audit) "+
|
||||
"git-pages {-audit-log|-audit-read <id>|-audit-server <endpoint> <program> [args...]}\n")
|
||||
fmt.Fprintf(os.Stderr, "(info) "+
|
||||
"git-pages {-print-config-env-vars|-print-config}\n")
|
||||
fmt.Fprintf(os.Stderr, "(maint) "+
|
||||
"git-pages {-run-migration <name>|-trace-garbage}\n")
|
||||
flag.PrintDefaults()
|
||||
}
|
||||
|
||||
@@ -187,24 +189,22 @@ func Main() {
|
||||
ctx := context.Background()
|
||||
|
||||
flag.Usage = usage
|
||||
printConfigEnvVars := flag.Bool("print-config-env-vars", false,
|
||||
"print every recognized configuration environment variable and exit")
|
||||
printConfig := flag.Bool("print-config", false,
|
||||
"print configuration as JSON and exit")
|
||||
configTomlPath := flag.String("config", "",
|
||||
"load configuration from `filename` (default: 'config.toml')")
|
||||
noConfig := flag.Bool("no-config", false,
|
||||
"run without configuration file (configure via environment variables)")
|
||||
runMigration := flag.String("run-migration", "",
|
||||
"run a store `migration` (one of: create-domain-markers)")
|
||||
getBlob := flag.String("get-blob", "",
|
||||
"write contents of `blob` ('sha256-xxxxxxx...xxx')")
|
||||
printConfigEnvVars := flag.Bool("print-config-env-vars", false,
|
||||
"print every recognized configuration environment variable and exit")
|
||||
printConfig := flag.Bool("print-config", false,
|
||||
"print configuration as JSON and exit")
|
||||
listBlobs := flag.Bool("list-blobs", false,
|
||||
"enumerate every blob with its metadata")
|
||||
getManifest := flag.String("get-manifest", "",
|
||||
"write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON")
|
||||
listManifests := flag.Bool("list-manifests", false,
|
||||
"enumerate every manifest with its metadata")
|
||||
getBlob := flag.String("get-blob", "",
|
||||
"write contents of `blob` ('sha256-xxxxxxx...xxx')")
|
||||
getManifest := flag.String("get-manifest", "",
|
||||
"write manifest for `site` (either 'domain.tld' or 'domain.tld/dir') as ProtoJSON")
|
||||
getArchive := flag.String("get-archive", "",
|
||||
"write archive for `site` (either 'domain.tld' or 'domain.tld/dir') in tar format")
|
||||
updateSite := flag.String("update-site", "",
|
||||
@@ -219,15 +219,18 @@ func Main() {
|
||||
"extract contents of audit record `id` to files '<id>-*'")
|
||||
auditServer := flag.String("audit-server", "",
|
||||
"listen for notifications on `endpoint` and spawn a process for each audit event")
|
||||
runMigration := flag.String("run-migration", "",
|
||||
"run a store `migration` (one of: create-domain-markers)")
|
||||
traceGarbage := flag.Bool("trace-garbage", false,
|
||||
"estimate total size of unreachable blobs")
|
||||
flag.Parse()
|
||||
|
||||
var cliOperations int
|
||||
for _, selected := range []bool{
|
||||
*runMigration != "",
|
||||
*getBlob != "",
|
||||
*listBlobs,
|
||||
*getManifest != "",
|
||||
*listManifests,
|
||||
*getBlob != "",
|
||||
*getManifest != "",
|
||||
*getArchive != "",
|
||||
*updateSite != "",
|
||||
*freezeDomain != "",
|
||||
@@ -235,14 +238,17 @@ func Main() {
|
||||
*auditLog,
|
||||
*auditRead != "",
|
||||
*auditServer != "",
|
||||
*runMigration != "",
|
||||
*traceGarbage,
|
||||
} {
|
||||
if selected {
|
||||
cliOperations++
|
||||
}
|
||||
}
|
||||
if cliOperations > 1 {
|
||||
logc.Fatalln(ctx, "-get-blob, -get-manifest, -get-archive, -update-site, "+
|
||||
"-freeze, -unfreeze, -audit-log, and -audit-read are mutually exclusive")
|
||||
logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+
|
||||
"-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+
|
||||
"-audit-server, -run-migration, and -trace-garbage are mutually exclusive")
|
||||
}
|
||||
|
||||
if *configTomlPath != "" && *noConfig {
|
||||
@@ -288,18 +294,6 @@ func Main() {
|
||||
}
|
||||
|
||||
switch {
|
||||
case *runMigration != "":
|
||||
if err := RunMigration(ctx, *runMigration); err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
|
||||
case *getBlob != "":
|
||||
reader, _, err := backend.GetBlob(ctx, *getBlob)
|
||||
if err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
io.Copy(fileOutputArg(), reader)
|
||||
|
||||
case *listBlobs:
|
||||
for metadata, err := range backend.EnumerateBlobs(ctx) {
|
||||
if err != nil {
|
||||
@@ -312,14 +306,6 @@ func Main() {
|
||||
)
|
||||
}
|
||||
|
||||
case *getManifest != "":
|
||||
webRoot := webRootArg(*getManifest)
|
||||
manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
|
||||
if err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest)))
|
||||
|
||||
case *listManifests:
|
||||
for metadata, err := range backend.EnumerateManifests(ctx) {
|
||||
if err != nil {
|
||||
@@ -332,6 +318,21 @@ func Main() {
|
||||
)
|
||||
}
|
||||
|
||||
case *getBlob != "":
|
||||
reader, _, err := backend.GetBlob(ctx, *getBlob)
|
||||
if err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
io.Copy(fileOutputArg(), reader)
|
||||
|
||||
case *getManifest != "":
|
||||
webRoot := webRootArg(*getManifest)
|
||||
manifest, _, err := backend.GetManifest(ctx, webRoot, GetManifestOptions{})
|
||||
if err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
fmt.Fprintln(fileOutputArg(), string(ManifestJSON(manifest)))
|
||||
|
||||
case *getArchive != "":
|
||||
webRoot := webRootArg(*getArchive)
|
||||
manifest, metadata, err :=
|
||||
@@ -491,6 +492,16 @@ func Main() {
|
||||
|
||||
serve(ctx, listen(ctx, "audit", *auditServer), ObserveHTTPHandler(processor))
|
||||
|
||||
case *runMigration != "":
|
||||
if err = RunMigration(ctx, *runMigration); err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
|
||||
case *traceGarbage:
|
||||
if err = TraceGarbage(ctx); err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
}
|
||||
|
||||
default:
|
||||
// Hook a signal (SIGHUP on *nix, nothing on Windows) for reloading the configuration
|
||||
// at runtime. This is useful because it preserves S3 backend cache contents. Failed
|
||||
|
||||
Reference in New Issue
Block a user