mirror of
https://codeberg.org/git-pages/git-pages.git
synced 2026-05-14 03:01:48 +00:00
Significantly improve efficiency of tracing.
I thought I was being smart by using a trie to record blob existence and sizes. I was not. The trie approach had at least ~5 times less throughput and consumed entirely unreasonable amounts of RAM. A hashmap works just fine here.
This commit is contained in:
1
go.mod
1
go.mod
@@ -10,7 +10,6 @@ require (
|
||||
github.com/bits-and-blooms/bloom/v3 v3.7.1
|
||||
github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500
|
||||
github.com/creasty/defaults v1.8.0
|
||||
github.com/dghubble/trie v0.1.0
|
||||
github.com/fatih/color v1.19.0
|
||||
github.com/go-git/go-billy/v6 v6.0.0-20260410103409-85b6241850b5
|
||||
github.com/go-git/go-git/v6 v6.0.0-alpha.2
|
||||
|
||||
2
go.sum
2
go.sum
@@ -33,8 +33,6 @@ github.com/cyphar/filepath-securejoin v0.6.1/go.mod h1:A8hd4EnAeyujCJRrICiOWqjS1
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dghubble/trie v0.1.0 h1:kJnjBLFFElBwS60N4tkPvnLhnpcDxbBjIulgI8CpNGM=
|
||||
github.com/dghubble/trie v0.1.0/go.mod h1:sOmnzfBNH7H92ow2292dDFWNsVQuh/izuD7otCYb1ak=
|
||||
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
|
||||
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
|
||||
github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc=
|
||||
|
||||
@@ -46,9 +46,6 @@ schema = 3
|
||||
[mod."github.com/davecgh/go-spew"]
|
||||
version = "v1.1.1"
|
||||
hash = "sha256-nhzSUrE1fCkN0+RL04N4h8jWmRFPPPWbCuDc7Ss0akI="
|
||||
[mod."github.com/dghubble/trie"]
|
||||
version = "v0.1.0"
|
||||
hash = "sha256-hVh7uYylpMCCSPcxl70hJTmzSwaA1MxBmJFBO5Xdncc="
|
||||
[mod."github.com/dustin/go-humanize"]
|
||||
version = "v1.0.1"
|
||||
hash = "sha256-yuvxYYngpfVkUg9yAmG99IUVmADTQA0tMbBXe0Fq0Mc="
|
||||
|
||||
@@ -5,30 +5,29 @@ import (
|
||||
"fmt"
|
||||
|
||||
"github.com/c2h5oh/datasize"
|
||||
"github.com/dghubble/trie"
|
||||
)
|
||||
|
||||
func trieReduce(data trie.Trier) (items, total int64) {
|
||||
data.Walk(func(key string, value any) error {
|
||||
items += 1
|
||||
total += *value.(*int64)
|
||||
return nil
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
func TraceGarbage(ctx context.Context) error {
|
||||
allBlobs := trie.NewRuneTrie()
|
||||
liveBlobs := trie.NewRuneTrie()
|
||||
allBlobs := map[string]int64{}
|
||||
liveBlobs := map[string]int64{}
|
||||
|
||||
traceManifest := func(manifestName string, manifest *Manifest) error {
|
||||
reduceBlobs := func(data map[string]int64) (items, total int64) {
|
||||
for _, value := range data {
|
||||
items += 1
|
||||
total += value
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
traceManifest := func(manifestKind string, manifestName string, manifest *Manifest) error {
|
||||
for _, entry := range manifest.GetContents() {
|
||||
if entry.GetType() == Type_ExternalFile {
|
||||
blobName := string(entry.Data)
|
||||
if size := allBlobs.Get(blobName); size == nil {
|
||||
return fmt.Errorf("%s: dangling reference %s", manifestName, blobName)
|
||||
if size, ok := allBlobs[blobName]; ok {
|
||||
liveBlobs[blobName] = size
|
||||
} else {
|
||||
liveBlobs.Put(blobName, size)
|
||||
logc.Printf(ctx, "trace manifest: %s/%s: dangling reference %s",
|
||||
manifestKind, manifestName, blobName)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -36,42 +35,44 @@ func TraceGarbage(ctx context.Context) error {
|
||||
}
|
||||
|
||||
// Enumerate all blobs.
|
||||
logc.Printf(ctx, "trace: enumerating blobs")
|
||||
for metadata, err := range backend.EnumerateBlobs(ctx) {
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace blobs err: %w", err)
|
||||
}
|
||||
allBlobs.Put(metadata.Name, &metadata.Size)
|
||||
allBlobs[metadata.Name] = metadata.Size
|
||||
}
|
||||
|
||||
// Enumerate blobs live via site manifests.
|
||||
logc.Printf(ctx, "trace: enumerating manifests")
|
||||
for item, err := range backend.GetAllManifests(ctx) {
|
||||
metadata, manifest := item.Splat()
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace sites err: %w", err)
|
||||
}
|
||||
err = traceManifest(metadata.Name, manifest)
|
||||
err = traceManifest("site", metadata.Name, manifest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace sites err: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Enumerate blobs live via audit records.
|
||||
|
||||
logc.Printf(ctx, "trace: enumerating audit records")
|
||||
auditIDs := backend.SearchAuditLog(ctx, SearchAuditLogOptions{})
|
||||
for record, err := range backend.GetAuditLogRecords(ctx, auditIDs) {
|
||||
if err != nil {
|
||||
logc.Fatalln(ctx, err)
|
||||
return fmt.Errorf("trace audit err: %w", err)
|
||||
}
|
||||
if record.Manifest != nil {
|
||||
err = traceManifest(record.GetAuditID().String(), record.Manifest)
|
||||
err = traceManifest("audit", record.GetAuditID().String(), record.Manifest)
|
||||
if err != nil {
|
||||
return fmt.Errorf("trace audit err: %w", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
allBlobsCount, allBlobsSize := trieReduce(allBlobs)
|
||||
liveBlobsCount, liveBlobsSize := trieReduce(liveBlobs)
|
||||
allBlobsCount, allBlobsSize := reduceBlobs(allBlobs)
|
||||
liveBlobsCount, liveBlobsSize := reduceBlobs(liveBlobs)
|
||||
logc.Printf(ctx, "trace all: %d blobs, %s",
|
||||
allBlobsCount, datasize.ByteSize(allBlobsSize).HR())
|
||||
logc.Printf(ctx, "trace live: %d blobs, %s",
|
||||
|
||||
Reference in New Issue
Block a user