diff --git a/src/histogram.go b/src/histogram.go new file mode 100644 index 0000000..ef08e12 --- /dev/null +++ b/src/histogram.go @@ -0,0 +1,38 @@ +package git_pages + +import ( + "context" + "fmt" + "maps" + "slices" + "strings" +) + +type DomainStatistics struct { + Domain string + OriginalSize int64 + CompressedSize int64 + StoredSize int64 +} + +func SizeHistogram(ctx context.Context) ([]*DomainStatistics, error) { + statisticsMap := map[string]*DomainStatistics{} + for metadata, err := range backend.EnumerateManifests(ctx) { + if err != nil { + return nil, fmt.Errorf("size histogram err: %w", err) + } + manifest, _, err := backend.GetManifest(ctx, metadata.Name, GetManifestOptions{}) + if err != nil { + return nil, fmt.Errorf("size histogram err: %w", err) + } + domain, _, _ := strings.Cut(metadata.Name, "/") + if _, found := statisticsMap[domain]; !found { + statisticsMap[domain] = &DomainStatistics{Domain: domain} + } + statistics := statisticsMap[domain] + statistics.OriginalSize += manifest.GetOriginalSize() + statistics.CompressedSize += manifest.GetCompressedSize() + statistics.StoredSize += manifest.GetStoredSize() + } + return slices.Collect(maps.Values(statisticsMap)), nil +} diff --git a/src/main.go b/src/main.go index 90d63a9..714a11b 100644 --- a/src/main.go +++ b/src/main.go @@ -1,6 +1,7 @@ package git_pages import ( + "cmp" "context" "crypto/tls" "errors" @@ -16,6 +17,7 @@ import ( "os" "path" "runtime/debug" + "slices" "strings" "time" @@ -188,7 +190,7 @@ func usage() { fmt.Fprintf(os.Stderr, "(audit) "+ "git-pages {-audit-log|-audit-read |-audit-server [args...]}\n") fmt.Fprintf(os.Stderr, "(maint) "+ - "git-pages {-run-migration |-trace-garbage}\n") + "git-pages {-run-migration |-trace-garbage|-size-histogram {original|stored}}\n") flag.PrintDefaults() } @@ -230,6 +232,8 @@ func Main(versionInfo string) { "listen for notifications on `endpoint` and spawn a process for each audit event") runMigration := flag.String("run-migration", "", "run a store `migration` (one of: create-domain-markers)") + sizeHistogram := flag.String("size-histogram", "", + "display histogram of `size-type` (original or stored) per domain") traceGarbage := flag.Bool("trace-garbage", false, "estimate total size of unreachable blobs") version := flag.Bool("version", false, @@ -256,6 +260,7 @@ func Main(versionInfo string) { *auditRollback != "", *auditServer != "", *runMigration != "", + *sizeHistogram != "", *traceGarbage, } { if selected { @@ -265,8 +270,8 @@ func Main(versionInfo string) { if cliOperations > 1 { logc.Fatalln(ctx, "-list-blobs, -list-manifests, -get-blob, -get-manifest, -get-archive, "+ "-update-site, -freeze-domain, -unfreeze-domain, -audit-log, -audit-read, "+ - "-audit-rollback, -audit-server, -run-migration, and -trace-garbage are "+ - "mutually exclusive") + "-audit-rollback, -audit-server, -run-migration, -size-histogram, "+ + "and -trace-garbage are mutually exclusive") } if *configTomlPath != "" && *noConfig { @@ -547,6 +552,48 @@ func Main(versionInfo string) { logc.Fatalln(ctx, err) } + case *sizeHistogram != "": + extractSize := func(s *DomainStatistics) int64 { return 0 } + switch *sizeHistogram { + case "original": + // Displays a size histogram using the `manifest.OriginalSize`, which is useful to see + // which site is the closest to hitting the size limit (checked against apparent size). + // This apparent size does not have any direct relationship with used storage. + extractSize = func(s *DomainStatistics) int64 { return s.OriginalSize } + case "stored": + // Displays a size histogram using the `manifest.StoredSize`, which is useful to see + // which site consumes the most resources. The site is keeping at least this many + // bytes worth of blobs alive, but removing it may not free any space because + // deduplication is global. + extractSize = func(s *DomainStatistics) int64 { return s.StoredSize } + default: + logc.Fatalln(ctx, "unknown histogram type") + } + + histogram, err := SizeHistogram(ctx) + if err != nil { + logc.Fatalln(ctx, err) + } + slices.SortFunc(histogram, func(a *DomainStatistics, b *DomainStatistics) int { + return cmp.Compare(extractSize(a), extractSize(b)) + }) + + if len(histogram) > 0 { + fullScaleSize := max(extractSize(histogram[len(histogram)-1]), 1) + fullScaleWidth := int64(40) + for _, statistics := range histogram { + size := extractSize(statistics) + barWidth := size * fullScaleWidth / fullScaleSize + spaceWidth := fullScaleWidth - barWidth + bar := strings.Repeat("*", int(barWidth)) + strings.Repeat(" ", int(spaceWidth)) + fmt.Fprintf(color.Output, "%s %s %s\n", + color.HiBlackString(fmt.Sprint("|", bar, "|")), + statistics.Domain, + color.HiGreenString(datasize.ByteSize(extractSize(statistics)).HR()), + ) + } + } + case *traceGarbage: if err = TraceGarbage(ctx); err != nil { logc.Fatalln(ctx, err)