Files
at-container-registry/cmd/usage-report/main.go

760 lines
20 KiB
Go

// usage-report queries a hold service and generates a storage usage report
// grouped by user, with unique layers and totals.
//
// Usage:
//
// go run ./cmd/usage-report --hold https://hold01.atcr.io
// go run ./cmd/usage-report --hold https://hold01.atcr.io --from-manifests
// go run ./cmd/usage-report --hold https://hold01.atcr.io --list-blobs
package main
import (
"encoding/json"
"flag"
"fmt"
"io"
"net/http"
"net/url"
"os"
"sort"
"strings"
"time"
)
// LayerRecord matches the io.atcr.hold.layer record structure
type LayerRecord struct {
Type string `json:"$type"`
Digest string `json:"digest"`
Size int64 `json:"size"`
MediaType string `json:"mediaType"`
Manifest string `json:"manifest"`
UserDID string `json:"userDid"`
CreatedAt string `json:"createdAt"`
}
// ManifestRecord matches the io.atcr.manifest record structure
type ManifestRecord struct {
Type string `json:"$type"`
Repository string `json:"repository"`
Digest string `json:"digest"`
HoldDID string `json:"holdDid"`
Config *struct {
Digest string `json:"digest"`
Size int64 `json:"size"`
} `json:"config"`
Layers []struct {
Digest string `json:"digest"`
Size int64 `json:"size"`
MediaType string `json:"mediaType"`
} `json:"layers"`
Manifests []struct {
Digest string `json:"digest"`
Size int64 `json:"size"`
} `json:"manifests"`
CreatedAt string `json:"createdAt"`
}
// CrewRecord matches the io.atcr.hold.crew record structure
type CrewRecord struct {
Member string `json:"member"`
Role string `json:"role"`
Permissions []string `json:"permissions"`
AddedAt string `json:"addedAt"`
}
// ListRecordsResponse is the response from com.atproto.repo.listRecords
type ListRecordsResponse struct {
Records []struct {
URI string `json:"uri"`
CID string `json:"cid"`
Value json.RawMessage `json:"value"`
} `json:"records"`
Cursor string `json:"cursor,omitempty"`
}
// UserUsage tracks storage for a single user
type UserUsage struct {
DID string
Handle string
UniqueLayers map[string]int64 // digest -> size
TotalSize int64
LayerCount int
Repositories map[string]bool // unique repos
}
var client = &http.Client{Timeout: 30 * time.Second}
// BlobInfo represents a single blob with its metadata
type BlobInfo struct {
Digest string
Size int64
MediaType string
UserDID string
Handle string
}
func main() {
holdURL := flag.String("hold", "https://hold01.atcr.io", "Hold service URL")
fromManifests := flag.Bool("from-manifests", false, "Calculate usage from user manifests instead of hold layer records (more accurate but slower)")
listBlobs := flag.Bool("list-blobs", false, "List all individual blobs sorted by size (largest first)")
flag.Parse()
// Normalize URL
baseURL := strings.TrimSuffix(*holdURL, "/")
fmt.Printf("Querying %s...\n\n", baseURL)
// First, get the hold's DID
holdDID, err := getHoldDID(baseURL)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to get hold DID: %v\n", err)
os.Exit(1)
}
fmt.Printf("Hold DID: %s\n\n", holdDID)
// If --list-blobs flag is set, run blob listing mode
if *listBlobs {
listAllBlobs(baseURL, holdDID)
return
}
var userUsage map[string]*UserUsage
if *fromManifests {
fmt.Println("=== Calculating from user manifests (bypasses layer record bug) ===")
userUsage, err = calculateFromManifests(baseURL, holdDID)
} else {
fmt.Println("=== Calculating from hold layer records ===")
fmt.Println("NOTE: May undercount app-password users due to layer record bug")
fmt.Println(" Use --from-manifests for more accurate results")
userUsage, err = calculateFromLayerRecords(baseURL, holdDID)
}
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to calculate usage: %v\n", err)
os.Exit(1)
}
// Resolve DIDs to handles
fmt.Println("\n\nResolving DIDs to handles...")
for _, usage := range userUsage {
handle, err := resolveDIDToHandle(usage.DID)
if err != nil {
usage.Handle = usage.DID
} else {
usage.Handle = handle
}
}
// Convert to slice and sort by total size (descending)
var sorted []*UserUsage
for _, u := range userUsage {
sorted = append(sorted, u)
}
sort.Slice(sorted, func(i, j int) bool {
return sorted[i].TotalSize > sorted[j].TotalSize
})
// Print report
fmt.Println("\n========================================")
fmt.Println("STORAGE USAGE REPORT")
fmt.Println("========================================")
var grandTotal int64
var grandLayers int
for _, u := range sorted {
grandTotal += u.TotalSize
grandLayers += u.LayerCount
}
fmt.Printf("\nTotal Users: %d\n", len(sorted))
fmt.Printf("Total Unique Layers: %d\n", grandLayers)
fmt.Printf("Total Storage: %s\n\n", humanSize(grandTotal))
fmt.Println("BY USER (sorted by storage):")
fmt.Println("----------------------------------------")
for i, u := range sorted {
fmt.Printf("%3d. %s\n", i+1, u.Handle)
fmt.Printf(" DID: %s\n", u.DID)
fmt.Printf(" Unique Layers: %d\n", u.LayerCount)
fmt.Printf(" Total Size: %s\n", humanSize(u.TotalSize))
if len(u.Repositories) > 0 {
var repos []string
for r := range u.Repositories {
repos = append(repos, r)
}
sort.Strings(repos)
fmt.Printf(" Repositories: %s\n", strings.Join(repos, ", "))
}
pct := float64(0)
if grandTotal > 0 {
pct = float64(u.TotalSize) / float64(grandTotal) * 100
}
fmt.Printf(" Share: %.1f%%\n\n", pct)
}
// Output CSV format for easy analysis
fmt.Println("\n========================================")
fmt.Println("CSV FORMAT")
fmt.Println("========================================")
fmt.Println("handle,did,unique_layers,total_bytes,total_human,repositories")
for _, u := range sorted {
var repos []string
for r := range u.Repositories {
repos = append(repos, r)
}
sort.Strings(repos)
fmt.Printf("%s,%s,%d,%d,%s,\"%s\"\n", u.Handle, u.DID, u.LayerCount, u.TotalSize, humanSize(u.TotalSize), strings.Join(repos, ";"))
}
}
// listAllBlobs fetches all blobs and lists them sorted by size (largest first)
func listAllBlobs(baseURL, holdDID string) {
fmt.Println("=== Fetching all blob records ===")
layers, err := fetchAllLayerRecords(baseURL, holdDID)
if err != nil {
fmt.Fprintf(os.Stderr, "Failed to fetch layer records: %v\n", err)
os.Exit(1)
}
fmt.Printf("Fetched %d layer records\n", len(layers))
// Deduplicate by digest, keeping track of first seen user
blobMap := make(map[string]*BlobInfo)
for _, layer := range layers {
if existing, exists := blobMap[layer.Digest]; exists {
// If we have a record with a user DID and existing doesn't, prefer this one
if existing.UserDID == "" && layer.UserDID != "" {
existing.UserDID = layer.UserDID
}
continue
}
blobMap[layer.Digest] = &BlobInfo{
Digest: layer.Digest,
Size: layer.Size,
MediaType: layer.MediaType,
UserDID: layer.UserDID,
}
}
// Convert to slice
var blobs []*BlobInfo
for _, b := range blobMap {
blobs = append(blobs, b)
}
// Sort by size (largest first)
sort.Slice(blobs, func(i, j int) bool {
return blobs[i].Size > blobs[j].Size
})
fmt.Printf("Found %d unique blobs\n\n", len(blobs))
// Resolve DIDs to handles (batch for efficiency)
fmt.Println("Resolving DIDs to handles...")
didToHandle := make(map[string]string)
for _, b := range blobs {
if b.UserDID == "" {
continue
}
if _, exists := didToHandle[b.UserDID]; !exists {
handle, err := resolveDIDToHandle(b.UserDID)
if err != nil {
didToHandle[b.UserDID] = b.UserDID
} else {
didToHandle[b.UserDID] = handle
}
}
b.Handle = didToHandle[b.UserDID]
}
// Calculate total
var totalSize int64
for _, b := range blobs {
totalSize += b.Size
}
// Print report
fmt.Println("\n========================================")
fmt.Println("BLOB SIZE REPORT (sorted largest to smallest)")
fmt.Println("========================================")
fmt.Printf("\nTotal Unique Blobs: %d\n", len(blobs))
fmt.Printf("Total Storage: %s\n\n", humanSize(totalSize))
fmt.Println("BLOBS:")
fmt.Println("----------------------------------------")
for i, b := range blobs {
pct := float64(0)
if totalSize > 0 {
pct = float64(b.Size) / float64(totalSize) * 100
}
owner := b.Handle
if owner == "" {
owner = "(unknown)"
}
fmt.Printf("%4d. %s\n", i+1, humanSize(b.Size))
fmt.Printf(" Digest: %s\n", b.Digest)
fmt.Printf(" Owner: %s\n", owner)
if b.MediaType != "" {
fmt.Printf(" Type: %s\n", b.MediaType)
}
fmt.Printf(" Share: %.2f%%\n\n", pct)
}
// Output CSV format
fmt.Println("\n========================================")
fmt.Println("CSV FORMAT")
fmt.Println("========================================")
fmt.Println("rank,size_bytes,size_human,digest,owner,media_type,share_pct")
for i, b := range blobs {
pct := float64(0)
if totalSize > 0 {
pct = float64(b.Size) / float64(totalSize) * 100
}
owner := b.Handle
if owner == "" {
owner = ""
}
fmt.Printf("%d,%d,%s,%s,%s,%s,%.2f\n", i+1, b.Size, humanSize(b.Size), b.Digest, owner, b.MediaType, pct)
}
}
// calculateFromLayerRecords uses the hold's layer records (original method)
func calculateFromLayerRecords(baseURL, holdDID string) (map[string]*UserUsage, error) {
layers, err := fetchAllLayerRecords(baseURL, holdDID)
if err != nil {
return nil, err
}
fmt.Printf("Fetched %d layer records\n", len(layers))
userUsage := make(map[string]*UserUsage)
for _, layer := range layers {
if layer.UserDID == "" {
continue
}
usage, exists := userUsage[layer.UserDID]
if !exists {
usage = &UserUsage{
DID: layer.UserDID,
UniqueLayers: make(map[string]int64),
Repositories: make(map[string]bool),
}
userUsage[layer.UserDID] = usage
}
if _, seen := usage.UniqueLayers[layer.Digest]; !seen {
usage.UniqueLayers[layer.Digest] = layer.Size
usage.TotalSize += layer.Size
usage.LayerCount++
}
}
return userUsage, nil
}
// calculateFromManifests queries crew members and fetches their manifests from their PDSes
func calculateFromManifests(baseURL, holdDID string) (map[string]*UserUsage, error) {
// Get all crew members
crewDIDs, err := fetchCrewMembers(baseURL, holdDID)
if err != nil {
return nil, fmt.Errorf("failed to fetch crew: %w", err)
}
// Also get captain
captainDID, err := fetchCaptain(baseURL, holdDID)
if err == nil && captainDID != "" {
// Add captain to list if not already there
found := false
for _, d := range crewDIDs {
if d == captainDID {
found = true
break
}
}
if !found {
crewDIDs = append(crewDIDs, captainDID)
}
}
fmt.Printf("Found %d users (crew + captain)\n", len(crewDIDs))
userUsage := make(map[string]*UserUsage)
for _, did := range crewDIDs {
fmt.Printf(" Checking manifests for %s...", did)
// Resolve DID to PDS
pdsEndpoint, err := resolveDIDToPDS(did)
if err != nil {
fmt.Printf(" (failed to resolve PDS: %v)\n", err)
continue
}
// Fetch manifests that use this hold
manifests, err := fetchUserManifestsForHold(pdsEndpoint, did, holdDID)
if err != nil {
fmt.Printf(" (failed to fetch manifests: %v)\n", err)
continue
}
if len(manifests) == 0 {
fmt.Printf(" 0 manifests\n")
continue
}
// Calculate unique layers across all manifests
usage := &UserUsage{
DID: did,
UniqueLayers: make(map[string]int64),
Repositories: make(map[string]bool),
}
for _, m := range manifests {
usage.Repositories[m.Repository] = true
// Add config blob
if m.Config != nil {
if _, seen := usage.UniqueLayers[m.Config.Digest]; !seen {
usage.UniqueLayers[m.Config.Digest] = m.Config.Size
usage.TotalSize += m.Config.Size
usage.LayerCount++
}
}
// Add layers
for _, layer := range m.Layers {
if _, seen := usage.UniqueLayers[layer.Digest]; !seen {
usage.UniqueLayers[layer.Digest] = layer.Size
usage.TotalSize += layer.Size
usage.LayerCount++
}
}
}
fmt.Printf(" %d manifests, %d unique layers, %s\n", len(manifests), usage.LayerCount, humanSize(usage.TotalSize))
if usage.LayerCount > 0 {
userUsage[did] = usage
}
}
return userUsage, nil
}
// fetchCrewMembers gets all crew member DIDs from the hold
func fetchCrewMembers(baseURL, holdDID string) ([]string, error) {
var dids []string
seen := make(map[string]bool)
cursor := ""
for {
u := fmt.Sprintf("%s/xrpc/com.atproto.repo.listRecords", baseURL)
params := url.Values{}
params.Set("repo", holdDID)
params.Set("collection", "io.atcr.hold.crew")
params.Set("limit", "100")
if cursor != "" {
params.Set("cursor", cursor)
}
resp, err := client.Get(u + "?" + params.Encode())
if err != nil {
return nil, err
}
var listResp ListRecordsResponse
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
resp.Body.Close()
return nil, err
}
resp.Body.Close()
for _, rec := range listResp.Records {
var crew CrewRecord
if err := json.Unmarshal(rec.Value, &crew); err != nil {
continue
}
if crew.Member != "" && !seen[crew.Member] {
seen[crew.Member] = true
dids = append(dids, crew.Member)
}
}
if listResp.Cursor == "" || len(listResp.Records) < 100 {
break
}
cursor = listResp.Cursor
}
return dids, nil
}
// fetchCaptain gets the captain DID from the hold
func fetchCaptain(baseURL, holdDID string) (string, error) {
u := fmt.Sprintf("%s/xrpc/com.atproto.repo.getRecord?repo=%s&collection=io.atcr.hold.captain&rkey=self",
baseURL, url.QueryEscape(holdDID))
resp, err := client.Get(u)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("status %d", resp.StatusCode)
}
var result struct {
Value struct {
Owner string `json:"owner"`
} `json:"value"`
}
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
return "", err
}
return result.Value.Owner, nil
}
// fetchUserManifestsForHold fetches all manifests from a user's PDS that use the specified hold
func fetchUserManifestsForHold(pdsEndpoint, userDID, holdDID string) ([]ManifestRecord, error) {
var manifests []ManifestRecord
cursor := ""
for {
u := fmt.Sprintf("%s/xrpc/com.atproto.repo.listRecords", pdsEndpoint)
params := url.Values{}
params.Set("repo", userDID)
params.Set("collection", "io.atcr.manifest")
params.Set("limit", "100")
if cursor != "" {
params.Set("cursor", cursor)
}
resp, err := client.Get(u + "?" + params.Encode())
if err != nil {
return nil, err
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return nil, fmt.Errorf("status %d", resp.StatusCode)
}
var listResp ListRecordsResponse
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
resp.Body.Close()
return nil, err
}
resp.Body.Close()
for _, rec := range listResp.Records {
var m ManifestRecord
if err := json.Unmarshal(rec.Value, &m); err != nil {
continue
}
// Only include manifests for this hold
if m.HoldDID == holdDID {
manifests = append(manifests, m)
}
}
if listResp.Cursor == "" || len(listResp.Records) < 100 {
break
}
cursor = listResp.Cursor
}
return manifests, nil
}
// getHoldDID fetches the hold's DID from /.well-known/atproto-did
func getHoldDID(baseURL string) (string, error) {
resp, err := http.Get(baseURL + "/.well-known/atproto-did")
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status: %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
return "", err
}
return strings.TrimSpace(string(body)), nil
}
// fetchAllLayerRecords fetches all layer records with pagination
func fetchAllLayerRecords(baseURL, holdDID string) ([]LayerRecord, error) {
var allLayers []LayerRecord
cursor := ""
limit := 100
for {
u := fmt.Sprintf("%s/xrpc/com.atproto.repo.listRecords", baseURL)
params := url.Values{}
params.Set("repo", holdDID)
params.Set("collection", "io.atcr.hold.layer")
params.Set("limit", fmt.Sprintf("%d", limit))
if cursor != "" {
params.Set("cursor", cursor)
}
fullURL := u + "?" + params.Encode()
fmt.Printf(" Fetching: %s\n", fullURL)
resp, err := client.Get(fullURL)
if err != nil {
return nil, fmt.Errorf("request failed: %w", err)
}
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
resp.Body.Close()
return nil, fmt.Errorf("unexpected status %d: %s", resp.StatusCode, string(body))
}
var listResp ListRecordsResponse
if err := json.NewDecoder(resp.Body).Decode(&listResp); err != nil {
resp.Body.Close()
return nil, fmt.Errorf("decode failed: %w", err)
}
resp.Body.Close()
for _, rec := range listResp.Records {
var layer LayerRecord
if err := json.Unmarshal(rec.Value, &layer); err != nil {
fmt.Fprintf(os.Stderr, "Warning: failed to parse layer record: %v\n", err)
continue
}
allLayers = append(allLayers, layer)
}
fmt.Printf(" Got %d records (total: %d)\n", len(listResp.Records), len(allLayers))
if listResp.Cursor == "" || len(listResp.Records) < limit {
break
}
cursor = listResp.Cursor
}
return allLayers, nil
}
// resolveDIDToHandle resolves a DID to a handle using the PLC directory or did:web
func resolveDIDToHandle(did string) (string, error) {
if strings.HasPrefix(did, "did:web:") {
return strings.TrimPrefix(did, "did:web:"), nil
}
if strings.HasPrefix(did, "did:plc:") {
plcURL := "https://plc.directory/" + did
resp, err := client.Get(plcURL)
if err != nil {
return "", fmt.Errorf("PLC query failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("PLC returned status %d", resp.StatusCode)
}
var plcDoc struct {
AlsoKnownAs []string `json:"alsoKnownAs"`
}
if err := json.NewDecoder(resp.Body).Decode(&plcDoc); err != nil {
return "", fmt.Errorf("failed to parse PLC response: %w", err)
}
for _, aka := range plcDoc.AlsoKnownAs {
if strings.HasPrefix(aka, "at://") {
return strings.TrimPrefix(aka, "at://"), nil
}
}
return did, nil
}
return did, nil
}
// resolveDIDToPDS resolves a DID to its PDS endpoint
func resolveDIDToPDS(did string) (string, error) {
if strings.HasPrefix(did, "did:web:") {
// did:web:example.com -> https://example.com
// did:web:host%3A8080 -> http://host:8080
domain := strings.TrimPrefix(did, "did:web:")
domain = strings.ReplaceAll(domain, "%3A", ":")
scheme := "https"
if strings.Contains(domain, ":") {
scheme = "http"
}
return scheme + "://" + domain, nil
}
if strings.HasPrefix(did, "did:plc:") {
plcURL := "https://plc.directory/" + did
resp, err := client.Get(plcURL)
if err != nil {
return "", fmt.Errorf("PLC query failed: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("PLC returned status %d", resp.StatusCode)
}
var plcDoc struct {
Service []struct {
ID string `json:"id"`
Type string `json:"type"`
ServiceEndpoint string `json:"serviceEndpoint"`
} `json:"service"`
}
if err := json.NewDecoder(resp.Body).Decode(&plcDoc); err != nil {
return "", fmt.Errorf("failed to parse PLC response: %w", err)
}
for _, svc := range plcDoc.Service {
if svc.Type == "AtprotoPersonalDataServer" {
return svc.ServiceEndpoint, nil
}
}
return "", fmt.Errorf("no PDS found in DID document")
}
return "", fmt.Errorf("unsupported DID method")
}
// humanSize converts bytes to human-readable format
func humanSize(bytes int64) string {
const (
KB = 1024
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB
)
switch {
case bytes >= TB:
return fmt.Sprintf("%.2f TB", float64(bytes)/TB)
case bytes >= GB:
return fmt.Sprintf("%.2f GB", float64(bytes)/GB)
case bytes >= MB:
return fmt.Sprintf("%.2f MB", float64(bytes)/MB)
case bytes >= KB:
return fmt.Sprintf("%.2f KB", float64(bytes)/KB)
default:
return fmt.Sprintf("%d B", bytes)
}
}