mirror of
https://tangled.org/evan.jarrett.net/at-container-registry
synced 2026-04-24 18:30:34 +00:00
617 lines
16 KiB
Go
617 lines
16 KiB
Go
// relay-compare compares ATProto relays by querying listReposByCollection
|
|
// for all io.atcr.* record types and showing what's missing from each relay.
|
|
//
|
|
// Usage:
|
|
//
|
|
// go run ./cmd/relay-compare https://relay1.us-east.bsky.network https://relay1.us-west.bsky.network
|
|
package main
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"flag"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"os"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/bluesky-social/indigo/atproto/identity"
|
|
"github.com/bluesky-social/indigo/atproto/syntax"
|
|
"github.com/bluesky-social/indigo/xrpc"
|
|
)
|
|
|
|
// ANSI color codes (disabled via --no-color or NO_COLOR env)
|
|
var (
|
|
cRed = "\033[31m"
|
|
cGreen = "\033[32m"
|
|
cYellow = "\033[33m"
|
|
cCyan = "\033[36m"
|
|
cBold = "\033[1m"
|
|
cDim = "\033[2m"
|
|
cReset = "\033[0m"
|
|
)
|
|
|
|
func disableColors() {
|
|
cRed, cGreen, cYellow, cCyan, cBold, cDim, cReset = "", "", "", "", "", "", ""
|
|
}
|
|
|
|
// All io.atcr.* collections to compare
|
|
var allCollections = []string{
|
|
"io.atcr.manifest",
|
|
"io.atcr.tag",
|
|
"io.atcr.sailor.profile",
|
|
"io.atcr.sailor.star",
|
|
"io.atcr.repo.page",
|
|
"io.atcr.hold.captain",
|
|
"io.atcr.hold.crew",
|
|
"io.atcr.hold.layer",
|
|
"io.atcr.hold.stats",
|
|
"io.atcr.hold.scan",
|
|
}
|
|
|
|
type summaryRow struct {
|
|
collection string
|
|
counts []int
|
|
status string // "sync", "diff", "error"
|
|
diffCount int
|
|
realGaps int // verified: record exists on PDS but relay is missing it
|
|
ghosts int // verified: record doesn't exist on PDS, relay has stale entry
|
|
deactivated int // verified: account deactivated/deleted on PDS
|
|
}
|
|
|
|
// verifyResult holds the PDS verification result for a (DID, collection) pair.
|
|
type verifyResult struct {
|
|
exists bool
|
|
deactivated bool // account deactivated/deleted on PDS
|
|
err error
|
|
}
|
|
|
|
// key identifies a (collection, relay-or-DID) pair for result lookups.
|
|
type key struct{ col, relay string }
|
|
|
|
// diffEntry represents a DID missing from a specific relay for a collection.
|
|
type diffEntry struct {
|
|
did string
|
|
collection string
|
|
relayIdx int
|
|
}
|
|
|
|
// XRPC response types for listReposByCollection
|
|
type listReposByCollectionResult struct {
|
|
Repos []repoRef `json:"repos"`
|
|
Cursor string `json:"cursor,omitempty"`
|
|
}
|
|
|
|
type repoRef struct {
|
|
DID string `json:"did"`
|
|
}
|
|
|
|
// XRPC response types for listRecords
|
|
type listRecordsResult struct {
|
|
Records []json.RawMessage `json:"records"`
|
|
Cursor string `json:"cursor,omitempty"`
|
|
}
|
|
|
|
// Shared identity directory for DID resolution
|
|
var dir identity.Directory
|
|
|
|
func main() {
|
|
noColor := flag.Bool("no-color", false, "disable colored output")
|
|
verify := flag.Bool("verify", false, "verify diffs against PDS to distinguish real gaps from ghost entries")
|
|
hideGhosts := flag.Bool("hide-ghosts", false, "with --verify, hide ghost and deactivated entries from output")
|
|
collection := flag.String("collection", "", "compare only this collection")
|
|
timeout := flag.Duration("timeout", 2*time.Minute, "timeout for all relay queries")
|
|
flag.Usage = func() {
|
|
fmt.Fprintf(os.Stderr, "Compare ATProto relays by querying listReposByCollection for io.atcr.* records.\n\n")
|
|
fmt.Fprintf(os.Stderr, "Usage:\n relay-compare [flags] <relay-url> <relay-url> [relay-url...]\n\n")
|
|
fmt.Fprintf(os.Stderr, "Example:\n")
|
|
fmt.Fprintf(os.Stderr, " go run ./cmd/relay-compare https://relay1.us-east.bsky.network https://relay1.us-west.bsky.network\n\n")
|
|
fmt.Fprintf(os.Stderr, "Flags:\n")
|
|
flag.PrintDefaults()
|
|
}
|
|
flag.Parse()
|
|
|
|
if *noColor || os.Getenv("NO_COLOR") != "" {
|
|
disableColors()
|
|
}
|
|
|
|
relays := flag.Args()
|
|
if len(relays) < 2 {
|
|
flag.Usage()
|
|
os.Exit(1)
|
|
}
|
|
|
|
for i, r := range relays {
|
|
relays[i] = strings.TrimRight(r, "/")
|
|
}
|
|
|
|
cols := allCollections
|
|
if *collection != "" {
|
|
cols = []string{*collection}
|
|
}
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), *timeout)
|
|
defer cancel()
|
|
|
|
dir = identity.DefaultDirectory()
|
|
|
|
// Short display names for each relay
|
|
names := make([]string, len(relays))
|
|
maxNameLen := 0
|
|
for i, r := range relays {
|
|
names[i] = shortName(r)
|
|
if len(names[i]) > maxNameLen {
|
|
maxNameLen = len(names[i])
|
|
}
|
|
}
|
|
|
|
fmt.Printf("%sFetching %d collections from %d relays...%s\n", cDim, len(cols), len(relays), cReset)
|
|
|
|
// Fetch all data in parallel: every (collection, relay) pair concurrently
|
|
type fetchResult struct {
|
|
dids map[string]struct{}
|
|
err error
|
|
}
|
|
allResults := make(map[key]fetchResult)
|
|
var mu sync.Mutex
|
|
var wg sync.WaitGroup
|
|
|
|
for _, col := range cols {
|
|
for _, relay := range relays {
|
|
wg.Add(1)
|
|
go func(col, relay string) {
|
|
defer wg.Done()
|
|
dids, err := fetchAllDIDs(ctx, relay, col)
|
|
mu.Lock()
|
|
allResults[key{col, relay}] = fetchResult{dids, err}
|
|
mu.Unlock()
|
|
}(col, relay)
|
|
}
|
|
}
|
|
wg.Wait()
|
|
|
|
// Collect all diffs across collections (for optional verification)
|
|
var allDiffs []diffEntry
|
|
|
|
// First pass: compute diffs per collection
|
|
type colDiffs struct {
|
|
hasError bool
|
|
counts []int
|
|
// per-relay missing DIDs (sorted)
|
|
missing [][]string
|
|
}
|
|
colResults := make(map[string]*colDiffs)
|
|
|
|
for _, col := range cols {
|
|
cd := &colDiffs{counts: make([]int, len(relays)), missing: make([][]string, len(relays))}
|
|
colResults[col] = cd
|
|
|
|
for ri, relay := range relays {
|
|
r := allResults[key{col, relay}]
|
|
if r.err != nil {
|
|
cd.hasError = true
|
|
} else {
|
|
cd.counts[ri] = len(r.dids)
|
|
}
|
|
}
|
|
|
|
if cd.hasError {
|
|
continue
|
|
}
|
|
|
|
// Build union of all DIDs across relays
|
|
union := make(map[string]struct{})
|
|
for _, relay := range relays {
|
|
for did := range allResults[key{col, relay}].dids {
|
|
union[did] = struct{}{}
|
|
}
|
|
}
|
|
|
|
for ri, relay := range relays {
|
|
var missing []string
|
|
for did := range union {
|
|
if _, ok := allResults[key{col, relay}].dids[did]; !ok {
|
|
missing = append(missing, did)
|
|
}
|
|
}
|
|
sort.Strings(missing)
|
|
cd.missing[ri] = missing
|
|
for _, did := range missing {
|
|
allDiffs = append(allDiffs, diffEntry{did: did, collection: col, relayIdx: ri})
|
|
}
|
|
}
|
|
}
|
|
|
|
// Optionally verify diffs against PDS
|
|
verified := make(map[key]verifyResult)
|
|
if *verify && len(allDiffs) > 0 {
|
|
verified = verifyDiffs(ctx, allDiffs)
|
|
}
|
|
|
|
// Display per-collection diffs and collect summary
|
|
var summary []summaryRow
|
|
totalMissing := 0
|
|
totalRealGaps := 0
|
|
totalGhosts := 0
|
|
totalDeactivated := 0
|
|
|
|
for _, col := range cols {
|
|
fmt.Printf("\n%s%s━━━ %s ━━━%s\n", cBold, cCyan, col, cReset)
|
|
|
|
cd := colResults[col]
|
|
row := summaryRow{collection: col, counts: cd.counts}
|
|
|
|
if cd.hasError {
|
|
for ri, relay := range relays {
|
|
r := allResults[key{col, relay}]
|
|
if r.err != nil {
|
|
fmt.Printf(" %-*s %s%serror%s: %v\n", maxNameLen, names[ri], cBold, cRed, cReset, r.err)
|
|
} else {
|
|
fmt.Printf(" %-*s %s%d%s DIDs\n", maxNameLen, names[ri], cBold, len(r.dids), cReset)
|
|
}
|
|
}
|
|
row.status = "error"
|
|
summary = append(summary, row)
|
|
continue
|
|
}
|
|
|
|
// Show counts per relay
|
|
for ri := range relays {
|
|
fmt.Printf(" %-*s %s%d%s DIDs\n", maxNameLen, names[ri], cBold, cd.counts[ri], cReset)
|
|
}
|
|
|
|
// Show missing DIDs per relay
|
|
inSync := true
|
|
for ri := range relays {
|
|
missing := cd.missing[ri]
|
|
if len(missing) == 0 {
|
|
continue
|
|
}
|
|
|
|
inSync = false
|
|
totalMissing += len(missing)
|
|
row.diffCount += len(missing)
|
|
|
|
fmt.Printf("\n %sMissing from %s (%d):%s\n", cRed, names[ri], len(missing), cReset)
|
|
for _, did := range missing {
|
|
suffix := ""
|
|
skip := false
|
|
if *verify {
|
|
vr, ok := verified[key{col, did}]
|
|
if !ok {
|
|
suffix = fmt.Sprintf(" %s(verify: unknown)%s", cDim, cReset)
|
|
} else if vr.err != nil {
|
|
suffix = fmt.Sprintf(" %s(verify: %s)%s", cDim, vr.err, cReset)
|
|
} else if vr.deactivated {
|
|
suffix = fmt.Sprintf(" %s← deactivated%s", cDim, cReset)
|
|
row.deactivated++
|
|
totalDeactivated++
|
|
skip = *hideGhosts
|
|
} else if vr.exists {
|
|
suffix = fmt.Sprintf(" %s← real gap%s", cRed, cReset)
|
|
row.realGaps++
|
|
totalRealGaps++
|
|
} else {
|
|
suffix = fmt.Sprintf(" %s← ghost (not on PDS)%s", cDim, cReset)
|
|
row.ghosts++
|
|
totalGhosts++
|
|
skip = *hideGhosts
|
|
}
|
|
}
|
|
if !skip {
|
|
fmt.Printf(" %s- %s%s%s\n", cRed, did, cReset, suffix)
|
|
}
|
|
}
|
|
}
|
|
|
|
// When verifying, ghost/deactivated-only diffs are considered in sync
|
|
if !inSync && *verify && row.realGaps == 0 {
|
|
inSync = true
|
|
}
|
|
|
|
if inSync {
|
|
notes := ""
|
|
if !*hideGhosts {
|
|
notes = formatSyncNotes(row.ghosts, row.deactivated)
|
|
}
|
|
if notes != "" {
|
|
fmt.Printf(" %s✓ in sync%s %s(%s)%s\n", cGreen, cReset, cDim, notes, cReset)
|
|
} else {
|
|
fmt.Printf(" %s✓ in sync%s\n", cGreen, cReset)
|
|
}
|
|
row.status = "sync"
|
|
} else {
|
|
row.status = "diff"
|
|
}
|
|
summary = append(summary, row)
|
|
}
|
|
|
|
// Summary table
|
|
printSummary(summary, names, maxNameLen, totalMissing, *verify, *hideGhosts, totalRealGaps, totalGhosts, totalDeactivated)
|
|
}
|
|
|
|
func printSummary(rows []summaryRow, names []string, maxNameLen, totalMissing int, showVerify, hideGhosts bool, totalRealGaps, totalGhosts, totalDeactivated int) {
|
|
fmt.Printf("\n%s%s━━━ Summary ━━━%s\n\n", cBold, cCyan, cReset)
|
|
|
|
// Build short labels (A, B, C, ...) for compact columns
|
|
labels := make([]string, len(names))
|
|
for i, name := range names {
|
|
labels[i] = string(rune('A' + i))
|
|
fmt.Printf(" %s%s%s: %s\n", cBold, labels[i], cReset, name)
|
|
}
|
|
fmt.Println()
|
|
|
|
colW := len("Collection")
|
|
for _, row := range rows {
|
|
if len(row.collection) > colW {
|
|
colW = len(row.collection)
|
|
}
|
|
}
|
|
relayW := 6
|
|
|
|
// Header
|
|
fmt.Printf(" %-*s", colW, "Collection")
|
|
for _, label := range labels {
|
|
fmt.Printf(" %*s", relayW, label)
|
|
}
|
|
fmt.Printf(" Status\n")
|
|
|
|
// Separator
|
|
fmt.Printf(" %s", strings.Repeat("─", colW))
|
|
for range labels {
|
|
fmt.Printf(" %s", strings.Repeat("─", relayW))
|
|
}
|
|
fmt.Printf(" %s\n", strings.Repeat("─", 14))
|
|
|
|
// Data rows
|
|
for _, row := range rows {
|
|
fmt.Printf(" %-*s", colW, row.collection)
|
|
for _, c := range row.counts {
|
|
switch row.status {
|
|
case "error":
|
|
fmt.Printf(" %*s", relayW, fmt.Sprintf("%s—%s", cDim, cReset))
|
|
default:
|
|
fmt.Printf(" %*d", relayW, c)
|
|
}
|
|
}
|
|
switch row.status {
|
|
case "sync":
|
|
notes := ""
|
|
if !hideGhosts {
|
|
notes = formatSyncNotes(row.ghosts, row.deactivated)
|
|
}
|
|
if notes != "" {
|
|
fmt.Printf(" %s✓ in sync%s %s(%s)%s", cGreen, cReset, cDim, notes, cReset)
|
|
} else {
|
|
fmt.Printf(" %s✓ in sync%s", cGreen, cReset)
|
|
}
|
|
case "diff":
|
|
if showVerify {
|
|
if hideGhosts {
|
|
fmt.Printf(" %s≠ %d missing%s", cYellow, row.realGaps, cReset)
|
|
} else {
|
|
notes := formatSyncNotes(row.ghosts, row.deactivated)
|
|
if notes != "" {
|
|
notes = ", " + notes
|
|
}
|
|
fmt.Printf(" %s≠ %d missing%s %s(%d real%s)%s",
|
|
cYellow, row.realGaps, cReset, cDim, row.realGaps, notes, cReset)
|
|
}
|
|
} else {
|
|
fmt.Printf(" %s≠ %d missing%s", cYellow, row.diffCount, cReset)
|
|
}
|
|
case "error":
|
|
fmt.Printf(" %s✗ error%s", cRed, cReset)
|
|
}
|
|
fmt.Println()
|
|
}
|
|
|
|
// Footer
|
|
fmt.Println()
|
|
if totalMissing > 0 {
|
|
if showVerify && totalRealGaps == 0 {
|
|
if hideGhosts {
|
|
fmt.Printf("%s✓ All relays in sync%s\n", cGreen, cReset)
|
|
} else {
|
|
notes := formatSyncNotes(totalGhosts, totalDeactivated)
|
|
fmt.Printf("%s✓ All relays in sync%s %s(%s)%s\n", cGreen, cReset, cDim, notes, cReset)
|
|
}
|
|
} else {
|
|
if showVerify {
|
|
fmt.Printf("%s%d real gaps across relays%s", cYellow, totalRealGaps, cReset)
|
|
if !hideGhosts {
|
|
notes := formatSyncNotes(totalGhosts, totalDeactivated)
|
|
if notes != "" {
|
|
fmt.Printf(" %s(%s)%s", cDim, notes, cReset)
|
|
}
|
|
}
|
|
fmt.Println()
|
|
} else {
|
|
fmt.Printf("%s%d total missing DID-collection pairs across relays%s\n", cYellow, totalMissing, cReset)
|
|
}
|
|
}
|
|
} else {
|
|
fmt.Printf("%s✓ All relays fully in sync%s\n", cGreen, cReset)
|
|
}
|
|
}
|
|
|
|
// formatSyncNotes builds a parenthetical like "2 ghost, 1 deactivated" for sync status.
|
|
// Returns empty string if both counts are zero.
|
|
func formatSyncNotes(ghosts, deactivated int) string {
|
|
var parts []string
|
|
if ghosts > 0 {
|
|
parts = append(parts, fmt.Sprintf("%d ghost", ghosts))
|
|
}
|
|
if deactivated > 0 {
|
|
parts = append(parts, fmt.Sprintf("%d deactivated", deactivated))
|
|
}
|
|
return strings.Join(parts, ", ")
|
|
}
|
|
|
|
// verifyDiffs resolves each diff DID to its PDS and checks if records actually exist.
|
|
func verifyDiffs(ctx context.Context, diffs []diffEntry) map[key]verifyResult {
|
|
// Collect unique (DID, collection) pairs to verify
|
|
type didCol struct{ did, col string }
|
|
unique := make(map[didCol]struct{})
|
|
for _, d := range diffs {
|
|
unique[didCol{d.did, d.collection}] = struct{}{}
|
|
}
|
|
|
|
// Resolve unique DIDs to PDS endpoints (deduplicate across collections)
|
|
uniqueDIDs := make(map[string]struct{})
|
|
for dc := range unique {
|
|
uniqueDIDs[dc.did] = struct{}{}
|
|
}
|
|
|
|
fmt.Printf("\n%sVerifying %d DID-collection pairs (%d unique DIDs)...%s\n", cDim, len(unique), len(uniqueDIDs), cReset)
|
|
|
|
pdsEndpoints := make(map[string]string) // DID → PDS URL
|
|
pdsErrors := make(map[string]error) // DID → resolution error
|
|
var mu sync.Mutex
|
|
var wg sync.WaitGroup
|
|
sem := make(chan struct{}, 10) // concurrency limit
|
|
|
|
for did := range uniqueDIDs {
|
|
wg.Add(1)
|
|
go func(did string) {
|
|
defer wg.Done()
|
|
sem <- struct{}{}
|
|
defer func() { <-sem }()
|
|
|
|
pds, err := resolveDIDToPDS(ctx, did)
|
|
mu.Lock()
|
|
if err != nil {
|
|
pdsErrors[did] = err
|
|
} else {
|
|
pdsEndpoints[did] = pds
|
|
}
|
|
mu.Unlock()
|
|
}(did)
|
|
}
|
|
wg.Wait()
|
|
|
|
// Check each (DID, collection) pair against the resolved PDS
|
|
results := make(map[key]verifyResult)
|
|
|
|
for dc := range unique {
|
|
wg.Add(1)
|
|
go func(dc didCol) {
|
|
defer wg.Done()
|
|
sem <- struct{}{}
|
|
defer func() { <-sem }()
|
|
|
|
k := key{dc.col, dc.did}
|
|
|
|
// Check if DID resolution failed — could mean account is deactivated/tombstoned
|
|
if err, ok := pdsErrors[dc.did]; ok {
|
|
errStr := err.Error()
|
|
if strings.Contains(errStr, "no PDS endpoint") ||
|
|
strings.Contains(errStr, "not found") {
|
|
mu.Lock()
|
|
results[k] = verifyResult{deactivated: true}
|
|
mu.Unlock()
|
|
} else {
|
|
mu.Lock()
|
|
results[k] = verifyResult{err: fmt.Errorf("DID resolution failed: %w", err)}
|
|
mu.Unlock()
|
|
}
|
|
return
|
|
}
|
|
|
|
pds := pdsEndpoints[dc.did]
|
|
client := &xrpc.Client{Host: pds, Client: http.DefaultClient}
|
|
var listResult listRecordsResult
|
|
err := client.LexDo(ctx, "GET", "", "com.atproto.repo.listRecords", map[string]any{
|
|
"repo": dc.did,
|
|
"collection": dc.col,
|
|
"limit": 1,
|
|
}, nil, &listResult)
|
|
mu.Lock()
|
|
if err != nil {
|
|
errStr := err.Error()
|
|
if strings.Contains(errStr, "Could not find repo") ||
|
|
strings.Contains(errStr, "RepoDeactivated") ||
|
|
strings.Contains(errStr, "RepoTakendown") ||
|
|
strings.Contains(errStr, "RepoSuspended") {
|
|
results[k] = verifyResult{deactivated: true}
|
|
} else {
|
|
results[k] = verifyResult{err: err}
|
|
}
|
|
} else {
|
|
results[k] = verifyResult{exists: len(listResult.Records) > 0}
|
|
}
|
|
mu.Unlock()
|
|
}(dc)
|
|
}
|
|
wg.Wait()
|
|
|
|
return results
|
|
}
|
|
|
|
// resolveDIDToPDS resolves a DID to its PDS endpoint using the shared identity directory.
|
|
func resolveDIDToPDS(ctx context.Context, did string) (string, error) {
|
|
didParsed, err := syntax.ParseDID(did)
|
|
if err != nil {
|
|
return "", fmt.Errorf("invalid DID: %w", err)
|
|
}
|
|
|
|
ident, err := dir.LookupDID(ctx, didParsed)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to resolve DID: %w", err)
|
|
}
|
|
|
|
pdsEndpoint := ident.PDSEndpoint()
|
|
if pdsEndpoint == "" {
|
|
return "", fmt.Errorf("no PDS endpoint found for DID")
|
|
}
|
|
|
|
return pdsEndpoint, nil
|
|
}
|
|
|
|
// fetchAllDIDs paginates through listReposByCollection to collect all DIDs.
|
|
func fetchAllDIDs(ctx context.Context, relay, collection string) (map[string]struct{}, error) {
|
|
client := &xrpc.Client{Host: relay, Client: http.DefaultClient}
|
|
dids := make(map[string]struct{})
|
|
var cursor string
|
|
|
|
for {
|
|
params := map[string]any{
|
|
"collection": collection,
|
|
"limit": 1000,
|
|
}
|
|
if cursor != "" {
|
|
params["cursor"] = cursor
|
|
}
|
|
|
|
var result listReposByCollectionResult
|
|
err := client.LexDo(ctx, "GET", "", "com.atproto.sync.listReposByCollection", params, nil, &result)
|
|
if err != nil {
|
|
return dids, fmt.Errorf("listReposByCollection failed: %w", err)
|
|
}
|
|
|
|
for _, repo := range result.Repos {
|
|
dids[repo.DID] = struct{}{}
|
|
}
|
|
|
|
if result.Cursor == "" {
|
|
break
|
|
}
|
|
cursor = result.Cursor
|
|
}
|
|
|
|
return dids, nil
|
|
}
|
|
|
|
// shortName extracts the hostname from a relay URL for display.
|
|
func shortName(relayURL string) string {
|
|
u, err := url.Parse(relayURL)
|
|
if err != nil {
|
|
return relayURL
|
|
}
|
|
return u.Hostname()
|
|
}
|