From 3409af6c67fc0ccdd7ec79b7fcce89df1d4b844d Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Wed, 7 Jan 2026 22:41:14 -0600 Subject: [PATCH] implement hold discovery dropdown in settings. implement a data privacy export feature --- cmd/appview/serve.go | 1 + cmd/hold/main.go | 2 +- docs/DIRECT_HOLD_ACCESS.md | 304 +++ docs/HOLD_DISCOVERY.md | 1721 +++++++++++++++++ docs/HOLD_XRPC_ENDPOINTS.md | 27 +- lexicons/io/atcr/hold/captain.json | 5 - pkg/appview/db/export.go | 393 ++++ pkg/appview/db/hold_store.go | 283 ++- pkg/appview/db/hold_store_test.go | 14 - .../0008_add_hold_crew_members.yaml | 19 + pkg/appview/db/schema.sql | 19 +- pkg/appview/handlers/export.go | 230 +++ pkg/appview/handlers/settings.go | 160 +- pkg/appview/jetstream/backfill.go | 77 +- pkg/appview/jetstream/processor.go | 78 + pkg/appview/jetstream/worker.go | 62 + pkg/appview/routes/routes.go | 16 +- pkg/appview/templates/pages/settings.html | 297 ++- pkg/atproto/cbor_gen.go | 43 +- pkg/atproto/endpoints.go | 6 + pkg/atproto/lexicon.go | 3 +- pkg/auth/hold_local_test.go | 8 +- pkg/auth/hold_remote.go | 16 +- pkg/auth/hold_remote_test.go | 1 - pkg/hold/config.go | 18 + pkg/hold/metadata.go | 65 + pkg/hold/oci/xrpc_test.go | 2 +- pkg/hold/pds/captain.go | 3 +- pkg/hold/pds/captain_test.go | 13 +- pkg/hold/pds/layer.go | 82 + pkg/hold/pds/layer_test.go | 2 +- pkg/hold/pds/server.go | 7 +- pkg/hold/pds/server_test.go | 26 +- pkg/hold/pds/stats.go | 23 + pkg/hold/pds/status_test.go | 2 +- pkg/hold/pds/xrpc.go | 138 ++ pkg/hold/pds/xrpc_test.go | 8 +- pkg/logging/logger.go | 54 +- pkg/logging/logger_test.go | 55 + 39 files changed, 4124 insertions(+), 159 deletions(-) create mode 100644 docs/DIRECT_HOLD_ACCESS.md create mode 100644 docs/HOLD_DISCOVERY.md create mode 100644 pkg/appview/db/export.go create mode 100644 pkg/appview/db/migrations/0008_add_hold_crew_members.yaml create mode 100644 pkg/appview/handlers/export.go create mode 100644 pkg/hold/metadata.go diff --git a/cmd/appview/serve.go b/cmd/appview/serve.go index 71e6245..d5c71d2 100644 --- a/cmd/appview/serve.go +++ b/cmd/appview/serve.go @@ -204,6 +204,7 @@ func serveRegistry(cmd *cobra.Command, args []string) error { HealthChecker: healthChecker, ReadmeFetcher: readmeFetcher, Templates: uiTemplates, + DefaultHoldDID: defaultHoldDID, }) } } diff --git a/cmd/hold/main.go b/cmd/hold/main.go index 4df8b4d..f321d0a 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -64,7 +64,7 @@ func main() { } // Bootstrap PDS with captain record, hold owner as first crew member, and profile - if err := holdPDS.Bootstrap(ctx, driver, cfg.Registration.OwnerDID, cfg.Server.Public, cfg.Registration.AllowAllCrew, cfg.Registration.ProfileAvatarURL); err != nil { + if err := holdPDS.Bootstrap(ctx, driver, cfg.Registration.OwnerDID, cfg.Server.Public, cfg.Registration.AllowAllCrew, cfg.Registration.ProfileAvatarURL, cfg.Registration.Region); err != nil { slog.Error("Failed to bootstrap PDS", "error", err) os.Exit(1) } diff --git a/docs/DIRECT_HOLD_ACCESS.md b/docs/DIRECT_HOLD_ACCESS.md new file mode 100644 index 0000000..40cd58c --- /dev/null +++ b/docs/DIRECT_HOLD_ACCESS.md @@ -0,0 +1,304 @@ +# Accessing Hold Data Without AppView + +This document explains how to retrieve your data directly from a hold service without going through the ATCR AppView. This is useful for: +- GDPR data export requests +- Backup and migration +- Debugging and development +- Building alternative clients + +## Quick Start: App Passwords (Recommended) + +The simplest way to authenticate is using an ATProto app password. This avoids the complexity of OAuth + DPoP. + +### Step 1: Create an App Password + +1. Go to your Bluesky settings: https://bsky.app/settings/app-passwords +2. Create a new app password +3. Save it securely (you'll only see it once) + +### Step 2: Get a Session Token + +```bash +# Replace with your handle and app password +HANDLE="yourhandle.bsky.social" +APP_PASSWORD="xxxx-xxxx-xxxx-xxxx" + +# Create session with your PDS +SESSION=$(curl -s -X POST "https://bsky.social/xrpc/com.atproto.server.createSession" \ + -H "Content-Type: application/json" \ + -d "{\"identifier\": \"$HANDLE\", \"password\": \"$APP_PASSWORD\"}") + +# Extract tokens +ACCESS_JWT=$(echo "$SESSION" | jq -r '.accessJwt') +DID=$(echo "$SESSION" | jq -r '.did') +PDS=$(echo "$SESSION" | jq -r '.didDoc.service[0].serviceEndpoint') + +echo "DID: $DID" +echo "PDS: $PDS" +``` + +### Step 3: Get a Service Token for the Hold + +```bash +# The hold DID you want to access (e.g., did:web:hold01.atcr.io) +HOLD_DID="did:web:hold01.atcr.io" + +# Get a service token from your PDS +SERVICE_TOKEN=$(curl -s -X GET "$PDS/xrpc/com.atproto.server.getServiceAuth?aud=$HOLD_DID" \ + -H "Authorization: Bearer $ACCESS_JWT" | jq -r '.token') + +echo "Service Token: $SERVICE_TOKEN" +``` + +### Step 4: Call Hold Endpoints + +Now you can call any authenticated hold endpoint with the service token: + +```bash +# Export your data from the hold +curl -s "https://hold01.atcr.io/xrpc/io.atcr.hold.exportUserData" \ + -H "Authorization: Bearer $SERVICE_TOKEN" | jq . +``` + +### Complete Script + +Here's a complete script that does all the above: + +```bash +#!/bin/bash +# export-hold-data.sh - Export your data from an ATCR hold + +set -e + +# Configuration +HANDLE="${1:-yourhandle.bsky.social}" +APP_PASSWORD="${2:-xxxx-xxxx-xxxx-xxxx}" +HOLD_DID="${3:-did:web:hold01.atcr.io}" + +# Default PDS (Bluesky's main PDS) +DEFAULT_PDS="https://bsky.social" + +echo "Authenticating as $HANDLE..." + +# Step 1: Create session +SESSION=$(curl -s -X POST "$DEFAULT_PDS/xrpc/com.atproto.server.createSession" \ + -H "Content-Type: application/json" \ + -d "{\"identifier\": \"$HANDLE\", \"password\": \"$APP_PASSWORD\"}") + +# Check for errors +if echo "$SESSION" | jq -e '.error' > /dev/null 2>&1; then + echo "Error: $(echo "$SESSION" | jq -r '.message')" + exit 1 +fi + +ACCESS_JWT=$(echo "$SESSION" | jq -r '.accessJwt') +DID=$(echo "$SESSION" | jq -r '.did') + +# Try to get PDS from didDoc, fall back to default +PDS=$(echo "$SESSION" | jq -r '.didDoc.service[] | select(.id == "#atproto_pds") | .serviceEndpoint' 2>/dev/null || echo "$DEFAULT_PDS") +if [ "$PDS" = "null" ] || [ -z "$PDS" ]; then + PDS="$DEFAULT_PDS" +fi + +echo "Authenticated as $DID" +echo "PDS: $PDS" + +# Step 2: Get service token for the hold +echo "Getting service token for $HOLD_DID..." +SERVICE_RESPONSE=$(curl -s -X GET "$PDS/xrpc/com.atproto.server.getServiceAuth?aud=$HOLD_DID" \ + -H "Authorization: Bearer $ACCESS_JWT") + +if echo "$SERVICE_RESPONSE" | jq -e '.error' > /dev/null 2>&1; then + echo "Error getting service token: $(echo "$SERVICE_RESPONSE" | jq -r '.message')" + exit 1 +fi + +SERVICE_TOKEN=$(echo "$SERVICE_RESPONSE" | jq -r '.token') + +# Step 3: Resolve hold DID to URL +if [[ "$HOLD_DID" == did:web:* ]]; then + # did:web:example.com -> https://example.com + HOLD_HOST="${HOLD_DID#did:web:}" + HOLD_URL="https://$HOLD_HOST" +else + echo "Error: Only did:web holds are currently supported for direct resolution" + exit 1 +fi + +echo "Hold URL: $HOLD_URL" + +# Step 4: Export data +echo "Exporting data from $HOLD_URL..." +curl -s "$HOLD_URL/xrpc/io.atcr.hold.exportUserData" \ + -H "Authorization: Bearer $SERVICE_TOKEN" | jq . +``` + +Usage: +```bash +chmod +x export-hold-data.sh +./export-hold-data.sh yourhandle.bsky.social xxxx-xxxx-xxxx-xxxx did:web:hold01.atcr.io +``` + +--- + +## Available Hold Endpoints + +Once you have a service token, you can call these endpoints: + +### Data Export (GDPR) +```bash +GET /xrpc/io.atcr.hold.exportUserData +Authorization: Bearer {service_token} +``` + +Returns all your data stored on that hold: +- Layer records (blobs you've pushed) +- Crew membership status +- Usage statistics +- Whether you're the hold captain + +### Quota Information +```bash +GET /xrpc/io.atcr.hold.getQuota?userDid={your_did} +# No auth required - just needs your DID +``` + +### Blob Download (if you have read access) +```bash +GET /xrpc/com.atproto.sync.getBlob?did={owner_did}&cid={blob_digest} +Authorization: Bearer {service_token} +``` + +Returns a presigned URL to download the blob directly from storage. + +--- + +## OAuth + DPoP (Advanced) + +App passwords are the simplest option, but OAuth with DPoP is the "proper" way to authenticate in ATProto. However, it's significantly more complex because: + +1. **DPoP (Demonstrating Proof of Possession)** - Every request requires a cryptographically signed JWT proving you control a specific key +2. **PAR (Pushed Authorization Requests)** - Authorization parameters are sent server-to-server +3. **PKCE (Proof Key for Code Exchange)** - Prevents authorization code interception + +### Why DPoP Makes Curl Impractical + +Each request requires a fresh DPoP proof JWT with: +- Unique `jti` (request ID) +- Current `iat` timestamp +- HTTP method and URL bound to the request +- Server-provided `nonce` +- Signature using your P-256 private key + +Example DPoP proof structure: +```json +{ + "alg": "ES256", + "typ": "dpop+jwt", + "jwk": { "kty": "EC", "crv": "P-256", "x": "...", "y": "..." } +} +{ + "htm": "GET", + "htu": "https://bsky.social/xrpc/com.atproto.server.getServiceAuth", + "jti": "550e8400-e29b-41d4-a716-446655440000", + "iat": 1735689100, + "nonce": "server-provided-nonce" +} +``` + +### If You Need OAuth + +If you need OAuth (e.g., for a production application), you'll want to use a library: + +**Go:** +```go +import "github.com/bluesky-social/indigo/atproto/auth/oauth" +``` + +**TypeScript/JavaScript:** +```bash +npm install @atproto/oauth-client-node +``` + +**Python:** +```bash +pip install atproto +``` + +These libraries handle all the DPoP complexity for you. + +### High-Level OAuth Flow + +For documentation purposes, here's what the flow looks like: + +1. **Resolve identity**: `handle` → `DID` → `PDS endpoint` +2. **Discover OAuth server**: `GET {pds}/.well-known/oauth-authorization-server` +3. **Generate DPoP key**: Create P-256 key pair +4. **PAR request**: Send authorization parameters (with DPoP proof) +5. **User authorization**: Browser-based login +6. **Token exchange**: Exchange code for tokens (with DPoP proof) +7. **Use tokens**: All subsequent requests include DPoP proofs + +Each step after #3 requires generating a fresh DPoP proof JWT, which is why libraries are essential. + +--- + +## Troubleshooting + +### "Invalid token" or "Token expired" + +Service tokens are only valid for ~60 seconds. Get a fresh one: +```bash +SERVICE_TOKEN=$(curl -s "$PDS/xrpc/com.atproto.server.getServiceAuth?aud=$HOLD_DID" \ + -H "Authorization: Bearer $ACCESS_JWT" | jq -r '.token') +``` + +### "Session expired" + +Your access JWT from `createSession` has expired. Create a new session: +```bash +SESSION=$(curl -s -X POST "$PDS/xrpc/com.atproto.server.createSession" ...) +ACCESS_JWT=$(echo "$SESSION" | jq -r '.accessJwt') +``` + +### "Audience mismatch" + +The service token is scoped to a specific hold. Make sure `HOLD_DID` matches exactly what's in the `aud` claim of your token. + +### "Access denied: user is not a crew member" + +You don't have access to this hold. You need to either: +- Be the hold captain (owner) +- Be a crew member with appropriate permissions + +### Finding Your Hold DID + +Check your sailor profile to find your default hold: +```bash +curl -s "https://bsky.social/xrpc/com.atproto.repo.getRecord?repo=$DID&collection=io.atcr.sailor.profile&rkey=self" \ + -H "Authorization: Bearer $ACCESS_JWT" | jq -r '.value.defaultHold' +``` + +Or check your manifest records for the hold where your images are stored: +```bash +curl -s "https://bsky.social/xrpc/com.atproto.repo.listRecords?repo=$DID&collection=io.atcr.manifest&limit=1" \ + -H "Authorization: Bearer $ACCESS_JWT" | jq -r '.records[0].value.holdDid' +``` + +--- + +## Security Notes + +- **App passwords** are scoped tokens that can be revoked without changing your main password +- **Service tokens** are short-lived (60 seconds) and scoped to a specific hold +- **Never share** your app password or access tokens +- Service tokens can only be used for the specific hold they were requested for (`aud` claim) + +--- + +## References + +- [ATProto OAuth Specification](https://atproto.com/specs/oauth) +- [DPoP RFC 9449](https://datatracker.ietf.org/doc/html/rfc9449) +- [Bluesky OAuth Guide](https://docs.bsky.app/docs/advanced-guides/oauth-client) +- [ATCR BYOS Documentation](./BYOS.md) diff --git a/docs/HOLD_DISCOVERY.md b/docs/HOLD_DISCOVERY.md new file mode 100644 index 0000000..b80c066 --- /dev/null +++ b/docs/HOLD_DISCOVERY.md @@ -0,0 +1,1721 @@ +# Hold Discovery + +This document describes how AppView discovers available holds and presents them to users for selection. + +## TL;DR + +**Problem:** Users currently enter hold URLs manually in a text field. They don't know what holds exist or which ones they can access. + +**Solution:** +1. Subscribe to Jetstream for `io.atcr.hold.captain` and `io.atcr.hold.crew` collections +2. Cache discovered holds and crew memberships in SQLite +3. Replace the text input with a dropdown showing available holds grouped by access level + +**Key Changes:** +- New table: `hold_crew_members` (hold_did, member_did, rkey, permissions, ...) +- Jetstream collections: `io.atcr.hold.captain`, `io.atcr.hold.crew` +- Settings UI: Text input → ` + Leave empty to use AppView default storage + + + + +``` + +**Problems with the current approach:** + +1. **Users must know hold URLs** - Requires users to manually find and copy hold endpoint URLs +2. **No validation** - Users can enter invalid or inaccessible URLs +3. **No discovery** - Users don't know what holds are available to them +4. **Poor UX** - Text input is error-prone and unfriendly +5. **No membership visibility** - Users can't see which holds they're crew on + +### Proposed Change: Dropdown with Discovered Holds + +Replace the text input with a ` + + + {{if .OwnedHolds}} + + {{range .OwnedHolds}} + + {{end}} + + {{end}} + + {{if .CrewHolds}} + + {{range .CrewHolds}} + + {{end}} + + {{end}} + + {{if .EligibleHolds}} + + {{range .EligibleHolds}} + + {{end}} + + {{end}} + + {{if .PublicHolds}} + + {{range .PublicHolds}} + + {{end}} + + {{end}} + + Your images will be stored on the selected hold + + + + + +
+ + + + +``` + +### Dropdown Option Groups + +The dropdown organizes holds into logical groups based on user's relationship: + +| Group | Description | Access Level | +|-------|-------------|--------------| +| **Your Holds** | Holds where user is the captain (owner) | Full control | +| **Crew Member** | Holds where user has explicit crew membership | Based on permissions | +| **Open Registration** | Holds with `allowAllCrew=true` | Can self-register | +| **Public Holds** | Holds with `public=true` | Anyone can use | + +### Visual Indicators + +Each option should show relevant context: + +``` +┌─ Storage Hold: ─────────────────────────────────────┐ +│ ▼ hold01.atcr.io (us-east) │ +├─────────────────────────────────────────────────────┤ +│ AppView Default (hold01.atcr.io) │ +│ ───────────────────────────────────── │ +│ Your Holds │ +│ my-hold.fly.dev (us-west) │ +│ ───────────────────────────────────── │ +│ Crew Member │ +│ team-hold.company.com (eu-central) │ +│ shared-hold.org (asia-pacific) [read-only] │ +│ ───────────────────────────────────── │ +│ Open Registration │ +│ community-hold.dev (us-east) │ +│ ───────────────────────────────────── │ +│ Public Holds │ +│ public-hold.example.com (global) │ +└─────────────────────────────────────────────────────┘ +``` + +### Form Submission Change + +The form now submits `hold_did` (a DID) instead of `hold_endpoint` (a URL): + +**Before:** +``` +POST /api/profile/default-hold +Content-Type: application/x-www-form-urlencoded + +hold_endpoint=https://hold01.atcr.io +``` + +**After:** +``` +POST /api/profile/default-hold +Content-Type: application/x-www-form-urlencoded + +hold_did=did:web:hold01.atcr.io +``` + +The `UpdateDefaultHoldHandler` needs to be updated to accept DIDs: + +```go +// pkg/appview/handlers/settings.go + +func (h *UpdateDefaultHoldHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + user := middleware.GetUser(r) + if user == nil { + http.Error(w, "Unauthorized", http.StatusUnauthorized) + return + } + + // Accept DID (new) or endpoint (legacy/fallback) + holdDID := r.FormValue("hold_did") + if holdDID == "" { + // Fallback for legacy form submissions + holdDID = r.FormValue("hold_endpoint") + } + + // Validate the hold DID if provided + if holdDID != "" { + // Check it's in our discovered holds cache + captain, err := h.DB.GetCaptainRecord(holdDID) + if err != nil { + http.Error(w, "Unknown hold: "+holdDID, http.StatusBadRequest) + return + } + + // Verify user has access to this hold + available, err := db.GetAvailableHolds(h.DB, user.DID) + if err != nil { + http.Error(w, "Failed to check hold access", http.StatusInternalServerError) + return + } + + hasAccess := false + for _, h := range available { + if h.DID == holdDID { + hasAccess = true + break + } + } + + if !hasAccess { + http.Error(w, "You don't have access to this hold", http.StatusForbidden) + return + } + } + + // ... rest of profile update logic +} +``` + +### Settings Handler + +Update the settings handler to include available holds: + +```go +// pkg/appview/handlers/settings.go + +func (h *Handler) SettingsPage(w http.ResponseWriter, r *http.Request) { + ctx := r.Context() + userDID := auth.GetDID(ctx) + + // Get user's current profile + profile, err := h.storage.GetProfile(ctx, userDID) + if err != nil { + // Handle error + } + + // Get available holds for dropdown + availableHolds, err := db.GetAvailableHolds(h.db, userDID) + if err != nil { + // Handle error + } + + data := SettingsPageData{ + Profile: profile, + AvailableHolds: availableHolds, + CurrentHoldDID: profile.DefaultHold, + } + + h.renderTemplate(w, "settings.html", data) +} +``` + +### Settings Template + +```html + + +
+

Default Hold

+

+ Select where your container images will be stored by default. +

+ +
+ + + +
+
+``` + +### Template Data Preparation + +```go +// pkg/appview/handlers/settings.go + +type SettingsPageData struct { + Profile *atproto.SailorProfile + CurrentHoldDID string + OwnedHolds []HoldDisplay + CrewHolds []HoldDisplay + EligibleHolds []HoldDisplay + PublicHolds []HoldDisplay +} + +type HoldDisplay struct { + DID string + DisplayName string // Derived from DID or endpoint + Region string + Provider string + Permissions []string +} + +func (h *Handler) prepareSettingsData(userDID string, holds []db.AvailableHold, currentHold string) SettingsPageData { + data := SettingsPageData{ + CurrentHoldDID: currentHold, + } + + for _, hold := range holds { + display := HoldDisplay{ + DID: hold.DID, + DisplayName: deriveDisplayName(hold.DID, hold.Endpoint), + Region: hold.Region, + Provider: hold.Provider, + Permissions: hold.Permissions, + } + + switch hold.Membership { + case "owner": + data.OwnedHolds = append(data.OwnedHolds, display) + case "crew": + data.CrewHolds = append(data.CrewHolds, display) + case "eligible": + data.EligibleHolds = append(data.EligibleHolds, display) + case "public": + data.PublicHolds = append(data.PublicHolds, display) + } + } + + return data +} + +func deriveDisplayName(did, endpoint string) string { + // For did:web, extract the domain + if strings.HasPrefix(did, "did:web:") { + return strings.TrimPrefix(did, "did:web:") + } + + // For did:plc, use the endpoint hostname if available + if endpoint != "" { + if u, err := url.Parse(endpoint); err == nil { + return u.Host + } + } + + // Fallback to truncated DID + if len(did) > 20 { + return did[:20] + "..." + } + return did +} +``` + +### CSS Styles + +Add styles for the hold dropdown and details panel: + +```css +/* pkg/appview/templates/pages/settings.html - add to diff --git a/pkg/atproto/cbor_gen.go b/pkg/atproto/cbor_gen.go index 1af73ea..755f653 100644 --- a/pkg/atproto/cbor_gen.go +++ b/pkg/atproto/cbor_gen.go @@ -342,16 +342,12 @@ func (t *CaptainRecord) MarshalCBOR(w io.Writer) error { } cw := cbg.NewCborWriter(w) - fieldCount := 8 + fieldCount := 7 if t.Region == "" { fieldCount-- } - if t.Provider == "" { - fieldCount-- - } - if _, err := cw.Write(cbg.CborEncodeMajorType(cbg.MajMap, uint64(fieldCount))); err != nil { return err } @@ -444,32 +440,6 @@ func (t *CaptainRecord) MarshalCBOR(w io.Writer) error { } } - // t.Provider (string) (string) - if t.Provider != "" { - - if len("provider") > 8192 { - return xerrors.Errorf("Value in field \"provider\" was too long") - } - - if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len("provider"))); err != nil { - return err - } - if _, err := cw.WriteString(string("provider")); err != nil { - return err - } - - if len(t.Provider) > 8192 { - return xerrors.Errorf("Value in field t.Provider was too long") - } - - if err := cw.WriteMajorTypeHeader(cbg.MajTextString, uint64(len(t.Provider))); err != nil { - return err - } - if _, err := cw.WriteString(string(t.Provider)); err != nil { - return err - } - } - // t.DeployedAt (string) (string) if len("deployedAt") > 8192 { return xerrors.Errorf("Value in field \"deployedAt\" was too long") @@ -619,17 +589,6 @@ func (t *CaptainRecord) UnmarshalCBOR(r io.Reader) (err error) { t.Region = string(sval) } - // t.Provider (string) (string) - case "provider": - - { - sval, err := cbg.ReadStringWithMax(cr, 8192) - if err != nil { - return err - } - - t.Provider = string(sval) - } // t.DeployedAt (string) (string) case "deployedAt": diff --git a/pkg/atproto/endpoints.go b/pkg/atproto/endpoints.go index ead1071..ec993fb 100644 --- a/pkg/atproto/endpoints.go +++ b/pkg/atproto/endpoints.go @@ -57,6 +57,12 @@ const ( // Query: userDid={did} // Response: {"userDid": "...", "uniqueBlobs": 10, "totalSize": 1073741824} HoldGetQuota = "/xrpc/io.atcr.hold.getQuota" + + // HoldExportUserData exports all user data from a hold service (GDPR compliance). + // Method: GET + // Query: userDid={did} + // Response: JSON containing all user data stored by the hold + HoldExportUserData = "/xrpc/io.atcr.hold.exportUserData" ) // Hold service crew management endpoints (io.atcr.hold.*) diff --git a/pkg/atproto/lexicon.go b/pkg/atproto/lexicon.go index c3cfd2b..7f30ac1 100644 --- a/pkg/atproto/lexicon.go +++ b/pkg/atproto/lexicon.go @@ -580,8 +580,7 @@ type CaptainRecord struct { AllowAllCrew bool `json:"allowAllCrew" cborgen:"allowAllCrew"` // Allow any authenticated user to register as crew EnableBlueskyPosts bool `json:"enableBlueskyPosts" cborgen:"enableBlueskyPosts"` // Enable Bluesky posts when manifests are pushed (overrides env var) DeployedAt string `json:"deployedAt" cborgen:"deployedAt"` // RFC3339 timestamp - Region string `json:"region,omitempty" cborgen:"region,omitempty"` // S3 region (optional) - Provider string `json:"provider,omitempty" cborgen:"provider,omitempty"` // Deployment provider (optional) + Region string `json:"region,omitempty" cborgen:"region,omitempty"` // Deployment region (optional) } // CrewRecord represents a crew member in the hold diff --git a/pkg/auth/hold_local_test.go b/pkg/auth/hold_local_test.go index e3ff9a5..1e51e71 100644 --- a/pkg/auth/hold_local_test.go +++ b/pkg/auth/hold_local_test.go @@ -43,7 +43,7 @@ func TestMain(m *testing.M) { if err != nil { panic(err) } - err = sharedPublicPDS.Bootstrap(ctx, nil, "did:plc:owner123", true, false, "") + err = sharedPublicPDS.Bootstrap(ctx, nil, "did:plc:owner123", true, false, "", "") if err != nil { panic(err) } @@ -54,7 +54,7 @@ func TestMain(m *testing.M) { if err != nil { panic(err) } - err = sharedPrivatePDS.Bootstrap(ctx, nil, "did:plc:owner123", false, false, "") + err = sharedPrivatePDS.Bootstrap(ctx, nil, "did:plc:owner123", false, false, "", "") if err != nil { panic(err) } @@ -65,7 +65,7 @@ func TestMain(m *testing.M) { if err != nil { panic(err) } - err = sharedAllowCrewPDS.Bootstrap(ctx, nil, "did:plc:owner123", false, true, "") + err = sharedAllowCrewPDS.Bootstrap(ctx, nil, "did:plc:owner123", false, true, "", "") if err != nil { panic(err) } @@ -93,7 +93,7 @@ func createTestHoldPDS(t *testing.T, ownerDID string, public bool, allowAllCrew // Bootstrap with owner if provided if ownerDID != "" { - err = holdPDS.Bootstrap(ctx, nil, ownerDID, public, allowAllCrew, "") + err = holdPDS.Bootstrap(ctx, nil, ownerDID, public, allowAllCrew, "", "") if err != nil { t.Fatalf("Failed to bootstrap HoldPDS: %v", err) } diff --git a/pkg/auth/hold_remote.go b/pkg/auth/hold_remote.go index 96f6b1d..437a4cc 100644 --- a/pkg/auth/hold_remote.go +++ b/pkg/auth/hold_remote.go @@ -144,13 +144,13 @@ type captainRecordWithMeta struct { // getCachedCaptainRecord retrieves a captain record from database cache func (a *RemoteHoldAuthorizer) getCachedCaptainRecord(holdDID string) (*captainRecordWithMeta, error) { query := ` - SELECT owner_did, public, allow_all_crew, deployed_at, region, provider, updated_at + SELECT owner_did, public, allow_all_crew, deployed_at, region, updated_at FROM hold_captain_records WHERE hold_did = ? ` var record atproto.CaptainRecord - var deployedAt, region, provider sql.NullString + var deployedAt, region sql.NullString var updatedAt time.Time err := a.db.QueryRow(query, holdDID).Scan( @@ -159,7 +159,6 @@ func (a *RemoteHoldAuthorizer) getCachedCaptainRecord(holdDID string) (*captainR &record.AllowAllCrew, &deployedAt, ®ion, - &provider, &updatedAt, ) @@ -178,9 +177,6 @@ func (a *RemoteHoldAuthorizer) getCachedCaptainRecord(holdDID string) (*captainR if region.Valid { record.Region = region.String } - if provider.Valid { - record.Provider = provider.String - } return &captainRecordWithMeta{ CaptainRecord: &record, @@ -193,15 +189,14 @@ func (a *RemoteHoldAuthorizer) setCachedCaptainRecord(holdDID string, record *at query := ` INSERT INTO hold_captain_records ( hold_did, owner_did, public, allow_all_crew, - deployed_at, region, provider, updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?) + deployed_at, region, updated_at + ) VALUES (?, ?, ?, ?, ?, ?, ?) ON CONFLICT(hold_did) DO UPDATE SET owner_did = excluded.owner_did, public = excluded.public, allow_all_crew = excluded.allow_all_crew, deployed_at = excluded.deployed_at, region = excluded.region, - provider = excluded.provider, updated_at = excluded.updated_at ` @@ -212,7 +207,6 @@ func (a *RemoteHoldAuthorizer) setCachedCaptainRecord(holdDID string, record *at record.AllowAllCrew, nullString(record.DeployedAt), nullString(record.Region), - nullString(record.Provider), time.Now(), ) @@ -256,7 +250,6 @@ func (a *RemoteHoldAuthorizer) fetchCaptainRecordFromXRPC(ctx context.Context, h AllowAllCrew bool `json:"allowAllCrew"` DeployedAt string `json:"deployedAt"` Region string `json:"region,omitempty"` - Provider string `json:"provider,omitempty"` } `json:"value"` } @@ -272,7 +265,6 @@ func (a *RemoteHoldAuthorizer) fetchCaptainRecordFromXRPC(ctx context.Context, h AllowAllCrew: xrpcResp.Value.AllowAllCrew, DeployedAt: xrpcResp.Value.DeployedAt, Region: xrpcResp.Value.Region, - Provider: xrpcResp.Value.Provider, } return record, nil diff --git a/pkg/auth/hold_remote_test.go b/pkg/auth/hold_remote_test.go index 602e59e..99aa141 100644 --- a/pkg/auth/hold_remote_test.go +++ b/pkg/auth/hold_remote_test.go @@ -129,7 +129,6 @@ func TestGetCaptainRecord_CacheHit(t *testing.T) { AllowAllCrew: false, DeployedAt: "2025-10-28T00:00:00Z", Region: "us-east-1", - Provider: "fly.io", } err := remote.setCachedCaptainRecord(holdDID, captainRecord) diff --git a/pkg/hold/config.go b/pkg/hold/config.go index acad177..1d91bd8 100644 --- a/pkg/hold/config.go +++ b/pkg/hold/config.go @@ -7,8 +7,10 @@ package hold import ( "bytes" + "context" "encoding/json" "fmt" + "log/slog" "net/http" "net/url" "os" @@ -54,6 +56,9 @@ type RegistrationConfig struct { // If true, creates posts when users push images // Synced to captain record's enableBlueskyPosts field on startup EnableBlueskyPosts bool `yaml:"enable_bluesky_posts"` + + // Region is the deployment region, auto-detected from cloud metadata or S3 config + Region string `yaml:"region"` } // StorageConfig wraps distribution's storage configuration @@ -148,6 +153,18 @@ func LoadConfigFromEnv() (*Config, error) { // Admin panel configuration cfg.Admin.Enabled = os.Getenv("HOLD_ADMIN_ENABLED") == "true" + // Detect region from cloud metadata or S3 config + if meta, err := DetectCloudMetadata(context.Background()); err == nil && meta != nil { + cfg.Registration.Region = meta.Region + slog.Info("Detected cloud metadata", "region", meta.Region) + } else { + // Fall back to S3 region + if storageType == "s3" { + cfg.Registration.Region = getEnvOrDefault("AWS_REGION", "us-east-1") + slog.Info("Using S3 region", "region", cfg.Registration.Region) + } + } + return cfg, nil } @@ -200,6 +217,7 @@ func getEnvOrDefault(key, defaultValue string) string { return defaultValue } + // RequestCrawl sends a crawl request to the ATProto relay for the given hostname. // This makes the hold's PDS discoverable by the relay network. func RequestCrawl(relayEndpoint, publicURL string) error { diff --git a/pkg/hold/metadata.go b/pkg/hold/metadata.go new file mode 100644 index 0000000..e4d5a2c --- /dev/null +++ b/pkg/hold/metadata.go @@ -0,0 +1,65 @@ +package hold + +import ( + "context" + "encoding/json" + "fmt" + "net/http" + "time" +) + +// CloudMetadata contains region info from cloud metadata service +type CloudMetadata struct { + Region string +} + +// DetectCloudMetadata queries the instance metadata service (169.254.169.254) +// Currently supports UpCloud. Others can be added via PR. +func DetectCloudMetadata(ctx context.Context) (*CloudMetadata, error) { + ctx, cancel := context.WithTimeout(ctx, 2*time.Second) + defer cancel() + + // Try UpCloud metadata format + if meta, err := detectUpCloud(ctx); err == nil { + return meta, nil + } + + // Add other providers here (AWS, GCP, Azure, DigitalOcean, etc.) + // Contributors welcome! + + return nil, nil // No metadata available +} + +// detectUpCloud queries UpCloud's metadata service +func detectUpCloud(ctx context.Context) (*CloudMetadata, error) { + req, err := http.NewRequestWithContext(ctx, "GET", "http://169.254.169.254/metadata/v1.json", nil) + if err != nil { + return nil, err + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return nil, fmt.Errorf("metadata returned %d", resp.StatusCode) + } + + var data struct { + CloudName string `json:"cloud_name"` + Region string `json:"region"` + } + if err := json.NewDecoder(resp.Body).Decode(&data); err != nil { + return nil, err + } + + if data.CloudName != "upcloud" { + return nil, fmt.Errorf("not upcloud: %s", data.CloudName) + } + + return &CloudMetadata{ + Region: data.Region, + }, nil +} diff --git a/pkg/hold/oci/xrpc_test.go b/pkg/hold/oci/xrpc_test.go index 8aa08b5..9834455 100644 --- a/pkg/hold/oci/xrpc_test.go +++ b/pkg/hold/oci/xrpc_test.go @@ -111,7 +111,7 @@ func setupTestOCIHandler(t *testing.T) (*XRPCHandler, context.Context) { r, w, _ := os.Pipe() os.Stdout = w - err = holdPDS.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = holdPDS.Bootstrap(ctx, nil, ownerDID, true, false, "", "") // Restore stdout w.Close() diff --git a/pkg/hold/pds/captain.go b/pkg/hold/pds/captain.go index 8a2b2de..64a5f9c 100644 --- a/pkg/hold/pds/captain.go +++ b/pkg/hold/pds/captain.go @@ -17,7 +17,7 @@ const ( // CreateCaptainRecord creates the captain record for the hold (first-time only). // This will FAIL if the captain record already exists. Use UpdateCaptainRecord to modify. -func (p *HoldPDS) CreateCaptainRecord(ctx context.Context, ownerDID string, public bool, allowAllCrew bool, enableBlueskyPosts bool) (cid.Cid, error) { +func (p *HoldPDS) CreateCaptainRecord(ctx context.Context, ownerDID string, public bool, allowAllCrew bool, enableBlueskyPosts bool, region string) (cid.Cid, error) { captainRecord := &atproto.CaptainRecord{ Type: atproto.CaptainCollection, Owner: ownerDID, @@ -25,6 +25,7 @@ func (p *HoldPDS) CreateCaptainRecord(ctx context.Context, ownerDID string, publ AllowAllCrew: allowAllCrew, EnableBlueskyPosts: enableBlueskyPosts, DeployedAt: time.Now().Format(time.RFC3339), + Region: region, } // Use repomgr.PutRecord - creates with explicit rkey, fails if already exists diff --git a/pkg/hold/pds/captain_test.go b/pkg/hold/pds/captain_test.go index f2298c9..0bb463c 100644 --- a/pkg/hold/pds/captain_test.go +++ b/pkg/hold/pds/captain_test.go @@ -55,7 +55,7 @@ func setupTestPDSWithBootstrap(t *testing.T, ownerDID string, public, allowAllCr r, w, _ := os.Pipe() os.Stdout = w - err := pds.Bootstrap(ctx, nil, ownerDID, public, allowAllCrew, "") + err := pds.Bootstrap(ctx, nil, ownerDID, public, allowAllCrew, "", "") w.Close() os.Stdout = oldStdout @@ -114,7 +114,7 @@ func TestCreateCaptainRecord(t *testing.T) { defer pds.Close() // Create captain record - recordCID, err := pds.CreateCaptainRecord(ctx, tt.ownerDID, tt.public, tt.allowAllCrew, tt.enableBlueskyPosts) + recordCID, err := pds.CreateCaptainRecord(ctx, tt.ownerDID, tt.public, tt.allowAllCrew, tt.enableBlueskyPosts, "") if err != nil { t.Fatalf("CreateCaptainRecord failed: %v", err) } @@ -164,7 +164,7 @@ func TestGetCaptainRecord(t *testing.T) { ownerDID := "did:plc:alice123" // Create captain record - createdCID, err := pds.CreateCaptainRecord(ctx, ownerDID, true, false, false) + createdCID, err := pds.CreateCaptainRecord(ctx, ownerDID, true, false, false, "") if err != nil { t.Fatalf("CreateCaptainRecord failed: %v", err) } @@ -221,7 +221,7 @@ func TestUpdateCaptainRecord(t *testing.T) { ownerDID := "did:plc:alice123" // Create initial captain record (public=false, allowAllCrew=false, enableBlueskyPosts=false) - _, err := pds.CreateCaptainRecord(ctx, ownerDID, false, false, false) + _, err := pds.CreateCaptainRecord(ctx, ownerDID, false, false, false, "") if err != nil { t.Fatalf("CreateCaptainRecord failed: %v", err) } @@ -343,7 +343,6 @@ func TestCaptainRecord_CBORRoundtrip(t *testing.T) { AllowAllCrew: true, DeployedAt: "2025-10-16T12:00:00Z", Region: "us-west-2", - Provider: "fly.io", }, }, { @@ -355,7 +354,6 @@ func TestCaptainRecord_CBORRoundtrip(t *testing.T) { AllowAllCrew: true, DeployedAt: "2025-10-16T12:00:00Z", Region: "", - Provider: "", }, }, } @@ -400,9 +398,6 @@ func TestCaptainRecord_CBORRoundtrip(t *testing.T) { if decoded.Region != tt.record.Region { t.Errorf("Region mismatch: expected %s, got %s", tt.record.Region, decoded.Region) } - if decoded.Provider != tt.record.Provider { - t.Errorf("Provider mismatch: expected %s, got %s", tt.record.Provider, decoded.Provider) - } }) } } diff --git a/pkg/hold/pds/layer.go b/pkg/hold/pds/layer.go index 783a387..9edd5f2 100644 --- a/pkg/hold/pds/layer.go +++ b/pkg/hold/pds/layer.go @@ -212,3 +212,85 @@ func (p *HoldPDS) getCrewTier(ctx context.Context, userDID string) string { return "" } + +// ListLayerRecordsForUser returns all layer records uploaded by a specific user +// Used for GDPR data export to return all layers a user has pushed to this hold +func (p *HoldPDS) ListLayerRecordsForUser(ctx context.Context, userDID string) ([]*atproto.LayerRecord, error) { + if p.recordsIndex == nil { + return nil, fmt.Errorf("records index not available") + } + + // Get session for reading record data + session, err := p.carstore.ReadOnlySession(p.uid) + if err != nil { + return nil, fmt.Errorf("failed to create session: %w", err) + } + + head, err := p.carstore.GetUserRepoHead(ctx, p.uid) + if err != nil { + return nil, fmt.Errorf("failed to get repo head: %w", err) + } + + if !head.Defined() { + // Empty repo - return empty list + return []*atproto.LayerRecord{}, nil + } + + repoHandle, err := repo.OpenRepo(ctx, session, head) + if err != nil { + return nil, fmt.Errorf("failed to open repo: %w", err) + } + + var records []*atproto.LayerRecord + + // Iterate all layer records via the index + cursor := "" + batchSize := 1000 // Process in batches + + for { + indexRecords, nextCursor, err := p.recordsIndex.ListRecords(atproto.LayerCollection, batchSize, cursor, true) + if err != nil { + return nil, fmt.Errorf("failed to list layer records: %w", err) + } + + for _, rec := range indexRecords { + // Construct record path and get the record data + recordPath := rec.Collection + "/" + rec.Rkey + + _, recBytes, err := repoHandle.GetRecordBytes(ctx, recordPath) + if err != nil { + // Skip records we can't read + continue + } + + // Decode the layer record + recordValue, err := lexutil.CborDecodeValue(*recBytes) + if err != nil { + continue + } + + layerRecord, ok := recordValue.(*atproto.LayerRecord) + if !ok { + continue + } + + // Filter by userDID + if layerRecord.UserDID != userDID { + continue + } + + records = append(records, layerRecord) + } + + if nextCursor == "" { + break + } + cursor = nextCursor + } + + if records == nil { + records = []*atproto.LayerRecord{} + } + + return records, nil +} diff --git a/pkg/hold/pds/layer_test.go b/pkg/hold/pds/layer_test.go index 372d0b9..d054f2e 100644 --- a/pkg/hold/pds/layer_test.go +++ b/pkg/hold/pds/layer_test.go @@ -308,7 +308,7 @@ func setupTestPDSWithIndex(t *testing.T, ownerDID string) (*HoldPDS, func()) { } // Bootstrap with owner - if err := pds.Bootstrap(ctx, nil, ownerDID, true, false, ""); err != nil { + if err := pds.Bootstrap(ctx, nil, ownerDID, true, false, "", ""); err != nil { t.Fatalf("Failed to bootstrap PDS: %v", err) } diff --git a/pkg/hold/pds/server.go b/pkg/hold/pds/server.go index 5f6e6e1..5cc6fd4 100644 --- a/pkg/hold/pds/server.go +++ b/pkg/hold/pds/server.go @@ -153,7 +153,7 @@ func (p *HoldPDS) UID() models.Uid { } // Bootstrap initializes the hold with the captain record, owner as first crew member, and profile -func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDriver, ownerDID string, public bool, allowAllCrew bool, avatarURL string) error { +func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDriver, ownerDID string, public bool, allowAllCrew bool, avatarURL, region string) error { if ownerDID == "" { return nil } @@ -185,7 +185,7 @@ func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDri } // Create captain record (hold ownership and settings) - _, err = p.CreateCaptainRecord(ctx, ownerDID, public, allowAllCrew, p.enableBlueskyPosts) + _, err = p.CreateCaptainRecord(ctx, ownerDID, public, allowAllCrew, p.enableBlueskyPosts, region) if err != nil { return fmt.Errorf("failed to create captain record: %w", err) } @@ -193,7 +193,8 @@ func (p *HoldPDS) Bootstrap(ctx context.Context, storageDriver driver.StorageDri slog.Info("Created captain record", "public", public, "allowAllCrew", allowAllCrew, - "enableBlueskyPosts", p.enableBlueskyPosts) + "enableBlueskyPosts", p.enableBlueskyPosts, + "region", region) // Add hold owner as first crew member with admin role _, err = p.AddCrewMember(ctx, ownerDID, "admin", []string{"blob:read", "blob:write", "crew:admin"}) diff --git a/pkg/hold/pds/server_test.go b/pkg/hold/pds/server_test.go index 6fd6851..593204b 100644 --- a/pkg/hold/pds/server_test.go +++ b/pkg/hold/pds/server_test.go @@ -69,7 +69,7 @@ func TestNewHoldPDS_ExistingRepo(t *testing.T) { // Bootstrap with a captain record ownerDID := "did:plc:owner123" - if err := pds1.Bootstrap(ctx, nil, ownerDID, true, false, ""); err != nil { + if err := pds1.Bootstrap(ctx, nil, ownerDID, true, false, "", ""); err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -129,7 +129,7 @@ func TestBootstrap_NewRepo(t *testing.T) { publicAccess := true allowAllCrew := false - err = pds.Bootstrap(ctx, nil, ownerDID, publicAccess, allowAllCrew, "") + err = pds.Bootstrap(ctx, nil, ownerDID, publicAccess, allowAllCrew, "", "") if err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -204,7 +204,7 @@ func TestBootstrap_Idempotent(t *testing.T) { ownerDID := "did:plc:alice123" // First bootstrap - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") if err != nil { t.Fatalf("First bootstrap failed: %v", err) } @@ -223,7 +223,7 @@ func TestBootstrap_Idempotent(t *testing.T) { crewCount1 := len(crew1) // Second bootstrap (should be idempotent - skip creation) - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") if err != nil { t.Fatalf("Second bootstrap failed: %v", err) } @@ -268,7 +268,7 @@ func TestBootstrap_EmptyOwner(t *testing.T) { defer pds.Close() // Bootstrap with empty owner DID (should be no-op) - err = pds.Bootstrap(ctx, nil, "", true, false, "") + err = pds.Bootstrap(ctx, nil, "", true, false, "", "") if err != nil { t.Fatalf("Bootstrap with empty owner should not error: %v", err) } @@ -302,7 +302,7 @@ func TestLexiconTypeRegistration(t *testing.T) { // Bootstrap to create captain record ownerDID := "did:plc:alice123" - if err := pds.Bootstrap(ctx, nil, ownerDID, true, false, ""); err != nil { + if err := pds.Bootstrap(ctx, nil, ownerDID, true, false, "", ""); err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -355,7 +355,7 @@ func TestBootstrap_DidWebOwner(t *testing.T) { publicAccess := true allowAllCrew := false - err = pds.Bootstrap(ctx, nil, ownerDID, publicAccess, allowAllCrew, "") + err = pds.Bootstrap(ctx, nil, ownerDID, publicAccess, allowAllCrew, "", "") if err != nil { t.Fatalf("Bootstrap failed with did:web owner: %v", err) } @@ -414,7 +414,7 @@ func TestBootstrap_MixedDIDs(t *testing.T) { // Bootstrap with did:plc owner plcOwner := "did:plc:alice123" - err = pds.Bootstrap(ctx, nil, plcOwner, true, false, "") + err = pds.Bootstrap(ctx, nil, plcOwner, true, false, "", "") if err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -509,7 +509,7 @@ func TestBootstrap_CrewWithoutCaptain(t *testing.T) { } // Bootstrap should create captain record - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") if err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -559,7 +559,7 @@ func TestBootstrap_CaptainWithoutCrew(t *testing.T) { // Create captain record WITHOUT crew (unusual state) ownerDID := "did:plc:alice123" - _, err = pds.CreateCaptainRecord(ctx, ownerDID, true, false, false) + _, err = pds.CreateCaptainRecord(ctx, ownerDID, true, false, false, "") if err != nil { t.Fatalf("CreateCaptainRecord failed: %v", err) } @@ -584,7 +584,7 @@ func TestBootstrap_CaptainWithoutCrew(t *testing.T) { // Bootstrap should be idempotent but notice missing crew // Currently Bootstrap skips if captain exists, so crew won't be added - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") if err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -856,7 +856,7 @@ func TestHoldPDS_BackfillRecordsIndex(t *testing.T) { // Bootstrap to create some records in MST (captain + crew) ownerDID := "did:plc:testowner" - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") if err != nil { t.Fatalf("Bootstrap failed: %v", err) } @@ -921,7 +921,7 @@ func TestHoldPDS_BackfillRecordsIndex_SkipsWhenSynced(t *testing.T) { defer pds.Close() // Bootstrap to create records - err = pds.Bootstrap(ctx, nil, "did:plc:testowner", true, false, "") + err = pds.Bootstrap(ctx, nil, "did:plc:testowner", true, false, "", "") if err != nil { t.Fatalf("Bootstrap failed: %v", err) } diff --git a/pkg/hold/pds/stats.go b/pkg/hold/pds/stats.go index 59a95e6..0317928 100644 --- a/pkg/hold/pds/stats.go +++ b/pkg/hold/pds/stats.go @@ -216,3 +216,26 @@ func (p *HoldPDS) ListStats(ctx context.Context) ([]*atproto.StatsRecord, error) return stats, nil } + +// ListStatsRecordsForUser returns all stats records where the user is the repository owner +// Used for GDPR data export to return all stats for repositories owned by the user +func (p *HoldPDS) ListStatsRecordsForUser(ctx context.Context, userDID string) ([]*atproto.StatsRecord, error) { + // Get all stats records and filter by ownerDID + allStats, err := p.ListStats(ctx) + if err != nil { + return nil, err + } + + var userStats []*atproto.StatsRecord + for _, stat := range allStats { + if stat.OwnerDID == userDID { + userStats = append(userStats, stat) + } + } + + if userStats == nil { + userStats = []*atproto.StatsRecord{} + } + + return userStats, nil +} diff --git a/pkg/hold/pds/status_test.go b/pkg/hold/pds/status_test.go index c58a07a..15851a7 100644 --- a/pkg/hold/pds/status_test.go +++ b/pkg/hold/pds/status_test.go @@ -277,7 +277,7 @@ func TestMain(m *testing.M) { // Bootstrap once ownerDID := "did:plc:testowner123" - err = sharedPDS.Bootstrap(sharedCtx, nil, ownerDID, true, false, "") + err = sharedPDS.Bootstrap(sharedCtx, nil, ownerDID, true, false, "", "") if err != nil { panic(fmt.Sprintf("Failed to bootstrap shared PDS: %v", err)) } diff --git a/pkg/hold/pds/xrpc.go b/pkg/hold/pds/xrpc.go index 23fb6f7..60ec551 100644 --- a/pkg/hold/pds/xrpc.go +++ b/pkg/hold/pds/xrpc.go @@ -195,6 +195,8 @@ func (h *XRPCHandler) RegisterHandlers(r chi.Router) { r.Group(func(r chi.Router) { r.Use(h.requireAuth) r.Post(atproto.HoldRequestCrew, h.HandleRequestCrew) + // GDPR data export endpoint (TODO: implement) + r.Get("/xrpc/io.atcr.hold.exportUserData", h.HandleExportUserData) }) // Public quota endpoint (no auth - quota is per-user, just needs userDid param) @@ -1492,3 +1494,139 @@ func (h *XRPCHandler) HandleGetQuota(w http.ResponseWriter, r *http.Request) { render.JSON(w, r, stats) } + +// HoldUserDataExport represents the GDPR data export from a hold service +type HoldUserDataExport struct { + ExportedAt time.Time `json:"exported_at"` + HoldDID string `json:"hold_did"` + UserDID string `json:"user_did"` + IsCaptain bool `json:"is_captain"` + CrewRecord *CrewExport `json:"crew_record,omitempty"` + LayerRecords []LayerExport `json:"layer_records"` + StatsRecords []StatsExport `json:"stats_records"` +} + +// CrewExport represents a sanitized crew record for export +type CrewExport struct { + Role string `json:"role"` + Permissions []string `json:"permissions"` + Tier string `json:"tier,omitempty"` + AddedAt string `json:"added_at"` +} + +// LayerExport represents a layer record for export +type LayerExport struct { + Digest string `json:"digest"` + Size int64 `json:"size"` + MediaType string `json:"media_type"` + Manifest string `json:"manifest"` + CreatedAt string `json:"created_at"` +} + +// StatsExport represents a stats record for export +type StatsExport struct { + Repository string `json:"repository"` + PullCount int64 `json:"pull_count"` + PushCount int64 `json:"push_count"` + LastPull string `json:"last_pull,omitempty"` + LastPush string `json:"last_push,omitempty"` + UpdatedAt string `json:"updated_at"` +} + +// HandleExportUserData handles GDPR data export requests for a specific user. +// This endpoint returns all records stored on this hold's PDS that reference +// the authenticated user's DID. +// +// Returns: +// - io.atcr.hold.layer records where userDid matches +// - io.atcr.hold.crew record for the DID (if exists) +// - io.atcr.hold.stats records where ownerDid matches +// - Whether the user is the hold captain +// +// Authentication: Requires valid service token from user's PDS +func (h *XRPCHandler) HandleExportUserData(w http.ResponseWriter, r *http.Request) { + // Get authenticated user from context + user := getUserFromContext(r) + if user == nil { + http.Error(w, "authentication required", http.StatusUnauthorized) + return + } + + slog.Info("GDPR data export requested", + "requester_did", user.DID, + "hold_did", h.pds.DID()) + + export := HoldUserDataExport{ + ExportedAt: time.Now().UTC(), + HoldDID: h.pds.DID(), + UserDID: user.DID, + LayerRecords: []LayerExport{}, + StatsRecords: []StatsExport{}, + } + + // Check if user is captain + _, captain, err := h.pds.GetCaptainRecord(r.Context()) + if err == nil && captain != nil && captain.Owner == user.DID { + export.IsCaptain = true + } + + // Get crew record for user + _, crewRecord, err := h.pds.GetCrewMemberByDID(r.Context(), user.DID) + if err == nil && crewRecord != nil { + export.CrewRecord = &CrewExport{ + Role: crewRecord.Role, + Permissions: crewRecord.Permissions, + Tier: crewRecord.Tier, + AddedAt: crewRecord.AddedAt, + } + } + + // Get layer records for user + layerRecords, err := h.pds.ListLayerRecordsForUser(r.Context(), user.DID) + if err != nil { + slog.Warn("Failed to get layer records for export", + "user_did", user.DID, + "error", err) + // Continue with empty list - don't fail entire export + } else { + for _, layer := range layerRecords { + export.LayerRecords = append(export.LayerRecords, LayerExport{ + Digest: layer.Digest, + Size: layer.Size, + MediaType: layer.MediaType, + Manifest: layer.Manifest, + CreatedAt: layer.CreatedAt, + }) + } + } + + // Get stats records for user + statsRecords, err := h.pds.ListStatsRecordsForUser(r.Context(), user.DID) + if err != nil { + slog.Warn("Failed to get stats records for export", + "user_did", user.DID, + "error", err) + // Continue with empty list - don't fail entire export + } else { + for _, stat := range statsRecords { + export.StatsRecords = append(export.StatsRecords, StatsExport{ + Repository: stat.Repository, + PullCount: stat.PullCount, + PushCount: stat.PushCount, + LastPull: stat.LastPull, + LastPush: stat.LastPush, + UpdatedAt: stat.UpdatedAt, + }) + } + } + + slog.Info("GDPR data export completed", + "user_did", user.DID, + "hold_did", h.pds.DID(), + "is_captain", export.IsCaptain, + "has_crew_record", export.CrewRecord != nil, + "layer_count", len(export.LayerRecords), + "stats_count", len(export.StatsRecords)) + + render.JSON(w, r, export) +} diff --git a/pkg/hold/pds/xrpc_test.go b/pkg/hold/pds/xrpc_test.go index 64152df..3a56acb 100644 --- a/pkg/hold/pds/xrpc_test.go +++ b/pkg/hold/pds/xrpc_test.go @@ -58,7 +58,7 @@ func setupTestXRPCHandler(t *testing.T) (*XRPCHandler, context.Context) { r, w, _ := os.Pipe() os.Stdout = w - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") // Restore stdout w.Close() @@ -116,7 +116,7 @@ func setupTestXRPCHandlerWithIndex(t *testing.T) (*XRPCHandler, context.Context) r, w, _ := os.Pipe() os.Stdout = w - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") // Restore stdout w.Close() @@ -1986,7 +1986,7 @@ func setupTestXRPCHandlerWithBlobs(t *testing.T) (*XRPCHandler, *mockS3Service, r, w, _ := os.Pipe() os.Stdout = w - err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "") + err = pds.Bootstrap(ctx, nil, ownerDID, true, false, "", "") // Restore stdout w.Close() @@ -2429,7 +2429,7 @@ func TestRequireOwnerOrCrewAdmin_Authorized(t *testing.T) { // Clean up - recreate captain record if it was deleted if w.Code == http.StatusOK { - handler.pds.Bootstrap(ctx, nil, "did:plc:testowner123", true, false, "") + handler.pds.Bootstrap(ctx, nil, "did:plc:testowner123", true, false, "", "") } } diff --git a/pkg/logging/logger.go b/pkg/logging/logger.go index 38571c9..4a2ba64 100644 --- a/pkg/logging/logger.go +++ b/pkg/logging/logger.go @@ -7,6 +7,7 @@ package logging import ( + "fmt" "io" "log/slog" "os" @@ -56,7 +57,16 @@ func InitLogger(level string) { levelVar.Set(logLevel) opts := &slog.HandlerOptions{ - Level: levelVar, + Level: levelVar, + AddSource: true, + ReplaceAttr: func(groups []string, a slog.Attr) slog.Attr { + if a.Key == slog.SourceKey { + if src, ok := a.Value.Any().(*slog.Source); ok { + a.Value = slog.StringValue(shortenSource(src.File, src.Line)) + } + } + return a + }, } handler := slog.NewTextHandler(os.Stdout, opts) @@ -127,6 +137,48 @@ func autoRevert() { "trigger", "auto-revert") } +// shortenSource shortens file paths for cleaner log output. +// - Our code (atcr.io/): shows pkg/appview/jetstream/processor.go:73 +// - Library code (/pkg/mod/): shows indigo/atproto/identity/handle.go:225 +// - Other: shows last 3 path components +func shortenSource(file string, line int) string { + // Our code: strip everything up to and including atcr.io/ + if idx := strings.Index(file, "atcr.io/"); idx != -1 { + return fmt.Sprintf("%s:%d", file[idx+8:], line) // 8 = len("atcr.io/") + } + + // Library code in go mod cache: extract module name + relative path + // Example: /go/pkg/mod/github.com/bluesky-social/indigo@v0.0.0-.../atproto/identity/handle.go + // becomes: indigo/atproto/identity/handle.go:225 + if idx := strings.Index(file, "/pkg/mod/"); idx != -1 { + modPath := file[idx+9:] // 9 = len("/pkg/mod/") + if atIdx := strings.Index(modPath, "@"); atIdx != -1 { + // Get module path before @ + modFullPath := modPath[:atIdx] + parts := strings.Split(modFullPath, "/") + + // Get module name - skip version suffix like "v3" if present + modName := parts[len(parts)-1] + if len(parts) >= 2 && len(modName) >= 2 && modName[0] == 'v' && modName[1] >= '0' && modName[1] <= '9' { + modName = parts[len(parts)-2] + } + + // Get path after version + afterAt := modPath[atIdx+1:] + if slashIdx := strings.Index(afterAt, "/"); slashIdx != -1 { + return fmt.Sprintf("%s%s:%d", modName, afterAt[slashIdx:], line) + } + } + } + + // Fallback: show last 3 path components + parts := strings.Split(file, "/") + if len(parts) > 3 { + parts = parts[len(parts)-3:] + } + return fmt.Sprintf("%s:%d", strings.Join(parts, "/"), line) +} + func levelToString(l slog.Level) string { switch l { case slog.LevelDebug: diff --git a/pkg/logging/logger_test.go b/pkg/logging/logger_test.go index ea9c69e..0d28460 100644 --- a/pkg/logging/logger_test.go +++ b/pkg/logging/logger_test.go @@ -395,3 +395,58 @@ func ExampleSetupTestLogger() { // cleanup() will restore the original logger when defer runs } + +func TestShortenSource(t *testing.T) { + tests := []struct { + name string + file string + line int + expected string + }{ + { + name: "our code", + file: "/app/atcr.io/pkg/appview/jetstream/processor.go", + line: 73, + expected: "pkg/appview/jetstream/processor.go:73", + }, + { + name: "indigo library", + file: "/go/pkg/mod/github.com/bluesky-social/indigo@v0.0.0-20251218205144-034a2c019e64/atproto/identity/handle.go", + line: 225, + expected: "indigo/atproto/identity/handle.go:225", + }, + { + name: "distribution with v3 suffix", + file: "/go/pkg/mod/github.com/distribution/distribution/v3@v3.0.0-rc.3/registry/storage/driver.go", + line: 123, + expected: "distribution/registry/storage/driver.go:123", + }, + { + name: "chi router", + file: "/go/pkg/mod/github.com/go-chi/chi/v5@v5.0.10/mux.go", + line: 42, + expected: "chi/mux.go:42", + }, + { + name: "simple module without version suffix", + file: "/go/pkg/mod/github.com/ipfs/go-cid@v0.4.1/cid.go", + line: 99, + expected: "go-cid/cid.go:99", + }, + { + name: "fallback - unknown path", + file: "/some/random/path/to/file.go", + line: 10, + expected: "path/to/file.go:10", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := shortenSource(tt.file, tt.line) + if result != tt.expected { + t.Errorf("shortenSource(%q, %d) = %q, want %q", tt.file, tt.line, result, tt.expected) + } + }) + } +}