mirror of
https://tangled.org/evan.jarrett.net/at-container-registry
synced 2026-04-20 16:40:29 +00:00
362 lines
13 KiB
Go
362 lines
13 KiB
Go
package auth
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"log/slog"
|
|
"net/http"
|
|
"net/url"
|
|
"time"
|
|
|
|
"atcr.io/pkg/atproto"
|
|
"atcr.io/pkg/auth/oauth"
|
|
"github.com/bluesky-social/indigo/atproto/atclient"
|
|
indigo_oauth "github.com/bluesky-social/indigo/atproto/auth/oauth"
|
|
)
|
|
|
|
// getErrorHint provides context-specific troubleshooting hints based on API error type
|
|
func getErrorHint(apiErr *atclient.APIError) string {
|
|
switch apiErr.Name {
|
|
case "use_dpop_nonce":
|
|
return "DPoP nonce mismatch - indigo library should automatically retry with new nonce. If this persists, check for concurrent request issues or PDS session corruption."
|
|
case "invalid_client":
|
|
if apiErr.Message != "" && apiErr.Message == "Validation of \"client_assertion\" failed: \"iat\" claim timestamp check failed (it should be in the past)" {
|
|
return "JWT timestamp validation failed - system clock on AppView may be ahead of PDS clock. Check NTP sync with: timedatectl status"
|
|
}
|
|
return "OAuth client authentication failed - check client key configuration and PDS OAuth server status"
|
|
case "invalid_token", "invalid_grant":
|
|
return "OAuth tokens expired or invalidated - user will need to re-authenticate via OAuth flow"
|
|
case "server_error":
|
|
if apiErr.StatusCode == 500 {
|
|
return "PDS returned internal server error - this may occur after repeated DPoP nonce failures or other PDS-side issues. Check PDS logs for root cause."
|
|
}
|
|
return "PDS server error - check PDS health and logs"
|
|
case "invalid_dpop_proof":
|
|
return "DPoP proof validation failed - check system clock sync and DPoP key configuration"
|
|
default:
|
|
if apiErr.StatusCode == 401 || apiErr.StatusCode == 403 {
|
|
return "Authentication/authorization failed - OAuth session may be expired or revoked"
|
|
}
|
|
return "PDS rejected the request - see errorName and errorMessage for details"
|
|
}
|
|
}
|
|
|
|
// GetOrFetchServiceToken gets a service token for hold authentication.
|
|
// Checks cache first, then fetches from PDS with OAuth/DPoP if needed.
|
|
// This is the canonical implementation used by both middleware and crew registration.
|
|
//
|
|
// IMPORTANT: Uses DoWithSession() to hold a per-DID lock through the entire PDS interaction.
|
|
// This prevents DPoP nonce race conditions when multiple Docker layers upload concurrently.
|
|
func GetOrFetchServiceToken(
|
|
ctx context.Context,
|
|
refresher *oauth.Refresher,
|
|
did, holdDID, pdsEndpoint string,
|
|
) (string, error) {
|
|
if refresher == nil {
|
|
return "", fmt.Errorf("refresher is nil (OAuth session required for service tokens)")
|
|
}
|
|
|
|
// Check cache first to avoid unnecessary PDS calls on every request
|
|
cachedToken, expiresAt := GetServiceToken(did, holdDID)
|
|
|
|
// Use cached token if it exists and has > 10s remaining
|
|
if cachedToken != "" && time.Until(expiresAt) > 10*time.Second {
|
|
slog.Debug("Using cached service token",
|
|
"did", did,
|
|
"expiresIn", time.Until(expiresAt).Round(time.Second))
|
|
return cachedToken, nil
|
|
}
|
|
|
|
// Cache miss or expiring soon - validate OAuth and get new service token
|
|
if cachedToken == "" {
|
|
slog.Debug("Service token cache miss, fetching new token", "did", did)
|
|
} else {
|
|
slog.Debug("Service token expiring soon, proactively renewing", "did", did)
|
|
}
|
|
|
|
// Use DoWithSession to hold the lock through the entire PDS interaction.
|
|
// This prevents DPoP nonce races when multiple goroutines try to fetch service tokens.
|
|
var serviceToken string
|
|
var fetchErr error
|
|
|
|
err := refresher.DoWithSession(ctx, did, func(session *indigo_oauth.ClientSession) error {
|
|
// Double-check cache after acquiring lock - another goroutine may have
|
|
// populated it while we were waiting (classic double-checked locking pattern)
|
|
cachedToken, expiresAt := GetServiceToken(did, holdDID)
|
|
if cachedToken != "" && time.Until(expiresAt) > 10*time.Second {
|
|
slog.Debug("Service token cache hit after lock acquisition",
|
|
"did", did,
|
|
"expiresIn", time.Until(expiresAt).Round(time.Second))
|
|
serviceToken = cachedToken
|
|
return nil
|
|
}
|
|
|
|
// Cache still empty/expired - proceed with PDS call
|
|
// Request 5-minute expiry (PDS may grant less)
|
|
// exp must be absolute Unix timestamp, not relative duration
|
|
// Note: OAuth scope includes #atcr_hold fragment, but service auth aud must be bare DID
|
|
expiryTime := time.Now().Unix() + 300 // 5 minutes from now
|
|
serviceAuthURL := fmt.Sprintf("%s%s?aud=%s&lxm=%s&exp=%d",
|
|
pdsEndpoint,
|
|
atproto.ServerGetServiceAuth,
|
|
url.QueryEscape(holdDID),
|
|
url.QueryEscape("com.atproto.repo.getRecord"),
|
|
expiryTime,
|
|
)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", serviceAuthURL, nil)
|
|
if err != nil {
|
|
fetchErr = fmt.Errorf("failed to create service auth request: %w", err)
|
|
return fetchErr
|
|
}
|
|
|
|
// Use OAuth session to authenticate to PDS (with DPoP)
|
|
// The lock is held, so DPoP nonce negotiation is serialized per-DID
|
|
resp, err := session.DoWithAuth(session.Client, req, "com.atproto.server.getServiceAuth")
|
|
if err != nil {
|
|
// Auth error - may indicate expired tokens or corrupted session
|
|
InvalidateServiceToken(did, holdDID)
|
|
|
|
// Inspect the error to extract detailed information from indigo's APIError
|
|
var apiErr *atclient.APIError
|
|
if errors.As(err, &apiErr) {
|
|
// Log detailed API error information
|
|
slog.Error("OAuth authentication failed during service token request",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"url", serviceAuthURL,
|
|
"error", err,
|
|
"httpStatus", apiErr.StatusCode,
|
|
"errorName", apiErr.Name,
|
|
"errorMessage", apiErr.Message,
|
|
"hint", getErrorHint(apiErr))
|
|
} else {
|
|
// Fallback for non-API errors (network errors, etc.)
|
|
slog.Error("OAuth authentication failed during service token request",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"url", serviceAuthURL,
|
|
"error", err,
|
|
"errorType", fmt.Sprintf("%T", err),
|
|
"hint", "Network error or unexpected failure during OAuth request")
|
|
}
|
|
|
|
fetchErr = fmt.Errorf("OAuth validation failed: %w", err)
|
|
return fetchErr
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
// Service auth failed
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
InvalidateServiceToken(did, holdDID)
|
|
slog.Error("Service token request returned non-200 status",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"statusCode", resp.StatusCode,
|
|
"responseBody", string(bodyBytes),
|
|
"hint", "PDS rejected the service token request - check PDS logs for details")
|
|
fetchErr = fmt.Errorf("service auth failed with status %d: %s", resp.StatusCode, string(bodyBytes))
|
|
return fetchErr
|
|
}
|
|
|
|
// Parse response to get service token
|
|
var result struct {
|
|
Token string `json:"token"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
fetchErr = fmt.Errorf("failed to decode service auth response: %w", err)
|
|
return fetchErr
|
|
}
|
|
|
|
if result.Token == "" {
|
|
fetchErr = fmt.Errorf("empty token in service auth response")
|
|
return fetchErr
|
|
}
|
|
|
|
serviceToken = result.Token
|
|
return nil
|
|
})
|
|
|
|
if err != nil {
|
|
// DoWithSession failed (session load or callback error)
|
|
InvalidateServiceToken(did, holdDID)
|
|
|
|
// Try to extract detailed error information
|
|
var apiErr *atclient.APIError
|
|
if errors.As(err, &apiErr) {
|
|
slog.Error("Failed to get OAuth session for service token",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"error", err,
|
|
"httpStatus", apiErr.StatusCode,
|
|
"errorName", apiErr.Name,
|
|
"errorMessage", apiErr.Message,
|
|
"hint", getErrorHint(apiErr))
|
|
} else if fetchErr == nil {
|
|
// Session load failed (not a fetch error)
|
|
slog.Error("Failed to get OAuth session for service token",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"error", err,
|
|
"errorType", fmt.Sprintf("%T", err),
|
|
"hint", "OAuth session not found in database or token refresh failed")
|
|
}
|
|
|
|
// Delete the stale OAuth session to force re-authentication
|
|
// This also invalidates the UI session automatically
|
|
if delErr := refresher.DeleteSession(ctx, did); delErr != nil {
|
|
slog.Warn("Failed to delete stale OAuth session",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"error", delErr)
|
|
}
|
|
|
|
if fetchErr != nil {
|
|
return "", fetchErr
|
|
}
|
|
return "", fmt.Errorf("failed to get OAuth session: %w", err)
|
|
}
|
|
|
|
// Cache the token (parses JWT to extract actual expiry)
|
|
if err := SetServiceToken(did, holdDID, serviceToken); err != nil {
|
|
slog.Warn("Failed to cache service token", "error", err, "did", did, "holdDID", holdDID)
|
|
// Non-fatal - we have the token, just won't be cached
|
|
}
|
|
|
|
slog.Debug("OAuth validation succeeded, service token obtained", "did", did)
|
|
return serviceToken, nil
|
|
}
|
|
|
|
// GetOrFetchServiceTokenWithAppPassword gets a service token using app-password Bearer authentication.
|
|
// Used when auth method is app_password instead of OAuth.
|
|
func GetOrFetchServiceTokenWithAppPassword(
|
|
ctx context.Context,
|
|
did, holdDID, pdsEndpoint string,
|
|
) (string, error) {
|
|
// Check cache first to avoid unnecessary PDS calls on every request
|
|
cachedToken, expiresAt := GetServiceToken(did, holdDID)
|
|
|
|
// Use cached token if it exists and has > 10s remaining
|
|
if cachedToken != "" && time.Until(expiresAt) > 10*time.Second {
|
|
slog.Debug("Using cached service token (app-password)",
|
|
"did", did,
|
|
"expiresIn", time.Until(expiresAt).Round(time.Second))
|
|
return cachedToken, nil
|
|
}
|
|
|
|
// Cache miss or expiring soon - get app-password token and fetch new service token
|
|
if cachedToken == "" {
|
|
slog.Debug("Service token cache miss, fetching new token with app-password", "did", did)
|
|
} else {
|
|
slog.Debug("Service token expiring soon, proactively renewing with app-password", "did", did)
|
|
}
|
|
|
|
// Get app-password access token from cache
|
|
accessToken, ok := GetGlobalTokenCache().Get(did)
|
|
if !ok {
|
|
InvalidateServiceToken(did, holdDID)
|
|
slog.Error("No app-password access token found in cache",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"hint", "User must re-authenticate with docker login")
|
|
return "", fmt.Errorf("no app-password access token available for DID %s", did)
|
|
}
|
|
|
|
// Call com.atproto.server.getServiceAuth on the user's PDS with Bearer token
|
|
// Request 5-minute expiry (PDS may grant less)
|
|
// exp must be absolute Unix timestamp, not relative duration
|
|
expiryTime := time.Now().Unix() + 300 // 5 minutes from now
|
|
serviceAuthURL := fmt.Sprintf("%s%s?aud=%s&lxm=%s&exp=%d",
|
|
pdsEndpoint,
|
|
atproto.ServerGetServiceAuth,
|
|
url.QueryEscape(holdDID),
|
|
url.QueryEscape("com.atproto.repo.getRecord"),
|
|
expiryTime,
|
|
)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", serviceAuthURL, nil)
|
|
if err != nil {
|
|
return "", fmt.Errorf("failed to create service auth request: %w", err)
|
|
}
|
|
|
|
// Set Bearer token authentication (app-password)
|
|
req.Header.Set("Authorization", "Bearer "+accessToken)
|
|
|
|
// Make request with standard HTTP client
|
|
resp, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
InvalidateServiceToken(did, holdDID)
|
|
slog.Error("App-password service token request failed",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"error", err)
|
|
return "", fmt.Errorf("failed to request service token: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode == http.StatusUnauthorized {
|
|
// App-password token is invalid or expired - clear from cache
|
|
GetGlobalTokenCache().Delete(did)
|
|
InvalidateServiceToken(did, holdDID)
|
|
slog.Error("App-password token rejected by PDS",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"hint", "User must re-authenticate with docker login")
|
|
return "", fmt.Errorf("app-password authentication failed: token expired or invalid")
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
// Service auth failed
|
|
bodyBytes, _ := io.ReadAll(resp.Body)
|
|
InvalidateServiceToken(did, holdDID)
|
|
slog.Error("Service token request returned non-200 status (app-password)",
|
|
"component", "token/servicetoken",
|
|
"did", did,
|
|
"holdDID", holdDID,
|
|
"pdsEndpoint", pdsEndpoint,
|
|
"statusCode", resp.StatusCode,
|
|
"responseBody", string(bodyBytes))
|
|
return "", fmt.Errorf("service auth failed with status %d: %s", resp.StatusCode, string(bodyBytes))
|
|
}
|
|
|
|
// Parse response to get service token
|
|
var result struct {
|
|
Token string `json:"token"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
return "", fmt.Errorf("failed to decode service auth response: %w", err)
|
|
}
|
|
|
|
if result.Token == "" {
|
|
return "", fmt.Errorf("empty token in service auth response")
|
|
}
|
|
|
|
serviceToken := result.Token
|
|
|
|
// Cache the token (parses JWT to extract actual expiry)
|
|
if err := SetServiceToken(did, holdDID, serviceToken); err != nil {
|
|
slog.Warn("Failed to cache service token", "error", err, "did", did, "holdDID", holdDID)
|
|
// Non-fatal - we have the token, just won't be cached
|
|
}
|
|
|
|
slog.Debug("App-password validation succeeded, service token obtained", "did", did)
|
|
return serviceToken, nil
|
|
}
|