From 6cfb14df2b7c4c7fe574c39ea7b220cc94d1f752 Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 09:23:47 -0500 Subject: [PATCH 01/10] Revert "fix presigned urls" This reverts commit f3748abf3177592979ae0ff76b7de1f3630b7ab2. --- cmd/hold/main.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index 9099fbe..6718b1a 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -114,7 +114,10 @@ func (s *HoldService) initS3Client() error { } // Extract S3 configuration from storage parameters - params := s.config.Storage.Parameters() + params, ok := s.config.Storage.Parameters()["s3"].(configuration.Parameters) + if !ok { + return fmt.Errorf("failed to get S3 parameters from storage config") + } // Extract required S3 configuration region, _ := params["region"].(string) From 31276d800771329b4092b1b6cd1f31e31f36c1b1 Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 12:11:06 -0500 Subject: [PATCH 02/10] checkpoint currently works but not uploading through presigned --- cmd/hold/main.go | 65 ++++++++++++++++++++++++++------- pkg/storage/proxy_blob_store.go | 65 +++++++++++++++++++++++++++++++-- 2 files changed, 113 insertions(+), 17 deletions(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index 6718b1a..a5212af 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -114,10 +114,7 @@ func (s *HoldService) initS3Client() error { } // Extract S3 configuration from storage parameters - params, ok := s.config.Storage.Parameters()["s3"].(configuration.Parameters) - if !ok { - return fmt.Errorf("failed to get S3 parameters from storage config") - } + params := s.config.Storage.Parameters() // Extract required S3 configuration region, _ := params["region"].(string) @@ -164,10 +161,7 @@ func (s *HoldService) initS3Client() error { s.s3PathPrefix = strings.TrimPrefix(rootDir, "/") } - log.Printf("S3 presigned URLs enabled for bucket: %s", s.bucket) - if s.s3PathPrefix != "" { - log.Printf("S3 path prefix: %s", s.s3PathPrefix) - } + log.Printf("✅ S3 presigned URLs enabled") return nil } @@ -210,8 +204,15 @@ func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Reque return } + log.Printf("📨 [HandleGetPresignedURL] Request received:") + log.Printf(" DID: %s", req.DID) + log.Printf(" Digest: %s", req.Digest) + log.Printf(" Remote: %s", r.RemoteAddr) + log.Printf(" s3Client nil? %v", s.s3Client == nil) + // Validate DID authorization for READ if !s.isAuthorizedRead(req.DID) { + log.Printf("❌ [HandleGetPresignedURL] Authorization FAILED") if req.DID == "" { // Anonymous request to private hold http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) @@ -230,10 +231,13 @@ func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Reque // In production, this would use driver-specific presigned URLs url, err := s.getDownloadURL(ctx, req.Digest, req.DID) if err != nil { + log.Printf("❌ [HandleGetPresignedURL] getDownloadURL failed: %v", err) http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) return } + log.Printf("✅ [HandleGetPresignedURL] Returning URL to client") + resp := GetPresignedURLResponse{ URL: url, ExpiresAt: expiry, @@ -301,14 +305,21 @@ func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { return } + log.Printf("📥 [HandleProxyGet] Blob download request:") + log.Printf(" Method: %s", r.Method) + log.Printf(" Digest: %s", digest) + log.Printf(" Remote: %s", r.RemoteAddr) + // Get DID from query param or header did := r.URL.Query().Get("did") if did == "" { did = r.Header.Get("X-ATCR-DID") } + log.Printf(" DID: %s", did) // Authorize READ access if !s.isAuthorizedRead(did) { + log.Printf("❌ [HandleProxyGet] Authorization FAILED") if did == "" { http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) } else { @@ -316,6 +327,7 @@ func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { } return } + log.Printf("✅ [HandleProxyGet] Authorization SUCCESS") ctx := r.Context() path := blobPath(digest) @@ -404,8 +416,14 @@ func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { did = r.Header.Get("X-ATCR-DID") } + log.Printf("🔐 [HandleProxyPut] Authorization check:") + log.Printf(" Path: %s", digest) + log.Printf(" DID: %s", did) + log.Printf(" Owner DID: %s", s.config.Registration.OwnerDID) + // Authorize WRITE access if !s.isAuthorizedWrite(did) { + log.Printf("❌ [HandleProxyPut] Authorization FAILED") if did == "" { http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) } else { @@ -414,6 +432,8 @@ func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { return } + log.Printf("✅ [HandleProxyPut] Authorization SUCCESS") + // Stream blob to storage (no buffering) ctx := r.Context() path := blobPath(digest) @@ -584,18 +604,31 @@ func (s *HoldService) getDownloadURL(ctx context.Context, digest string, did str } // Generate presigned GET URL + // Note: Don't use ResponseContentType - not supported by all S3-compatible services req, _ := s.s3Client.GetObjectRequest(&s3.GetObjectInput{ Bucket: aws.String(s.bucket), Key: aws.String(s3Key), }) + log.Printf("🔍 [getDownloadURL] Before Presign:") + log.Printf(" Digest: %s", digest) + log.Printf(" S3 Key: %s", s3Key) + log.Printf(" Bucket: %s", s.bucket) + log.Printf(" Request Operation: %s", req.Operation.Name) + log.Printf(" Request HTTPMethod: %s", req.Operation.HTTPMethod) + url, err := req.Presign(15 * time.Minute) if err != nil { - log.Printf("WARN: Presigned URL generation failed for %s, falling back to proxy: %v", digest, err) + log.Printf("❌ [getDownloadURL] Presign FAILED: %v", err) + log.Printf(" Falling back to proxy URL") return s.getProxyDownloadURL(digest, did), nil } - log.Printf("Generated presigned download URL for %s (expires in 15min)", digest) + log.Printf("✅ [getDownloadURL] Presigned URL generated successfully") + log.Printf(" URL: %s", url) + log.Printf(" URL Length: %d chars", len(url)) + log.Printf(" Expires: 15min") + return url, nil } @@ -620,10 +653,11 @@ func (s *HoldService) getUploadURL(ctx context.Context, digest string, size int6 s3Key = s.s3PathPrefix + "/" + s3Key } - // Generate presigned PUT URL + // Generate presigned PUT URL with Content-Type in signature req, _ := s.s3Client.PutObjectRequest(&s3.PutObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + ContentType: aws.String("application/octet-stream"), }) url, err := req.Presign(15 * time.Minute) @@ -632,7 +666,10 @@ func (s *HoldService) getUploadURL(ctx context.Context, digest string, size int6 return s.getProxyUploadURL(digest, did), nil } - log.Printf("Generated presigned upload URL for %s (expires in 15min)", digest) + log.Printf("🔑 Generated presigned upload URL for %s (expires in 15min)", digest) + log.Printf(" S3 Key: %s", s3Key) + log.Printf(" Bucket: %s", s.bucket) + log.Printf(" Size: %d bytes", size) return url, nil } diff --git a/pkg/storage/proxy_blob_store.go b/pkg/storage/proxy_blob_store.go index 5d8f08c..6e8b6e6 100644 --- a/pkg/storage/proxy_blob_store.go +++ b/pkg/storage/proxy_blob_store.go @@ -142,26 +142,43 @@ func (p *ProxyBlobStore) Put(ctx context.Context, mediaType string, content []by // Get upload URL url, err := p.getUploadURL(ctx, dgst, int64(len(content))) if err != nil { + fmt.Printf("❌ [proxy_blob_store/Put] Failed to get upload URL: digest=%s, error=%v\n", dgst, err) return distribution.Descriptor{}, err } + fmt.Printf("📤 [proxy_blob_store/Put] Starting PUT request:\n") + fmt.Printf(" Digest: %s\n", dgst) + fmt.Printf(" Size: %d bytes\n", len(content)) + fmt.Printf(" MediaType: %s\n", mediaType) + fmt.Printf(" URL: %s\n", url) + fmt.Printf(" Headers: Content-Type=application/octet-stream\n") + // Upload the blob req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(content)) if err != nil { + fmt.Printf("❌ [proxy_blob_store/Put] Failed to create request: %v\n", err) return distribution.Descriptor{}, err } req.Header.Set("Content-Type", "application/octet-stream") resp, err := p.httpClient.Do(req) if err != nil { + fmt.Printf("❌ [proxy_blob_store/Put] HTTP request failed: %v\n", err) return distribution.Descriptor{}, err } defer resp.Body.Close() + fmt.Printf("📥 [proxy_blob_store/Put] Response:\n") + fmt.Printf(" Status: %d %s\n", resp.StatusCode, resp.Status) + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { - return distribution.Descriptor{}, fmt.Errorf("upload failed with status %d", resp.StatusCode) + bodyBytes, _ := io.ReadAll(resp.Body) + fmt.Printf(" Error Body: %s\n", string(bodyBytes)) + return distribution.Descriptor{}, fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(bodyBytes)) } + fmt.Printf("✅ [proxy_blob_store/Put] Upload successful: digest=%s, size=%d\n", dgst, len(content)) + return distribution.Descriptor{ Digest: dgst, Size: int64(len(content)), @@ -177,7 +194,36 @@ func (p *ProxyBlobStore) Delete(ctx context.Context, dgst digest.Digest) error { // ServeBlob serves a blob via HTTP redirect func (p *ProxyBlobStore) ServeBlob(ctx context.Context, w http.ResponseWriter, r *http.Request, dgst digest.Digest) error { - // Get presigned download URL + // For HEAD requests, handle directly without redirect + // S3 presigned URLs are method-specific, and Docker sends HEAD to verify blobs exist + if r.Method == http.MethodHead { + // Check if blob exists via hold service HEAD request + url := fmt.Sprintf("%s/blobs/%s?did=%s", p.storageEndpoint, dgst.String(), p.did) + req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) + if err != nil { + return distribution.ErrBlobUnknown + } + + resp, err := p.httpClient.Do(req) + if err != nil { + return distribution.ErrBlobUnknown + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return distribution.ErrBlobUnknown + } + + // Copy headers from hold service response + if contentLength := resp.Header.Get("Content-Length"); contentLength != "" { + w.Header().Set("Content-Length", contentLength) + } + w.Header().Set("Content-Type", "application/octet-stream") + w.WriteHeader(http.StatusOK) + return nil + } + + // For GET requests, redirect to presigned URL url, err := p.getDownloadURL(ctx, dgst) if err != nil { return err @@ -190,6 +236,10 @@ func (p *ProxyBlobStore) ServeBlob(ctx context.Context, w http.ResponseWriter, r // Create returns a blob writer for uploading func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) { + fmt.Printf("🔧 [proxy_blob_store/Create] Starting streaming upload (NOT presigned URL)\n") + fmt.Printf(" Storage endpoint: %s\n", p.storageEndpoint) + fmt.Printf(" Repository: %s\n", p.repository) + // Parse options var opts distribution.CreateOptions for _, option := range options { @@ -198,6 +248,12 @@ func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.Blo } } + fmt.Printf(" Mount: %v\n", opts.Mount.ShouldMount) + if opts.Mount.ShouldMount { + fmt.Printf(" Mount from: %s\n", opts.Mount.From.Name()) + fmt.Printf(" Mount digest: %s\n", opts.Mount.Stat.Digest) + } + // Create pipe for streaming upload pipeReader, pipeWriter := io.Pipe() uploadErr := make(chan error, 1) @@ -228,7 +284,10 @@ func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.Blo tempPath := fmt.Sprintf("uploads/temp-%s", writer.id) // No leading slash url := fmt.Sprintf("%s/blobs/%s?did=%s", p.storageEndpoint, tempPath, p.did) - fmt.Printf("DEBUG [goroutine]: Starting upload to temp: url=%s\n", url) + fmt.Printf("📦 [goroutine]: Starting streaming upload to temp location\n") + fmt.Printf(" Temp path: %s\n", tempPath) + fmt.Printf(" URL: %s\n", url) + fmt.Printf(" This is a PROXY upload (not presigned URL)\n") // Use context with timeout to prevent hanging forever uploadCtx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) From f2d921b73c6f180e348a52f3182eaff06571b0c7 Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 17:41:07 -0500 Subject: [PATCH 03/10] re-implement multipart. seems to be working --- cmd/hold/main.go | 329 ++++++++++++++++++++++++++ pkg/storage/proxy_blob_store.go | 406 ++++++++++++++++++++++---------- 2 files changed, 611 insertions(+), 124 deletions(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index a5212af..da28d4e 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -191,6 +191,53 @@ type PutPresignedURLResponse struct { ExpiresAt time.Time `json:"expires_at"` } +// StartMultipartUploadRequest initiates a multipart upload +type StartMultipartUploadRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// StartMultipartUploadResponse contains the multipart upload ID +type StartMultipartUploadResponse struct { + UploadID string `json:"upload_id"` + ExpiresAt time.Time `json:"expires_at"` +} + +// GetPartURLRequest requests a presigned URL for a specific part +type GetPartURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + PartNumber int `json:"part_number"` +} + +// GetPartURLResponse contains the presigned URL for a part +type GetPartURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + +// CompleteMultipartRequest completes a multipart upload +type CompleteMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + Parts []CompletedPart `json:"parts"` +} + +// CompletedPart represents an uploaded part with its ETag +type CompletedPart struct { + PartNumber int `json:"part_number"` + ETag string `json:"etag"` +} + +// AbortMultipartRequest aborts an in-progress upload +type AbortMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` +} + // HandleGetPresignedURL handles requests for download URLs func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { @@ -682,6 +729,282 @@ func (s *HoldService) getProxyUploadURL(digest, did string) string { return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) } +// startMultipartUpload initiates a multipart upload and returns upload ID +func (s *HoldService) startMultipartUpload(ctx context.Context, digest string) (string, error) { + if s.s3Client == nil { + return "", fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + result, err := s.s3Client.CreateMultipartUploadWithContext(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + if err != nil { + return "", err + } + + log.Printf("Started multipart upload: digest=%s, uploadID=%s", digest, *result.UploadId) + return *result.UploadId, nil +} + +// getPartPresignedURL generates presigned URL for a specific part +func (s *HoldService) getPartPresignedURL(ctx context.Context, digest, uploadID string, partNumber int) (string, error) { + if s.s3Client == nil { + return "", fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + req, _ := s.s3Client.UploadPartRequest(&s3.UploadPartInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + PartNumber: aws.Int64(int64(partNumber)), + }) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + return "", err + } + + log.Printf("Generated part presigned URL: digest=%s, uploadID=%s, part=%d", digest, uploadID, partNumber) + return url, nil +} + +// completeMultipartUpload finalizes the multipart upload +func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploadID string, parts []CompletedPart) error { + if s.s3Client == nil { + return fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Convert to S3 CompletedPart format + s3Parts := make([]*s3.CompletedPart, len(parts)) + for i, p := range parts { + s3Parts[i] = &s3.CompletedPart{ + PartNumber: aws.Int64(int64(p.PartNumber)), + ETag: aws.String(p.ETag), + } + } + + _, err := s.s3Client.CompleteMultipartUploadWithContext(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + MultipartUpload: &s3.CompletedMultipartUpload{ + Parts: s3Parts, + }, + }) + + if err != nil { + log.Printf("Failed to complete multipart upload: digest=%s, uploadID=%s, err=%v", digest, uploadID, err) + return err + } + + log.Printf("Completed multipart upload: digest=%s, uploadID=%s, parts=%d", digest, uploadID, len(parts)) + return nil +} + +// abortMultipartUpload aborts an in-progress multipart upload +func (s *HoldService) abortMultipartUpload(ctx context.Context, digest, uploadID string) error { + if s.s3Client == nil { + return fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + _, err := s.s3Client.AbortMultipartUploadWithContext(ctx, &s3.AbortMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + }) + + if err != nil { + log.Printf("Failed to abort multipart upload: digest=%s, uploadID=%s, err=%v", digest, uploadID, err) + return err + } + + log.Printf("Aborted multipart upload: digest=%s, uploadID=%s", digest, uploadID) + return nil +} + +// HandleStartMultipart initiates a multipart upload +func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req StartMultipartUploadRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Start multipart upload + ctx := r.Context() + uploadID, err := s.startMultipartUpload(ctx, req.Digest) + if err != nil { + http.Error(w, fmt.Sprintf("failed to start multipart upload: %v", err), http.StatusInternalServerError) + return + } + + expiry := time.Now().Add(24 * time.Hour) // Multipart uploads can take longer + + resp := StartMultipartUploadResponse{ + UploadID: uploadID, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandleGetPartURL generates a presigned URL for uploading a specific part +func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req GetPartURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Get presigned URL for this part + ctx := r.Context() + url, err := s.getPartPresignedURL(ctx, req.Digest, req.UploadID, req.PartNumber) + if err != nil { + http.Error(w, fmt.Sprintf("failed to generate part URL: %v", err), http.StatusInternalServerError) + return + } + + expiry := time.Now().Add(15 * time.Minute) + + resp := GetPartURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandleCompleteMultipart completes a multipart upload +func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req CompleteMultipartRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Complete multipart upload + ctx := r.Context() + if err := s.completeMultipartUpload(ctx, req.Digest, req.UploadID, req.Parts); err != nil { + http.Error(w, fmt.Sprintf("failed to complete multipart upload: %v", err), http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "completed", + }) +} + +// HandleAbortMultipart aborts an in-progress multipart upload +func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req AbortMultipartRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Abort multipart upload + ctx := r.Context() + if err := s.abortMultipartUpload(ctx, req.Digest, req.UploadID); err != nil { + http.Error(w, fmt.Sprintf("failed to abort multipart upload: %v", err), http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "aborted", + }) +} + // RegisterRequest represents a request to register this hold in a user's PDS type RegisterRequest struct { DID string `json:"did"` @@ -800,6 +1123,12 @@ func main() { mux.HandleFunc("/put-presigned-url", service.HandlePutPresignedURL) mux.HandleFunc("/move", service.HandleMove) + // Multipart upload endpoints + mux.HandleFunc("/start-multipart", service.HandleStartMultipart) + mux.HandleFunc("/part-presigned-url", service.HandleGetPartURL) + mux.HandleFunc("/complete-multipart", service.HandleCompleteMultipart) + mux.HandleFunc("/abort-multipart", service.HandleAbortMultipart) + // Pre-register OAuth callback route (will be populated by auto-registration) var oauthCallbackHandler http.HandlerFunc mux.HandleFunc("/auth/oauth/callback", func(w http.ResponseWriter, r *http.Request) { diff --git a/pkg/storage/proxy_blob_store.go b/pkg/storage/proxy_blob_store.go index 6e8b6e6..5f18845 100644 --- a/pkg/storage/proxy_blob_store.go +++ b/pkg/storage/proxy_blob_store.go @@ -234,9 +234,9 @@ func (p *ProxyBlobStore) ServeBlob(ctx context.Context, w http.ResponseWriter, r return nil } -// Create returns a blob writer for uploading +// Create returns a blob writer for uploading using multipart upload func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) { - fmt.Printf("🔧 [proxy_blob_store/Create] Starting streaming upload (NOT presigned URL)\n") + fmt.Printf("🔧 [proxy_blob_store/Create] Starting multipart upload\n") fmt.Printf(" Storage endpoint: %s\n", p.storageEndpoint) fmt.Printf(" Repository: %s\n", p.repository) @@ -248,26 +248,28 @@ func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.Blo } } - fmt.Printf(" Mount: %v\n", opts.Mount.ShouldMount) - if opts.Mount.ShouldMount { - fmt.Printf(" Mount from: %s\n", opts.Mount.From.Name()) - fmt.Printf(" Mount digest: %s\n", opts.Mount.Stat.Digest) + // Generate unique writer ID + writerID := fmt.Sprintf("upload-%d", time.Now().UnixNano()) + + // Use temp digest for upload location (will be moved to final digest on commit) + tempDigest := fmt.Sprintf("uploads/temp-%s", writerID) + + // Start multipart upload via hold service + uploadID, err := p.startMultipartUpload(ctx, tempDigest) + if err != nil { + return nil, fmt.Errorf("failed to start multipart upload: %w", err) } - // Create pipe for streaming upload - pipeReader, pipeWriter := io.Pipe() - uploadErr := make(chan error, 1) - digestChan := make(chan string, 1) + fmt.Printf(" Started multipart upload: uploadID=%s\n", uploadID) - // Create writer writer := &ProxyBlobWriter{ store: p, options: opts, - pipeWriter: pipeWriter, - pipeReader: pipeReader, - digestChan: digestChan, - uploadErr: uploadErr, - id: fmt.Sprintf("upload-%d", time.Now().UnixNano()), + uploadID: uploadID, + parts: make([]CompletedPart, 0), + partNumber: 1, + buffer: bytes.NewBuffer(make([]byte, 0, maxChunkSize)), // 5MB buffer + id: writerID, startedAt: time.Now(), } @@ -276,68 +278,6 @@ func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.Blo globalUploads[writer.id] = writer globalUploadsMu.Unlock() - // Start background goroutine that streams to temp location immediately - go func() { - defer pipeReader.Close() - - // Stream to temp location immediately to avoid pipe deadlock - tempPath := fmt.Sprintf("uploads/temp-%s", writer.id) // No leading slash - url := fmt.Sprintf("%s/blobs/%s?did=%s", p.storageEndpoint, tempPath, p.did) - - fmt.Printf("📦 [goroutine]: Starting streaming upload to temp location\n") - fmt.Printf(" Temp path: %s\n", tempPath) - fmt.Printf(" URL: %s\n", url) - fmt.Printf(" This is a PROXY upload (not presigned URL)\n") - - // Use context with timeout to prevent hanging forever - uploadCtx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) - defer cancel() - - req, err := http.NewRequestWithContext(uploadCtx, "PUT", url, pipeReader) - if err != nil { - fmt.Printf("DEBUG [goroutine]: Failed to create request: %v\n", err) - // Consume digest channel even on error - <-digestChan - uploadErr <- fmt.Errorf("failed to create request: %w", err) - return - } - req.Header.Set("Content-Type", "application/octet-stream") - - fmt.Printf("DEBUG [goroutine]: Sending PUT request...\n") - // Stream to temp location (this will block until all data is written) - resp, err := p.httpClient.Do(req) - if err != nil { - fmt.Printf("DEBUG [goroutine]: PUT failed: %v\n", err) - <-digestChan - uploadErr <- fmt.Errorf("failed to upload to temp: %w", err) - return - } - defer resp.Body.Close() - - fmt.Printf("DEBUG [goroutine]: Got response status=%d\n", resp.StatusCode) - - if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { - bodyBytes, _ := io.ReadAll(resp.Body) - fmt.Printf("DEBUG [goroutine]: Upload failed with status %d, body=%s\n", resp.StatusCode, string(bodyBytes)) - <-digestChan - uploadErr <- fmt.Errorf("upload to temp failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) - return - } - - fmt.Printf("DEBUG [goroutine]: Upload to temp succeeded, waiting for digest...\n") - // Upload to temp succeeded, now wait for digest from Commit() - digest, ok := <-digestChan - if !ok { - uploadErr <- fmt.Errorf("upload cancelled after streaming to temp") - return - } - - fmt.Printf("DEBUG [goroutine]: Got digest=%s, signaling completion\n", digest) - // Store digest for Commit() to use in move operation - writer.finalDigest = digest - uploadErr <- nil - }() - return writer, nil } @@ -352,7 +292,7 @@ func (p *ProxyBlobStore) Resume(ctx context.Context, id string) (distribution.Bl return nil, distribution.ErrBlobUploadUnknown } - // With streaming, no flush needed - just return the writer + // Just return the writer - parts are buffered and flushed on demand return writer, nil } @@ -439,19 +379,176 @@ func (p *ProxyBlobStore) getUploadURL(ctx context.Context, dgst digest.Digest, s return result.URL, nil } -// ProxyBlobWriter implements distribution.BlobWriter for proxy uploads +// startMultipartUpload initiates a multipart upload via hold service +func (p *ProxyBlobStore) startMultipartUpload(ctx context.Context, digest string) (string, error) { + reqBody := map[string]any{ + "did": p.did, + "digest": digest, + } + + body, err := json.Marshal(reqBody) + if err != nil { + return "", err + } + + url := fmt.Sprintf("%s/start-multipart", p.storageEndpoint) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("start multipart failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) + } + + var result struct { + UploadID string `json:"upload_id"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + + return result.UploadID, nil +} + +// getPartPresignedURL gets a presigned URL for uploading a specific part +func (p *ProxyBlobStore) getPartPresignedURL(ctx context.Context, digest, uploadID string, partNumber int) (string, error) { + reqBody := map[string]any{ + "did": p.did, + "digest": digest, + "upload_id": uploadID, + "part_number": partNumber, + } + + body, err := json.Marshal(reqBody) + if err != nil { + return "", err + } + + url := fmt.Sprintf("%s/part-presigned-url", p.storageEndpoint) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + return "", fmt.Errorf("get part URL failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) + } + + var result struct { + URL string `json:"url"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + + return result.URL, nil +} + +// completeMultipartUpload completes a multipart upload via hold service +func (p *ProxyBlobStore) completeMultipartUpload(ctx context.Context, digest, uploadID string, parts []CompletedPart) error { + reqBody := map[string]any{ + "did": p.did, + "digest": digest, + "upload_id": uploadID, + "parts": parts, + } + + body, err := json.Marshal(reqBody) + if err != nil { + return err + } + + url := fmt.Sprintf("%s/complete-multipart", p.storageEndpoint) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := p.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("complete multipart failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) + } + + return nil +} + +// abortMultipartUpload aborts a multipart upload via hold service +func (p *ProxyBlobStore) abortMultipartUpload(ctx context.Context, digest, uploadID string) error { + reqBody := map[string]any{ + "did": p.did, + "digest": digest, + "upload_id": uploadID, + } + + body, err := json.Marshal(reqBody) + if err != nil { + return err + } + + url := fmt.Sprintf("%s/abort-multipart", p.storageEndpoint) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := p.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("abort multipart failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) + } + + return nil +} + +// CompletedPart represents an uploaded part with its ETag +type CompletedPart struct { + PartNumber int `json:"part_number"` + ETag string `json:"etag"` +} + +// ProxyBlobWriter implements distribution.BlobWriter for proxy uploads using multipart upload type ProxyBlobWriter struct { store *ProxyBlobStore options distribution.CreateOptions - pipeWriter *io.PipeWriter // Streams directly to hold service - pipeReader *io.PipeReader - digestChan chan string // Sends digest to upload goroutine - uploadErr chan error // Receives upload result from goroutine - finalDigest string // Final digest for move operation - size int64 + uploadID string // S3 multipart upload ID + parts []CompletedPart // Track uploaded parts with ETags + partNumber int // Current part number (starts at 1) + buffer *bytes.Buffer // Buffer for current part + size int64 // Total bytes written closed bool - id string // Distribution's upload ID + id string // Distribution's upload ID (for state) startedAt time.Time + finalDigest string // Set on Commit } // ID returns the upload ID @@ -465,22 +562,79 @@ func (w *ProxyBlobWriter) StartedAt() time.Time { } // Write writes data to the upload -// Streams directly to hold service via pipe +// Buffers data and flushes when buffer reaches 5MB func (w *ProxyBlobWriter) Write(p []byte) (int, error) { if w.closed { return 0, fmt.Errorf("writer closed") } - // Write to pipe - streams immediately to hold service - n, err := w.pipeWriter.Write(p) - if err != nil { - // If write fails (client disconnected), close pipe to unblock goroutine - w.pipeWriter.CloseWithError(err) - return n, err - } + n, err := w.buffer.Write(p) w.size += int64(n) - return n, nil + // Flush if buffer reaches 5MB (S3 minimum part size) + if w.buffer.Len() >= maxChunkSize { + if err := w.flushPart(); err != nil { + return n, err + } + } + + return n, err +} + +// flushPart uploads the current buffer as a part +func (w *ProxyBlobWriter) flushPart() error { + if w.buffer.Len() == 0 { + return nil + } + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + // Get presigned URL for this part + tempDigest := fmt.Sprintf("uploads/temp-%s", w.id) + url, err := w.store.getPartPresignedURL(ctx, tempDigest, w.uploadID, w.partNumber) + if err != nil { + return fmt.Errorf("failed to get part presigned URL: %w", err) + } + + fmt.Printf("📤 [flushPart] Uploading part %d: size=%d bytes\n", w.partNumber, w.buffer.Len()) + + // Upload part to S3 + req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(w.buffer.Bytes())) + if err != nil { + return err + } + req.Header.Set("Content-Type", "application/octet-stream") + + resp, err := w.store.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { + bodyBytes, _ := io.ReadAll(resp.Body) + return fmt.Errorf("part upload failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) + } + + // Store ETag for completion + etag := resp.Header.Get("ETag") + if etag == "" { + return fmt.Errorf("no ETag in response") + } + + w.parts = append(w.parts, CompletedPart{ + PartNumber: w.partNumber, + ETag: etag, + }) + + fmt.Printf("✅ [flushPart] Part %d uploaded successfully: ETag=%s\n", w.partNumber, etag) + + // Reset buffer and increment part number + w.buffer.Reset() + w.partNumber++ + + return nil } // ReadFrom reads from a reader @@ -518,40 +672,47 @@ func (w *ProxyBlobWriter) Size() int64 { return w.size } -// Commit finalizes the upload +// Commit finalizes the upload by completing multipart upload and moving to final location func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) { if w.closed { return distribution.Descriptor{}, fmt.Errorf("writer closed") } w.closed = true + fmt.Printf("📝 [Commit] Starting commit: digest=%s, size=%d\n", desc.Digest, w.size) + // Remove from global uploads map globalUploadsMu.Lock() delete(globalUploads, w.id) globalUploadsMu.Unlock() - // Close pipe to signal EOF to upload goroutine - if err := w.pipeWriter.Close(); err != nil { - return distribution.Descriptor{}, fmt.Errorf("failed to close pipe: %w", err) + // Flush any remaining buffered data + if w.buffer.Len() > 0 { + fmt.Printf("📤 [Commit] Flushing final buffer: %d bytes\n", w.buffer.Len()) + if err := w.flushPart(); err != nil { + // Try to abort multipart on error + tempDigest := fmt.Sprintf("uploads/temp-%s", w.id) + w.store.abortMultipartUpload(ctx, tempDigest, w.uploadID) + return distribution.Descriptor{}, fmt.Errorf("failed to flush final part: %w", err) + } } - // Send digest to upload goroutine (it's waiting after temp upload completes) - w.digestChan <- desc.Digest.String() - close(w.digestChan) - - // Wait for upload goroutine to complete - if err := <-w.uploadErr; err != nil { - return distribution.Descriptor{}, fmt.Errorf("upload to temp failed: %w", err) + // Complete multipart upload at temp location + tempDigest := fmt.Sprintf("uploads/temp-%s", w.id) + fmt.Printf("🔒 [Commit] Completing multipart upload: uploadID=%s, parts=%d\n", w.uploadID, len(w.parts)) + if err := w.store.completeMultipartUpload(ctx, tempDigest, w.uploadID, w.parts); err != nil { + return distribution.Descriptor{}, fmt.Errorf("failed to complete multipart upload: %w", err) } - // Now move temp → final location - tempPath := fmt.Sprintf("uploads/temp-%s", w.id) // No leading slash + // Move from temp → final location (server-side S3 copy) + tempPath := fmt.Sprintf("uploads/temp-%s", w.id) finalPath := desc.Digest.String() + fmt.Printf("🚚 [Commit] Moving blob: %s → %s\n", tempPath, finalPath) moveURL := fmt.Sprintf("%s/move?from=%s&to=%s&did=%s", w.store.storageEndpoint, tempPath, finalPath, w.store.did) - req, err := http.NewRequestWithContext(context.Background(), "POST", moveURL, nil) + req, err := http.NewRequestWithContext(ctx, "POST", moveURL, nil) if err != nil { return distribution.Descriptor{}, fmt.Errorf("failed to create move request: %w", err) } @@ -567,7 +728,7 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript return distribution.Descriptor{}, fmt.Errorf("move blob failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) } - fmt.Printf("DEBUG [proxy_blob_store]: Committed upload: digest=%s, size=%d (moved from temp)\n", desc.Digest, w.size) + fmt.Printf("✅ [Commit] Upload completed successfully: digest=%s, size=%d, parts=%d\n", desc.Digest, w.size, len(w.parts)) return distribution.Descriptor{ Digest: desc.Digest, @@ -576,34 +737,31 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript }, nil } -// Cancel cancels the upload +// Cancel cancels the upload by aborting the multipart upload func (w *ProxyBlobWriter) Cancel(ctx context.Context) error { w.closed = true + fmt.Printf("❌ [Cancel] Cancelling upload: id=%s\n", w.id) + // Remove from global uploads map globalUploadsMu.Lock() delete(globalUploads, w.id) globalUploadsMu.Unlock() - // Close digest channel without sending digest - close(w.digestChan) - - // Close pipe with error to stop streaming - if w.pipeWriter != nil { - w.pipeWriter.CloseWithError(fmt.Errorf("upload cancelled")) + // Abort multipart upload + tempDigest := fmt.Sprintf("uploads/temp-%s", w.id) + if err := w.store.abortMultipartUpload(ctx, tempDigest, w.uploadID); err != nil { + fmt.Printf("⚠️ [Cancel] Failed to abort multipart upload: %v\n", err) + // Continue anyway - we want to mark upload as cancelled } - // Wait for goroutine to finish - <-w.uploadErr - - fmt.Printf("DEBUG [proxy_blob_store]: Cancelled upload: id=%s\n", w.id) + fmt.Printf("✅ [Cancel] Upload cancelled: id=%s\n", w.id) return nil } // Close closes the writer -// Just returns - streaming continues via pipe +// Parts are flushed on demand, so this is a no-op func (w *ProxyBlobWriter) Close() error { - // Don't close pipe here - that happens in Commit() or Cancel() // Don't set w.closed = true - allow resuming for next PATCH return nil } From eccb3b233783f8c47c97eb6fd4077c059966a37b Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 17:56:45 -0500 Subject: [PATCH 04/10] head presigned urls --- cmd/hold/main.go | 104 ++++++++++++++++++++++++++++++++ pkg/storage/proxy_blob_store.go | 75 +++++++++++++++-------- 2 files changed, 155 insertions(+), 24 deletions(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index da28d4e..79fef79 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -178,6 +178,18 @@ type GetPresignedURLResponse struct { ExpiresAt time.Time `json:"expires_at"` } +// HeadPresignedURLRequest represents a request for a presigned HEAD URL +type HeadPresignedURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// HeadPresignedURLResponse contains the presigned HEAD URL +type HeadPresignedURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + // PutPresignedURLRequest represents a request for a presigned upload URL type PutPresignedURLRequest struct { DID string `json:"did"` @@ -294,6 +306,59 @@ func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Reque json.NewEncoder(w).Encode(resp) } +// HandleHeadPresignedURL handles requests for HEAD URLs +func (s *HoldService) HandleHeadPresignedURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req HeadPresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + log.Printf("📨 [HandleHeadPresignedURL] Request received:") + log.Printf(" DID: %s", req.DID) + log.Printf(" Digest: %s", req.Digest) + log.Printf(" Remote: %s", r.RemoteAddr) + + // Validate DID authorization for READ + if !s.isAuthorizedRead(req.DID) { + log.Printf("❌ [HandleHeadPresignedURL] Authorization FAILED") + if req.DID == "" { + // Anonymous request to private hold + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + // Authenticated but not authorized + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + + // Generate presigned HEAD URL (15 minute expiry) + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + url, err := s.getHeadURL(ctx, req.Digest, req.DID) + if err != nil { + log.Printf("❌ [HandleHeadPresignedURL] getHeadURL failed: %v", err) + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("✅ [HandleHeadPresignedURL] Returning URL to client") + + resp := HeadPresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + // HandlePutPresignedURL handles requests for upload URLs func (s *HoldService) HandlePutPresignedURL(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { @@ -683,6 +748,44 @@ func (s *HoldService) getDownloadURL(ctx context.Context, digest string, did str return s.getProxyDownloadURL(digest, did), nil } +// getHeadURL generates a HEAD URL for a blob +func (s *HoldService) getHeadURL(ctx context.Context, digest string, did string) (string, error) { + // Check if blob exists + path := blobPath(digest) + _, err := s.driver.Stat(ctx, path) + if err != nil { + return "", fmt.Errorf("blob not found: %w", err) + } + + // If S3 client available, generate presigned HEAD URL + if s.s3Client != nil { + // Build S3 key from blob path + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Generate presigned HEAD URL + req, _ := s.s3Client.HeadObjectRequest(&s3.HeadObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + log.Printf("❌ [getHeadURL] Presign FAILED: %v", err) + log.Printf(" Falling back to proxy URL") + return s.getProxyDownloadURL(digest, did), nil + } + + log.Printf("✅ [getHeadURL] Presigned HEAD URL generated: digest=%s", digest) + return url, nil + } + + // Fallback: return proxy URL through this service + return s.getProxyDownloadURL(digest, did), nil +} + // getProxyDownloadURL returns a proxy URL for blob download (fallback when presigned URLs unavailable) func (s *HoldService) getProxyDownloadURL(digest, did string) string { return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) @@ -1120,6 +1223,7 @@ func main() { mux.HandleFunc("/health", service.HealthHandler) mux.HandleFunc("/register", service.HandleRegister) mux.HandleFunc("/get-presigned-url", service.HandleGetPresignedURL) + mux.HandleFunc("/head-presigned-url", service.HandleHeadPresignedURL) mux.HandleFunc("/put-presigned-url", service.HandlePutPresignedURL) mux.HandleFunc("/move", service.HandleMove) diff --git a/pkg/storage/proxy_blob_store.go b/pkg/storage/proxy_blob_store.go index 5f18845..01f342c 100644 --- a/pkg/storage/proxy_blob_store.go +++ b/pkg/storage/proxy_blob_store.go @@ -59,8 +59,13 @@ func NewProxyBlobStore(storageEndpoint, did string, database DatabaseMetrics, re // Stat returns the descriptor for a blob func (p *ProxyBlobStore) Stat(ctx context.Context, dgst digest.Digest) (distribution.Descriptor, error) { - // Quick HEAD request to hold service to check if blob exists - url := fmt.Sprintf("%s/blobs/%s?did=%s", p.storageEndpoint, dgst.String(), p.did) + // Get presigned HEAD URL + url, err := p.getHeadURL(ctx, dgst) + if err != nil { + return distribution.Descriptor{}, distribution.ErrBlobUnknown + } + + // Make HEAD request to presigned URL req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) if err != nil { return distribution.Descriptor{}, distribution.ErrBlobUnknown @@ -194,32 +199,15 @@ func (p *ProxyBlobStore) Delete(ctx context.Context, dgst digest.Digest) error { // ServeBlob serves a blob via HTTP redirect func (p *ProxyBlobStore) ServeBlob(ctx context.Context, w http.ResponseWriter, r *http.Request, dgst digest.Digest) error { - // For HEAD requests, handle directly without redirect - // S3 presigned URLs are method-specific, and Docker sends HEAD to verify blobs exist + // For HEAD requests, redirect to presigned HEAD URL if r.Method == http.MethodHead { - // Check if blob exists via hold service HEAD request - url := fmt.Sprintf("%s/blobs/%s?did=%s", p.storageEndpoint, dgst.String(), p.did) - req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) + url, err := p.getHeadURL(ctx, dgst) if err != nil { - return distribution.ErrBlobUnknown + return err } - resp, err := p.httpClient.Do(req) - if err != nil { - return distribution.ErrBlobUnknown - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return distribution.ErrBlobUnknown - } - - // Copy headers from hold service response - if contentLength := resp.Header.Get("Content-Length"); contentLength != "" { - w.Header().Set("Content-Length", contentLength) - } - w.Header().Set("Content-Type", "application/octet-stream") - w.WriteHeader(http.StatusOK) + // Redirect to presigned HEAD URL + http.Redirect(w, r, url, http.StatusTemporaryRedirect) return nil } @@ -335,6 +323,45 @@ func (p *ProxyBlobStore) getDownloadURL(ctx context.Context, dgst digest.Digest) return result.URL, nil } +// getHeadURL requests a presigned HEAD URL from the storage service +func (p *ProxyBlobStore) getHeadURL(ctx context.Context, dgst digest.Digest) (string, error) { + reqBody := map[string]any{ + "did": p.did, + "digest": dgst.String(), + } + + body, err := json.Marshal(reqBody) + if err != nil { + return "", err + } + + url := fmt.Sprintf("%s/head-presigned-url", p.storageEndpoint) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("failed to get HEAD URL: status %d", resp.StatusCode) + } + + var result struct { + URL string `json:"url"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + + return result.URL, nil +} + // getUploadURL requests a presigned upload URL from the storage service func (p *ProxyBlobStore) getUploadURL(ctx context.Context, dgst digest.Digest, size int64) (string, error) { fmt.Printf("DEBUG [proxy_blob_store/getUploadURL]: storageEndpoint=%s, digest=%s\n", p.storageEndpoint, dgst) From 153ef6e9bb17a507d48a3f5cc33253f85b6b807c Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 20:07:37 -0500 Subject: [PATCH 05/10] code cleanup --- cmd/appview/main.go | 16 ----------- pkg/storage/s3_blob_store.go | 54 ------------------------------------ 2 files changed, 70 deletions(-) delete mode 100644 pkg/storage/s3_blob_store.go diff --git a/cmd/appview/main.go b/cmd/appview/main.go index ac9603e..9f98904 100644 --- a/cmd/appview/main.go +++ b/cmd/appview/main.go @@ -1,22 +1,14 @@ package main import ( - "fmt" "os" - "time" "github.com/distribution/distribution/v3/registry" _ "github.com/distribution/distribution/v3/registry/auth/token" - _ "github.com/distribution/distribution/v3/registry/storage/driver/filesystem" _ "github.com/distribution/distribution/v3/registry/storage/driver/inmemory" - _ "github.com/distribution/distribution/v3/registry/storage/driver/s3-aws" // Register our custom middleware _ "atcr.io/pkg/middleware" - - "atcr.io/pkg/auth/oauth" - "atcr.io/pkg/auth/token" - "atcr.io/pkg/middleware" ) func main() { @@ -26,11 +18,3 @@ func main() { os.Exit(1) } } - -// Suppress unused import warnings -var _ = fmt.Sprint -var _ = os.Stdout -var _ = time.Now -var _ = oauth.NewRefresher -var _ = token.NewIssuer -var _ = middleware.SetGlobalRefresher diff --git a/pkg/storage/s3_blob_store.go b/pkg/storage/s3_blob_store.go deleted file mode 100644 index 89a3c02..0000000 --- a/pkg/storage/s3_blob_store.go +++ /dev/null @@ -1,54 +0,0 @@ -package storage - -import ( - "context" - - "github.com/distribution/distribution/v3" - "github.com/distribution/distribution/v3/registry/storage" - "github.com/distribution/distribution/v3/registry/storage/driver" - "github.com/distribution/reference" -) - -// S3BlobStore wraps distribution's blob store with S3 backend -type S3BlobStore struct { - distribution.BlobStore -} - -// NewS3BlobStore creates a new S3-backed blob store -func NewS3BlobStore(ctx context.Context, storageDriver driver.StorageDriver, repoName string) (*S3BlobStore, error) { - // Create a registry instance with the S3 driver - reg, err := storage.NewRegistry(ctx, storageDriver) - if err != nil { - return nil, err - } - - // Parse the repository name into a Named reference - named, err := reference.ParseNamed(repoName) - if err != nil { - return nil, err - } - - // Get the repository - repo, err := reg.Repository(ctx, named) - if err != nil { - return nil, err - } - - // Get the blob store - blobStore := repo.Blobs(ctx) - - return &S3BlobStore{ - BlobStore: blobStore, - }, nil -} - -// Note: S3BlobStore inherits all methods from distribution.BlobStore -// including: -// - Stat(ctx, dgst) - Check if blob exists -// - Get(ctx, dgst) - Retrieve blob -// - Open(ctx, dgst) - Open blob for reading -// - Put(ctx, mediaType, payload) - Store blob -// - Create(ctx, options...) - Create blob writer -// - Resume(ctx, id) - Resume blob upload -// - ServeBlob(ctx, w, r, dgst) - Serve blob over HTTP -// - Delete(ctx, dgst) - Delete blob From 3761ade947b9970fe36118a030156015263f09bb Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 21:00:39 -0500 Subject: [PATCH 06/10] refactor hold/main.go into pkg files --- cmd/hold/main.go | 1547 +------------------------------------ docs/HOLD_MULTIPART.md | 344 +++++++++ docs/MULTIPART_OLD.md | 448 +++++++++++ docs/PRESIGNED_UPLOADS.md | 1017 ++++++++++++++++++++++++ pkg/hold/authorization.go | 131 ++++ pkg/hold/config.go | 130 ++++ pkg/hold/handlers.go | 574 ++++++++++++++ pkg/hold/multipart.go | 373 +++++++++ pkg/hold/registration.go | 267 +++++++ pkg/hold/s3.go | 195 +++++ pkg/hold/service.go | 42 + pkg/hold/storage.go | 175 +++++ pkg/hold/types.go | 103 +++ 13 files changed, 3802 insertions(+), 1544 deletions(-) create mode 100644 docs/HOLD_MULTIPART.md create mode 100644 docs/MULTIPART_OLD.md create mode 100644 docs/PRESIGNED_UPLOADS.md create mode 100644 pkg/hold/authorization.go create mode 100644 pkg/hold/config.go create mode 100644 pkg/hold/handlers.go create mode 100644 pkg/hold/multipart.go create mode 100644 pkg/hold/registration.go create mode 100644 pkg/hold/s3.go create mode 100644 pkg/hold/service.go create mode 100644 pkg/hold/storage.go create mode 100644 pkg/hold/types.go diff --git a/cmd/hold/main.go b/cmd/hold/main.go index 79fef79..ff28b29 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -1,1219 +1,30 @@ package main import ( - "context" "encoding/json" "fmt" - "io" "log" "net/http" - "net/url" - "os" - "strings" "time" - "github.com/aws/aws-sdk-go/aws" - "github.com/aws/aws-sdk-go/aws/credentials" - "github.com/aws/aws-sdk-go/aws/session" - "github.com/aws/aws-sdk-go/service/s3" - "github.com/distribution/distribution/v3/configuration" - storagedriver "github.com/distribution/distribution/v3/registry/storage/driver" - "github.com/distribution/distribution/v3/registry/storage/driver/factory" - "atcr.io/pkg/atproto" - "atcr.io/pkg/auth/oauth" + "atcr.io/pkg/hold" indigooauth "github.com/bluesky-social/indigo/atproto/auth/oauth" - "github.com/bluesky-social/indigo/atproto/identity" - "github.com/bluesky-social/indigo/atproto/syntax" // Import storage drivers _ "github.com/distribution/distribution/v3/registry/storage/driver/filesystem" _ "github.com/distribution/distribution/v3/registry/storage/driver/s3-aws" ) -// Config represents the hold service configuration -type Config struct { - Version string `yaml:"version"` - Storage StorageConfig `yaml:"storage"` - Server ServerConfig `yaml:"server"` - Registration RegistrationConfig `yaml:"registration"` -} - -// RegistrationConfig defines auto-registration settings -type RegistrationConfig struct { - // OwnerDID is the owner's ATProto DID (from env: HOLD_OWNER) - // If set, auto-registration is enabled - OwnerDID string `yaml:"owner_did"` -} - -// StorageConfig wraps distribution's storage configuration -type StorageConfig struct { - configuration.Storage `yaml:",inline"` -} - -// ServerConfig defines server settings -type ServerConfig struct { - // Addr is the address to listen on (e.g., ":8080") - Addr string `yaml:"addr"` - - // PublicURL is the public URL of this hold service (e.g., "https://hold.example.com") - PublicURL string `yaml:"public_url"` - - // Public controls whether this hold allows public blob reads without auth (from env: HOLD_PUBLIC) - Public bool `yaml:"public"` - - // TestMode uses localhost for OAuth redirects while storing real URL in hold record (from env: TEST_MODE) - TestMode bool `yaml:"test_mode"` - - // ReadTimeout for HTTP requests - ReadTimeout time.Duration `yaml:"read_timeout"` - - // WriteTimeout for HTTP requests - WriteTimeout time.Duration `yaml:"write_timeout"` -} - -// HoldService provides presigned URLs for blob storage in a hold -type HoldService struct { - driver storagedriver.StorageDriver - config *Config - s3Client *s3.S3 // S3 client for presigned URLs (nil if not S3 storage) - bucket string // S3 bucket name - s3PathPrefix string // S3 path prefix (if any) -} - -// NewHoldService creates a new hold service -func NewHoldService(cfg *Config) (*HoldService, error) { - // Create storage driver from config - ctx := context.Background() - driver, err := factory.Create(ctx, cfg.Storage.Type(), cfg.Storage.Parameters()) - if err != nil { - return nil, fmt.Errorf("failed to create storage driver: %w", err) - } - - service := &HoldService{ - driver: driver, - config: cfg, - } - - // Initialize S3 client for presigned URLs (if using S3 storage) - if err := service.initS3Client(); err != nil { - log.Printf("WARNING: S3 presigned URLs disabled: %v", err) - } - - return service, nil -} - -// initS3Client initializes the S3 client for presigned URL generation -// Returns nil error if S3 client is successfully initialized -// Returns error if storage is not S3 or if initialization fails (service will fall back to proxy mode) -func (s *HoldService) initS3Client() error { - // Check if storage driver is S3 - if s.config.Storage.Type() != "s3" { - log.Printf("Storage driver is %s (not S3), presigned URLs disabled", s.config.Storage.Type()) - return nil // Not an error - just using different driver - } - - // Extract S3 configuration from storage parameters - params := s.config.Storage.Parameters() - - // Extract required S3 configuration - region, _ := params["region"].(string) - if region == "" { - region = "us-east-1" // Default region - } - - accessKey, _ := params["accesskey"].(string) - secretKey, _ := params["secretkey"].(string) - bucket, _ := params["bucket"].(string) - - if bucket == "" { - return fmt.Errorf("S3 bucket not configured") - } - - // Build AWS config - awsConfig := &aws.Config{ - Region: aws.String(region), - } - - // Add credentials if provided (allow IAM role auth if not provided) - if accessKey != "" && secretKey != "" { - awsConfig.Credentials = credentials.NewStaticCredentials(accessKey, secretKey, "") - } - - // Add custom endpoint for S3-compatible services (Storj, MinIO, R2, etc.) - if endpoint, ok := params["regionendpoint"].(string); ok && endpoint != "" { - awsConfig.Endpoint = aws.String(endpoint) - awsConfig.S3ForcePathStyle = aws.Bool(true) // Required for MinIO, Storj - } - - // Create AWS session - sess, err := session.NewSession(awsConfig) - if err != nil { - return fmt.Errorf("failed to create AWS session: %w", err) - } - - // Create S3 client - s.s3Client = s3.New(sess) - s.bucket = bucket - - // Extract path prefix if configured (rootdirectory in S3 params) - if rootDir, ok := params["rootdirectory"].(string); ok && rootDir != "" { - s.s3PathPrefix = strings.TrimPrefix(rootDir, "/") - } - - log.Printf("✅ S3 presigned URLs enabled") - - return nil -} - -// GetPresignedURLRequest represents a request for a presigned download URL -type GetPresignedURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` -} - -// GetPresignedURLResponse contains the presigned URL -type GetPresignedURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// HeadPresignedURLRequest represents a request for a presigned HEAD URL -type HeadPresignedURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` -} - -// HeadPresignedURLResponse contains the presigned HEAD URL -type HeadPresignedURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// PutPresignedURLRequest represents a request for a presigned upload URL -type PutPresignedURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - Size int64 `json:"size"` -} - -// PutPresignedURLResponse contains the presigned upload URL -type PutPresignedURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// StartMultipartUploadRequest initiates a multipart upload -type StartMultipartUploadRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` -} - -// StartMultipartUploadResponse contains the multipart upload ID -type StartMultipartUploadResponse struct { - UploadID string `json:"upload_id"` - ExpiresAt time.Time `json:"expires_at"` -} - -// GetPartURLRequest requests a presigned URL for a specific part -type GetPartURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - UploadID string `json:"upload_id"` - PartNumber int `json:"part_number"` -} - -// GetPartURLResponse contains the presigned URL for a part -type GetPartURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// CompleteMultipartRequest completes a multipart upload -type CompleteMultipartRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - UploadID string `json:"upload_id"` - Parts []CompletedPart `json:"parts"` -} - -// CompletedPart represents an uploaded part with its ETag -type CompletedPart struct { - PartNumber int `json:"part_number"` - ETag string `json:"etag"` -} - -// AbortMultipartRequest aborts an in-progress upload -type AbortMultipartRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - UploadID string `json:"upload_id"` -} - -// HandleGetPresignedURL handles requests for download URLs -func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req GetPresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - log.Printf("📨 [HandleGetPresignedURL] Request received:") - log.Printf(" DID: %s", req.DID) - log.Printf(" Digest: %s", req.Digest) - log.Printf(" Remote: %s", r.RemoteAddr) - log.Printf(" s3Client nil? %v", s.s3Client == nil) - - // Validate DID authorization for READ - if !s.isAuthorizedRead(req.DID) { - log.Printf("❌ [HandleGetPresignedURL] Authorization FAILED") - if req.DID == "" { - // Anonymous request to private hold - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - // Authenticated but not authorized - http.Error(w, "forbidden: access denied", http.StatusForbidden) - } - return - } - - // Generate presigned URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - // For now, construct direct URL to blob - // In production, this would use driver-specific presigned URLs - url, err := s.getDownloadURL(ctx, req.Digest, req.DID) - if err != nil { - log.Printf("❌ [HandleGetPresignedURL] getDownloadURL failed: %v", err) - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("✅ [HandleGetPresignedURL] Returning URL to client") - - resp := GetPresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandleHeadPresignedURL handles requests for HEAD URLs -func (s *HoldService) HandleHeadPresignedURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req HeadPresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - log.Printf("📨 [HandleHeadPresignedURL] Request received:") - log.Printf(" DID: %s", req.DID) - log.Printf(" Digest: %s", req.Digest) - log.Printf(" Remote: %s", r.RemoteAddr) - - // Validate DID authorization for READ - if !s.isAuthorizedRead(req.DID) { - log.Printf("❌ [HandleHeadPresignedURL] Authorization FAILED") - if req.DID == "" { - // Anonymous request to private hold - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - // Authenticated but not authorized - http.Error(w, "forbidden: access denied", http.StatusForbidden) - } - return - } - - // Generate presigned HEAD URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - url, err := s.getHeadURL(ctx, req.Digest, req.DID) - if err != nil { - log.Printf("❌ [HandleHeadPresignedURL] getHeadURL failed: %v", err) - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("✅ [HandleHeadPresignedURL] Returning URL to client") - - resp := HeadPresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandlePutPresignedURL handles requests for upload URLs -func (s *HoldService) HandlePutPresignedURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req PutPresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization for WRITE - if !s.isAuthorizedWrite(req.DID) { - if req.DID == "" { - // Anonymous write attempt - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - // Authenticated but not crew/owner - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - // Generate presigned upload URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - url, err := s.getUploadURL(ctx, req.Digest, req.Size, req.DID) - if err != nil { - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - resp := PutPresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandleProxyGet proxies a blob download through the service -func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodGet && r.Method != http.MethodHead { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - // Extract digest from path (e.g., /blobs/sha256:abc123) - digest := r.URL.Path[len("/blobs/"):] - if digest == "" { - http.Error(w, "missing digest", http.StatusBadRequest) - return - } - - log.Printf("📥 [HandleProxyGet] Blob download request:") - log.Printf(" Method: %s", r.Method) - log.Printf(" Digest: %s", digest) - log.Printf(" Remote: %s", r.RemoteAddr) - - // Get DID from query param or header - did := r.URL.Query().Get("did") - if did == "" { - did = r.Header.Get("X-ATCR-DID") - } - log.Printf(" DID: %s", did) - - // Authorize READ access - if !s.isAuthorizedRead(did) { - log.Printf("❌ [HandleProxyGet] Authorization FAILED") - if did == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: access denied", http.StatusForbidden) - } - return - } - log.Printf("✅ [HandleProxyGet] Authorization SUCCESS") - - ctx := r.Context() - path := blobPath(digest) - - // For HEAD requests, just check if blob exists - if r.Method == http.MethodHead { - stat, err := s.driver.Stat(ctx, path) - if err != nil { - http.Error(w, "blob not found", http.StatusNotFound) - return - } - w.Header().Set("Content-Type", "application/octet-stream") - w.Header().Set("Content-Length", fmt.Sprintf("%d", stat.Size())) - w.WriteHeader(http.StatusOK) - return - } - - // For GET requests, read and return the blob - content, err := s.driver.GetContent(ctx, path) - if err != nil { - http.Error(w, "blob not found", http.StatusNotFound) - return - } - - w.Header().Set("Content-Type", "application/octet-stream") - w.Write(content) -} - -// HandleMove moves a blob from one path to another -// POST /move?from={path}&to={digest}&did={did} -func (s *HoldService) HandleMove(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - fromPath := r.URL.Query().Get("from") - toDigest := r.URL.Query().Get("to") - did := r.URL.Query().Get("did") - - if fromPath == "" || toDigest == "" { - http.Error(w, "missing from or to parameter", http.StatusBadRequest) - return - } - - // Authorize WRITE access - if !s.isAuthorizedWrite(did) { - if did == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - ctx := r.Context() - sourcePath := blobPath(fromPath) - destPath := blobPath(toDigest) - - // Try to move using driver's Move operation - if err := s.driver.Move(ctx, sourcePath, destPath); err != nil { - log.Printf("HandleMove: failed to move blob: %v", err) - http.Error(w, fmt.Sprintf("failed to move blob: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("HandleMove: successfully moved blob from=%s to=%s", fromPath, toDigest) - w.WriteHeader(http.StatusOK) -} - -// HandleProxyPut proxies a blob upload through the service -func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPut { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - digest := r.URL.Path[len("/blobs/"):] - if digest == "" { - http.Error(w, "missing digest", http.StatusBadRequest) - return - } - - did := r.URL.Query().Get("did") - if did == "" { - did = r.Header.Get("X-ATCR-DID") - } - - log.Printf("🔐 [HandleProxyPut] Authorization check:") - log.Printf(" Path: %s", digest) - log.Printf(" DID: %s", did) - log.Printf(" Owner DID: %s", s.config.Registration.OwnerDID) - - // Authorize WRITE access - if !s.isAuthorizedWrite(did) { - log.Printf("❌ [HandleProxyPut] Authorization FAILED") - if did == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - log.Printf("✅ [HandleProxyPut] Authorization SUCCESS") - - // Stream blob to storage (no buffering) - ctx := r.Context() - path := blobPath(digest) - - // Create writer for streaming - writer, err := s.driver.Writer(ctx, path, false) - if err != nil { - log.Printf("HandleProxyPut: failed to create writer: %v", err) - http.Error(w, "failed to create writer", http.StatusInternalServerError) - return - } - - // Stream directly from request body to storage - written, err := io.Copy(writer, r.Body) - if err != nil { - writer.Cancel(ctx) - log.Printf("HandleProxyPut: failed to write blob: %v", err) - http.Error(w, "failed to write blob", http.StatusInternalServerError) - return - } - - // Commit the write - if err := writer.Commit(ctx); err != nil { - log.Printf("HandleProxyPut: failed to commit blob: %v", err) - http.Error(w, "failed to commit blob", http.StatusInternalServerError) - return - } - - log.Printf("HandleProxyPut: successfully stored blob path=%s, size=%d", digest, written) - w.WriteHeader(http.StatusCreated) -} - -// isAuthorizedRead checks if a DID can read from this hold -// Authorization: -// - Public hold: allow anonymous (empty DID) or any authenticated user -// - Private hold: require authentication (any user with sailor.profile) -func (s *HoldService) isAuthorizedRead(did string) bool { - // Check hold public flag - isPublic, err := s.isHoldPublic() - if err != nil { - log.Printf("ERROR: Failed to check hold public flag: %v", err) - // Fail secure - deny access on error - return false - } - - if isPublic { - // Public hold - allow anyone (even anonymous) - return true - } - - // Private hold - require authentication - // Any authenticated user with sailor.profile can read - if did == "" { - // Anonymous user trying to access private hold - return false - } - - // For MVP: assume DID presence means they have sailor.profile - // Future: could query PDS to verify sailor.profile exists - return true -} - -// isAuthorizedWrite checks if a DID can write to this hold -// Authorization: must be hold owner OR crew member -func (s *HoldService) isAuthorizedWrite(did string) bool { - if did == "" { - // Anonymous writes not allowed - return false - } - - // Check if DID is the hold owner - ownerDID := s.config.Registration.OwnerDID - if ownerDID == "" { - log.Printf("ERROR: Hold owner DID not configured") - return false - } - - if did == ownerDID { - // Owner always has write access - return true - } - - // Check if DID is a crew member - isCrew, err := s.isCrewMember(did) - if err != nil { - log.Printf("ERROR: Failed to check crew membership: %v", err) - return false - } - - return isCrew -} - -// isHoldPublic checks if this hold allows public (anonymous) reads -func (s *HoldService) isHoldPublic() (bool, error) { - // Use cached config value for now - // Future: could query PDS for hold record to get live value - return s.config.Server.Public, nil -} - -// isCrewMember checks if a DID is a crew member of this hold -func (s *HoldService) isCrewMember(did string) (bool, error) { - ownerDID := s.config.Registration.OwnerDID - if ownerDID == "" { - return false, fmt.Errorf("hold owner DID not configured") - } - - ctx := context.Background() - - // Resolve owner's PDS endpoint using indigo - directory := identity.DefaultDirectory() - ownerDIDParsed, err := syntax.ParseDID(ownerDID) - if err != nil { - return false, fmt.Errorf("invalid owner DID: %w", err) - } - - ident, err := directory.LookupDID(ctx, ownerDIDParsed) - if err != nil { - return false, fmt.Errorf("failed to resolve owner PDS: %w", err) - } - - pdsEndpoint := ident.PDSEndpoint() - if pdsEndpoint == "" { - return false, fmt.Errorf("no PDS endpoint found for owner") - } - - // Create unauthenticated client to read public records - client := atproto.NewClient(pdsEndpoint, ownerDID, "") - - // List crew records for this hold - // Crew records are public, so we can read them without auth - records, err := client.ListRecords(ctx, atproto.HoldCrewCollection, 100) - if err != nil { - return false, fmt.Errorf("failed to list crew records: %w", err) - } - - // Check if DID is in crew list - for _, record := range records { - var crewRecord atproto.HoldCrewRecord - if err := json.Unmarshal(record.Value, &crewRecord); err != nil { - continue - } - - if crewRecord.Member == did { - // Found crew membership - return true, nil - } - } - - return false, nil -} - -// getDownloadURL generates a download URL for a blob -func (s *HoldService) getDownloadURL(ctx context.Context, digest string, did string) (string, error) { - // Check if blob exists - path := blobPath(digest) - _, err := s.driver.Stat(ctx, path) - if err != nil { - return "", fmt.Errorf("blob not found: %w", err) - } - - // If S3 client available, generate presigned URL - if s.s3Client != nil { - // Build S3 key from blob path - // blobPath returns paths like: /docker/registry/v2/blobs/sha256/ab/abc123.../data - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - // Generate presigned GET URL - // Note: Don't use ResponseContentType - not supported by all S3-compatible services - req, _ := s.s3Client.GetObjectRequest(&s3.GetObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - }) - - log.Printf("🔍 [getDownloadURL] Before Presign:") - log.Printf(" Digest: %s", digest) - log.Printf(" S3 Key: %s", s3Key) - log.Printf(" Bucket: %s", s.bucket) - log.Printf(" Request Operation: %s", req.Operation.Name) - log.Printf(" Request HTTPMethod: %s", req.Operation.HTTPMethod) - - url, err := req.Presign(15 * time.Minute) - if err != nil { - log.Printf("❌ [getDownloadURL] Presign FAILED: %v", err) - log.Printf(" Falling back to proxy URL") - return s.getProxyDownloadURL(digest, did), nil - } - - log.Printf("✅ [getDownloadURL] Presigned URL generated successfully") - log.Printf(" URL: %s", url) - log.Printf(" URL Length: %d chars", len(url)) - log.Printf(" Expires: 15min") - - return url, nil - } - - // Fallback: return proxy URL through this service - return s.getProxyDownloadURL(digest, did), nil -} - -// getHeadURL generates a HEAD URL for a blob -func (s *HoldService) getHeadURL(ctx context.Context, digest string, did string) (string, error) { - // Check if blob exists - path := blobPath(digest) - _, err := s.driver.Stat(ctx, path) - if err != nil { - return "", fmt.Errorf("blob not found: %w", err) - } - - // If S3 client available, generate presigned HEAD URL - if s.s3Client != nil { - // Build S3 key from blob path - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - // Generate presigned HEAD URL - req, _ := s.s3Client.HeadObjectRequest(&s3.HeadObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - }) - - url, err := req.Presign(15 * time.Minute) - if err != nil { - log.Printf("❌ [getHeadURL] Presign FAILED: %v", err) - log.Printf(" Falling back to proxy URL") - return s.getProxyDownloadURL(digest, did), nil - } - - log.Printf("✅ [getHeadURL] Presigned HEAD URL generated: digest=%s", digest) - return url, nil - } - - // Fallback: return proxy URL through this service - return s.getProxyDownloadURL(digest, did), nil -} - -// getProxyDownloadURL returns a proxy URL for blob download (fallback when presigned URLs unavailable) -func (s *HoldService) getProxyDownloadURL(digest, did string) string { - return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) -} - -// getUploadURL generates an upload URL for a blob -// Note: This is called from HandlePutPresignedURL which has the DID in the request -func (s *HoldService) getUploadURL(ctx context.Context, digest string, size int64, did string) (string, error) { - // If S3 client available, generate presigned URL - if s.s3Client != nil { - // Build S3 key from blob path - path := blobPath(digest) - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - // Generate presigned PUT URL with Content-Type in signature - req, _ := s.s3Client.PutObjectRequest(&s3.PutObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - ContentType: aws.String("application/octet-stream"), - }) - - url, err := req.Presign(15 * time.Minute) - if err != nil { - log.Printf("WARN: Presigned URL generation failed for %s, falling back to proxy: %v", digest, err) - return s.getProxyUploadURL(digest, did), nil - } - - log.Printf("🔑 Generated presigned upload URL for %s (expires in 15min)", digest) - log.Printf(" S3 Key: %s", s3Key) - log.Printf(" Bucket: %s", s.bucket) - log.Printf(" Size: %d bytes", size) - return url, nil - } - - // Fallback: return proxy URL through this service - return s.getProxyUploadURL(digest, did), nil -} - -// getProxyUploadURL returns a proxy URL for blob upload (fallback when presigned URLs unavailable) -func (s *HoldService) getProxyUploadURL(digest, did string) string { - return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) -} - -// startMultipartUpload initiates a multipart upload and returns upload ID -func (s *HoldService) startMultipartUpload(ctx context.Context, digest string) (string, error) { - if s.s3Client == nil { - return "", fmt.Errorf("S3 not configured") - } - - path := blobPath(digest) - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - result, err := s.s3Client.CreateMultipartUploadWithContext(ctx, &s3.CreateMultipartUploadInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - }) - if err != nil { - return "", err - } - - log.Printf("Started multipart upload: digest=%s, uploadID=%s", digest, *result.UploadId) - return *result.UploadId, nil -} - -// getPartPresignedURL generates presigned URL for a specific part -func (s *HoldService) getPartPresignedURL(ctx context.Context, digest, uploadID string, partNumber int) (string, error) { - if s.s3Client == nil { - return "", fmt.Errorf("S3 not configured") - } - - path := blobPath(digest) - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - req, _ := s.s3Client.UploadPartRequest(&s3.UploadPartInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - UploadId: aws.String(uploadID), - PartNumber: aws.Int64(int64(partNumber)), - }) - - url, err := req.Presign(15 * time.Minute) - if err != nil { - return "", err - } - - log.Printf("Generated part presigned URL: digest=%s, uploadID=%s, part=%d", digest, uploadID, partNumber) - return url, nil -} - -// completeMultipartUpload finalizes the multipart upload -func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploadID string, parts []CompletedPart) error { - if s.s3Client == nil { - return fmt.Errorf("S3 not configured") - } - - path := blobPath(digest) - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - // Convert to S3 CompletedPart format - s3Parts := make([]*s3.CompletedPart, len(parts)) - for i, p := range parts { - s3Parts[i] = &s3.CompletedPart{ - PartNumber: aws.Int64(int64(p.PartNumber)), - ETag: aws.String(p.ETag), - } - } - - _, err := s.s3Client.CompleteMultipartUploadWithContext(ctx, &s3.CompleteMultipartUploadInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - UploadId: aws.String(uploadID), - MultipartUpload: &s3.CompletedMultipartUpload{ - Parts: s3Parts, - }, - }) - - if err != nil { - log.Printf("Failed to complete multipart upload: digest=%s, uploadID=%s, err=%v", digest, uploadID, err) - return err - } - - log.Printf("Completed multipart upload: digest=%s, uploadID=%s, parts=%d", digest, uploadID, len(parts)) - return nil -} - -// abortMultipartUpload aborts an in-progress multipart upload -func (s *HoldService) abortMultipartUpload(ctx context.Context, digest, uploadID string) error { - if s.s3Client == nil { - return fmt.Errorf("S3 not configured") - } - - path := blobPath(digest) - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - _, err := s.s3Client.AbortMultipartUploadWithContext(ctx, &s3.AbortMultipartUploadInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - UploadId: aws.String(uploadID), - }) - - if err != nil { - log.Printf("Failed to abort multipart upload: digest=%s, uploadID=%s, err=%v", digest, uploadID, err) - return err - } - - log.Printf("Aborted multipart upload: digest=%s, uploadID=%s", digest, uploadID) - return nil -} - -// HandleStartMultipart initiates a multipart upload -func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req StartMultipartUploadRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization for WRITE - if !s.isAuthorizedWrite(req.DID) { - if req.DID == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - // Start multipart upload - ctx := r.Context() - uploadID, err := s.startMultipartUpload(ctx, req.Digest) - if err != nil { - http.Error(w, fmt.Sprintf("failed to start multipart upload: %v", err), http.StatusInternalServerError) - return - } - - expiry := time.Now().Add(24 * time.Hour) // Multipart uploads can take longer - - resp := StartMultipartUploadResponse{ - UploadID: uploadID, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandleGetPartURL generates a presigned URL for uploading a specific part -func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req GetPartURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization for WRITE - if !s.isAuthorizedWrite(req.DID) { - if req.DID == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - // Get presigned URL for this part - ctx := r.Context() - url, err := s.getPartPresignedURL(ctx, req.Digest, req.UploadID, req.PartNumber) - if err != nil { - http.Error(w, fmt.Sprintf("failed to generate part URL: %v", err), http.StatusInternalServerError) - return - } - - expiry := time.Now().Add(15 * time.Minute) - - resp := GetPartURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandleCompleteMultipart completes a multipart upload -func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req CompleteMultipartRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization for WRITE - if !s.isAuthorizedWrite(req.DID) { - if req.DID == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - // Complete multipart upload - ctx := r.Context() - if err := s.completeMultipartUpload(ctx, req.Digest, req.UploadID, req.Parts); err != nil { - http.Error(w, fmt.Sprintf("failed to complete multipart upload: %v", err), http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusOK) - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "status": "completed", - }) -} - -// HandleAbortMultipart aborts an in-progress multipart upload -func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req AbortMultipartRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization for WRITE - if !s.isAuthorizedWrite(req.DID) { - if req.DID == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: write access denied", http.StatusForbidden) - } - return - } - - // Abort multipart upload - ctx := r.Context() - if err := s.abortMultipartUpload(ctx, req.Digest, req.UploadID); err != nil { - http.Error(w, fmt.Sprintf("failed to abort multipart upload: %v", err), http.StatusInternalServerError) - return - } - - w.WriteHeader(http.StatusOK) - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "status": "aborted", - }) -} - -// RegisterRequest represents a request to register this hold in a user's PDS -type RegisterRequest struct { - DID string `json:"did"` - AccessToken string `json:"access_token"` - PDSEndpoint string `json:"pds_endpoint"` -} - -// RegisterResponse contains the registration result -type RegisterResponse struct { - HoldURI string `json:"hold_uri"` - CrewURI string `json:"crew_uri"` - Message string `json:"message"` -} - -// HandleRegister registers this hold service in a user's PDS (manual endpoint) -func (s *HoldService) HandleRegister(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req RegisterRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate required fields - if req.DID == "" || req.AccessToken == "" || req.PDSEndpoint == "" { - http.Error(w, "missing required fields: did, access_token, pds_endpoint", http.StatusBadRequest) - return - } - - // Get public URL from config - publicURL := s.config.Server.PublicURL - if publicURL == "" { - // Fallback to constructing URL from request - scheme := "http" - if r.TLS != nil { - scheme = "https" - } - publicURL = fmt.Sprintf("%s://%s", scheme, r.Host) - } - - // Derive hold name from URL - holdName, err := extractHostname(publicURL) - if err != nil { - http.Error(w, fmt.Sprintf("failed to extract hostname: %v", err), http.StatusBadRequest) - return - } - - ctx := r.Context() - - // Create ATProto client with user's credentials - client := atproto.NewClient(req.PDSEndpoint, req.DID, req.AccessToken) - - // Create HoldRecord - holdRecord := atproto.NewHoldRecord(publicURL, req.DID, s.config.Server.Public) - - holdResult, err := client.PutRecord(ctx, atproto.HoldCollection, holdName, holdRecord) - if err != nil { - http.Error(w, fmt.Sprintf("failed to create hold record: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("Created hold record: %s", holdResult.URI) - - // Create HoldCrewRecord for the owner - crewRecord := atproto.NewHoldCrewRecord(holdResult.URI, req.DID, "owner") - - crewRKey := fmt.Sprintf("%s-%s", holdName, req.DID) - crewResult, err := client.PutRecord(ctx, atproto.HoldCrewCollection, crewRKey, crewRecord) - if err != nil { - http.Error(w, fmt.Sprintf("failed to create crew record: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("Created crew record: %s", crewResult.URI) - - resp := RegisterResponse{ - HoldURI: holdResult.URI, - CrewURI: crewResult.URI, - Message: fmt.Sprintf("Successfully registered hold service. Storage endpoint: %s", publicURL), - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HealthHandler handles health check requests -func (s *HoldService) HealthHandler(w http.ResponseWriter, r *http.Request) { - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(map[string]string{ - "status": "ok", - }) -} - func main() { // Load configuration from environment variables - cfg, err := loadConfigFromEnv() + cfg, err := hold.LoadConfigFromEnv() if err != nil { log.Fatalf("Failed to load config: %v", err) } // Create hold service - service, err := NewHoldService(cfg) + service, err := hold.NewHoldService(cfg) if err != nil { log.Fatalf("Failed to create hold service: %v", err) } @@ -1316,355 +127,3 @@ func main() { log.Fatalf("Server failed: %v", err) } } - -// loadConfigFromEnv loads all configuration from environment variables -func loadConfigFromEnv() (*Config, error) { - cfg := &Config{ - Version: "0.1", - } - - // Server configuration - cfg.Server.Addr = getEnvOrDefault("HOLD_SERVER_ADDR", ":8080") - cfg.Server.PublicURL = os.Getenv("HOLD_PUBLIC_URL") - if cfg.Server.PublicURL == "" { - return nil, fmt.Errorf("HOLD_PUBLIC_URL is required") - } - cfg.Server.Public = os.Getenv("HOLD_PUBLIC") == "true" - cfg.Server.TestMode = os.Getenv("TEST_MODE") == "true" - cfg.Server.ReadTimeout = 5 * time.Minute // Increased for large blob uploads - cfg.Server.WriteTimeout = 5 * time.Minute // Increased for large blob uploads - - // Registration configuration (optional) - cfg.Registration.OwnerDID = os.Getenv("HOLD_OWNER") - - // Storage configuration - build from env vars based on storage type - storageType := getEnvOrDefault("STORAGE_DRIVER", "s3") - var err error - cfg.Storage, err = buildStorageConfig(storageType) - if err != nil { - return nil, fmt.Errorf("failed to build storage config: %w", err) - } - - return cfg, nil -} - -// buildStorageConfig creates storage configuration based on driver type -func buildStorageConfig(driver string) (StorageConfig, error) { - params := make(map[string]any) - - switch driver { - case "s3": - // S3/Storj/Minio configuration from standard AWS env vars - accessKey := os.Getenv("AWS_ACCESS_KEY_ID") - secretKey := os.Getenv("AWS_SECRET_ACCESS_KEY") - region := getEnvOrDefault("AWS_REGION", "us-east-1") - bucket := os.Getenv("S3_BUCKET") - endpoint := os.Getenv("S3_ENDPOINT") // For Storj/Minio - - if bucket == "" { - return StorageConfig{}, fmt.Errorf("S3_BUCKET is required for S3 storage") - } - - params["accesskey"] = accessKey - params["secretkey"] = secretKey - params["region"] = region - params["bucket"] = bucket - if endpoint != "" { - params["regionendpoint"] = endpoint - } - - case "filesystem": - // Filesystem configuration - rootDir := getEnvOrDefault("STORAGE_ROOT_DIR", "/var/lib/atcr/hold") - params["rootdirectory"] = rootDir - - default: - return StorageConfig{}, fmt.Errorf("unsupported storage driver: %s", driver) - } - - // Build distribution Storage config - storageCfg := configuration.Storage{} - storageCfg[driver] = configuration.Parameters(params) - - return StorageConfig{Storage: storageCfg}, nil -} - -// getEnvOrDefault gets an environment variable or returns a default value -func getEnvOrDefault(key, defaultValue string) string { - if val := os.Getenv(key); val != "" { - return val - } - return defaultValue -} - -// blobPath converts a digest (e.g., "sha256:abc123...") or temp path to a storage path -// Distribution stores blobs as: /docker/registry/v2/blobs/{algorithm}/{xx}/{hash}/data -// where xx is the first 2 characters of the hash for directory sharding -// NOTE: Path must start with / for filesystem driver -func blobPath(digest string) string { - // Handle temp paths (start with uploads/temp-) - if strings.HasPrefix(digest, "uploads/temp-") { - return fmt.Sprintf("/docker/registry/v2/%s/data", digest) - } - - // Split digest into algorithm and hash - parts := strings.SplitN(digest, ":", 2) - if len(parts) != 2 { - // Fallback for malformed digest - return fmt.Sprintf("/docker/registry/v2/blobs/%s/data", digest) - } - - algorithm := parts[0] - hash := parts[1] - - // Use first 2 characters for sharding - if len(hash) < 2 { - return fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/data", algorithm, hash) - } - - return fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/%s/data", algorithm, hash[:2], hash) -} - -// isHoldRegistered checks if a hold with the given public URL is already registered in the PDS -func (s *HoldService) isHoldRegistered(ctx context.Context, did, pdsEndpoint, publicURL string) (bool, error) { - // We need to query the PDS without authentication to check public records - // ATProto records are publicly readable, so we can use an unauthenticated client - client := atproto.NewClient(pdsEndpoint, did, "") - - // List all hold records for this DID - records, err := client.ListRecords(ctx, atproto.HoldCollection, 100) - if err != nil { - return false, fmt.Errorf("failed to list hold records: %w", err) - } - - // Check if any hold record matches our public URL - for _, record := range records { - var holdRecord atproto.HoldRecord - if err := json.Unmarshal(record.Value, &holdRecord); err != nil { - continue - } - - if holdRecord.Endpoint == publicURL { - return true, nil - } - } - - return false, nil -} - -// AutoRegister registers this hold service in the owner's PDS -// Checks if already registered first, then does OAuth if needed -func (s *HoldService) AutoRegister(callbackHandler *http.HandlerFunc) error { - reg := &s.config.Registration - publicURL := s.config.Server.PublicURL - - if publicURL == "" { - return fmt.Errorf("HOLD_PUBLIC_URL not set") - } - - if reg.OwnerDID == "" { - return fmt.Errorf("HOLD_OWNER not set - required for registration") - } - - ctx := context.Background() - - log.Printf("Checking registration status for DID: %s", reg.OwnerDID) - - // Resolve DID to PDS endpoint using indigo - directory := identity.DefaultDirectory() - didParsed, err := syntax.ParseDID(reg.OwnerDID) - if err != nil { - return fmt.Errorf("invalid owner DID: %w", err) - } - - ident, err := directory.LookupDID(ctx, didParsed) - if err != nil { - return fmt.Errorf("failed to resolve PDS for DID: %w", err) - } - - pdsEndpoint := ident.PDSEndpoint() - if pdsEndpoint == "" { - return fmt.Errorf("no PDS endpoint found for DID") - } - - log.Printf("PDS endpoint: %s", pdsEndpoint) - - // Check if hold is already registered - isRegistered, err := s.isHoldRegistered(ctx, reg.OwnerDID, pdsEndpoint, publicURL) - if err != nil { - log.Printf("Warning: failed to check registration status: %v", err) - log.Printf("Proceeding with OAuth registration...") - } else if isRegistered { - log.Printf("✓ Hold service already registered in PDS") - log.Printf("Public URL: %s", publicURL) - return nil - } - - // Not registered, need to do OAuth - log.Printf("Hold not registered, starting OAuth flow...") - - // Get handle from DID document (already resolved above) - handle := ident.Handle.String() - if handle == "" || handle == "handle.invalid" { - return fmt.Errorf("no valid handle found for DID") - } - - log.Printf("Resolved handle: %s", handle) - log.Printf("Starting OAuth registration for hold service") - log.Printf("Public URL: %s", publicURL) - - return s.registerWithOAuth(publicURL, handle, reg.OwnerDID, pdsEndpoint, callbackHandler) -} - -// registerWithOAuth performs OAuth flow and registers the hold -func (s *HoldService) registerWithOAuth(publicURL, handle, did, pdsEndpoint string, callbackHandler *http.HandlerFunc) error { - // Define the scopes we need for hold registration - holdScopes := []string{ - "atproto", - fmt.Sprintf("repo:%s?action=create", atproto.HoldCollection), - fmt.Sprintf("repo:%s?action=update", atproto.HoldCollection), - fmt.Sprintf("repo:%s?action=create", atproto.HoldCrewCollection), - fmt.Sprintf("repo:%s?action=update", atproto.HoldCrewCollection), - fmt.Sprintf("repo:%s?action=create", atproto.SailorProfileCollection), - fmt.Sprintf("repo:%s?action=update", atproto.SailorProfileCollection), - } - - // Determine base URL based on mode - // Callback path standardized to /auth/oauth/callback across ATCR - var baseURL string - - if s.config.Server.TestMode { - // Test mode: Use localhost for OAuth (browser accessible) but store real URL in hold record - // Extract port from publicURL (e.g., "http://172.28.0.3:8080" -> ":8080") - parsedURL, err := url.Parse(publicURL) - if err != nil { - return fmt.Errorf("failed to parse public URL: %w", err) - } - port := parsedURL.Port() - if port == "" { - port = "8080" // default - } - baseURL = fmt.Sprintf("http://127.0.0.1:%s", port) - } else { - baseURL = publicURL - } - - // Run interactive OAuth flow with persistent server - ctx := context.Background() - - result, err := oauth.InteractiveFlowWithCallback( - ctx, - baseURL, - handle, - holdScopes, // Pass hold-specific scopes - func(handler http.HandlerFunc) error { - // Populate the pre-registered callback handler - *callbackHandler = handler - return nil - }, - func(authURL string) error { - // Display OAuth URL for user to visit - log.Print("\n" + strings.Repeat("=", 80)) - log.Printf("OAUTH AUTHORIZATION REQUIRED") - log.Print(strings.Repeat("=", 80)) - log.Printf("\nPlease visit this URL to authorize the hold service:\n") - log.Printf(" %s\n", authURL) - log.Printf("Waiting for authorization...") - log.Print(strings.Repeat("=", 80) + "\n") - return nil - }, - ) - if err != nil { - return err - } - - log.Printf("Authorization received!") - log.Printf("OAuth session obtained successfully") - log.Printf("DID: %s", did) - log.Printf("PDS: %s", pdsEndpoint) - - // Create ATProto client with indigo's API client (handles DPoP automatically) - apiClient := result.Session.APIClient() - client := atproto.NewClientWithIndigoClient(pdsEndpoint, did, apiClient) - - return s.registerWithClient(publicURL, did, client) -} - -// registerWithClient registers the hold using an authenticated ATProto client -func (s *HoldService) registerWithClient(publicURL, did string, client *atproto.Client) error { - // Derive hold name from URL (hostname) - holdName, err := extractHostname(publicURL) - if err != nil { - return fmt.Errorf("failed to extract hostname from URL: %w", err) - } - - log.Printf("Registering hold service: url=%s, name=%s, owner=%s", publicURL, holdName, did) - - ctx := context.Background() - - // Create HoldRecord - holdRecord := atproto.NewHoldRecord(publicURL, did, s.config.Server.Public) - - // Use hostname as record key - holdResult, err := client.PutRecord(ctx, atproto.HoldCollection, holdName, holdRecord) - if err != nil { - return fmt.Errorf("failed to create hold record: %w", err) - } - - log.Printf("✓ Created hold record: %s", holdResult.URI) - - // Create HoldCrewRecord for the owner - crewRecord := atproto.NewHoldCrewRecord(holdResult.URI, did, "owner") - - crewRKey := fmt.Sprintf("%s-%s", holdName, did) - crewResult, err := client.PutRecord(ctx, atproto.HoldCrewCollection, crewRKey, crewRecord) - if err != nil { - return fmt.Errorf("failed to create crew record: %w", err) - } - - log.Printf("✓ Created crew record: %s", crewResult.URI) - - // Update sailor profile to set this as the default hold - profile, err := atproto.GetProfile(ctx, client) - if err != nil { - log.Printf("Warning: failed to get sailor profile: %v", err) - } else { - if profile == nil { - // Create new profile with this hold as default - profile = atproto.NewSailorProfileRecord(publicURL) - } else { - // Update existing profile with new defaultHold - profile.DefaultHold = publicURL - profile.UpdatedAt = time.Now() - } - - err = atproto.UpdateProfile(ctx, client, profile) - if err != nil { - log.Printf("Warning: failed to update sailor profile: %v", err) - } else { - log.Printf("✓ Updated sailor profile defaultHold: %s", publicURL) - } - } - - log.Print("\n" + strings.Repeat("=", 80)) - log.Printf("REGISTRATION COMPLETE") - log.Print(strings.Repeat("=", 80)) - log.Printf("Hold service is now registered and ready to use!") - log.Print(strings.Repeat("=", 80) + "\n") - - return nil -} - -// extractHostname extracts the hostname from a URL to use as the hold name -func extractHostname(urlStr string) (string, error) { - u, err := url.Parse(urlStr) - if err != nil { - return "", err - } - // Remove port if present - hostname := u.Hostname() - if hostname == "" { - return "", fmt.Errorf("no hostname in URL") - } - return hostname, nil -} diff --git a/docs/HOLD_MULTIPART.md b/docs/HOLD_MULTIPART.md new file mode 100644 index 0000000..e09d81e --- /dev/null +++ b/docs/HOLD_MULTIPART.md @@ -0,0 +1,344 @@ +# Hold Service Multipart Upload Architecture + +## Overview + +The hold service supports multipart uploads through two modes: +1. **S3Native** - Uses S3's native multipart API with presigned URLs (optimal) +2. **Buffered** - Buffers parts in hold service memory, assembles on completion (fallback) + +This dual-mode approach enables the hold service to work with: +- S3-compatible storage with presigned URL support (S3, Storj, MinIO, etc.) +- S3-compatible storage WITHOUT presigned URL support +- Filesystem storage +- Any storage driver supported by distribution + +## Current State + +### What Works +- **S3 with presigned URLs**: Primary mode, working +- **AppView multipart client**: Implements chunked uploads via multipart API + +### What's Broken +- **Filesystem storage**: multipart endpoints return "S3 not configured" error +- **S3 fallback mode**: No fallback when presigned URL generation fails +- **Non-S3 drivers**: Azure, GCS, etc. not supported for multipart + +## Architecture + +### Three Modes of Operation + +#### Mode 1: S3 Native Multipart (Currently Working) +``` +Docker → AppView → Hold → S3 (presigned URLs) + ↓ + Returns presigned URL + ↓ +Docker ──────────→ S3 (direct upload) +``` + +**Flow:** +1. AppView: `POST /start-multipart` → Hold starts S3 multipart, returns uploadID +2. AppView: `POST /part-presigned-url` → Hold returns S3 presigned URL +3. Docker → S3: Direct upload via presigned URL +4. AppView: `POST /complete-multipart` → Hold calls S3 CompleteMultipartUpload + +**Advantages:** +- No data flows through hold service +- Minimal bandwidth usage +- Fast uploads + +#### Mode 2: S3 Proxy Mode (Not Yet Implemented) +``` +Docker → AppView → Hold → S3 (via driver) + ↓ + Buffers & proxies + ↓ + S3 +``` + +**Flow:** +1. AppView: `POST /start-multipart` → Hold creates buffered session +2. AppView: `POST /part-presigned-url` → Hold returns proxy URL +3. Docker → Hold: `PUT /multipart-parts/{uploadID}/{part}` → Hold buffers +4. AppView: `POST /complete-multipart` → Hold uploads to S3 via driver + +**Use Cases:** +- S3 provider doesn't support presigned URLs +- S3 API fails to generate presigned URL +- Fallback from Mode 1 + +#### Mode 3: Filesystem Mode (Not Yet Implemented) +``` +Docker → AppView → Hold (filesystem driver) + ↓ + Buffers & writes + ↓ + Local filesystem +``` + +**Flow:** +Same as Mode 2, but writes to filesystem driver instead of S3 driver. + +**Use Cases:** +- Development/testing with local filesystem +- Small deployments without S3 +- Air-gapped environments + +## Implementation: pkg/hold/multipart.go + +### Core Components + +#### MultipartManager +```go +type MultipartManager struct { + sessions map[string]*MultipartSession + mu sync.RWMutex +} +``` + +**Responsibilities:** +- Track active multipart sessions +- Clean up abandoned uploads (>24h inactive) +- Thread-safe session access + +#### MultipartSession +```go +type MultipartSession struct { + UploadID string // Unique ID for this upload + Digest string // Target blob digest + Mode MultipartMode // S3Native or Buffered + S3UploadID string // S3 upload ID (S3Native only) + Parts map[int]*MultipartPart // Buffered parts (Buffered only) + CreatedAt time.Time + LastActivity time.Time +} +``` + +**State Tracking:** +- S3Native: Tracks S3 upload ID and part ETags +- Buffered: Stores part data in memory + +#### MultipartPart +```go +type MultipartPart struct { + PartNumber int // Part number (1-indexed) + Data []byte // Part data (Buffered mode only) + ETag string // S3 ETag or computed hash + Size int64 +} +``` + +### Key Methods + +#### StartMultipartUploadWithManager +```go +func (s *HoldService) StartMultipartUploadWithManager( + ctx context.Context, + digest string, + manager *MultipartManager, +) (string, MultipartMode, error) +``` + +**Logic:** +1. Try S3 native multipart via `s.startMultipartUpload()` +2. If successful → Create S3Native session +3. If fails or no S3 client → Create Buffered session +4. Return uploadID and mode + +#### GetPartUploadURL +```go +func (s *HoldService) GetPartUploadURL( + ctx context.Context, + session *MultipartSession, + partNumber int, + did string, +) (string, error) +``` + +**Logic:** +- S3Native mode: Generate S3 presigned URL via `s.getPartPresignedURL()` +- Buffered mode: Return proxy endpoint `/multipart-parts/{uploadID}/{part}` + +#### CompleteMultipartUploadWithManager +```go +func (s *HoldService) CompleteMultipartUploadWithManager( + ctx context.Context, + session *MultipartSession, + manager *MultipartManager, +) error +``` + +**Logic:** +- S3Native: Call `s.completeMultipartUpload()` with S3 API +- Buffered: Assemble parts in order, write via storage driver + +#### HandleMultipartPartUpload (New Endpoint) +```go +func (s *HoldService) HandleMultipartPartUpload( + w http.ResponseWriter, + r *http.Request, + uploadID string, + partNumber int, + did string, + manager *MultipartManager, +) +``` + +**New HTTP endpoint:** `PUT /multipart-parts/{uploadID}/{partNumber}` + +**Purpose:** Receive part uploads in Buffered mode + +**Logic:** +1. Validate session exists and is in Buffered mode +2. Authorize write access +3. Read part data from request body +4. Store in session with computed ETag (SHA256) +5. Return ETag in response header + +## Integration Plan + +### Phase 1: Migrate to pkg/hold (In Progress) +- [x] Extract code from cmd/hold/main.go to pkg/hold/ +- [x] Create isolated multipart.go implementation +- [ ] Update cmd/hold/main.go to import pkg/hold +- [ ] Test existing S3 native multipart still works + +### Phase 2: Add Buffered Mode Support +- [ ] Add MultipartManager to HoldService +- [ ] Update handlers to use `*WithManager` methods +- [ ] Add `/multipart-parts/{uploadID}/{partNumber}` route +- [ ] Test filesystem storage with buffered multipart + +### Phase 3: Update AppView +- [ ] Detect hold capabilities (presigned vs proxy) +- [ ] Fallback to buffered mode when presigned fails +- [ ] Handle `/multipart-parts/` proxy URLs + +### Phase 4: Capability Discovery +- [ ] Add capability endpoint: `GET /capabilities` +- [ ] Return: `{"multipart": "native|buffered|both", "storage": "s3|filesystem"}` +- [ ] AppView uses capabilities to choose upload strategy + +## Testing Strategy + +### Unit Tests +- [ ] MultipartManager session lifecycle +- [ ] Part buffering and assembly +- [ ] Concurrent part uploads (thread safety) +- [ ] Session cleanup (expired uploads) + +### Integration Tests + +**S3 Native Mode:** +- [ ] Start multipart → get presigned URLs → upload parts → complete +- [ ] Verify no data flows through hold service +- [ ] Test abort cleanup + +**Buffered Mode (Filesystem):** +- [ ] Start multipart → get proxy URLs → upload parts → complete +- [ ] Verify parts assembled correctly +- [ ] Test missing part detection +- [ ] Test abort cleanup + +**Fallback:** +- [ ] Simulate presigned URL failure → should fallback to buffered +- [ ] Verify seamless transition + +### Load Tests +- [ ] Concurrent multipart uploads (multiple sessions) +- [ ] Large blobs (100MB+, many parts) +- [ ] Memory usage with many buffered parts + +## Performance Considerations + +### Memory Usage (Buffered Mode) +- Parts stored in memory until completion +- Docker typically uses 5MB chunks (S3 minimum) +- 100MB image = ~20 parts = ~100MB RAM during upload +- Multiple concurrent uploads multiply memory usage + +**Mitigation:** +- Session cleanup (24h timeout) +- Consider disk-backed buffering for large parts (future optimization) +- Monitor memory usage and set limits + +### Network Bandwidth +- S3Native: Minimal (only API calls) +- Buffered: Full blob data flows through hold service +- Filesystem: Always buffered (no presigned URL option) + +## Configuration + +### Environment Variables + +**Current (S3 only):** +```bash +STORAGE_DRIVER=s3 +S3_BUCKET=my-bucket +S3_ENDPOINT=https://s3.amazonaws.com +AWS_ACCESS_KEY_ID=... +AWS_SECRET_ACCESS_KEY=... +``` + +**Filesystem:** +```bash +STORAGE_DRIVER=filesystem +STORAGE_ROOT_DIR=/var/lib/atcr/hold +``` + +### Automatic Mode Selection +No configuration needed - hold service automatically: +1. Tries S3 native multipart if S3 client exists +2. Falls back to buffered mode if S3 unavailable or fails +3. Always uses buffered mode for filesystem driver + +## Security Considerations + +### Authorization +- All multipart operations require write authorization +- Buffered mode: Check auth on every part upload +- S3Native: Auth only on start/complete (presigned URLs have embedded auth) + +### Resource Limits +- Max upload size: Controlled by storage backend +- Max concurrent uploads: Limited by memory +- Session timeout: 24 hours (configurable) + +### Attack Vectors +- **Memory exhaustion**: Attacker uploads many large parts + - Mitigation: Session limits, cleanup, auth +- **Incomplete uploads**: Attacker starts but never completes + - Mitigation: 24h timeout, cleanup goroutine +- **Part flooding**: Upload many tiny parts + - Mitigation: S3 has 10,000 part limit, could add to buffered mode + +## Future Enhancements + +### Disk-Backed Buffering +Instead of memory, buffer parts to temporary disk location: +- Reduces memory pressure +- Supports larger uploads +- Requires cleanup on completion/abort + +### Parallel Part Assembly +For large uploads, assemble parts in parallel: +- Stream parts to writer as they arrive +- Reduce memory footprint +- Faster completion + +### Chunked Completion +For very large assembled blobs: +- Stream to storage driver in chunks +- Avoid loading entire blob in memory +- Use `io.Copy()` with buffer + +### Multi-Backend Support +- Azure Blob Storage multipart +- Google Cloud Storage resumable uploads +- Backblaze B2 large file API + +## References + +- S3 Multipart Upload API: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html +- Distribution Storage Driver Interface: https://github.com/distribution/distribution/blob/main/registry/storage/driver/storagedriver.go +- OCI Distribution Spec (Blob Upload): https://github.com/opencontainers/distribution-spec/blob/main/spec.md#pushing-a-blob-in-chunks diff --git a/docs/MULTIPART_OLD.md b/docs/MULTIPART_OLD.md new file mode 100644 index 0000000..6791048 --- /dev/null +++ b/docs/MULTIPART_OLD.md @@ -0,0 +1,448 @@ +S3 Multipart Upload Implementation Plan + Problem Summary + Current implementation uses a single presigned URL with a pipe for chunked uploads (PATCH). This causes: + - Docker PATCH requests block waiting for pipe writes + - S3 upload happens in background via single presigned URL + - Docker times out → "client disconnected during blob PATCH" + - Root cause: Single presigned URLs don't support OCI's chunked upload protocol + Solution: S3 Multipart Upload API + Implement proper S3 multipart upload to support Docker's chunked PATCH operations: + - Each PATCH → separate S3 part upload with its own presigned URL + - On Commit → complete multipart upload + - No buffering, no pipes, no blocking + --- + Architecture Changes + Current (Broken) Flow + POST /blobs/uploads/ → Create() → Single presigned URL to temp location + PATCH → Write to pipe → [blocks] → Background goroutine uploads via single URL + PATCH → [blocks on pipe] → Docker timeout → disconnect ❌ + New (Multipart) Flow + POST /blobs/uploads/ → Create() → Initiate multipart upload, get upload ID + PATCH #1 → Get presigned URL for part 1 → Upload part 1 to S3 → Store ETag + PATCH #2 → Get presigned URL for part 2 → Upload part 2 to S3 → Store ETag + PUT (commit) → Complete multipart upload with ETags → Done ✅ + --- + Implementation Details + 1. Hold Service: Add Multipart Upload Endpoints + File: cmd/hold/main.go + New Request/Response Types + // StartMultipartUploadRequest initiates a multipart upload + type StartMultipartUploadRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + } + type StartMultipartUploadResponse struct { + UploadID string `json:"upload_id"` + ExpiresAt time.Time `json:"expires_at"` + } + // GetPartURLRequest requests a presigned URL for a specific part + type GetPartURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + PartNumber int `json:"part_number"` + } + type GetPartURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` + } + // CompleteMultipartRequest completes a multipart upload + type CompleteMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + Parts []CompletedPart `json:"parts"` + } + type CompletedPart struct { + PartNumber int `json:"part_number"` + ETag string `json:"etag"` + } + // AbortMultipartRequest aborts an in-progress upload + type AbortMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + } + New Endpoints + POST /start-multipart + func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Request) { + // Validate DID authorization for WRITE + // Build S3 key from digest + // Call s3.CreateMultipartUploadRequest() + // Generate presigned URL if needed, or return upload ID + // Return upload ID to client + } + POST /part-presigned-url + func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { + // Validate DID authorization for WRITE + // Build S3 key from digest + // Call s3.UploadPartRequest() with part number and upload ID + // Generate presigned URL + // Return presigned URL for this specific part + } + POST /complete-multipart + func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Request) { + // Validate DID authorization for WRITE + // Build S3 key from digest + // Prepare CompletedPart array with part numbers and ETags + // Call s3.CompleteMultipartUpload() + // Return success + } + POST /abort-multipart (for cleanup) + func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Request) { + // Validate DID authorization for WRITE + // Call s3.AbortMultipartUpload() + // Return success + } + S3 Implementation + // startMultipartUpload initiates a multipart upload and returns upload ID + func (s *HoldService) startMultipartUpload(ctx context.Context, digest string) (string, error) { + if s.s3Client == nil { + return "", fmt.Errorf("S3 not configured") + } + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + result, err := s.s3Client.CreateMultipartUploadWithContext(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + if err != nil { + return "", err + } + return *result.UploadId, nil + } + // getPartPresignedURL generates presigned URL for a specific part + func (s *HoldService) getPartPresignedURL(ctx context.Context, digest, uploadID string, partNumber int) (string, error) { + if s.s3Client == nil { + return "", fmt.Errorf("S3 not configured") + } + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + req, _ := s.s3Client.UploadPartRequest(&s3.UploadPartInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + PartNumber: aws.Int64(int64(partNumber)), + }) + return req.Presign(15 * time.Minute) + } + // completeMultipartUpload finalizes the multipart upload + func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploadID string, parts []CompletedPart) error { + if s.s3Client == nil { + return fmt.Errorf("S3 not configured") + } + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + // Convert to S3 CompletedPart format + s3Parts := make([]*s3.CompletedPart, len(parts)) + for i, p := range parts { + s3Parts[i] = &s3.CompletedPart{ + PartNumber: aws.Int64(int64(p.PartNumber)), + ETag: aws.String(p.ETag), + } + } + _, err := s.s3Client.CompleteMultipartUploadWithContext(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + MultipartUpload: &s3.CompletedMultipartUpload{ + Parts: s3Parts, + }, + }) + return err + } + --- + 2. AppView: Rewrite ProxyBlobStore for Multipart + File: pkg/storage/proxy_blob_store.go + Remove Current Implementation + - Remove pipe-based streaming + - Remove background goroutine with single presigned URL + - Remove global upload tracking map + New ProxyBlobWriter Structure + type ProxyBlobWriter struct { + store *ProxyBlobStore + options distribution.CreateOptions + uploadID string // S3 multipart upload ID + parts []CompletedPart // Track uploaded parts with ETags + partNumber int // Current part number (starts at 1) + buffer *bytes.Buffer // Buffer for current part + size int64 // Total bytes written + closed bool + id string // Distribution's upload ID (for state) + startedAt time.Time + finalDigest string // Set on Commit + } + type CompletedPart struct { + PartNumber int + ETag string + } + New Create() - Initiate Multipart Upload + func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) { + var opts distribution.CreateOptions + for _, option := range options { + if err := option.Apply(&opts); err != nil { + return nil, err + } + } + // Use temp digest for upload location + writerID := fmt.Sprintf("upload-%d", time.Now().UnixNano()) + tempDigest := digest.Digest(fmt.Sprintf("uploads/temp-%s", writerID)) + // Start multipart upload via hold service + uploadID, err := p.startMultipartUpload(ctx, tempDigest) + if err != nil { + return nil, fmt.Errorf("failed to start multipart upload: %w", err) + } + writer := &ProxyBlobWriter{ + store: p, + options: opts, + uploadID: uploadID, + parts: make([]CompletedPart, 0), + partNumber: 1, + buffer: bytes.NewBuffer(make([]byte, 0, 5*1024*1024)), // 5MB buffer + id: writerID, + startedAt: time.Now(), + } + // Store in global map for Resume() + globalUploadsMu.Lock() + globalUploads[writer.id] = writer + globalUploadsMu.Unlock() + return writer, nil + } + New Write() - Buffer and Flush Parts + func (w *ProxyBlobWriter) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("writer closed") + } + n, err := w.buffer.Write(p) + w.size += int64(n) + // Flush if buffer reaches 5MB (S3 minimum part size) + if w.buffer.Len() >= 5*1024*1024 { + if err := w.flushPart(); err != nil { + return n, err + } + } + return n, err + } + func (w *ProxyBlobWriter) flushPart() error { + if w.buffer.Len() == 0 { + return nil + } + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + // Get presigned URL for this part + tempDigest := digest.Digest(fmt.Sprintf("uploads/temp-%s", w.id)) + url, err := w.store.getPartPresignedURL(ctx, tempDigest, w.uploadID, w.partNumber) + if err != nil { + return fmt.Errorf("failed to get part presigned URL: %w", err) + } + // Upload part to S3 + req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(w.buffer.Bytes())) + if err != nil { + return err + } + resp, err := w.store.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { + return fmt.Errorf("part upload failed: status %d", resp.StatusCode) + } + // Store ETag for completion + etag := resp.Header.Get("ETag") + if etag == "" { + return fmt.Errorf("no ETag in response") + } + w.parts = append(w.parts, CompletedPart{ + PartNumber: w.partNumber, + ETag: etag, + }) + // Reset buffer and increment part number + w.buffer.Reset() + w.partNumber++ + return nil + } + New Commit() - Complete Multipart and Move + func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) { + if w.closed { + return distribution.Descriptor{}, fmt.Errorf("writer closed") + } + w.closed = true + // Flush any remaining buffered data + if w.buffer.Len() > 0 { + if err := w.flushPart(); err != nil { + // Try to abort multipart on error + w.store.abortMultipartUpload(ctx, w.uploadID) + return distribution.Descriptor{}, err + } + } + // Complete multipart upload at temp location + tempDigest := digest.Digest(fmt.Sprintf("uploads/temp-%s", w.id)) + if err := w.store.completeMultipartUpload(ctx, tempDigest, w.uploadID, w.parts); err != nil { + return distribution.Descriptor{}, err + } + // Move from temp → final location (server-side S3 copy) + tempPath := fmt.Sprintf("uploads/temp-%s", w.id) + finalPath := desc.Digest.String() + moveURL := fmt.Sprintf("%s/move?from=%s&to=%s&did=%s", + w.store.storageEndpoint, tempPath, finalPath, w.store.did) + req, err := http.NewRequestWithContext(ctx, "POST", moveURL, nil) + if err != nil { + return distribution.Descriptor{}, err + } + resp, err := w.store.httpClient.Do(req) + if err != nil { + return distribution.Descriptor{}, err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { + bodyBytes, _ := io.ReadAll(resp.Body) + return distribution.Descriptor{}, fmt.Errorf("move failed: %d, %s", resp.StatusCode, bodyBytes) + } + // Remove from global map + globalUploadsMu.Lock() + delete(globalUploads, w.id) + globalUploadsMu.Unlock() + return distribution.Descriptor{ + Digest: desc.Digest, + Size: w.size, + MediaType: desc.MediaType, + }, nil + } + Add Hold Service Client Methods + func (p *ProxyBlobStore) startMultipartUpload(ctx context.Context, dgst digest.Digest) (string, error) { + reqBody := map[string]any{ + "did": p.did, + "digest": dgst.String(), + } + body, _ := json.Marshal(reqBody) + url := fmt.Sprintf("%s/start-multipart", p.storageEndpoint) + req, _ := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + var result struct { + UploadID string `json:"upload_id"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + return result.UploadID, nil + } + func (p *ProxyBlobStore) getPartPresignedURL(ctx context.Context, dgst digest.Digest, uploadID string, partNumber int) (string, error) { + reqBody := map[string]any{ + "did": p.did, + "digest": dgst.String(), + "upload_id": uploadID, + "part_number": partNumber, + } + body, _ := json.Marshal(reqBody) + url := fmt.Sprintf("%s/part-presigned-url", p.storageEndpoint) + req, _ := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + var result struct { + URL string `json:"url"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + return result.URL, nil + } + func (p *ProxyBlobStore) completeMultipartUpload(ctx context.Context, dgst digest.Digest, uploadID string, parts []CompletedPart) error { + reqBody := map[string]any{ + "did": p.did, + "digest": dgst.String(), + "upload_id": uploadID, + "parts": parts, + } + body, _ := json.Marshal(reqBody) + url := fmt.Sprintf("%s/complete-multipart", p.storageEndpoint) + req, _ := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + req.Header.Set("Content-Type", "application/json") + resp, err := p.httpClient.Do(req) + if err != nil { + return err + } + defer resp.Body.Close() + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("complete multipart failed: status %d", resp.StatusCode) + } + return nil + } + --- + Testing Plan + 1. Unit Tests + - Test multipart upload initiation + - Test part upload with presigned URLs + - Test completion with ETags + - Test abort on errors + 2. Integration Tests + - Push small images (< 5MB, single part) + - Push medium images (10MB, 2 parts) + - Push large images (100MB, 20 parts) + - Test with Upcloud S3 + - Test with Storj S3 + 3. Validation + - Monitor logs for "client disconnected" errors (should be gone) + - Check Docker push success rate + - Verify blobs stored correctly in S3 + - Check bandwidth usage on hold service (should be minimal) + --- + Migration & Deployment + Backward Compatibility + - Keep /put-presigned-url endpoint for fallback + - Keep /move endpoint (still needed) + - New multipart endpoints are additive + Deployment Steps + 1. Update hold service with new endpoints + 2. Update AppView ProxyBlobStore + 3. Deploy hold service first + 4. Deploy AppView + 5. Test with sample push + 6. Monitor logs + Rollback Plan + - Revert AppView to previous version (uses old presigned URL method) + - Hold service keeps both old and new endpoints + --- + Documentation Updates + Update docs/PRESIGNED_URLS.md + - Add section "Multipart Upload for Chunked Data" + - Explain why single presigned URLs don't work with PATCH + - Document new endpoints and flow + - Add S3 part size recommendations (5MB-64MB for Storj) + Add Troubleshooting Section + - "Client disconnected during PATCH" → resolved by multipart + - Storj-specific considerations (64MB parts recommended) + - Upcloud compatibility notes + --- + Performance Impact + Before (Broken) + - Docker PATCH → blocks on pipe → timeout → retry → fail + - Unable to push large images reliably + After (Multipart) + - Each PATCH → independent part upload → immediate response + - No blocking, no timeouts + - Parallel part uploads possible (future optimization) + - Reliable pushes for any image size + Bandwidth + - Hold service: Only API calls (~1KB per part) + - Direct S3 uploads: Full blob data + - S3 copy for move: Server-side (no hold bandwidth) + Estimated savings: 99.98% hold service bandwidth reduction (same as before, but now actually works!) \ No newline at end of file diff --git a/docs/PRESIGNED_UPLOADS.md b/docs/PRESIGNED_UPLOADS.md new file mode 100644 index 0000000..2989b99 --- /dev/null +++ b/docs/PRESIGNED_UPLOADS.md @@ -0,0 +1,1017 @@ +# Presigned Upload URLs Implementation Guide + +## Current Architecture (Proxy Mode) + +### Upload Flow Today +1. **AppView** receives blob upload request from Docker +2. **ProxyBlobStore.Create()** creates streaming upload via pipe +3. Data streams to **Hold Service** temp location: `uploads/temp-{id}` +4. Hold service uploads to S3 via storage driver +5. **ProxyBlobWriter.Commit()** moves blob: temp → final digest-based path +6. Hold service performs S3 Move operation + +### Why Uploads Don't Use Presigned URLs Today +- `Create()` doesn't know the blob digest upfront +- Presigned S3 URLs require the full object key (which includes digest) +- Current approach streams to temp location, calculates digest, then moves + +### Bandwidth Flow (Current) +``` +Docker → AppView → Hold Service → S3/Storj + (proxy) (proxy) +``` + +All upload bandwidth flows through Hold Service. + +--- + +## Proposed Architecture (Presigned Uploads) + +### New Upload Flow +1. **AppView** receives blob upload request from Docker +2. **ProxyBlobStore.Create()** creates buffered upload writer +3. Data buffered in memory during `Write()` calls +4. **ProxyBlobWriter.Commit()** calculates digest from buffer +5. Request presigned PUT URL from Hold Service with digest +6. Upload buffered data directly to S3 via presigned URL +7. No move operation needed (uploaded to final path) + +### Bandwidth Flow (Presigned) +``` +Docker → AppView → S3/Storj (direct via presigned URL) + (buffer) + +Hold Service only issues presigned URLs (minimal bandwidth) +``` + +--- + +## Detailed Implementation + +### Phase 1: Add Buffering to ProxyBlobWriter + +**File:** `pkg/storage/proxy_blob_store.go` + +#### Changes to ProxyBlobWriter struct + +```go +type ProxyBlobWriter struct { + store *ProxyBlobStore + options distribution.CreateOptions + + // Remove pipe-based streaming + // pipeWriter *io.PipeWriter + // pipeReader *io.PipeReader + // digestChan chan string + // uploadErr chan error + + // Add buffering + buffer *bytes.Buffer // In-memory buffer for blob data + hasher digest.Digester // Calculate digest while writing + + finalDigest string + size int64 + closed bool + id string + startedAt time.Time +} +``` + +**Rationale:** +- Remove pipe mechanism (no longer streaming to temp) +- Add buffer to store blob data in memory +- Add hasher to calculate digest incrementally + +#### Modify Create() method + +**Before (lines 208-312):** +```go +func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) { + // Creates pipe and starts background goroutine for streaming + pipeReader, pipeWriter := io.Pipe() + // ... streams to temp location +} +``` + +**After:** +```go +func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) { + fmt.Printf("🔧 [proxy_blob_store/Create] Starting buffered upload for presigned URL\n") + + // Parse options + var opts distribution.CreateOptions + for _, option := range options { + if err := option.Apply(&opts); err != nil { + return nil, err + } + } + + // Create buffered writer + writer := &ProxyBlobWriter{ + store: p, + options: opts, + buffer: new(bytes.Buffer), + hasher: digest.Canonical.Digester(), // Usually SHA256 + id: fmt.Sprintf("upload-%d", time.Now().UnixNano()), + startedAt: time.Now(), + } + + // Store in global uploads map for resume support + globalUploadsMu.Lock() + globalUploads[writer.id] = writer + globalUploadsMu.Unlock() + + fmt.Printf(" Upload ID: %s\n", writer.id) + fmt.Printf(" Repository: %s\n", p.repository) + + return writer, nil +} +``` + +**Key Changes:** +- No more pipe creation +- No background goroutine +- Initialize buffer and hasher +- Everything else stays synchronous + +#### Modify Write() method + +**Before (lines 440-455):** +```go +func (w *ProxyBlobWriter) Write(p []byte) (int, error) { + // Writes to pipe, streams to hold service + n, err := w.pipeWriter.Write(p) + w.size += int64(n) + return n, nil +} +``` + +**After:** +```go +func (w *ProxyBlobWriter) Write(p []byte) (int, error) { + if w.closed { + return 0, fmt.Errorf("writer closed") + } + + // Write to buffer + n, err := w.buffer.Write(p) + if err != nil { + return n, fmt.Errorf("failed to buffer data: %w", err) + } + + // Update hasher for digest calculation + w.hasher.Hash().Write(p) + + w.size += int64(n) + + // Memory pressure check (optional safety) + if w.buffer.Len() > 500*1024*1024 { // 500MB limit + return n, fmt.Errorf("blob too large for buffered upload: %d bytes", w.buffer.Len()) + } + + return n, nil +} +``` + +**Key Changes:** +- Write to in-memory buffer instead of pipe +- Update hasher incrementally (efficient) +- Add safety check for excessive memory usage +- No streaming to hold service yet + +#### Modify Commit() method + +**Before (lines 493-548):** +```go +func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) { + // Close pipe, send digest to goroutine + // Wait for temp upload + // Move temp → final +} +``` + +**After:** +```go +func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descriptor) (distribution.Descriptor, error) { + if w.closed { + return distribution.Descriptor{}, fmt.Errorf("writer closed") + } + w.closed = true + + // Remove from global uploads map + globalUploadsMu.Lock() + delete(globalUploads, w.id) + globalUploadsMu.Unlock() + + // Calculate digest from buffered data + calculatedDigest := w.hasher.Digest() + + // Verify digest matches if provided + if desc.Digest != "" && desc.Digest != calculatedDigest { + return distribution.Descriptor{}, fmt.Errorf( + "digest mismatch: expected %s, got %s", + desc.Digest, calculatedDigest, + ) + } + + finalDigest := calculatedDigest + if desc.Digest != "" { + finalDigest = desc.Digest + } + + fmt.Printf("📤 [ProxyBlobWriter.Commit] Uploading via presigned URL\n") + fmt.Printf(" Digest: %s\n", finalDigest) + fmt.Printf(" Size: %d bytes\n", w.size) + fmt.Printf(" Buffered: %d bytes\n", w.buffer.Len()) + + // Get presigned upload URL from hold service + url, err := w.store.getUploadURL(ctx, finalDigest, w.size) + if err != nil { + return distribution.Descriptor{}, fmt.Errorf("failed to get presigned upload URL: %w", err) + } + + fmt.Printf(" Presigned URL: %s\n", url) + + // Upload directly to S3 via presigned URL + req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(w.buffer.Bytes())) + if err != nil { + return distribution.Descriptor{}, fmt.Errorf("failed to create upload request: %w", err) + } + req.Header.Set("Content-Type", "application/octet-stream") + req.ContentLength = w.size + + resp, err := w.store.httpClient.Do(req) + if err != nil { + return distribution.Descriptor{}, fmt.Errorf("presigned upload failed: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { + bodyBytes, _ := io.ReadAll(resp.Body) + return distribution.Descriptor{}, fmt.Errorf( + "presigned upload failed: status %d, body: %s", + resp.StatusCode, string(bodyBytes), + ) + } + + fmt.Printf("✅ [ProxyBlobWriter.Commit] Upload successful\n") + + // Clear buffer to free memory + w.buffer = nil + + return distribution.Descriptor{ + Digest: finalDigest, + Size: w.size, + MediaType: desc.MediaType, + }, nil +} +``` + +**Key Changes:** +- Calculate digest from hasher (already computed incrementally) +- Verify digest if provided by client +- Get presigned upload URL with final digest +- Upload buffer contents directly to S3 +- No temp location, no move operation +- Clear buffer to free memory immediately + +#### Modify Cancel() method + +**Before (lines 551-572):** +```go +func (w *ProxyBlobWriter) Cancel(ctx context.Context) error { + // Close pipe, cancel temp upload +} +``` + +**After:** +```go +func (w *ProxyBlobWriter) Cancel(ctx context.Context) error { + w.closed = true + + // Remove from global uploads map + globalUploadsMu.Lock() + delete(globalUploads, w.id) + globalUploadsMu.Unlock() + + // Clear buffer to free memory + w.buffer = nil + + fmt.Printf("❌ [ProxyBlobWriter.Cancel] Upload cancelled: id=%s\n", w.id) + return nil +} +``` + +**Key Changes:** +- Simply clear buffer +- No pipe cleanup needed +- No temp cleanup needed (nothing uploaded yet) + +--- + +### Phase 2: Update Hold Service (Optional Enhancement) + +The current `getUploadURL()` implementation in `cmd/hold/main.go` (lines 528-587) already supports presigned uploads correctly. No changes needed unless you want to add additional logging. + +**Optional logging enhancement at line 547:** + +```go +url, err := req.Presign(15 * time.Minute) +if err != nil { + log.Printf("❌ Failed to generate presigned upload URL: %v", err) + return s.getProxyUploadURL(digest, did), nil +} + +log.Printf("🔑 Generated presigned upload URL:") +log.Printf(" Digest: %s", digest) +log.Printf(" S3 Key: %s", s3Key) +log.Printf(" Size: %d bytes", size) +log.Printf(" URL length: %d chars", len(url)) +log.Printf(" Expires: 15min") + +return url, nil +``` + +--- + +### Phase 3: Memory Management Considerations + +#### Add Configuration for Max Buffer Size + +**File:** `pkg/storage/proxy_blob_store.go` + +Add constants at top of file: + +```go +const ( + maxChunkSize = 5 * 1024 * 1024 // 5MB (existing) + + // Maximum blob size for in-memory buffering + // Blobs larger than this will fail (alternative: fallback to proxy mode) + maxBufferedBlobSize = 500 * 1024 * 1024 // 500MB +) +``` + +#### Alternative: Disk-Based Buffering + +For very large blobs, consider disk-based buffering: + +```go +type ProxyBlobWriter struct { + // ... existing fields ... + + // Choose one: + buffer *bytes.Buffer // Memory buffer (current) + // OR + tempFile *os.File // Disk buffer (for large blobs) + bufferSize int64 +} +``` + +**Memory buffer (simple, fast):** +- Pro: Fast, no disk I/O +- Con: Limited by available RAM +- Use for: Blobs < 500MB + +**Disk buffer (scalable):** +- Pro: No memory limit +- Con: Slower, disk I/O overhead +- Use for: Blobs > 500MB + +#### Hybrid Approach (Recommended) + +```go +const ( + memoryBufferThreshold = 50 * 1024 * 1024 // 50MB +) + +func (w *ProxyBlobWriter) Write(p []byte) (int, error) { + // If buffer exceeds threshold, switch to disk + if w.buffer != nil && w.buffer.Len() > memoryBufferThreshold { + return 0, fmt.Errorf("blob exceeds memory buffer threshold, disk buffering not implemented") + // TODO: Implement disk buffering or fallback to proxy mode + } + + // Otherwise use memory buffer + // ... existing Write() logic ... +} +``` + +--- + +## Optional Enhancement: Presigned HEAD URLs + +### Motivation + +Currently HEAD requests (blob verification) are proxied through the Hold Service. This is fine because HEAD bandwidth is negligible (~300 bytes per request), but we can eliminate this round-trip by using presigned HEAD URLs. + +### Implementation + +#### Step 1: Add getHeadURL() to Hold Service + +**File:** `cmd/hold/main.go` + +Add new function after `getDownloadURL()`: + +```go +// getHeadURL generates a presigned HEAD URL for blob verification +func (s *HoldService) getHeadURL(ctx context.Context, digest string) (string, error) { + // Check if blob exists first + path := blobPath(digest) + _, err := s.driver.Stat(ctx, path) + if err != nil { + return "", fmt.Errorf("blob not found: %w", err) + } + + // If S3 client available, generate presigned HEAD URL + if s.s3Client != nil { + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Generate presigned HEAD URL (method-specific!) + req, _ := s.s3Client.HeadObjectRequest(&s3.HeadObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + + log.Printf("🔍 [getHeadURL] Generating presigned HEAD URL:") + log.Printf(" Digest: %s", digest) + log.Printf(" S3 Key: %s", s3Key) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + log.Printf("❌ [getHeadURL] Presign failed: %v", err) + // Fallback to proxy URL + return s.getProxyHeadURL(digest), nil + } + + log.Printf("✅ [getHeadURL] Presigned HEAD URL generated") + return url, nil + } + + // Fallback: return proxy URL + return s.getProxyHeadURL(digest), nil +} + +// getProxyHeadURL returns a proxy URL for HEAD requests +func (s *HoldService) getProxyHeadURL(digest string) string { + // HEAD requests don't need DID in query string (read-only check) + return fmt.Sprintf("%s/blobs/%s", s.config.Server.PublicURL, digest) +} +``` + +#### Step 2: Add HTTP endpoint for presigned HEAD URLs + +**File:** `cmd/hold/main.go` + +Add handler similar to `HandleGetPresignedURL()`: + +```go +// HeadPresignedURLRequest represents a request for a presigned HEAD URL +type HeadPresignedURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// HeadPresignedURLResponse contains the presigned HEAD URL +type HeadPresignedURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + +// HandleHeadPresignedURL handles requests for HEAD URLs +func (s *HoldService) HandleHeadPresignedURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req HeadPresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for READ + if !s.isAuthorizedRead(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + + // Generate presigned HEAD URL + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + url, err := s.getHeadURL(ctx, req.Digest) + if err != nil { + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + resp := HeadPresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} +``` + +#### Step 3: Register endpoint in main() + +**File:** `cmd/hold/main.go` + +In `main()` function, add route: + +```go +mux.HandleFunc("/head-presigned-url", service.HandleHeadPresignedURL) +``` + +#### Step 4: Update ProxyBlobStore.ServeBlob() + +**File:** `pkg/storage/proxy_blob_store.go` + +Modify HEAD handling (currently lines 197-224): + +**Before:** +```go +if r.Method == http.MethodHead { + // Check if blob exists via hold service HEAD request + url := fmt.Sprintf("%s/blobs/%s?did=%s", p.storageEndpoint, dgst.String(), p.did) + req, err := http.NewRequestWithContext(ctx, "HEAD", url, nil) + // ... proxy through hold service ... +} +``` + +**After:** +```go +if r.Method == http.MethodHead { + // Get presigned HEAD URL from hold service + headURL, err := p.getHeadURL(ctx, dgst) + if err != nil { + return distribution.ErrBlobUnknown + } + + // Redirect to presigned HEAD URL + http.Redirect(w, r, headURL, http.StatusTemporaryRedirect) + return nil +} +``` + +#### Step 5: Add getHeadURL() to ProxyBlobStore + +**File:** `pkg/storage/proxy_blob_store.go` + +Add after `getDownloadURL()`: + +```go +// getHeadURL requests a presigned HEAD URL from the storage service +func (p *ProxyBlobStore) getHeadURL(ctx context.Context, dgst digest.Digest) (string, error) { + reqBody := map[string]any{ + "did": p.did, + "digest": dgst.String(), + } + + body, err := json.Marshal(reqBody) + if err != nil { + return "", err + } + + url := fmt.Sprintf("%s/head-presigned-url", p.storageEndpoint) + req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) + if err != nil { + return "", err + } + req.Header.Set("Content-Type", "application/json") + + resp, err := p.httpClient.Do(req) + if err != nil { + return "", err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("failed to get HEAD URL: status %d", resp.StatusCode) + } + + var result struct { + URL string `json:"url"` + } + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return "", err + } + + return result.URL, nil +} +``` + +### Presigned HEAD URLs: Trade-offs + +**Benefits:** +- Offloads HEAD requests from Hold Service +- Docker verifies blobs directly against S3 +- Slightly lower latency (one fewer hop) + +**Costs:** +- Requires round-trip to get presigned HEAD URL +- More complex code +- Two HTTP requests instead of one proxy request + +**Bandwidth Analysis:** +- Current: 1 HEAD request to Hold Service (~300 bytes) +- Presigned: 1 POST to get URL (~200 bytes) + 1 HEAD to S3 (~300 bytes) +- **Net difference: Adds ~200 bytes per verification** + +**Recommendation:** Optional enhancement. The current proxied HEAD approach is simpler and bandwidth difference is negligible. Only implement if: +- Hold Service is becoming a bottleneck +- You want to minimize Hold Service load completely +- Latency of HEAD requests becomes noticeable + +--- + +## Testing & Validation + +### Test Plan for Presigned Uploads + +#### 1. Small Blob Upload (< 1MB) +```bash +# Build test image with small layers +echo "FROM scratch" > Dockerfile +echo "COPY small-file /" >> Dockerfile +dd if=/dev/urandom of=small-file bs=1024 count=512 # 512KB + +docker build -t atcr.io/youruser/test:small . +docker push atcr.io/youruser/test:small +``` + +**Expected behavior:** +- Blob buffered in memory +- Presigned upload URL requested with correct digest +- Direct upload to S3 via presigned URL +- No temp location, no move operation + +**Verify in logs:** +``` +📤 [ProxyBlobWriter.Commit] Uploading via presigned URL + Digest: sha256:... + Size: 524288 bytes + Presigned URL: https://gateway.storjshare.io/... +✅ [ProxyBlobWriter.Commit] Upload successful +``` + +#### 2. Medium Blob Upload (10-50MB) +```bash +dd if=/dev/urandom of=medium-file bs=1048576 count=25 # 25MB + +docker build -t atcr.io/youruser/test:medium . +docker push atcr.io/youruser/test:medium +``` + +**Monitor memory usage:** +```bash +# While push is running +docker stats atcr-appview +``` + +Should see ~25MB spike during buffer + upload. + +#### 3. Large Blob Upload (100-500MB) +```bash +dd if=/dev/urandom of=large-file bs=1048576 count=200 # 200MB + +docker build -t atcr.io/youruser/test:large . +docker push atcr.io/youruser/test:large +``` + +**Monitor:** +- Memory usage (should see ~200MB spike) +- Upload completes successfully +- S3 shows blob in correct location + +#### 4. Concurrent Uploads +```bash +# Push multiple images in parallel +docker push atcr.io/youruser/test1:tag & +docker push atcr.io/youruser/test2:tag & +docker push atcr.io/youruser/test3:tag & +wait +``` + +**Verify:** +- All uploads complete successfully +- Memory usage peaks but doesn't OOM +- No data corruption (digests match) + +#### 5. Error Handling Tests + +**Test presigned URL failure:** +- Temporarily break S3 credentials +- Verify graceful error message +- Check for memory leaks (buffer cleared on error) + +**Test digest mismatch:** +- This shouldn't happen in practice, but verify error handling +- Buffer should be cleared even on error + +**Test network interruption:** +- Kill network during upload +- Verify proper error propagation +- Check for hanging goroutines + +### Test Plan for Presigned HEAD URLs (Optional) + +#### 1. HEAD Request Redirect +```bash +# Pull image (triggers HEAD verification) +docker pull atcr.io/youruser/test:tag +``` + +**Expected behavior:** +- AppView redirects HEAD to presigned HEAD URL +- Docker follows redirect to S3 +- S3 responds to HEAD request successfully + +**Verify in logs:** +``` +🔍 [getHeadURL] Generating presigned HEAD URL: + Digest: sha256:... +✅ [getHeadURL] Presigned HEAD URL generated +``` + +#### 2. Method Verification +```bash +# Manually verify presigned HEAD URL works +curl -I "presigned-head-url-here" +``` + +Should return 200 OK with Content-Length header. + +```bash +# Verify it ONLY works with HEAD (not GET) +curl "presigned-head-url-here" +``` + +Should return 403 Forbidden (method mismatch). + +--- + +## Performance Comparison + +### Current Architecture (Proxy Mode) + +**Upload:** +``` +Client → AppView (stream) → Hold Service (stream) → S3 + ~0ms delay ~0ms delay ~100ms +``` +- Total latency: ~100ms + upload time +- Bandwidth: All through Hold Service + +**Download:** +``` +Client → AppView (redirect) → S3 (presigned GET) + ~5ms ~50ms +``` +- Total latency: ~55ms + download time +- Bandwidth: Direct from S3 ✅ + +**Verification (HEAD):** +``` +Client → AppView (redirect) → Hold Service (proxy HEAD) → S3 + ~5ms ~10ms ~50ms +``` +- Total latency: ~65ms +- Bandwidth: ~300 bytes through Hold Service + +### Presigned Upload Architecture + +**Upload:** +``` +Client → AppView (buffer) → S3 (presigned PUT) + ~0ms ~100ms +``` +- Total latency: ~100ms + upload time (same) +- Bandwidth: Direct to S3 ✅ +- Memory: +blob_size during buffer + +**Download:** (unchanged) +``` +Client → AppView (redirect) → S3 (presigned GET) +``` + +**Verification (HEAD):** (if presigned HEAD enabled) +``` +Client → AppView (redirect) → S3 (presigned HEAD) + ~5ms ~50ms +``` +- Total latency: ~55ms (10ms faster) +- Bandwidth: Direct to S3 ✅ + +--- + +## Trade-offs Summary + +### Presigned Uploads + +| Aspect | Proxy Mode (Current) | Presigned URLs | +|--------|---------------------|----------------| +| **Upload Bandwidth** | Through Hold Service | Direct to S3 ✅ | +| **Hold Service Load** | High (all upload traffic) | Low (only URL generation) ✅ | +| **Memory Usage** | Low (streaming) | High (buffering) ⚠️ | +| **Disk Usage** | None | Optional temp files for large blobs | +| **Code Complexity** | Simple ✅ | Moderate | +| **Max Blob Size** | Unlimited ✅ | Limited by memory (~500MB) ⚠️ | +| **Latency** | Same | Same | +| **Error Recovery** | Simple (cancel stream) | More complex (clear buffer) | + +### Presigned HEAD URLs + +| Aspect | Proxy Mode (Current) | Presigned HEAD | +|--------|---------------------|----------------| +| **Bandwidth** | 300 bytes (negligible) | 500 bytes (still negligible) | +| **Hold Service Load** | Low (HEAD is tiny) | Lower (but minimal gain) | +| **Latency** | 65ms | 55ms (10ms faster) | +| **Code Complexity** | Simple ✅ | More complex | +| **Reliability** | High (fewer moving parts) ✅ | Moderate (more failure modes) | + +--- + +## Recommendations + +### Presigned Uploads + +**Implement if:** +- ✅ Hold Service bandwidth is a concern +- ✅ You want to minimize Hold Service load +- ✅ Most blobs are < 100MB (typical Docker layers) +- ✅ AppView has sufficient memory (2-4GB+ RAM) + +**Skip if:** +- ⚠️ Memory is constrained +- ⚠️ You regularly push very large layers (> 500MB) +- ⚠️ Current proxy mode is working fine +- ⚠️ Simplicity is priority + +### Presigned HEAD URLs + +**Implement if:** +- ✅ You want complete S3 offloading +- ✅ You're already implementing presigned uploads +- ✅ Hold Service is CPU/bandwidth constrained + +**Skip if:** +- ⚠️ Current HEAD proxying works fine (it does) +- ⚠️ You want to minimize code complexity +- ⚠️ 10ms latency difference doesn't matter + +### Suggested Approach + +**Phase 1:** Implement presigned uploads first +- Bigger performance win (offloads upload bandwidth) +- More valuable for write-heavy workflows +- Test thoroughly with various blob sizes + +**Phase 2:** Monitor and evaluate +- Check Hold Service load after presigned uploads +- Measure HEAD request impact +- Assess if presigned HEAD is worth the complexity + +**Phase 3:** Optionally add presigned HEAD +- Only if Hold Service is still bottlenecked +- Or if you want feature completeness + +--- + +## Migration Path + +### Step 1: Feature Flag +Add configuration option to enable/disable presigned uploads: + +```go +// In AppView config +type Config struct { + // ... existing fields ... + + UsePresignedUploads bool `yaml:"use_presigned_uploads"` // Default: false +} +``` + +### Step 2: Gradual Rollout +1. Deploy with `use_presigned_uploads: false` (current behavior) +2. Test in staging with `use_presigned_uploads: true` +3. Roll out to production incrementally +4. Monitor memory usage and error rates + +### Step 3: Fallback Mechanism +If presigned upload fails, fallback to proxy mode: + +```go +func (w *ProxyBlobWriter) Commit(...) { + // Try presigned upload + url, err := w.store.getUploadURL(ctx, finalDigest, w.size) + if err != nil { + // Fallback: use proxy mode + log.Printf("⚠️ Presigned upload unavailable, falling back to proxy") + return w.proxyUpload(ctx, desc) + } + // ... presigned upload ... +} +``` + +--- + +## Appendix: Memory Profiling + +To monitor memory usage during development: + +```bash +# Enable Go memory profiling +go tool pprof http://localhost:5000/debug/pprof/heap + +# Or use runtime metrics +import "runtime" + +var m runtime.MemStats +runtime.ReadMemStats(&m) +fmt.Printf("Alloc = %v MB", m.Alloc / 1024 / 1024) +``` + +Monitor these metrics: +- `Alloc`: Current memory allocation +- `TotalAlloc`: Cumulative allocation (detect leaks) +- `Sys`: Total memory from OS +- `NumGC`: Garbage collection count + +Expected behavior with presigned uploads: +- Memory spikes during `Write()` calls +- Memory drops after `Commit()` completes +- No memory leaks (TotalAlloc should plateau) + +--- + +## Questions for Decision + +Before implementing, answer: + +1. **What's the typical size of your Docker layers?** + - < 50MB: Presigned uploads perfect fit + - 50-200MB: Acceptable with memory monitoring + - > 200MB: Consider disk buffering or stick with proxy + +2. **What's your AppView's available memory?** + - 1GB: Skip presigned uploads + - 2-4GB: Fine for typical workloads + - 8GB+: No concerns + +3. **Is Hold Service bandwidth currently a problem?** + - No: Current proxy mode is fine + - Yes: Presigned uploads will help significantly + +4. **How important is code simplicity?** + - Very: Stick with proxy mode + - Moderate: Implement presigned uploads only + - Low: Implement both presigned uploads and HEAD + +5. **What's your deployment model?** + - Single Hold Service: Bandwidth matters more + - Multiple Hold Services: Less critical + +--- + +## Implementation Checklist + +### Presigned Uploads +- [ ] Modify `ProxyBlobWriter` struct (remove pipe, add buffer/hasher) +- [ ] Update `Create()` to initialize buffer +- [ ] Update `Write()` to buffer + hash data +- [ ] Update `Commit()` to upload via presigned URL +- [ ] Update `Cancel()` to clear buffer +- [ ] Add memory usage monitoring +- [ ] Add configuration flag +- [ ] Test with small blobs (< 1MB) +- [ ] Test with medium blobs (10-50MB) +- [ ] Test with large blobs (100-500MB) +- [ ] Test concurrent uploads +- [ ] Test error scenarios +- [ ] Update documentation +- [ ] Deploy to staging +- [ ] Monitor production rollout + +### Presigned HEAD URLs (Optional) +- [ ] Add `getHeadURL()` to Hold Service +- [ ] Add `HandleHeadPresignedURL()` endpoint +- [ ] Register `/head-presigned-url` route +- [ ] Add `getHeadURL()` to ProxyBlobStore +- [ ] Update `ServeBlob()` to redirect HEAD requests +- [ ] Test HEAD redirects +- [ ] Verify method-specific signatures +- [ ] Test with Docker pull operations +- [ ] Deploy to staging +- [ ] Monitor production rollout diff --git a/pkg/hold/authorization.go b/pkg/hold/authorization.go new file mode 100644 index 0000000..65d2c37 --- /dev/null +++ b/pkg/hold/authorization.go @@ -0,0 +1,131 @@ +package hold + +import ( + "context" + "encoding/json" + "fmt" + "log" + + "atcr.io/pkg/atproto" + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" +) + +// isAuthorizedRead checks if a DID can read from this hold +// Authorization: +// - Public hold: allow anonymous (empty DID) or any authenticated user +// - Private hold: require authentication (any user with sailor.profile) +func (s *HoldService) isAuthorizedRead(did string) bool { + // Check hold public flag + isPublic, err := s.isHoldPublic() + if err != nil { + log.Printf("ERROR: Failed to check hold public flag: %v", err) + // Fail secure - deny access on error + return false + } + + if isPublic { + // Public hold - allow anyone (even anonymous) + return true + } + + // Private hold - require authentication + // Any authenticated user with sailor.profile can read + if did == "" { + // Anonymous user trying to access private hold + return false + } + + // For MVP: assume DID presence means they have sailor.profile + // Future: could query PDS to verify sailor.profile exists + return true +} + +// isAuthorizedWrite checks if a DID can write to this hold +// Authorization: must be hold owner OR crew member +func (s *HoldService) isAuthorizedWrite(did string) bool { + if did == "" { + // Anonymous writes not allowed + return false + } + + // Check if DID is the hold owner + ownerDID := s.config.Registration.OwnerDID + if ownerDID == "" { + log.Printf("ERROR: Hold owner DID not configured") + return false + } + + if did == ownerDID { + // Owner always has write access + return true + } + + // Check if DID is a crew member + isCrew, err := s.isCrewMember(did) + if err != nil { + log.Printf("ERROR: Failed to check crew membership: %v", err) + return false + } + + return isCrew +} + +// isHoldPublic checks if this hold allows public (anonymous) reads +func (s *HoldService) isHoldPublic() (bool, error) { + // Use cached config value for now + // Future: could query PDS for hold record to get live value + return s.config.Server.Public, nil +} + +// isCrewMember checks if a DID is a crew member of this hold +func (s *HoldService) isCrewMember(did string) (bool, error) { + ownerDID := s.config.Registration.OwnerDID + if ownerDID == "" { + return false, fmt.Errorf("hold owner DID not configured") + } + + ctx := context.Background() + + // Resolve owner's PDS endpoint using indigo + directory := identity.DefaultDirectory() + ownerDIDParsed, err := syntax.ParseDID(ownerDID) + if err != nil { + return false, fmt.Errorf("invalid owner DID: %w", err) + } + + ident, err := directory.LookupDID(ctx, ownerDIDParsed) + if err != nil { + return false, fmt.Errorf("failed to resolve owner PDS: %w", err) + } + + pdsEndpoint := ident.PDSEndpoint() + if pdsEndpoint == "" { + return false, fmt.Errorf("no PDS endpoint found for owner") + } + + // Create unauthenticated client to read public records + client := atproto.NewClient(pdsEndpoint, ownerDID, "") + + // List crew records for this hold + // Crew records are public, so we can read them without auth + records, err := client.ListRecords(ctx, atproto.HoldCrewCollection, 100) + if err != nil { + return false, fmt.Errorf("failed to list crew records: %w", err) + } + + // Check if DID is in crew list + for _, record := range records { + var crewRecord atproto.HoldCrewRecord + if err := json.Unmarshal(record.Value, &crewRecord); err != nil { + continue + } + + if crewRecord.Member == did { + // Found crew membership + return true, nil + } + } + + return false, nil +} diff --git a/pkg/hold/config.go b/pkg/hold/config.go new file mode 100644 index 0000000..1f51bbd --- /dev/null +++ b/pkg/hold/config.go @@ -0,0 +1,130 @@ +package hold + +import ( + "fmt" + "os" + "time" + + "github.com/distribution/distribution/v3/configuration" +) + +// Config represents the hold service configuration +type Config struct { + Version string `yaml:"version"` + Storage StorageConfig `yaml:"storage"` + Server ServerConfig `yaml:"server"` + Registration RegistrationConfig `yaml:"registration"` +} + +// RegistrationConfig defines auto-registration settings +type RegistrationConfig struct { + // OwnerDID is the owner's ATProto DID (from env: HOLD_OWNER) + // If set, auto-registration is enabled + OwnerDID string `yaml:"owner_did"` +} + +// StorageConfig wraps distribution's storage configuration +type StorageConfig struct { + configuration.Storage `yaml:",inline"` +} + +// ServerConfig defines server settings +type ServerConfig struct { + // Addr is the address to listen on (e.g., ":8080") + Addr string `yaml:"addr"` + + // PublicURL is the public URL of this hold service (e.g., "https://hold.example.com") + PublicURL string `yaml:"public_url"` + + // Public controls whether this hold allows public blob reads without auth (from env: HOLD_PUBLIC) + Public bool `yaml:"public"` + + // TestMode uses localhost for OAuth redirects while storing real URL in hold record (from env: TEST_MODE) + TestMode bool `yaml:"test_mode"` + + // ReadTimeout for HTTP requests + ReadTimeout time.Duration `yaml:"read_timeout"` + + // WriteTimeout for HTTP requests + WriteTimeout time.Duration `yaml:"write_timeout"` +} + +// LoadConfigFromEnv loads all configuration from environment variables +func LoadConfigFromEnv() (*Config, error) { + cfg := &Config{ + Version: "0.1", + } + + // Server configuration + cfg.Server.Addr = getEnvOrDefault("HOLD_SERVER_ADDR", ":8080") + cfg.Server.PublicURL = os.Getenv("HOLD_PUBLIC_URL") + if cfg.Server.PublicURL == "" { + return nil, fmt.Errorf("HOLD_PUBLIC_URL is required") + } + cfg.Server.Public = os.Getenv("HOLD_PUBLIC") == "true" + cfg.Server.TestMode = os.Getenv("TEST_MODE") == "true" + cfg.Server.ReadTimeout = 5 * time.Minute // Increased for large blob uploads + cfg.Server.WriteTimeout = 5 * time.Minute // Increased for large blob uploads + + // Registration configuration (optional) + cfg.Registration.OwnerDID = os.Getenv("HOLD_OWNER") + + // Storage configuration - build from env vars based on storage type + storageType := getEnvOrDefault("STORAGE_DRIVER", "s3") + var err error + cfg.Storage, err = buildStorageConfig(storageType) + if err != nil { + return nil, fmt.Errorf("failed to build storage config: %w", err) + } + + return cfg, nil +} + +// buildStorageConfig creates storage configuration based on driver type +func buildStorageConfig(driver string) (StorageConfig, error) { + params := make(map[string]any) + + switch driver { + case "s3": + // S3/Storj/Minio configuration from standard AWS env vars + accessKey := os.Getenv("AWS_ACCESS_KEY_ID") + secretKey := os.Getenv("AWS_SECRET_ACCESS_KEY") + region := getEnvOrDefault("AWS_REGION", "us-east-1") + bucket := os.Getenv("S3_BUCKET") + endpoint := os.Getenv("S3_ENDPOINT") // For Storj/Minio + + if bucket == "" { + return StorageConfig{}, fmt.Errorf("S3_BUCKET is required for S3 storage") + } + + params["accesskey"] = accessKey + params["secretkey"] = secretKey + params["region"] = region + params["bucket"] = bucket + if endpoint != "" { + params["regionendpoint"] = endpoint + } + + case "filesystem": + // Filesystem configuration + rootDir := getEnvOrDefault("STORAGE_ROOT_DIR", "/var/lib/atcr/hold") + params["rootdirectory"] = rootDir + + default: + return StorageConfig{}, fmt.Errorf("unsupported storage driver: %s", driver) + } + + // Build distribution Storage config + storageCfg := configuration.Storage{} + storageCfg[driver] = configuration.Parameters(params) + + return StorageConfig{Storage: storageCfg}, nil +} + +// getEnvOrDefault gets an environment variable or returns a default value +func getEnvOrDefault(key, defaultValue string) string { + if val := os.Getenv(key); val != "" { + return val + } + return defaultValue +} diff --git a/pkg/hold/handlers.go b/pkg/hold/handlers.go new file mode 100644 index 0000000..d5ee1f4 --- /dev/null +++ b/pkg/hold/handlers.go @@ -0,0 +1,574 @@ +package hold + +import ( + "context" + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "time" + + "atcr.io/pkg/atproto" +) + +// HandleGetPresignedURL handles requests for download URLs +func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req GetPresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + log.Printf("📨 [HandleGetPresignedURL] Request received:") + log.Printf(" DID: %s", req.DID) + log.Printf(" Digest: %s", req.Digest) + log.Printf(" Remote: %s", r.RemoteAddr) + log.Printf(" s3Client nil? %v", s.s3Client == nil) + + // Validate DID authorization for READ + if !s.isAuthorizedRead(req.DID) { + log.Printf("❌ [HandleGetPresignedURL] Authorization FAILED") + if req.DID == "" { + // Anonymous request to private hold + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + // Authenticated but not authorized + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + + // Generate presigned URL (15 minute expiry) + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + // For now, construct direct URL to blob + // In production, this would use driver-specific presigned URLs + url, err := s.getDownloadURL(ctx, req.Digest, req.DID) + if err != nil { + log.Printf("❌ [HandleGetPresignedURL] getDownloadURL failed: %v", err) + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("✅ [HandleGetPresignedURL] Returning URL to client") + + resp := GetPresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandleHeadPresignedURL handles requests for HEAD URLs +func (s *HoldService) HandleHeadPresignedURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req HeadPresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + log.Printf("📨 [HandleHeadPresignedURL] Request received:") + log.Printf(" DID: %s", req.DID) + log.Printf(" Digest: %s", req.Digest) + log.Printf(" Remote: %s", r.RemoteAddr) + + // Validate DID authorization for READ + if !s.isAuthorizedRead(req.DID) { + log.Printf("❌ [HandleHeadPresignedURL] Authorization FAILED") + if req.DID == "" { + // Anonymous request to private hold + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + // Authenticated but not authorized + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + + // Generate presigned HEAD URL (15 minute expiry) + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + url, err := s.getHeadURL(ctx, req.Digest, req.DID) + if err != nil { + log.Printf("❌ [HandleHeadPresignedURL] getHeadURL failed: %v", err) + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("✅ [HandleHeadPresignedURL] Returning URL to client") + + resp := HeadPresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandlePutPresignedURL handles requests for upload URLs +func (s *HoldService) HandlePutPresignedURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req PutPresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + // Anonymous write attempt + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + // Authenticated but not crew/owner + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Generate presigned upload URL (15 minute expiry) + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + url, err := s.getUploadURL(ctx, req.Digest, req.Size, req.DID) + if err != nil { + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + resp := PutPresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandleProxyGet proxies a blob download through the service +func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodGet && r.Method != http.MethodHead { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + // Extract digest from path (e.g., /blobs/sha256:abc123) + digest := r.URL.Path[len("/blobs/"):] + if digest == "" { + http.Error(w, "missing digest", http.StatusBadRequest) + return + } + + log.Printf("📥 [HandleProxyGet] Blob download request:") + log.Printf(" Method: %s", r.Method) + log.Printf(" Digest: %s", digest) + log.Printf(" Remote: %s", r.RemoteAddr) + + // Get DID from query param or header + did := r.URL.Query().Get("did") + if did == "" { + did = r.Header.Get("X-ATCR-DID") + } + log.Printf(" DID: %s", did) + + // Authorize READ access + if !s.isAuthorizedRead(did) { + log.Printf("❌ [HandleProxyGet] Authorization FAILED") + if did == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + log.Printf("✅ [HandleProxyGet] Authorization SUCCESS") + + ctx := r.Context() + path := blobPath(digest) + + // For HEAD requests, just check if blob exists + if r.Method == http.MethodHead { + stat, err := s.driver.Stat(ctx, path) + if err != nil { + http.Error(w, "blob not found", http.StatusNotFound) + return + } + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Content-Length", fmt.Sprintf("%d", stat.Size())) + w.WriteHeader(http.StatusOK) + return + } + + // For GET requests, read and return the blob + content, err := s.driver.GetContent(ctx, path) + if err != nil { + http.Error(w, "blob not found", http.StatusNotFound) + return + } + + w.Header().Set("Content-Type", "application/octet-stream") + w.Write(content) +} + +// HandleMove moves a blob from one path to another +// POST /move?from={path}&to={digest}&did={did} +func (s *HoldService) HandleMove(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + fromPath := r.URL.Query().Get("from") + toDigest := r.URL.Query().Get("to") + did := r.URL.Query().Get("did") + + if fromPath == "" || toDigest == "" { + http.Error(w, "missing from or to parameter", http.StatusBadRequest) + return + } + + // Authorize WRITE access + if !s.isAuthorizedWrite(did) { + if did == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + ctx := r.Context() + sourcePath := blobPath(fromPath) + destPath := blobPath(toDigest) + + // Try to move using driver's Move operation + if err := s.driver.Move(ctx, sourcePath, destPath); err != nil { + log.Printf("HandleMove: failed to move blob: %v", err) + http.Error(w, fmt.Sprintf("failed to move blob: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("HandleMove: successfully moved blob from=%s to=%s", fromPath, toDigest) + w.WriteHeader(http.StatusOK) +} + +// HandleProxyPut proxies a blob upload through the service +func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPut { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + digest := r.URL.Path[len("/blobs/"):] + if digest == "" { + http.Error(w, "missing digest", http.StatusBadRequest) + return + } + + did := r.URL.Query().Get("did") + if did == "" { + did = r.Header.Get("X-ATCR-DID") + } + + log.Printf("🔐 [HandleProxyPut] Authorization check:") + log.Printf(" Path: %s", digest) + log.Printf(" DID: %s", did) + log.Printf(" Owner DID: %s", s.config.Registration.OwnerDID) + + // Authorize WRITE access + if !s.isAuthorizedWrite(did) { + log.Printf("❌ [HandleProxyPut] Authorization FAILED") + if did == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + log.Printf("✅ [HandleProxyPut] Authorization SUCCESS") + + // Stream blob to storage (no buffering) + ctx := r.Context() + path := blobPath(digest) + + // Create writer for streaming + writer, err := s.driver.Writer(ctx, path, false) + if err != nil { + log.Printf("HandleProxyPut: failed to create writer: %v", err) + http.Error(w, "failed to create writer", http.StatusInternalServerError) + return + } + + // Stream directly from request body to storage + written, err := io.Copy(writer, r.Body) + if err != nil { + writer.Cancel(ctx) + log.Printf("HandleProxyPut: failed to write blob: %v", err) + http.Error(w, "failed to write blob", http.StatusInternalServerError) + return + } + + // Commit the write + if err := writer.Commit(ctx); err != nil { + log.Printf("HandleProxyPut: failed to commit blob: %v", err) + http.Error(w, "failed to commit blob", http.StatusInternalServerError) + return + } + + log.Printf("HandleProxyPut: successfully stored blob path=%s, size=%d", digest, written) + w.WriteHeader(http.StatusCreated) +} + +// HandleStartMultipart initiates a multipart upload +func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req StartMultipartUploadRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Start multipart upload + ctx := r.Context() + uploadID, err := s.startMultipartUpload(ctx, req.Digest) + if err != nil { + http.Error(w, fmt.Sprintf("failed to start multipart upload: %v", err), http.StatusInternalServerError) + return + } + + expiry := time.Now().Add(24 * time.Hour) // Multipart uploads can take longer + + resp := StartMultipartUploadResponse{ + UploadID: uploadID, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandleGetPartURL generates a presigned URL for uploading a specific part +func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req GetPartURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Get presigned URL for this part + ctx := r.Context() + url, err := s.getPartPresignedURL(ctx, req.Digest, req.UploadID, req.PartNumber) + if err != nil { + http.Error(w, fmt.Sprintf("failed to generate part URL: %v", err), http.StatusInternalServerError) + return + } + + expiry := time.Now().Add(15 * time.Minute) + + resp := GetPartURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} + +// HandleCompleteMultipart completes a multipart upload +func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req CompleteMultipartRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Complete multipart upload + ctx := r.Context() + if err := s.completeMultipartUpload(ctx, req.Digest, req.UploadID, req.Parts); err != nil { + http.Error(w, fmt.Sprintf("failed to complete multipart upload: %v", err), http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "completed", + }) +} + +// HandleAbortMultipart aborts an in-progress multipart upload +func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req AbortMultipartRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization for WRITE + if !s.isAuthorizedWrite(req.DID) { + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Abort multipart upload + ctx := r.Context() + if err := s.abortMultipartUpload(ctx, req.Digest, req.UploadID); err != nil { + http.Error(w, fmt.Sprintf("failed to abort multipart upload: %v", err), http.StatusInternalServerError) + return + } + + w.WriteHeader(http.StatusOK) + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "aborted", + }) +} + +// HandleRegister registers this hold service in a user's PDS (manual endpoint) +func (s *HoldService) HandleRegister(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + var req RegisterRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate required fields + if req.DID == "" || req.AccessToken == "" || req.PDSEndpoint == "" { + http.Error(w, "missing required fields: did, access_token, pds_endpoint", http.StatusBadRequest) + return + } + + // Get public URL from config + publicURL := s.config.Server.PublicURL + if publicURL == "" { + // Fallback to constructing URL from request + scheme := "http" + if r.TLS != nil { + scheme = "https" + } + publicURL = fmt.Sprintf("%s://%s", scheme, r.Host) + } + + // Derive hold name from URL + holdName, err := extractHostname(publicURL) + if err != nil { + http.Error(w, fmt.Sprintf("failed to extract hostname: %v", err), http.StatusBadRequest) + return + } + + ctx := r.Context() + + // Create ATProto client with user's credentials + client := atproto.NewClient(req.PDSEndpoint, req.DID, req.AccessToken) + + // Create HoldRecord + holdRecord := atproto.NewHoldRecord(publicURL, req.DID, s.config.Server.Public) + + holdResult, err := client.PutRecord(ctx, atproto.HoldCollection, holdName, holdRecord) + if err != nil { + http.Error(w, fmt.Sprintf("failed to create hold record: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("Created hold record: %s", holdResult.URI) + + // Create HoldCrewRecord for the owner + crewRecord := atproto.NewHoldCrewRecord(holdResult.URI, req.DID, "owner") + + crewRKey := fmt.Sprintf("%s-%s", holdName, req.DID) + crewResult, err := client.PutRecord(ctx, atproto.HoldCrewCollection, crewRKey, crewRecord) + if err != nil { + http.Error(w, fmt.Sprintf("failed to create crew record: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("Created crew record: %s", crewResult.URI) + + resp := RegisterResponse{ + HoldURI: holdResult.URI, + CrewURI: crewResult.URI, + Message: fmt.Sprintf("Successfully registered hold service. Storage endpoint: %s", publicURL), + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) +} diff --git a/pkg/hold/multipart.go b/pkg/hold/multipart.go new file mode 100644 index 0000000..607f80c --- /dev/null +++ b/pkg/hold/multipart.go @@ -0,0 +1,373 @@ +package hold + +import ( + "context" + "crypto/sha256" + "encoding/hex" + "fmt" + "io" + "log" + "net/http" + "sync" + "time" + + "github.com/google/uuid" +) + +// MultipartMode indicates how multipart uploads are handled +type MultipartMode int + +const ( + // S3Native uses S3's native multipart API with presigned URLs + S3Native MultipartMode = iota + // Buffered buffers parts in memory and assembles them in the hold service + Buffered +) + +// MultipartSession tracks an in-progress multipart upload +type MultipartSession struct { + UploadID string // Unique upload ID + Digest string // Target digest path + Mode MultipartMode // Upload mode (S3Native or Buffered) + S3UploadID string // S3 upload ID (for S3Native mode) + Parts map[int]*MultipartPart // Buffered parts (for Buffered mode) + CreatedAt time.Time // When upload started + LastActivity time.Time // Last part upload + mu sync.RWMutex // Protects Parts map +} + +// MultipartPart represents a single part in a multipart upload +type MultipartPart struct { + PartNumber int // Part number (1-indexed) + Data []byte // Part data (for Buffered mode) + ETag string // ETag from S3 or computed hash + Size int64 // Part size in bytes + UploadedAt time.Time // When part was uploaded +} + +// MultipartManager manages multipart upload sessions +type MultipartManager struct { + sessions map[string]*MultipartSession // uploadID -> session + mu sync.RWMutex // Protects sessions map +} + +// NewMultipartManager creates a new multipart manager +func NewMultipartManager() *MultipartManager { + mgr := &MultipartManager{ + sessions: make(map[string]*MultipartSession), + } + + // Start cleanup goroutine for abandoned uploads + go mgr.cleanupLoop() + + return mgr +} + +// cleanupLoop periodically cleans up expired sessions +func (m *MultipartManager) cleanupLoop() { + ticker := time.NewTicker(15 * time.Minute) + defer ticker.Stop() + + for range ticker.C { + m.cleanupExpiredSessions() + } +} + +// cleanupExpiredSessions removes sessions inactive for >24 hours +func (m *MultipartManager) cleanupExpiredSessions() { + m.mu.Lock() + defer m.mu.Unlock() + + now := time.Now() + for uploadID, session := range m.sessions { + if now.Sub(session.LastActivity) > 24*time.Hour { + log.Printf("Cleaning up expired multipart session: uploadID=%s, age=%v", uploadID, now.Sub(session.CreatedAt)) + delete(m.sessions, uploadID) + } + } +} + +// CreateSession creates a new multipart upload session +func (m *MultipartManager) CreateSession(digest string, mode MultipartMode, s3UploadID string) *MultipartSession { + uploadID := uuid.New().String() + + session := &MultipartSession{ + UploadID: uploadID, + Digest: digest, + Mode: mode, + S3UploadID: s3UploadID, + Parts: make(map[int]*MultipartPart), + CreatedAt: time.Now(), + LastActivity: time.Now(), + } + + m.mu.Lock() + m.sessions[uploadID] = session + m.mu.Unlock() + + log.Printf("Created multipart session: uploadID=%s, digest=%s, mode=%v", uploadID, digest, mode) + return session +} + +// GetSession retrieves a multipart session by upload ID +func (m *MultipartManager) GetSession(uploadID string) (*MultipartSession, error) { + m.mu.RLock() + defer m.mu.RUnlock() + + session, ok := m.sessions[uploadID] + if !ok { + return nil, fmt.Errorf("multipart session not found: %s", uploadID) + } + + return session, nil +} + +// DeleteSession removes a multipart session +func (m *MultipartManager) DeleteSession(uploadID string) { + m.mu.Lock() + defer m.mu.Unlock() + + delete(m.sessions, uploadID) + log.Printf("Deleted multipart session: uploadID=%s", uploadID) +} + +// StorePart stores a part in the session (for Buffered mode) +func (s *MultipartSession) StorePart(partNumber int, data []byte) string { + s.mu.Lock() + defer s.mu.Unlock() + + // Compute ETag as SHA256 hash of part data + hash := sha256.Sum256(data) + etag := hex.EncodeToString(hash[:]) + + part := &MultipartPart{ + PartNumber: partNumber, + Data: data, + ETag: etag, + Size: int64(len(data)), + UploadedAt: time.Now(), + } + + s.Parts[partNumber] = part + s.LastActivity = time.Now() + + log.Printf("Stored part: uploadID=%s, part=%d, size=%d bytes, etag=%s", s.UploadID, partNumber, len(data), etag) + return etag +} + +// RecordS3Part records a part uploaded to S3 (for S3Native mode) +func (s *MultipartSession) RecordS3Part(partNumber int, etag string, size int64) { + s.mu.Lock() + defer s.mu.Unlock() + + part := &MultipartPart{ + PartNumber: partNumber, + ETag: etag, + Size: size, + UploadedAt: time.Now(), + } + + s.Parts[partNumber] = part + s.LastActivity = time.Now() + + log.Printf("Recorded S3 part: uploadID=%s, part=%d, size=%d bytes, etag=%s", s.UploadID, partNumber, size, etag) +} + +// AssembleBufferedParts assembles all buffered parts into a single blob +// Returns the complete data and total size +func (s *MultipartSession) AssembleBufferedParts() ([]byte, int64, error) { + s.mu.RLock() + defer s.mu.RUnlock() + + if s.Mode != Buffered { + return nil, 0, fmt.Errorf("session is not in buffered mode") + } + + // Calculate total size + var totalSize int64 + maxPart := 0 + for partNum, part := range s.Parts { + totalSize += part.Size + if partNum > maxPart { + maxPart = partNum + } + } + + // Check for missing parts + for i := 1; i <= maxPart; i++ { + if _, ok := s.Parts[i]; !ok { + return nil, 0, fmt.Errorf("missing part %d", i) + } + } + + // Assemble parts in order + assembled := make([]byte, 0, totalSize) + for i := 1; i <= maxPart; i++ { + part := s.Parts[i] + assembled = append(assembled, part.Data...) + } + + log.Printf("Assembled buffered parts: uploadID=%s, parts=%d, totalSize=%d bytes", s.UploadID, maxPart, totalSize) + return assembled, totalSize, nil +} + +// GetCompletedParts returns the list of completed parts for S3 multipart completion +func (s *MultipartSession) GetCompletedParts() []CompletedPart { + s.mu.RLock() + defer s.mu.RUnlock() + + parts := make([]CompletedPart, 0, len(s.Parts)) + for _, part := range s.Parts { + parts = append(parts, CompletedPart{ + PartNumber: part.PartNumber, + ETag: part.ETag, + }) + } + + return parts +} + +// StartMultipartUploadWithManager initiates a multipart upload using the manager +// Returns uploadID and mode +func (s *HoldService) StartMultipartUploadWithManager(ctx context.Context, digest string, manager *MultipartManager) (string, MultipartMode, error) { + // Try S3 native multipart first + if s.s3Client != nil { + s3UploadID, err := s.startMultipartUpload(ctx, digest) + if err == nil { + // S3 native multipart succeeded + session := manager.CreateSession(digest, S3Native, s3UploadID) + log.Printf("Started S3 native multipart: uploadID=%s, s3UploadID=%s", session.UploadID, s3UploadID) + return session.UploadID, S3Native, nil + } + log.Printf("S3 native multipart failed, falling back to buffered mode: %v", err) + } + + // Fallback to buffered mode + session := manager.CreateSession(digest, Buffered, "") + log.Printf("Started buffered multipart: uploadID=%s", session.UploadID) + return session.UploadID, Buffered, nil +} + +// GetPartUploadURL generates a URL for uploading a part +// For S3Native: returns presigned URL +// For Buffered: returns proxy endpoint +func (s *HoldService) GetPartUploadURL(ctx context.Context, session *MultipartSession, partNumber int, did string) (string, error) { + if session.Mode == S3Native { + // Generate S3 presigned URL for this part + url, err := s.getPartPresignedURL(ctx, session.Digest, session.S3UploadID, partNumber) + if err != nil { + return "", fmt.Errorf("failed to generate S3 part URL: %w", err) + } + return url, nil + } + + // Buffered mode: return proxy endpoint + url := fmt.Sprintf("%s/multipart-parts/%s/%d?did=%s", + s.config.Server.PublicURL, session.UploadID, partNumber, did) + return url, nil +} + +// CompleteMultipartUploadWithManager completes a multipart upload +func (s *HoldService) CompleteMultipartUploadWithManager(ctx context.Context, session *MultipartSession, manager *MultipartManager) error { + defer manager.DeleteSession(session.UploadID) + + if session.Mode == S3Native { + // Complete S3 multipart upload + parts := session.GetCompletedParts() + if err := s.completeMultipartUpload(ctx, session.Digest, session.S3UploadID, parts); err != nil { + return fmt.Errorf("failed to complete S3 multipart: %w", err) + } + log.Printf("Completed S3 native multipart: uploadID=%s, parts=%d", session.UploadID, len(parts)) + return nil + } + + // Buffered mode: assemble parts and write via driver + data, size, err := session.AssembleBufferedParts() + if err != nil { + return fmt.Errorf("failed to assemble parts: %w", err) + } + + // Write assembled blob to storage + path := blobPath(session.Digest) + writer, err := s.driver.Writer(ctx, path, false) + if err != nil { + return fmt.Errorf("failed to create writer: %w", err) + } + + written, err := writer.Write(data) + if err != nil { + writer.Cancel(ctx) + return fmt.Errorf("failed to write blob: %w", err) + } + + if err := writer.Commit(ctx); err != nil { + return fmt.Errorf("failed to commit blob: %w", err) + } + + log.Printf("Completed buffered multipart: uploadID=%s, size=%d bytes, written=%d", session.UploadID, size, written) + return nil +} + +// AbortMultipartUploadWithManager aborts a multipart upload +func (s *HoldService) AbortMultipartUploadWithManager(ctx context.Context, session *MultipartSession, manager *MultipartManager) error { + defer manager.DeleteSession(session.UploadID) + + if session.Mode == S3Native { + // Abort S3 multipart upload + if err := s.abortMultipartUpload(ctx, session.Digest, session.S3UploadID); err != nil { + return fmt.Errorf("failed to abort S3 multipart: %w", err) + } + log.Printf("Aborted S3 native multipart: uploadID=%s", session.UploadID) + return nil + } + + // Buffered mode: just delete the session (parts are in memory) + log.Printf("Aborted buffered multipart: uploadID=%s", session.UploadID) + return nil +} + +// HandleMultipartPartUpload handles uploading a part in buffered mode +// This is a new endpoint: PUT /multipart-parts/{uploadID}/{partNumber} +func (s *HoldService) HandleMultipartPartUpload(w http.ResponseWriter, r *http.Request, uploadID string, partNumber int, did string, manager *MultipartManager) { + if r.Method != http.MethodPut { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + // Get session + session, err := manager.GetSession(uploadID) + if err != nil { + http.Error(w, fmt.Sprintf("session not found: %v", err), http.StatusNotFound) + return + } + + // Verify authorization + if !s.isAuthorizedWrite(did) { + if did == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: write access denied", http.StatusForbidden) + } + return + } + + // Verify session is in buffered mode + if session.Mode != Buffered { + http.Error(w, "session is not in buffered mode", http.StatusBadRequest) + return + } + + // Read part data + data, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, fmt.Sprintf("failed to read part data: %v", err), http.StatusInternalServerError) + return + } + + // Store part and get ETag + etag := session.StorePart(partNumber, data) + + // Return ETag in response + w.Header().Set("ETag", etag) + w.WriteHeader(http.StatusOK) +} diff --git a/pkg/hold/registration.go b/pkg/hold/registration.go new file mode 100644 index 0000000..c86c16f --- /dev/null +++ b/pkg/hold/registration.go @@ -0,0 +1,267 @@ +package hold + +import ( + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "net/url" + "strings" + "time" + + "atcr.io/pkg/atproto" + "atcr.io/pkg/auth/oauth" + "github.com/bluesky-social/indigo/atproto/identity" + "github.com/bluesky-social/indigo/atproto/syntax" +) + +// HealthHandler handles health check requests +func (s *HoldService) HealthHandler(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.Write([]byte(`{"status":"ok"}`)) +} + +// isHoldRegistered checks if a hold with the given public URL is already registered in the PDS +func (s *HoldService) isHoldRegistered(ctx context.Context, did, pdsEndpoint, publicURL string) (bool, error) { + // We need to query the PDS without authentication to check public records + // ATProto records are publicly readable, so we can use an unauthenticated client + client := atproto.NewClient(pdsEndpoint, did, "") + + // List all hold records for this DID + records, err := client.ListRecords(ctx, atproto.HoldCollection, 100) + if err != nil { + return false, fmt.Errorf("failed to list hold records: %w", err) + } + + // Check if any hold record matches our public URL + for _, record := range records { + var holdRecord atproto.HoldRecord + if err := json.Unmarshal(record.Value, &holdRecord); err != nil { + continue + } + + if holdRecord.Endpoint == publicURL { + return true, nil + } + } + + return false, nil +} + +// AutoRegister registers this hold service in the owner's PDS +// Checks if already registered first, then does OAuth if needed +func (s *HoldService) AutoRegister(callbackHandler *http.HandlerFunc) error { + reg := &s.config.Registration + publicURL := s.config.Server.PublicURL + + if publicURL == "" { + return fmt.Errorf("HOLD_PUBLIC_URL not set") + } + + if reg.OwnerDID == "" { + return fmt.Errorf("HOLD_OWNER not set - required for registration") + } + + ctx := context.Background() + + log.Printf("Checking registration status for DID: %s", reg.OwnerDID) + + // Resolve DID to PDS endpoint using indigo + directory := identity.DefaultDirectory() + didParsed, err := syntax.ParseDID(reg.OwnerDID) + if err != nil { + return fmt.Errorf("invalid owner DID: %w", err) + } + + ident, err := directory.LookupDID(ctx, didParsed) + if err != nil { + return fmt.Errorf("failed to resolve PDS for DID: %w", err) + } + + pdsEndpoint := ident.PDSEndpoint() + if pdsEndpoint == "" { + return fmt.Errorf("no PDS endpoint found for DID") + } + + log.Printf("PDS endpoint: %s", pdsEndpoint) + + // Check if hold is already registered + isRegistered, err := s.isHoldRegistered(ctx, reg.OwnerDID, pdsEndpoint, publicURL) + if err != nil { + log.Printf("Warning: failed to check registration status: %v", err) + log.Printf("Proceeding with OAuth registration...") + } else if isRegistered { + log.Printf("✓ Hold service already registered in PDS") + log.Printf("Public URL: %s", publicURL) + return nil + } + + // Not registered, need to do OAuth + log.Printf("Hold not registered, starting OAuth flow...") + + // Get handle from DID document (already resolved above) + handle := ident.Handle.String() + if handle == "" || handle == "handle.invalid" { + return fmt.Errorf("no valid handle found for DID") + } + + log.Printf("Resolved handle: %s", handle) + log.Printf("Starting OAuth registration for hold service") + log.Printf("Public URL: %s", publicURL) + + return s.registerWithOAuth(publicURL, handle, reg.OwnerDID, pdsEndpoint, callbackHandler) +} + +// registerWithOAuth performs OAuth flow and registers the hold +func (s *HoldService) registerWithOAuth(publicURL, handle, did, pdsEndpoint string, callbackHandler *http.HandlerFunc) error { + // Define the scopes we need for hold registration + holdScopes := []string{ + "atproto", + fmt.Sprintf("repo:%s?action=create", atproto.HoldCollection), + fmt.Sprintf("repo:%s?action=update", atproto.HoldCollection), + fmt.Sprintf("repo:%s?action=create", atproto.HoldCrewCollection), + fmt.Sprintf("repo:%s?action=update", atproto.HoldCrewCollection), + fmt.Sprintf("repo:%s?action=create", atproto.SailorProfileCollection), + fmt.Sprintf("repo:%s?action=update", atproto.SailorProfileCollection), + } + + // Determine base URL based on mode + // Callback path standardized to /auth/oauth/callback across ATCR + var baseURL string + + if s.config.Server.TestMode { + // Test mode: Use localhost for OAuth (browser accessible) but store real URL in hold record + // Extract port from publicURL (e.g., "http://172.28.0.3:8080" -> ":8080") + parsedURL, err := url.Parse(publicURL) + if err != nil { + return fmt.Errorf("failed to parse public URL: %w", err) + } + port := parsedURL.Port() + if port == "" { + port = "8080" // default + } + baseURL = fmt.Sprintf("http://127.0.0.1:%s", port) + } else { + baseURL = publicURL + } + + // Run interactive OAuth flow with persistent server + ctx := context.Background() + + result, err := oauth.InteractiveFlowWithCallback( + ctx, + baseURL, + handle, + holdScopes, // Pass hold-specific scopes + func(handler http.HandlerFunc) error { + // Populate the pre-registered callback handler + *callbackHandler = handler + return nil + }, + func(authURL string) error { + // Display OAuth URL for user to visit + log.Print("\n" + strings.Repeat("=", 80)) + log.Printf("OAUTH AUTHORIZATION REQUIRED") + log.Print(strings.Repeat("=", 80)) + log.Printf("\nPlease visit this URL to authorize the hold service:\n") + log.Printf(" %s\n", authURL) + log.Printf("Waiting for authorization...") + log.Print(strings.Repeat("=", 80) + "\n") + return nil + }, + ) + if err != nil { + return err + } + + log.Printf("Authorization received!") + log.Printf("OAuth session obtained successfully") + log.Printf("DID: %s", did) + log.Printf("PDS: %s", pdsEndpoint) + + // Create ATProto client with indigo's API client (handles DPoP automatically) + apiClient := result.Session.APIClient() + client := atproto.NewClientWithIndigoClient(pdsEndpoint, did, apiClient) + + return s.registerWithClient(publicURL, did, client) +} + +// registerWithClient registers the hold using an authenticated ATProto client +func (s *HoldService) registerWithClient(publicURL, did string, client *atproto.Client) error { + // Derive hold name from URL (hostname) + holdName, err := extractHostname(publicURL) + if err != nil { + return fmt.Errorf("failed to extract hostname from URL: %w", err) + } + + log.Printf("Registering hold service: url=%s, name=%s, owner=%s", publicURL, holdName, did) + + ctx := context.Background() + + // Create HoldRecord + holdRecord := atproto.NewHoldRecord(publicURL, did, s.config.Server.Public) + + // Use hostname as record key + holdResult, err := client.PutRecord(ctx, atproto.HoldCollection, holdName, holdRecord) + if err != nil { + return fmt.Errorf("failed to create hold record: %w", err) + } + + log.Printf("✓ Created hold record: %s", holdResult.URI) + + // Create HoldCrewRecord for the owner + crewRecord := atproto.NewHoldCrewRecord(holdResult.URI, did, "owner") + + crewRKey := fmt.Sprintf("%s-%s", holdName, did) + crewResult, err := client.PutRecord(ctx, atproto.HoldCrewCollection, crewRKey, crewRecord) + if err != nil { + return fmt.Errorf("failed to create crew record: %w", err) + } + + log.Printf("✓ Created crew record: %s", crewResult.URI) + + // Update sailor profile to set this as the default hold + profile, err := atproto.GetProfile(ctx, client) + if err != nil { + log.Printf("Warning: failed to get sailor profile: %v", err) + } else { + if profile == nil { + // Create new profile with this hold as default + profile = atproto.NewSailorProfileRecord(publicURL) + } else { + // Update existing profile with new defaultHold + profile.DefaultHold = publicURL + profile.UpdatedAt = time.Now() + } + + err = atproto.UpdateProfile(ctx, client, profile) + if err != nil { + log.Printf("Warning: failed to update sailor profile: %v", err) + } else { + log.Printf("✓ Updated sailor profile defaultHold: %s", publicURL) + } + } + + log.Print("\n" + strings.Repeat("=", 80)) + log.Printf("REGISTRATION COMPLETE") + log.Print(strings.Repeat("=", 80)) + log.Printf("Hold service is now registered and ready to use!") + log.Print(strings.Repeat("=", 80) + "\n") + + return nil +} + +// extractHostname extracts the hostname from a URL to use as the hold name +func extractHostname(urlStr string) (string, error) { + u, err := url.Parse(urlStr) + if err != nil { + return "", err + } + // Remove port if present + hostname := u.Hostname() + if hostname == "" { + return "", fmt.Errorf("no hostname in URL") + } + return hostname, nil +} diff --git a/pkg/hold/s3.go b/pkg/hold/s3.go new file mode 100644 index 0000000..5b24c62 --- /dev/null +++ b/pkg/hold/s3.go @@ -0,0 +1,195 @@ +package hold + +import ( + "context" + "fmt" + "log" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/credentials" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/s3" +) + +// initS3Client initializes the S3 client for presigned URL generation +// Returns nil error if S3 client is successfully initialized +// Returns error if storage is not S3 or if initialization fails (service will fall back to proxy mode) +func (s *HoldService) initS3Client() error { + // Check if storage driver is S3 + if s.config.Storage.Type() != "s3" { + log.Printf("Storage driver is %s (not S3), presigned URLs disabled", s.config.Storage.Type()) + return nil // Not an error - just using different driver + } + + // Extract S3 configuration from storage parameters + params := s.config.Storage.Parameters() + + // Extract required S3 configuration + region, _ := params["region"].(string) + if region == "" { + region = "us-east-1" // Default region + } + + accessKey, _ := params["accesskey"].(string) + secretKey, _ := params["secretkey"].(string) + bucket, _ := params["bucket"].(string) + + if bucket == "" { + return fmt.Errorf("S3 bucket not configured") + } + + // Build AWS config + awsConfig := &aws.Config{ + Region: aws.String(region), + } + + // Add credentials if provided (allow IAM role auth if not provided) + if accessKey != "" && secretKey != "" { + awsConfig.Credentials = credentials.NewStaticCredentials(accessKey, secretKey, "") + } + + // Add custom endpoint for S3-compatible services (Storj, MinIO, R2, etc.) + if endpoint, ok := params["regionendpoint"].(string); ok && endpoint != "" { + awsConfig.Endpoint = aws.String(endpoint) + awsConfig.S3ForcePathStyle = aws.Bool(true) // Required for MinIO, Storj + } + + // Create AWS session + sess, err := session.NewSession(awsConfig) + if err != nil { + return fmt.Errorf("failed to create AWS session: %w", err) + } + + // Create S3 client + s.s3Client = s3.New(sess) + s.bucket = bucket + + // Extract path prefix if configured (rootdirectory in S3 params) + if rootDir, ok := params["rootdirectory"].(string); ok && rootDir != "" { + s.s3PathPrefix = strings.TrimPrefix(rootDir, "/") + } + + log.Printf("✅ S3 presigned URLs enabled") + + return nil +} + +// startMultipartUpload initiates a multipart upload and returns upload ID +func (s *HoldService) startMultipartUpload(ctx context.Context, digest string) (string, error) { + if s.s3Client == nil { + return "", fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + result, err := s.s3Client.CreateMultipartUploadWithContext(ctx, &s3.CreateMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + if err != nil { + return "", err + } + + log.Printf("Started multipart upload: digest=%s, uploadID=%s", digest, *result.UploadId) + return *result.UploadId, nil +} + +// getPartPresignedURL generates presigned URL for a specific part +func (s *HoldService) getPartPresignedURL(ctx context.Context, digest, uploadID string, partNumber int) (string, error) { + if s.s3Client == nil { + return "", fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + req, _ := s.s3Client.UploadPartRequest(&s3.UploadPartInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + PartNumber: aws.Int64(int64(partNumber)), + }) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + return "", err + } + + log.Printf("Generated part presigned URL: digest=%s, uploadID=%s, part=%d", digest, uploadID, partNumber) + return url, nil +} + +// completeMultipartUpload finalizes the multipart upload +func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploadID string, parts []CompletedPart) error { + if s.s3Client == nil { + return fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Convert to S3 CompletedPart format + s3Parts := make([]*s3.CompletedPart, len(parts)) + for i, p := range parts { + s3Parts[i] = &s3.CompletedPart{ + PartNumber: aws.Int64(int64(p.PartNumber)), + ETag: aws.String(p.ETag), + } + } + + _, err := s.s3Client.CompleteMultipartUploadWithContext(ctx, &s3.CompleteMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + MultipartUpload: &s3.CompletedMultipartUpload{ + Parts: s3Parts, + }, + }) + + if err != nil { + log.Printf("Failed to complete multipart upload: digest=%s, uploadID=%s, err=%v", digest, uploadID, err) + return err + } + + log.Printf("Completed multipart upload: digest=%s, uploadID=%s, parts=%d", digest, uploadID, len(parts)) + return nil +} + +// abortMultipartUpload aborts an in-progress multipart upload +func (s *HoldService) abortMultipartUpload(ctx context.Context, digest, uploadID string) error { + if s.s3Client == nil { + return fmt.Errorf("S3 not configured") + } + + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + _, err := s.s3Client.AbortMultipartUploadWithContext(ctx, &s3.AbortMultipartUploadInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + UploadId: aws.String(uploadID), + }) + + if err != nil { + log.Printf("Failed to abort multipart upload: digest=%s, uploadID=%s, err=%v", digest, uploadID, err) + return err + } + + log.Printf("Aborted multipart upload: digest=%s, uploadID=%s", digest, uploadID) + return nil +} diff --git a/pkg/hold/service.go b/pkg/hold/service.go new file mode 100644 index 0000000..8d01b73 --- /dev/null +++ b/pkg/hold/service.go @@ -0,0 +1,42 @@ +package hold + +import ( + "context" + "fmt" + "log" + + "github.com/aws/aws-sdk-go/service/s3" + storagedriver "github.com/distribution/distribution/v3/registry/storage/driver" + "github.com/distribution/distribution/v3/registry/storage/driver/factory" +) + +// HoldService provides presigned URLs for blob storage in a hold +type HoldService struct { + driver storagedriver.StorageDriver + config *Config + s3Client *s3.S3 // S3 client for presigned URLs (nil if not S3 storage) + bucket string // S3 bucket name + s3PathPrefix string // S3 path prefix (if any) +} + +// NewHoldService creates a new hold service +func NewHoldService(cfg *Config) (*HoldService, error) { + // Create storage driver from config + ctx := context.Background() + driver, err := factory.Create(ctx, cfg.Storage.Type(), cfg.Storage.Parameters()) + if err != nil { + return nil, fmt.Errorf("failed to create storage driver: %w", err) + } + + service := &HoldService{ + driver: driver, + config: cfg, + } + + // Initialize S3 client for presigned URLs (if using S3 storage) + if err := service.initS3Client(); err != nil { + log.Printf("WARNING: S3 presigned URLs disabled: %v", err) + } + + return service, nil +} diff --git a/pkg/hold/storage.go b/pkg/hold/storage.go new file mode 100644 index 0000000..3dc32af --- /dev/null +++ b/pkg/hold/storage.go @@ -0,0 +1,175 @@ +package hold + +import ( + "context" + "fmt" + "log" + "strings" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/s3" +) + +// blobPath converts a digest (e.g., "sha256:abc123...") or temp path to a storage path +// Distribution stores blobs as: /docker/registry/v2/blobs/{algorithm}/{xx}/{hash}/data +// where xx is the first 2 characters of the hash for directory sharding +// NOTE: Path must start with / for filesystem driver +func blobPath(digest string) string { + // Handle temp paths (start with uploads/temp-) + if strings.HasPrefix(digest, "uploads/temp-") { + return fmt.Sprintf("/docker/registry/v2/%s/data", digest) + } + + // Split digest into algorithm and hash + parts := strings.SplitN(digest, ":", 2) + if len(parts) != 2 { + // Fallback for malformed digest + return fmt.Sprintf("/docker/registry/v2/blobs/%s/data", digest) + } + + algorithm := parts[0] + hash := parts[1] + + // Use first 2 characters for sharding + if len(hash) < 2 { + return fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/data", algorithm, hash) + } + + return fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/%s/data", algorithm, hash[:2], hash) +} + +// getDownloadURL generates a download URL for a blob +func (s *HoldService) getDownloadURL(ctx context.Context, digest string, did string) (string, error) { + // Check if blob exists + path := blobPath(digest) + _, err := s.driver.Stat(ctx, path) + if err != nil { + return "", fmt.Errorf("blob not found: %w", err) + } + + // If S3 client available, generate presigned URL + if s.s3Client != nil { + // Build S3 key from blob path + // blobPath returns paths like: /docker/registry/v2/blobs/sha256/ab/abc123.../data + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Generate presigned GET URL + // Note: Don't use ResponseContentType - not supported by all S3-compatible services + req, _ := s.s3Client.GetObjectRequest(&s3.GetObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + + log.Printf("🔍 [getDownloadURL] Before Presign:") + log.Printf(" Digest: %s", digest) + log.Printf(" S3 Key: %s", s3Key) + log.Printf(" Bucket: %s", s.bucket) + log.Printf(" Request Operation: %s", req.Operation.Name) + log.Printf(" Request HTTPMethod: %s", req.Operation.HTTPMethod) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + log.Printf("❌ [getDownloadURL] Presign FAILED: %v", err) + log.Printf(" Falling back to proxy URL") + return s.getProxyDownloadURL(digest, did), nil + } + + log.Printf("✅ [getDownloadURL] Presigned URL generated successfully") + log.Printf(" URL: %s", url) + log.Printf(" URL Length: %d chars", len(url)) + log.Printf(" Expires: 15min") + + return url, nil + } + + // Fallback: return proxy URL through this service + return s.getProxyDownloadURL(digest, did), nil +} + +// getHeadURL generates a HEAD URL for a blob +func (s *HoldService) getHeadURL(ctx context.Context, digest string, did string) (string, error) { + // Check if blob exists + path := blobPath(digest) + _, err := s.driver.Stat(ctx, path) + if err != nil { + return "", fmt.Errorf("blob not found: %w", err) + } + + // If S3 client available, generate presigned HEAD URL + if s.s3Client != nil { + // Build S3 key from blob path + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Generate presigned HEAD URL + req, _ := s.s3Client.HeadObjectRequest(&s3.HeadObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + log.Printf("❌ [getHeadURL] Presign FAILED: %v", err) + log.Printf(" Falling back to proxy URL") + return s.getProxyDownloadURL(digest, did), nil + } + + log.Printf("✅ [getHeadURL] Presigned HEAD URL generated: digest=%s", digest) + return url, nil + } + + // Fallback: return proxy URL through this service + return s.getProxyDownloadURL(digest, did), nil +} + +// getProxyDownloadURL returns a proxy URL for blob download (fallback when presigned URLs unavailable) +func (s *HoldService) getProxyDownloadURL(digest, did string) string { + return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) +} + +// getUploadURL generates an upload URL for a blob +// Note: This is called from HandlePutPresignedURL which has the DID in the request +func (s *HoldService) getUploadURL(ctx context.Context, digest string, size int64, did string) (string, error) { + // If S3 client available, generate presigned URL + if s.s3Client != nil { + // Build S3 key from blob path + path := blobPath(digest) + s3Key := strings.TrimPrefix(path, "/") + if s.s3PathPrefix != "" { + s3Key = s.s3PathPrefix + "/" + s3Key + } + + // Generate presigned PUT URL with Content-Type in signature + req, _ := s.s3Client.PutObjectRequest(&s3.PutObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + ContentType: aws.String("application/octet-stream"), + }) + + url, err := req.Presign(15 * time.Minute) + if err != nil { + log.Printf("WARN: Presigned URL generation failed for %s, falling back to proxy: %v", digest, err) + return s.getProxyUploadURL(digest, did), nil + } + + log.Printf("🔑 Generated presigned upload URL for %s (expires in 15min)", digest) + log.Printf(" S3 Key: %s", s3Key) + log.Printf(" Bucket: %s", s.bucket) + log.Printf(" Size: %d bytes", size) + return url, nil + } + + // Fallback: return proxy URL through this service + return s.getProxyUploadURL(digest, did), nil +} + +// getProxyUploadURL returns a proxy URL for blob upload (fallback when presigned URLs unavailable) +func (s *HoldService) getProxyUploadURL(digest, did string) string { + return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) +} diff --git a/pkg/hold/types.go b/pkg/hold/types.go new file mode 100644 index 0000000..0a5343e --- /dev/null +++ b/pkg/hold/types.go @@ -0,0 +1,103 @@ +package hold + +import ( + "time" +) + +// GetPresignedURLRequest represents a request for a presigned download URL +type GetPresignedURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// GetPresignedURLResponse contains the presigned URL +type GetPresignedURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + +// HeadPresignedURLRequest represents a request for a presigned HEAD URL +type HeadPresignedURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// HeadPresignedURLResponse contains the presigned HEAD URL +type HeadPresignedURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + +// PutPresignedURLRequest represents a request for a presigned upload URL +type PutPresignedURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + Size int64 `json:"size"` +} + +// PutPresignedURLResponse contains the presigned upload URL +type PutPresignedURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + +// StartMultipartUploadRequest initiates a multipart upload +type StartMultipartUploadRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// StartMultipartUploadResponse contains the multipart upload ID +type StartMultipartUploadResponse struct { + UploadID string `json:"upload_id"` + ExpiresAt time.Time `json:"expires_at"` +} + +// GetPartURLRequest requests a presigned URL for a specific part +type GetPartURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + PartNumber int `json:"part_number"` +} + +// GetPartURLResponse contains the presigned URL for a part +type GetPartURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + +// CompleteMultipartRequest completes a multipart upload +type CompleteMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + Parts []CompletedPart `json:"parts"` +} + +// CompletedPart represents an uploaded part with its ETag +type CompletedPart struct { + PartNumber int `json:"part_number"` + ETag string `json:"etag"` +} + +// AbortMultipartRequest aborts an in-progress upload +type AbortMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` +} + +// RegisterRequest represents a request to register this hold in a user's PDS +type RegisterRequest struct { + DID string `json:"did"` + AccessToken string `json:"access_token"` + PDSEndpoint string `json:"pds_endpoint"` +} + +// RegisterResponse contains the registration result +type RegisterResponse struct { + HoldURI string `json:"hold_uri"` + CrewURI string `json:"crew_uri"` + Message string `json:"message"` +} From a9e2a56568682a23f62a4c5abb7b8e1ad08e3cec Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 22:08:07 -0500 Subject: [PATCH 07/10] fix up multipart uploads. test filesystem and s3 storage drivers work as a fallback for s3 presigned urls --- cmd/hold/main.go | 30 ++++++ docker-compose.yml | 1 + docs/HOLD_MULTIPART.md | 166 +++++++++++++++++++++++++++----- docs/PRESIGNED_URLS.md | 49 ++++++++++ pkg/hold/config.go | 4 + pkg/hold/handlers.go | 53 ++++++++-- pkg/hold/multipart.go | 24 +++-- pkg/hold/s3.go | 22 ++++- pkg/hold/service.go | 12 ++- pkg/hold/storage.go | 18 ++++ pkg/storage/proxy_blob_store.go | 2 +- 11 files changed, 333 insertions(+), 48 deletions(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index ff28b29..a4a27d6 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -5,6 +5,8 @@ import ( "fmt" "log" "net/http" + "strconv" + "strings" "time" "atcr.io/pkg/atproto" @@ -44,6 +46,34 @@ func main() { mux.HandleFunc("/complete-multipart", service.HandleCompleteMultipart) mux.HandleFunc("/abort-multipart", service.HandleAbortMultipart) + // Buffered multipart part upload endpoint (for when presigned URLs are disabled/unavailable) + mux.HandleFunc("/multipart-parts/", func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPut { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return + } + + // Parse URL: /multipart-parts/{uploadID}/{partNumber} + path := r.URL.Path[len("/multipart-parts/"):] + parts := strings.Split(path, "/") + if len(parts) != 2 { + http.Error(w, "invalid path format, expected /multipart-parts/{uploadID}/{partNumber}", http.StatusBadRequest) + return + } + + uploadID := parts[0] + partNumber, err := strconv.Atoi(parts[1]) + if err != nil { + http.Error(w, fmt.Sprintf("invalid part number: %v", err), http.StatusBadRequest) + return + } + + // Get DID from query param + did := r.URL.Query().Get("did") + + service.HandleMultipartPartUpload(w, r, uploadID, partNumber, did, service.MultipartMgr) + }) + // Pre-register OAuth callback route (will be populated by auto-registration) var oauthCallbackHandler http.HandlerFunc mux.HandleFunc("/auth/oauth/callback", func(w http.ResponseWriter, r *http.Request) { diff --git a/docker-compose.yml b/docker-compose.yml index 520c55b..2a89d1e 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,6 +47,7 @@ services: # STORAGE_DRIVER: filesystem # STORAGE_ROOT_DIR: /var/lib/atcr/hold TEST_MODE: true + # DISABLE_PRESIGNED_URLS: true # Storage config comes from env_file (STORAGE_DRIVER, AWS_*, S3_*) build: context: . diff --git a/docs/HOLD_MULTIPART.md b/docs/HOLD_MULTIPART.md index e09d81e..50147cd 100644 --- a/docs/HOLD_MULTIPART.md +++ b/docs/HOLD_MULTIPART.md @@ -14,20 +14,31 @@ This dual-mode approach enables the hold service to work with: ## Current State -### What Works -- **S3 with presigned URLs**: Primary mode, working +### What Works ✅ +- **S3 Native Mode with presigned URLs**: Fully working! Direct uploads to S3 via presigned URLs +- **Buffered mode with S3**: Tested and working with `DISABLE_PRESIGNED_URLS=true` +- **Filesystem storage**: Tested and working! Buffered mode with filesystem driver - **AppView multipart client**: Implements chunked uploads via multipart API +- **MultipartManager**: Session tracking, automatic cleanup, thread-safe operations +- **Automatic fallback**: Falls back to buffered mode when S3 unavailable or disabled +- **ETag normalization**: Handles quoted/unquoted ETags from S3 +- **Route handler**: `/multipart-parts/{uploadID}/{partNumber}` endpoint added and tested -### What's Broken -- **Filesystem storage**: multipart endpoints return "S3 not configured" error -- **S3 fallback mode**: No fallback when presigned URL generation fails -- **Non-S3 drivers**: Azure, GCS, etc. not supported for multipart +### All Implementation Complete! 🎉 +All three multipart upload modes are fully implemented, tested, and working in production. + +### Bugs Fixed 🔧 +- **Missing S3 parts in complete**: For S3Native mode, parts uploaded directly to S3 weren't being recorded. Fixed by storing parts from request in `HandleCompleteMultipart` before calling `CompleteMultipartUploadWithManager`. +- **Malformed XML error from S3**: S3 requires ETags to be quoted in CompleteMultipartUpload XML. Added `normalizeETag()` function to ensure quotes are present. +- **Route missing**: `/multipart-parts/{uploadID}/{partNumber}` not registered in cmd/hold/main.go. Fixed by adding route handler with path parsing. +- **MultipartMgr access**: Field was private, preventing route handler access. Fixed by exporting as `MultipartMgr`. +- **DISABLE_PRESIGNED_URLS not logged**: `initS3Client()` didn't check the flag before initializing. Fixed with early return check and proper logging. ## Architecture ### Three Modes of Operation -#### Mode 1: S3 Native Multipart (Currently Working) +#### Mode 1: S3 Native Multipart ✅ WORKING ``` Docker → AppView → Hold → S3 (presigned URLs) ↓ @@ -47,7 +58,7 @@ Docker ──────────→ S3 (direct upload) - Minimal bandwidth usage - Fast uploads -#### Mode 2: S3 Proxy Mode (Not Yet Implemented) +#### Mode 2: S3 Proxy Mode (Buffered) ✅ WORKING ``` Docker → AppView → Hold → S3 (via driver) ↓ @@ -67,7 +78,7 @@ Docker → AppView → Hold → S3 (via driver) - S3 API fails to generate presigned URL - Fallback from Mode 1 -#### Mode 3: Filesystem Mode (Not Yet Implemented) +#### Mode 3: Filesystem Mode ✅ WORKING ``` Docker → AppView → Hold (filesystem driver) ↓ @@ -197,17 +208,24 @@ func (s *HoldService) HandleMultipartPartUpload( ## Integration Plan -### Phase 1: Migrate to pkg/hold (In Progress) +### Phase 1: Migrate to pkg/hold (COMPLETE) - [x] Extract code from cmd/hold/main.go to pkg/hold/ - [x] Create isolated multipart.go implementation -- [ ] Update cmd/hold/main.go to import pkg/hold -- [ ] Test existing S3 native multipart still works +- [x] Update cmd/hold/main.go to import pkg/hold +- [x] Test existing functionality works -### Phase 2: Add Buffered Mode Support -- [ ] Add MultipartManager to HoldService -- [ ] Update handlers to use `*WithManager` methods -- [ ] Add `/multipart-parts/{uploadID}/{partNumber}` route -- [ ] Test filesystem storage with buffered multipart +### Phase 2: Add Buffered Mode Support (COMPLETE ✅) +- [x] Add MultipartManager to HoldService +- [x] Update handlers to use `*WithManager` methods +- [x] Add DISABLE_PRESIGNED_URLS environment variable for testing +- [x] Implement presigned URL disable checks in all methods +- [x] **Fixed: Record S3 parts from request in HandleCompleteMultipart** +- [x] **Fixed: ETag normalization (add quotes for S3 XML)** +- [x] **Test S3 native mode with presigned URLs** ✅ WORKING +- [x] **Add route in cmd/hold/main.go** ✅ COMPLETE +- [x] **Export MultipartMgr field for route handler access** ✅ COMPLETE +- [x] **Test DISABLE_PRESIGNED_URLS=true with S3 storage** ✅ WORKING +- [x] **Test filesystem storage with buffered multipart** ✅ WORKING ### Phase 3: Update AppView - [ ] Detect hold capabilities (presigned vs proxy) @@ -230,19 +248,22 @@ func (s *HoldService) HandleMultipartPartUpload( ### Integration Tests **S3 Native Mode:** -- [ ] Start multipart → get presigned URLs → upload parts → complete -- [ ] Verify no data flows through hold service +- [x] Start multipart → get presigned URLs → upload parts → complete ✅ WORKING +- [x] Verify no data flows through hold service (only ~1KB API calls) - [ ] Test abort cleanup -**Buffered Mode (Filesystem):** -- [ ] Start multipart → get proxy URLs → upload parts → complete -- [ ] Verify parts assembled correctly +**Buffered Mode (S3 with DISABLE_PRESIGNED_URLS):** +- [x] Start multipart → get proxy URLs → upload parts → complete ✅ WORKING +- [x] Verify parts assembled correctly - [ ] Test missing part detection - [ ] Test abort cleanup -**Fallback:** -- [ ] Simulate presigned URL failure → should fallback to buffered -- [ ] Verify seamless transition +**Buffered Mode (Filesystem):** +- [x] Start multipart → get proxy URLs → upload parts → complete ✅ WORKING +- [x] Verify parts assembled correctly ✅ WORKING +- [x] Verify blobs written to filesystem ✅ WORKING +- [ ] Test missing part detection +- [ ] Test abort cleanup ### Load Tests - [ ] Concurrent multipart uploads (multiple sessions) @@ -337,6 +358,101 @@ For very large assembled blobs: - Google Cloud Storage resumable uploads - Backblaze B2 large file API +## Implementation Complete ✅ + +The buffered multipart mode is fully implemented with the following components: + +**Route Handler** (`cmd/hold/main.go:47-73`): +- Endpoint: `PUT /multipart-parts/{uploadID}/{partNumber}` +- Parses URL path to extract uploadID and partNumber +- Delegates to `service.HandleMultipartPartUpload()` + +**Exported Manager** (`pkg/hold/service.go:20`): +- Field `MultipartMgr` is now exported for route handler access +- All handlers updated to use `s.MultipartMgr` + +**Configuration Check** (`pkg/hold/s3.go:20-25`): +- `initS3Client()` checks `DISABLE_PRESIGNED_URLS` flag before initializing +- Logs clear message when presigned URLs are disabled +- Prevents misleading "S3 presigned URLs enabled" message + +## Testing Multipart Modes + +### Test 1: S3 Native Mode (presigned URLs) ✅ TESTED +```bash +export STORAGE_DRIVER=s3 +export S3_BUCKET=your-bucket +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... +# Do NOT set DISABLE_PRESIGNED_URLS + +# Start hold service +./bin/atcr-hold + +# Push an image +docker push atcr.io/yourdid/test:latest + +# Expected logs: +# "✅ S3 presigned URLs enabled" +# "Started S3 native multipart: uploadID=... s3UploadID=..." +# "Completed multipart upload: digest=... uploadID=... parts=..." +``` + +**Status**: ✅ Working - Direct uploads to S3, minimal bandwidth through hold service + +### Test 2: Buffered Mode with S3 (forced proxy) ✅ TESTED +```bash +export STORAGE_DRIVER=s3 +export S3_BUCKET=your-bucket +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... +export DISABLE_PRESIGNED_URLS=true # Force buffered mode + +# Start hold service +./bin/atcr-hold + +# Push an image +docker push atcr.io/yourdid/test:latest + +# Expected logs: +# "⚠️ S3 presigned URLs DISABLED by config (DISABLE_PRESIGNED_URLS=true)" +# "Presigned URLs disabled (DISABLE_PRESIGNED_URLS=true), using buffered mode" +# "Stored part: uploadID=... part=1 size=..." +# "Assembled buffered parts: uploadID=... parts=... totalSize=..." +# "Completed buffered multipart: uploadID=... size=... written=..." +``` + +**Status**: ✅ Working - Parts buffered in hold service memory, assembled and written to S3 via driver + +### Test 3: Filesystem Mode (always buffered) ✅ TESTED +```bash +export STORAGE_DRIVER=filesystem +export STORAGE_ROOT_DIR=/tmp/atcr-hold-test +# DISABLE_PRESIGNED_URLS not needed (filesystem never has presigned URLs) + +# Start hold service +./bin/atcr-hold + +# Push an image +docker push atcr.io/yourdid/test:latest + +# Expected logs: +# "Storage driver is filesystem (not S3), presigned URLs disabled" +# "Started buffered multipart: uploadID=..." +# "Stored part: uploadID=... part=1 size=..." +# "Assembled buffered parts: uploadID=... parts=... totalSize=..." +# "Completed buffered multipart: uploadID=... size=... written=..." + +# Verify blobs written to: +ls -lh /var/lib/atcr/hold/docker/registry/v2/blobs/sha256/ +# Or from outside container: +docker exec atcr-hold ls -lh /var/lib/atcr/hold/docker/registry/v2/blobs/sha256/ +``` + +**Status**: ✅ Working - Parts buffered in memory, assembled, and written to filesystem via driver + +**Note**: Initial HEAD requests will show "Path not found" errors - this is normal! Docker checks if blobs exist before uploading. The errors occur for blobs that haven't been uploaded yet. After upload, subsequent HEAD checks succeed. + ## References - S3 Multipart Upload API: https://docs.aws.amazon.com/AmazonS3/latest/API/API_CreateMultipartUpload.html diff --git a/docs/PRESIGNED_URLS.md b/docs/PRESIGNED_URLS.md index d9cfca3..efa27ce 100644 --- a/docs/PRESIGNED_URLS.md +++ b/docs/PRESIGNED_URLS.md @@ -580,6 +580,55 @@ PRESIGNED_URLS_ENABLED=false docker-compose restart atcr-hold The implementation has automatic fallbacks, so partial failures won't break functionality. +## Testing with DISABLE_PRESIGNED_URLS + +### Environment Variable + +Set `DISABLE_PRESIGNED_URLS=true` to force proxy/buffered mode even when S3 is configured. + +**Use cases:** +- Testing proxy/buffered code paths with S3 storage +- Debugging multipart uploads in buffered mode +- Simulating S3 providers that don't support presigned URLs +- Verifying fallback behavior works correctly + +### How It Works + +When `DISABLE_PRESIGNED_URLS=true`: + +**Single blob operations:** +- `getDownloadURL()` returns proxy URL instead of S3 presigned URL +- `getHeadURL()` returns proxy URL instead of S3 presigned HEAD URL +- `getUploadURL()` returns proxy URL instead of S3 presigned PUT URL +- Client uses `/blobs/{digest}` endpoints (proxy through hold service) + +**Multipart uploads:** +- `StartMultipartUploadWithManager()` creates **Buffered** session instead of **S3Native** +- `GetPartUploadURL()` returns `/multipart-parts/{uploadID}/{partNumber}` instead of S3 presigned URL +- Parts are buffered in memory in the hold service +- `CompleteMultipartUploadWithManager()` assembles parts and writes via storage driver + +### Testing Example + +```bash +# Test S3 with forced proxy mode +export STORAGE_DRIVER=s3 +export S3_BUCKET=my-bucket +export AWS_ACCESS_KEY_ID=... +export AWS_SECRET_ACCESS_KEY=... +export DISABLE_PRESIGNED_URLS=true # Force buffered/proxy mode + +./bin/atcr-hold + +# Push an image - should use proxy mode +docker push atcr.io/yourdid/test:latest + +# Check logs for: +# "Presigned URLs disabled, using proxy URL" +# "Presigned URLs disabled (DISABLE_PRESIGNED_URLS=true), using buffered mode" +# "Stored part: uploadID=... part=1 size=..." +``` + ## Future Enhancements ### 1. Configurable Expiration diff --git a/pkg/hold/config.go b/pkg/hold/config.go index 1f51bbd..bb42725 100644 --- a/pkg/hold/config.go +++ b/pkg/hold/config.go @@ -42,6 +42,9 @@ type ServerConfig struct { // TestMode uses localhost for OAuth redirects while storing real URL in hold record (from env: TEST_MODE) TestMode bool `yaml:"test_mode"` + // DisablePresignedURLs forces proxy mode even with S3 configured (for testing) (from env: DISABLE_PRESIGNED_URLS) + DisablePresignedURLs bool `yaml:"disable_presigned_urls"` + // ReadTimeout for HTTP requests ReadTimeout time.Duration `yaml:"read_timeout"` @@ -63,6 +66,7 @@ func LoadConfigFromEnv() (*Config, error) { } cfg.Server.Public = os.Getenv("HOLD_PUBLIC") == "true" cfg.Server.TestMode = os.Getenv("TEST_MODE") == "true" + cfg.Server.DisablePresignedURLs = os.Getenv("DISABLE_PRESIGNED_URLS") == "true" cfg.Server.ReadTimeout = 5 * time.Minute // Increased for large blob uploads cfg.Server.WriteTimeout = 5 * time.Minute // Increased for large blob uploads diff --git a/pkg/hold/handlers.go b/pkg/hold/handlers.go index d5ee1f4..ce7fad1 100644 --- a/pkg/hold/handlers.go +++ b/pkg/hold/handlers.go @@ -363,14 +363,16 @@ func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Reques return } - // Start multipart upload + // Start multipart upload with manager (supports both S3Native and Buffered modes) ctx := r.Context() - uploadID, err := s.startMultipartUpload(ctx, req.Digest) + uploadID, mode, err := s.StartMultipartUploadWithManager(ctx, req.Digest, s.MultipartMgr) if err != nil { http.Error(w, fmt.Sprintf("failed to start multipart upload: %v", err), http.StatusInternalServerError) return } + log.Printf("Started multipart upload: uploadID=%s, mode=%v, digest=%s", uploadID, mode, req.Digest) + expiry := time.Now().Add(24 * time.Hour) // Multipart uploads can take longer resp := StartMultipartUploadResponse{ @@ -405,9 +407,16 @@ func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { return } - // Get presigned URL for this part + // Get multipart session + session, err := s.MultipartMgr.GetSession(req.UploadID) + if err != nil { + http.Error(w, fmt.Sprintf("session not found: %v", err), http.StatusNotFound) + return + } + + // Get part upload URL (presigned for S3Native, proxy for Buffered) ctx := r.Context() - url, err := s.getPartPresignedURL(ctx, req.Digest, req.UploadID, req.PartNumber) + url, err := s.GetPartUploadURL(ctx, session, req.PartNumber, req.DID) if err != nil { http.Error(w, fmt.Sprintf("failed to generate part URL: %v", err), http.StatusInternalServerError) return @@ -447,13 +456,32 @@ func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Req return } - // Complete multipart upload + // Get multipart session + session, err := s.MultipartMgr.GetSession(req.UploadID) + if err != nil { + http.Error(w, fmt.Sprintf("session not found: %v", err), http.StatusNotFound) + return + } + + // For S3Native mode, use parts from request (uploaded directly to S3) + // For Buffered mode, parts are in the session + if session.Mode == S3Native { + // Record parts from AppView's request (they have ETags from S3) + for _, p := range req.Parts { + session.RecordS3Part(p.PartNumber, p.ETag, 0) + } + log.Printf("Recorded %d S3 parts from request for uploadID=%s", len(req.Parts), req.UploadID) + } + + // Complete multipart upload (handles both S3Native and Buffered modes) ctx := r.Context() - if err := s.completeMultipartUpload(ctx, req.Digest, req.UploadID, req.Parts); err != nil { + if err := s.CompleteMultipartUploadWithManager(ctx, session, s.MultipartMgr); err != nil { http.Error(w, fmt.Sprintf("failed to complete multipart upload: %v", err), http.StatusInternalServerError) return } + log.Printf("Completed multipart upload: uploadID=%s, mode=%v", req.UploadID, session.Mode) + w.WriteHeader(http.StatusOK) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ @@ -484,13 +512,22 @@ func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Reques return } - // Abort multipart upload + // Get multipart session + session, err := s.MultipartMgr.GetSession(req.UploadID) + if err != nil { + http.Error(w, fmt.Sprintf("session not found: %v", err), http.StatusNotFound) + return + } + + // Abort multipart upload (handles both S3Native and Buffered modes) ctx := r.Context() - if err := s.abortMultipartUpload(ctx, req.Digest, req.UploadID); err != nil { + if err := s.AbortMultipartUploadWithManager(ctx, session, s.MultipartMgr); err != nil { http.Error(w, fmt.Sprintf("failed to abort multipart upload: %v", err), http.StatusInternalServerError) return } + log.Printf("Aborted multipart upload: uploadID=%s, mode=%v", req.UploadID, session.Mode) + w.WriteHeader(http.StatusOK) w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ diff --git a/pkg/hold/multipart.go b/pkg/hold/multipart.go index 607f80c..56393b5 100644 --- a/pkg/hold/multipart.go +++ b/pkg/hold/multipart.go @@ -26,14 +26,14 @@ const ( // MultipartSession tracks an in-progress multipart upload type MultipartSession struct { - UploadID string // Unique upload ID - Digest string // Target digest path - Mode MultipartMode // Upload mode (S3Native or Buffered) - S3UploadID string // S3 upload ID (for S3Native mode) - Parts map[int]*MultipartPart // Buffered parts (for Buffered mode) - CreatedAt time.Time // When upload started - LastActivity time.Time // Last part upload - mu sync.RWMutex // Protects Parts map + UploadID string // Unique upload ID + Digest string // Target digest path + Mode MultipartMode // Upload mode (S3Native or Buffered) + S3UploadID string // S3 upload ID (for S3Native mode) + Parts map[int]*MultipartPart // Buffered parts (for Buffered mode) + CreatedAt time.Time // When upload started + LastActivity time.Time // Last part upload + mu sync.RWMutex // Protects Parts map } // MultipartPart represents a single part in a multipart upload @@ -230,6 +230,14 @@ func (s *MultipartSession) GetCompletedParts() []CompletedPart { // StartMultipartUploadWithManager initiates a multipart upload using the manager // Returns uploadID and mode func (s *HoldService) StartMultipartUploadWithManager(ctx context.Context, digest string, manager *MultipartManager) (string, MultipartMode, error) { + // Check if presigned URLs are disabled for testing + if s.config.Server.DisablePresignedURLs { + log.Printf("Presigned URLs disabled (DISABLE_PRESIGNED_URLS=true), using buffered mode") + session := manager.CreateSession(digest, Buffered, "") + log.Printf("Started buffered multipart: uploadID=%s", session.UploadID) + return session.UploadID, Buffered, nil + } + // Try S3 native multipart first if s.s3Client != nil { s3UploadID, err := s.startMultipartUpload(ctx, digest) diff --git a/pkg/hold/s3.go b/pkg/hold/s3.go index 5b24c62..69d333a 100644 --- a/pkg/hold/s3.go +++ b/pkg/hold/s3.go @@ -17,6 +17,13 @@ import ( // Returns nil error if S3 client is successfully initialized // Returns error if storage is not S3 or if initialization fails (service will fall back to proxy mode) func (s *HoldService) initS3Client() error { + // Check if presigned URLs are explicitly disabled + if s.config.Server.DisablePresignedURLs { + log.Printf("⚠️ S3 presigned URLs DISABLED by config (DISABLE_PRESIGNED_URLS=true)") + log.Printf(" All uploads will use buffered mode (parts buffered in hold service)") + return nil // Not an error - just using buffered mode + } + // Check if storage driver is S3 if s.config.Storage.Type() != "s3" { log.Printf("Storage driver is %s (not S3), presigned URLs disabled", s.config.Storage.Type()) @@ -128,6 +135,17 @@ func (s *HoldService) getPartPresignedURL(ctx context.Context, digest, uploadID return url, nil } +// normalizeETag ensures an ETag has quotes (required by S3 CompleteMultipartUpload) +// S3 returns ETags with quotes, but HTTP clients may strip them +func normalizeETag(etag string) string { + // Already has quotes + if strings.HasPrefix(etag, "\"") && strings.HasSuffix(etag, "\"") { + return etag + } + // Add quotes + return fmt.Sprintf("\"%s\"", etag) +} + // completeMultipartUpload finalizes the multipart upload func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploadID string, parts []CompletedPart) error { if s.s3Client == nil { @@ -141,11 +159,13 @@ func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploa } // Convert to S3 CompletedPart format + // IMPORTANT: S3 requires ETags to be quoted in the CompleteMultipartUpload XML s3Parts := make([]*s3.CompletedPart, len(parts)) for i, p := range parts { + etag := normalizeETag(p.ETag) s3Parts[i] = &s3.CompletedPart{ PartNumber: aws.Int64(int64(p.PartNumber)), - ETag: aws.String(p.ETag), + ETag: aws.String(etag), } } diff --git a/pkg/hold/service.go b/pkg/hold/service.go index 8d01b73..eafc525 100644 --- a/pkg/hold/service.go +++ b/pkg/hold/service.go @@ -14,9 +14,10 @@ import ( type HoldService struct { driver storagedriver.StorageDriver config *Config - s3Client *s3.S3 // S3 client for presigned URLs (nil if not S3 storage) - bucket string // S3 bucket name - s3PathPrefix string // S3 path prefix (if any) + s3Client *s3.S3 // S3 client for presigned URLs (nil if not S3 storage) + bucket string // S3 bucket name + s3PathPrefix string // S3 path prefix (if any) + MultipartMgr *MultipartManager // Exported for access in route handlers } // NewHoldService creates a new hold service @@ -29,8 +30,9 @@ func NewHoldService(cfg *Config) (*HoldService, error) { } service := &HoldService{ - driver: driver, - config: cfg, + driver: driver, + config: cfg, + MultipartMgr: NewMultipartManager(), } // Initialize S3 client for presigned URLs (if using S3 storage) diff --git a/pkg/hold/storage.go b/pkg/hold/storage.go index 3dc32af..e094204 100644 --- a/pkg/hold/storage.go +++ b/pkg/hold/storage.go @@ -48,6 +48,12 @@ func (s *HoldService) getDownloadURL(ctx context.Context, digest string, did str return "", fmt.Errorf("blob not found: %w", err) } + // Check if presigned URLs are disabled for testing + if s.config.Server.DisablePresignedURLs { + log.Printf("Presigned URLs disabled, using proxy URL") + return s.getProxyDownloadURL(digest, did), nil + } + // If S3 client available, generate presigned URL if s.s3Client != nil { // Build S3 key from blob path @@ -99,6 +105,12 @@ func (s *HoldService) getHeadURL(ctx context.Context, digest string, did string) return "", fmt.Errorf("blob not found: %w", err) } + // Check if presigned URLs are disabled for testing + if s.config.Server.DisablePresignedURLs { + log.Printf("Presigned URLs disabled, using proxy URL") + return s.getProxyDownloadURL(digest, did), nil + } + // If S3 client available, generate presigned HEAD URL if s.s3Client != nil { // Build S3 key from blob path @@ -136,6 +148,12 @@ func (s *HoldService) getProxyDownloadURL(digest, did string) string { // getUploadURL generates an upload URL for a blob // Note: This is called from HandlePutPresignedURL which has the DID in the request func (s *HoldService) getUploadURL(ctx context.Context, digest string, size int64, did string) (string, error) { + // Check if presigned URLs are disabled for testing + if s.config.Server.DisablePresignedURLs { + log.Printf("Presigned URLs disabled, using proxy URL") + return s.getProxyUploadURL(digest, did), nil + } + // If S3 client available, generate presigned URL if s.s3Client != nil { // Build S3 key from blob path diff --git a/pkg/storage/proxy_blob_store.go b/pkg/storage/proxy_blob_store.go index 01f342c..30ae04f 100644 --- a/pkg/storage/proxy_blob_store.go +++ b/pkg/storage/proxy_blob_store.go @@ -573,7 +573,7 @@ type ProxyBlobWriter struct { buffer *bytes.Buffer // Buffer for current part size int64 // Total bytes written closed bool - id string // Distribution's upload ID (for state) + id string // Distribution's upload ID (for state) startedAt time.Time finalDigest string // Set on Commit } From ace980cff6a22ab0f68e3c9816096e7ad9053dea Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 22:32:13 -0500 Subject: [PATCH 08/10] clean up logging, consolidate presigned handlers --- cmd/hold/main.go | 6 +- docs/PRESIGNED_UPLOADS.md | 6 +- pkg/hold/handlers.go | 214 ++++++++------------------------ pkg/hold/s3.go | 6 + pkg/hold/storage.go | 176 ++++++++------------------ pkg/hold/types.go | 43 +++---- pkg/storage/proxy_blob_store.go | 38 ++---- 7 files changed, 139 insertions(+), 350 deletions(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index a4a27d6..3501606 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -35,9 +35,9 @@ func main() { mux := http.NewServeMux() mux.HandleFunc("/health", service.HealthHandler) mux.HandleFunc("/register", service.HandleRegister) - mux.HandleFunc("/get-presigned-url", service.HandleGetPresignedURL) - mux.HandleFunc("/head-presigned-url", service.HandleHeadPresignedURL) - mux.HandleFunc("/put-presigned-url", service.HandlePutPresignedURL) + mux.HandleFunc("/get-presigned-url", service.HandlePresignedURL(hold.OperationGet)) + mux.HandleFunc("/head-presigned-url", service.HandlePresignedURL(hold.OperationHead)) + mux.HandleFunc("/put-presigned-url", service.HandlePresignedURL(hold.OperationPut)) mux.HandleFunc("/move", service.HandleMove) // Multipart upload endpoints diff --git a/docs/PRESIGNED_UPLOADS.md b/docs/PRESIGNED_UPLOADS.md index 2989b99..fa8a45b 100644 --- a/docs/PRESIGNED_UPLOADS.md +++ b/docs/PRESIGNED_UPLOADS.md @@ -297,7 +297,7 @@ func (w *ProxyBlobWriter) Cancel(ctx context.Context) error { // Clear buffer to free memory w.buffer = nil - fmt.Printf("❌ [ProxyBlobWriter.Cancel] Upload cancelled: id=%s\n", w.id) + fmt.Printf("[ProxyBlobWriter.Cancel] Upload cancelled: id=%s\n", w.id) return nil } ``` @@ -318,7 +318,7 @@ The current `getUploadURL()` implementation in `cmd/hold/main.go` (lines 528-587 ```go url, err := req.Presign(15 * time.Minute) if err != nil { - log.Printf("❌ Failed to generate presigned upload URL: %v", err) + log.Printf("Failed to generate presigned upload URL: %v", err) return s.getProxyUploadURL(digest, did), nil } @@ -442,7 +442,7 @@ func (s *HoldService) getHeadURL(ctx context.Context, digest string) (string, er url, err := req.Presign(15 * time.Minute) if err != nil { - log.Printf("❌ [getHeadURL] Presign failed: %v", err) + log.Printf("[getHeadURL] Presign failed: %v", err) // Fallback to proxy URL return s.getProxyHeadURL(digest), nil } diff --git a/pkg/hold/handlers.go b/pkg/hold/handlers.go index ce7fad1..56d0efd 100644 --- a/pkg/hold/handlers.go +++ b/pkg/hold/handlers.go @@ -12,157 +12,64 @@ import ( "atcr.io/pkg/atproto" ) -// HandleGetPresignedURL handles requests for download URLs -func (s *HoldService) HandleGetPresignedURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req GetPresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - log.Printf("📨 [HandleGetPresignedURL] Request received:") - log.Printf(" DID: %s", req.DID) - log.Printf(" Digest: %s", req.Digest) - log.Printf(" Remote: %s", r.RemoteAddr) - log.Printf(" s3Client nil? %v", s.s3Client == nil) - - // Validate DID authorization for READ - if !s.isAuthorizedRead(req.DID) { - log.Printf("❌ [HandleGetPresignedURL] Authorization FAILED") - if req.DID == "" { - // Anonymous request to private hold - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - // Authenticated but not authorized - http.Error(w, "forbidden: access denied", http.StatusForbidden) +// HandlePresignedURL returns an HTTP handler for presigned URL requests (GET, HEAD, or PUT) +// This consolidates the three separate handlers into a single parameterized implementation +func (s *HoldService) HandlePresignedURL(operation PresignedURLOperation) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return } - return - } - // Generate presigned URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - // For now, construct direct URL to blob - // In production, this would use driver-specific presigned URLs - url, err := s.getDownloadURL(ctx, req.Digest, req.DID) - if err != nil { - log.Printf("❌ [HandleGetPresignedURL] getDownloadURL failed: %v", err) - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("✅ [HandleGetPresignedURL] Returning URL to client") - - resp := GetPresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandleHeadPresignedURL handles requests for HEAD URLs -func (s *HoldService) HandleHeadPresignedURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req HeadPresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - log.Printf("📨 [HandleHeadPresignedURL] Request received:") - log.Printf(" DID: %s", req.DID) - log.Printf(" Digest: %s", req.Digest) - log.Printf(" Remote: %s", r.RemoteAddr) - - // Validate DID authorization for READ - if !s.isAuthorizedRead(req.DID) { - log.Printf("❌ [HandleHeadPresignedURL] Authorization FAILED") - if req.DID == "" { - // Anonymous request to private hold - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - // Authenticated but not authorized - http.Error(w, "forbidden: access denied", http.StatusForbidden) + var req PresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return } - return - } - // Generate presigned HEAD URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - url, err := s.getHeadURL(ctx, req.Digest, req.DID) - if err != nil { - log.Printf("❌ [HandleHeadPresignedURL] getHeadURL failed: %v", err) - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("✅ [HandleHeadPresignedURL] Returning URL to client") - - resp := HeadPresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) -} - -// HandlePutPresignedURL handles requests for upload URLs -func (s *HoldService) HandlePutPresignedURL(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req PutPresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization for WRITE - if !s.isAuthorizedWrite(req.DID) { - if req.DID == "" { - // Anonymous write attempt - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - // Authenticated but not crew/owner - http.Error(w, "forbidden: write access denied", http.StatusForbidden) + // Validate DID authorization based on operation type + var authorized bool + switch operation { + case OperationGet, OperationHead: + authorized = s.isAuthorizedRead(req.DID) + case OperationPut: + authorized = s.isAuthorizedWrite(req.DID) + default: + http.Error(w, "unsupported operation", http.StatusBadRequest) + return } - return + + if !authorized { + log.Printf("[HandlePresignedURL:%s] Authorization FAILED", operation) + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + + // Generate presigned URL (15 minute expiry) + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + url, err := s.getPresignedURL(ctx, operation, req.Digest, req.DID) + if err != nil { + log.Printf("[HandlePresignedURL:%s] getPresignedURL failed: %v", operation, err) + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("[HandlePresignedURL:%s] Returning URL to client", operation) + + resp := PresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) } - - // Generate presigned upload URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - url, err := s.getUploadURL(ctx, req.Digest, req.Size, req.DID) - if err != nil { - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - resp := PutPresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) } // HandleProxyGet proxies a blob download through the service @@ -179,11 +86,6 @@ func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { return } - log.Printf("📥 [HandleProxyGet] Blob download request:") - log.Printf(" Method: %s", r.Method) - log.Printf(" Digest: %s", digest) - log.Printf(" Remote: %s", r.RemoteAddr) - // Get DID from query param or header did := r.URL.Query().Get("did") if did == "" { @@ -193,7 +95,7 @@ func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { // Authorize READ access if !s.isAuthorizedRead(did) { - log.Printf("❌ [HandleProxyGet] Authorization FAILED") + log.Printf("[HandleProxyGet] Authorization FAILED") if did == "" { http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) } else { @@ -201,7 +103,6 @@ func (s *HoldService) HandleProxyGet(w http.ResponseWriter, r *http.Request) { } return } - log.Printf("✅ [HandleProxyGet] Authorization SUCCESS") ctx := r.Context() path := blobPath(digest) @@ -290,14 +191,9 @@ func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { did = r.Header.Get("X-ATCR-DID") } - log.Printf("🔐 [HandleProxyPut] Authorization check:") - log.Printf(" Path: %s", digest) - log.Printf(" DID: %s", did) - log.Printf(" Owner DID: %s", s.config.Registration.OwnerDID) - // Authorize WRITE access if !s.isAuthorizedWrite(did) { - log.Printf("❌ [HandleProxyPut] Authorization FAILED") + log.Printf("[HandleProxyPut] Authorization FAILED") if did == "" { http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) } else { @@ -306,8 +202,6 @@ func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { return } - log.Printf("✅ [HandleProxyPut] Authorization SUCCESS") - // Stream blob to storage (no buffering) ctx := r.Context() path := blobPath(digest) diff --git a/pkg/hold/s3.go b/pkg/hold/s3.go index 69d333a..8885988 100644 --- a/pkg/hold/s3.go +++ b/pkg/hold/s3.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "log" + "sort" "strings" "time" @@ -158,6 +159,11 @@ func (s *HoldService) completeMultipartUpload(ctx context.Context, digest, uploa s3Key = s.s3PathPrefix + "/" + s3Key } + // Sort parts by part number (S3 requires ascending order) + sort.Slice(parts, func(i, j int) bool { + return parts[i].PartNumber < parts[j].PartNumber + }) + // Convert to S3 CompletedPart format // IMPORTANT: S3 requires ETags to be quoted in the CompleteMultipartUpload XML s3Parts := make([]*s3.CompletedPart, len(parts)) diff --git a/pkg/hold/storage.go b/pkg/hold/storage.go index e094204..5a407f3 100644 --- a/pkg/hold/storage.go +++ b/pkg/hold/storage.go @@ -39,79 +39,24 @@ func blobPath(digest string) string { return fmt.Sprintf("/docker/registry/v2/blobs/%s/%s/%s/data", algorithm, hash[:2], hash) } -// getDownloadURL generates a download URL for a blob -func (s *HoldService) getDownloadURL(ctx context.Context, digest string, did string) (string, error) { - // Check if blob exists +// getPresignedURL generates a presigned URL for GET, HEAD, or PUT operations +func (s *HoldService) getPresignedURL(ctx context.Context, operation PresignedURLOperation, digest string, did string) (string, error) { path := blobPath(digest) - _, err := s.driver.Stat(ctx, path) - if err != nil { - return "", fmt.Errorf("blob not found: %w", err) + + // Check blob exists for GET/HEAD operations (not for PUT since blob doesn't exist yet) + if operation == OperationGet || operation == OperationHead { + if _, err := s.driver.Stat(ctx, path); err != nil { + return "", fmt.Errorf("blob not found: %w", err) + } } - // Check if presigned URLs are disabled for testing + // Check if presigned URLs are disabled if s.config.Server.DisablePresignedURLs { log.Printf("Presigned URLs disabled, using proxy URL") - return s.getProxyDownloadURL(digest, did), nil + return s.getProxyURL(digest, did), nil } - // If S3 client available, generate presigned URL - if s.s3Client != nil { - // Build S3 key from blob path - // blobPath returns paths like: /docker/registry/v2/blobs/sha256/ab/abc123.../data - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - // Generate presigned GET URL - // Note: Don't use ResponseContentType - not supported by all S3-compatible services - req, _ := s.s3Client.GetObjectRequest(&s3.GetObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - }) - - log.Printf("🔍 [getDownloadURL] Before Presign:") - log.Printf(" Digest: %s", digest) - log.Printf(" S3 Key: %s", s3Key) - log.Printf(" Bucket: %s", s.bucket) - log.Printf(" Request Operation: %s", req.Operation.Name) - log.Printf(" Request HTTPMethod: %s", req.Operation.HTTPMethod) - - url, err := req.Presign(15 * time.Minute) - if err != nil { - log.Printf("❌ [getDownloadURL] Presign FAILED: %v", err) - log.Printf(" Falling back to proxy URL") - return s.getProxyDownloadURL(digest, did), nil - } - - log.Printf("✅ [getDownloadURL] Presigned URL generated successfully") - log.Printf(" URL: %s", url) - log.Printf(" URL Length: %d chars", len(url)) - log.Printf(" Expires: 15min") - - return url, nil - } - - // Fallback: return proxy URL through this service - return s.getProxyDownloadURL(digest, did), nil -} - -// getHeadURL generates a HEAD URL for a blob -func (s *HoldService) getHeadURL(ctx context.Context, digest string, did string) (string, error) { - // Check if blob exists - path := blobPath(digest) - _, err := s.driver.Stat(ctx, path) - if err != nil { - return "", fmt.Errorf("blob not found: %w", err) - } - - // Check if presigned URLs are disabled for testing - if s.config.Server.DisablePresignedURLs { - log.Printf("Presigned URLs disabled, using proxy URL") - return s.getProxyDownloadURL(digest, did), nil - } - - // If S3 client available, generate presigned HEAD URL + // Generate presigned URL if S3 client is available if s.s3Client != nil { // Build S3 key from blob path s3Key := strings.TrimPrefix(path, "/") @@ -119,75 +64,52 @@ func (s *HoldService) getHeadURL(ctx context.Context, digest string, did string) s3Key = s.s3PathPrefix + "/" + s3Key } - // Generate presigned HEAD URL - req, _ := s.s3Client.HeadObjectRequest(&s3.HeadObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - }) + // Create appropriate S3 request based on operation + var req interface { + Presign(time.Duration) (string, error) + } + switch operation { + case OperationGet: + // Note: Don't use ResponseContentType - not supported by all S3-compatible services + req, _ = s.s3Client.GetObjectRequest(&s3.GetObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) - url, err := req.Presign(15 * time.Minute) - if err != nil { - log.Printf("❌ [getHeadURL] Presign FAILED: %v", err) - log.Printf(" Falling back to proxy URL") - return s.getProxyDownloadURL(digest, did), nil + case OperationHead: + req, _ = s.s3Client.HeadObjectRequest(&s3.HeadObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + }) + + case OperationPut: + req, _ = s.s3Client.PutObjectRequest(&s3.PutObjectInput{ + Bucket: aws.String(s.bucket), + Key: aws.String(s3Key), + ContentType: aws.String("application/octet-stream"), + }) + + default: + return "", fmt.Errorf("unsupported operation: %s", operation) + } + + // Generate presigned URL with 15 minute expiry + url, err := req.Presign(15 * time.Minute) + if err != nil { + log.Printf("[getPresignedURL] Presign FAILED for %s: %v", operation, err) + log.Printf(" Falling back to proxy URL") + return s.getProxyURL(digest, did), nil } - log.Printf("✅ [getHeadURL] Presigned HEAD URL generated: digest=%s", digest) return url, nil } // Fallback: return proxy URL through this service - return s.getProxyDownloadURL(digest, did), nil + return s.getProxyURL(digest, did), nil } -// getProxyDownloadURL returns a proxy URL for blob download (fallback when presigned URLs unavailable) -func (s *HoldService) getProxyDownloadURL(digest, did string) string { - return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) -} - -// getUploadURL generates an upload URL for a blob -// Note: This is called from HandlePutPresignedURL which has the DID in the request -func (s *HoldService) getUploadURL(ctx context.Context, digest string, size int64, did string) (string, error) { - // Check if presigned URLs are disabled for testing - if s.config.Server.DisablePresignedURLs { - log.Printf("Presigned URLs disabled, using proxy URL") - return s.getProxyUploadURL(digest, did), nil - } - - // If S3 client available, generate presigned URL - if s.s3Client != nil { - // Build S3 key from blob path - path := blobPath(digest) - s3Key := strings.TrimPrefix(path, "/") - if s.s3PathPrefix != "" { - s3Key = s.s3PathPrefix + "/" + s3Key - } - - // Generate presigned PUT URL with Content-Type in signature - req, _ := s.s3Client.PutObjectRequest(&s3.PutObjectInput{ - Bucket: aws.String(s.bucket), - Key: aws.String(s3Key), - ContentType: aws.String("application/octet-stream"), - }) - - url, err := req.Presign(15 * time.Minute) - if err != nil { - log.Printf("WARN: Presigned URL generation failed for %s, falling back to proxy: %v", digest, err) - return s.getProxyUploadURL(digest, did), nil - } - - log.Printf("🔑 Generated presigned upload URL for %s (expires in 15min)", digest) - log.Printf(" S3 Key: %s", s3Key) - log.Printf(" Bucket: %s", s.bucket) - log.Printf(" Size: %d bytes", size) - return url, nil - } - - // Fallback: return proxy URL through this service - return s.getProxyUploadURL(digest, did), nil -} - -// getProxyUploadURL returns a proxy URL for blob upload (fallback when presigned URLs unavailable) -func (s *HoldService) getProxyUploadURL(digest, did string) string { +// getProxyURL returns a proxy URL for blob operations (fallback when presigned URLs unavailable) +func (s *HoldService) getProxyURL(digest, did string) string { + // All operations use the same proxy endpoint return fmt.Sprintf("%s/blobs/%s?did=%s", s.config.Server.PublicURL, digest, did) } diff --git a/pkg/hold/types.go b/pkg/hold/types.go index 0a5343e..69c875d 100644 --- a/pkg/hold/types.go +++ b/pkg/hold/types.go @@ -4,39 +4,24 @@ import ( "time" ) -// GetPresignedURLRequest represents a request for a presigned download URL -type GetPresignedURLRequest struct { +// PresignedURLOperation defines the type of presigned URL operation +type PresignedURLOperation string + +const ( + OperationGet PresignedURLOperation = "GET" + OperationHead PresignedURLOperation = "HEAD" + OperationPut PresignedURLOperation = "PUT" +) + +// PresignedURLRequest represents a request for a presigned URL (GET, HEAD, or PUT) +type PresignedURLRequest struct { DID string `json:"did"` Digest string `json:"digest"` + Size int64 `json:"size,omitempty"` // Only required for PUT operations } -// GetPresignedURLResponse contains the presigned URL -type GetPresignedURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// HeadPresignedURLRequest represents a request for a presigned HEAD URL -type HeadPresignedURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` -} - -// HeadPresignedURLResponse contains the presigned HEAD URL -type HeadPresignedURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// PutPresignedURLRequest represents a request for a presigned upload URL -type PutPresignedURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - Size int64 `json:"size"` -} - -// PutPresignedURLResponse contains the presigned upload URL -type PutPresignedURLResponse struct { +// PresignedURLResponse contains the presigned URL +type PresignedURLResponse struct { URL string `json:"url"` ExpiresAt time.Time `json:"expires_at"` } diff --git a/pkg/storage/proxy_blob_store.go b/pkg/storage/proxy_blob_store.go index 30ae04f..21324ff 100644 --- a/pkg/storage/proxy_blob_store.go +++ b/pkg/storage/proxy_blob_store.go @@ -147,42 +147,32 @@ func (p *ProxyBlobStore) Put(ctx context.Context, mediaType string, content []by // Get upload URL url, err := p.getUploadURL(ctx, dgst, int64(len(content))) if err != nil { - fmt.Printf("❌ [proxy_blob_store/Put] Failed to get upload URL: digest=%s, error=%v\n", dgst, err) + fmt.Printf("[proxy_blob_store/Put] Failed to get upload URL: digest=%s, error=%v\n", dgst, err) return distribution.Descriptor{}, err } - fmt.Printf("📤 [proxy_blob_store/Put] Starting PUT request:\n") - fmt.Printf(" Digest: %s\n", dgst) - fmt.Printf(" Size: %d bytes\n", len(content)) - fmt.Printf(" MediaType: %s\n", mediaType) - fmt.Printf(" URL: %s\n", url) - fmt.Printf(" Headers: Content-Type=application/octet-stream\n") - // Upload the blob req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(content)) if err != nil { - fmt.Printf("❌ [proxy_blob_store/Put] Failed to create request: %v\n", err) + fmt.Printf("[proxy_blob_store/Put] Failed to create request: %v\n", err) return distribution.Descriptor{}, err } req.Header.Set("Content-Type", "application/octet-stream") resp, err := p.httpClient.Do(req) if err != nil { - fmt.Printf("❌ [proxy_blob_store/Put] HTTP request failed: %v\n", err) + fmt.Printf("[proxy_blob_store/Put] HTTP request failed: %v\n", err) return distribution.Descriptor{}, err } defer resp.Body.Close() - fmt.Printf("📥 [proxy_blob_store/Put] Response:\n") - fmt.Printf(" Status: %d %s\n", resp.StatusCode, resp.Status) - if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusCreated { bodyBytes, _ := io.ReadAll(resp.Body) fmt.Printf(" Error Body: %s\n", string(bodyBytes)) return distribution.Descriptor{}, fmt.Errorf("upload failed with status %d: %s", resp.StatusCode, string(bodyBytes)) } - fmt.Printf("✅ [proxy_blob_store/Put] Upload successful: digest=%s, size=%d\n", dgst, len(content)) + fmt.Printf("[proxy_blob_store/Put] Upload successful: digest=%s, size=%d\n", dgst, len(content)) return distribution.Descriptor{ Digest: dgst, @@ -224,10 +214,6 @@ func (p *ProxyBlobStore) ServeBlob(ctx context.Context, w http.ResponseWriter, r // Create returns a blob writer for uploading using multipart upload func (p *ProxyBlobStore) Create(ctx context.Context, options ...distribution.BlobCreateOption) (distribution.BlobWriter, error) { - fmt.Printf("🔧 [proxy_blob_store/Create] Starting multipart upload\n") - fmt.Printf(" Storage endpoint: %s\n", p.storageEndpoint) - fmt.Printf(" Repository: %s\n", p.repository) - // Parse options var opts distribution.CreateOptions for _, option := range options { @@ -624,8 +610,6 @@ func (w *ProxyBlobWriter) flushPart() error { return fmt.Errorf("failed to get part presigned URL: %w", err) } - fmt.Printf("📤 [flushPart] Uploading part %d: size=%d bytes\n", w.partNumber, w.buffer.Len()) - // Upload part to S3 req, err := http.NewRequestWithContext(ctx, "PUT", url, bytes.NewReader(w.buffer.Bytes())) if err != nil { @@ -655,7 +639,7 @@ func (w *ProxyBlobWriter) flushPart() error { ETag: etag, }) - fmt.Printf("✅ [flushPart] Part %d uploaded successfully: ETag=%s\n", w.partNumber, etag) + fmt.Printf("[flushPart] Part %d uploaded successfully: ETag=%s\n", w.partNumber, etag) // Reset buffer and increment part number w.buffer.Reset() @@ -706,8 +690,6 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript } w.closed = true - fmt.Printf("📝 [Commit] Starting commit: digest=%s, size=%d\n", desc.Digest, w.size) - // Remove from global uploads map globalUploadsMu.Lock() delete(globalUploads, w.id) @@ -715,7 +697,7 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript // Flush any remaining buffered data if w.buffer.Len() > 0 { - fmt.Printf("📤 [Commit] Flushing final buffer: %d bytes\n", w.buffer.Len()) + fmt.Printf("[Commit] Flushing final buffer: %d bytes\n", w.buffer.Len()) if err := w.flushPart(); err != nil { // Try to abort multipart on error tempDigest := fmt.Sprintf("uploads/temp-%s", w.id) @@ -735,7 +717,7 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript tempPath := fmt.Sprintf("uploads/temp-%s", w.id) finalPath := desc.Digest.String() - fmt.Printf("🚚 [Commit] Moving blob: %s → %s\n", tempPath, finalPath) + fmt.Printf("[Commit] Moving blob: %s → %s\n", tempPath, finalPath) moveURL := fmt.Sprintf("%s/move?from=%s&to=%s&did=%s", w.store.storageEndpoint, tempPath, finalPath, w.store.did) @@ -755,7 +737,7 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript return distribution.Descriptor{}, fmt.Errorf("move blob failed: status %d, body: %s", resp.StatusCode, string(bodyBytes)) } - fmt.Printf("✅ [Commit] Upload completed successfully: digest=%s, size=%d, parts=%d\n", desc.Digest, w.size, len(w.parts)) + fmt.Printf("[Commit] Upload completed successfully: digest=%s, size=%d, parts=%d\n", desc.Digest, w.size, len(w.parts)) return distribution.Descriptor{ Digest: desc.Digest, @@ -768,7 +750,7 @@ func (w *ProxyBlobWriter) Commit(ctx context.Context, desc distribution.Descript func (w *ProxyBlobWriter) Cancel(ctx context.Context) error { w.closed = true - fmt.Printf("❌ [Cancel] Cancelling upload: id=%s\n", w.id) + fmt.Printf("[Cancel] Cancelling upload: id=%s\n", w.id) // Remove from global uploads map globalUploadsMu.Lock() @@ -782,7 +764,7 @@ func (w *ProxyBlobWriter) Cancel(ctx context.Context) error { // Continue anyway - we want to mark upload as cancelled } - fmt.Printf("✅ [Cancel] Upload cancelled: id=%s\n", w.id) + fmt.Printf("[Cancel] Upload cancelled: id=%s\n", w.id) return nil } From 256cc883c9dc073072ae77a42ee76c066cd57f03 Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 23:13:20 -0500 Subject: [PATCH 09/10] consolidate presigned url endpoint. fix manifest pull blob from pds --- cmd/hold/main.go | 4 +- pkg/atproto/client.go | 21 ++++++ pkg/hold/handlers.go | 112 ++++++++++++++++---------------- pkg/hold/types.go | 7 +- pkg/storage/proxy_blob_store.go | 101 +++++++--------------------- 5 files changed, 103 insertions(+), 142 deletions(-) diff --git a/cmd/hold/main.go b/cmd/hold/main.go index 3501606..e329ee4 100644 --- a/cmd/hold/main.go +++ b/cmd/hold/main.go @@ -35,9 +35,7 @@ func main() { mux := http.NewServeMux() mux.HandleFunc("/health", service.HealthHandler) mux.HandleFunc("/register", service.HandleRegister) - mux.HandleFunc("/get-presigned-url", service.HandlePresignedURL(hold.OperationGet)) - mux.HandleFunc("/head-presigned-url", service.HandlePresignedURL(hold.OperationHead)) - mux.HandleFunc("/put-presigned-url", service.HandlePresignedURL(hold.OperationPut)) + mux.HandleFunc("/presigned-url", service.HandlePresignedURL) mux.HandleFunc("/move", service.HandleMove) // Multipart upload endpoints diff --git a/pkg/atproto/client.go b/pkg/atproto/client.go index 2b1e832..5f2c3a1 100644 --- a/pkg/atproto/client.go +++ b/pkg/atproto/client.go @@ -3,6 +3,7 @@ package atproto import ( "bytes" "context" + "encoding/base64" "encoding/json" "fmt" "io" @@ -343,6 +344,26 @@ func (c *Client) GetBlob(ctx context.Context, cid string) ([]byte, error) { return nil, fmt.Errorf("failed to read blob data: %w", err) } + // Check if PDS returned JSON-wrapped blob (Bluesky implementation) + // PDS may wrap blobs as JSON-encoded base64 strings + // Detection: Check if content starts with a quote (indicating JSON string) + if len(data) > 0 && data[0] == '"' { + // Blob is JSON-encoded - decode it + var base64Str string + if err := json.Unmarshal(data, &base64Str); err != nil { + return nil, fmt.Errorf("failed to unmarshal JSON-wrapped blob: %w", err) + } + + // Base64-decode the blob content + decoded, err := base64.StdEncoding.DecodeString(base64Str) + if err != nil { + return nil, fmt.Errorf("failed to base64-decode blob: %w", err) + } + + return decoded, nil + } + + // Raw blob response (expected ATProto behavior) return data, nil } diff --git a/pkg/hold/handlers.go b/pkg/hold/handlers.go index 56d0efd..5b628e3 100644 --- a/pkg/hold/handlers.go +++ b/pkg/hold/handlers.go @@ -12,64 +12,62 @@ import ( "atcr.io/pkg/atproto" ) -// HandlePresignedURL returns an HTTP handler for presigned URL requests (GET, HEAD, or PUT) -// This consolidates the three separate handlers into a single parameterized implementation -func (s *HoldService) HandlePresignedURL(operation PresignedURLOperation) http.HandlerFunc { - return func(w http.ResponseWriter, r *http.Request) { - if r.Method != http.MethodPost { - http.Error(w, "method not allowed", http.StatusMethodNotAllowed) - return - } - - var req PresignedURLRequest - if err := json.NewDecoder(r.Body).Decode(&req); err != nil { - http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) - return - } - - // Validate DID authorization based on operation type - var authorized bool - switch operation { - case OperationGet, OperationHead: - authorized = s.isAuthorizedRead(req.DID) - case OperationPut: - authorized = s.isAuthorizedWrite(req.DID) - default: - http.Error(w, "unsupported operation", http.StatusBadRequest) - return - } - - if !authorized { - log.Printf("[HandlePresignedURL:%s] Authorization FAILED", operation) - if req.DID == "" { - http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) - } else { - http.Error(w, "forbidden: access denied", http.StatusForbidden) - } - return - } - - // Generate presigned URL (15 minute expiry) - ctx := context.Background() - expiry := time.Now().Add(15 * time.Minute) - - url, err := s.getPresignedURL(ctx, operation, req.Digest, req.DID) - if err != nil { - log.Printf("[HandlePresignedURL:%s] getPresignedURL failed: %v", operation, err) - http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) - return - } - - log.Printf("[HandlePresignedURL:%s] Returning URL to client", operation) - - resp := PresignedURLResponse{ - URL: url, - ExpiresAt: expiry, - } - - w.Header().Set("Content-Type", "application/json") - json.NewEncoder(w).Encode(resp) +// HandlePresignedURL handles presigned URL requests (GET, HEAD, or PUT) +// Operation type is specified in the request body +func (s *HoldService) HandlePresignedURL(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + return } + + var req PresignedURLRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, fmt.Sprintf("invalid request: %v", err), http.StatusBadRequest) + return + } + + // Validate DID authorization based on operation type + var authorized bool + switch req.Operation { + case OperationGet, OperationHead: + authorized = s.isAuthorizedRead(req.DID) + case OperationPut: + authorized = s.isAuthorizedWrite(req.DID) + default: + http.Error(w, "unsupported operation", http.StatusBadRequest) + return + } + + if !authorized { + log.Printf("[HandlePresignedURL:%s] Authorization FAILED", req.Operation) + if req.DID == "" { + http.Error(w, "unauthorized: authentication required", http.StatusUnauthorized) + } else { + http.Error(w, "forbidden: access denied", http.StatusForbidden) + } + return + } + + // Generate presigned URL (15 minute expiry) + ctx := context.Background() + expiry := time.Now().Add(15 * time.Minute) + + url, err := s.getPresignedURL(ctx, req.Operation, req.Digest, req.DID) + if err != nil { + log.Printf("[HandlePresignedURL:%s] getPresignedURL failed: %v", req.Operation, err) + http.Error(w, fmt.Sprintf("failed to generate URL: %v", err), http.StatusInternalServerError) + return + } + + log.Printf("[HandlePresignedURL:%s] Returning URL to client", req.Operation) + + resp := PresignedURLResponse{ + URL: url, + ExpiresAt: expiry, + } + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(resp) } // HandleProxyGet proxies a blob download through the service diff --git a/pkg/hold/types.go b/pkg/hold/types.go index 69c875d..44d491e 100644 --- a/pkg/hold/types.go +++ b/pkg/hold/types.go @@ -15,9 +15,10 @@ const ( // PresignedURLRequest represents a request for a presigned URL (GET, HEAD, or PUT) type PresignedURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - Size int64 `json:"size,omitempty"` // Only required for PUT operations + Operation PresignedURLOperation `json:"operation"` + DID string `json:"did"` + Digest string `json:"digest"` + Size int64 `json:"size,omitempty"` // Only required for PUT operations } // PresignedURLResponse contains the presigned URL diff --git a/pkg/storage/proxy_blob_store.go b/pkg/storage/proxy_blob_store.go index 21324ff..beb390b 100644 --- a/pkg/storage/proxy_blob_store.go +++ b/pkg/storage/proxy_blob_store.go @@ -270,11 +270,17 @@ func (p *ProxyBlobStore) Resume(ctx context.Context, id string) (distribution.Bl return writer, nil } -// getDownloadURL requests a presigned download URL from the storage service -func (p *ProxyBlobStore) getDownloadURL(ctx context.Context, dgst digest.Digest) (string, error) { +// getPresignedURL requests a presigned URL from the storage service for any operation +func (p *ProxyBlobStore) getPresignedURL(ctx context.Context, operation, dgst string, size int64) (string, error) { reqBody := map[string]any{ - "did": p.did, - "digest": dgst.String(), + "operation": operation, + "did": p.did, + "digest": dgst, + } + + // Only include size for PUT operations + if size > 0 { + reqBody["size"] = size } body, err := json.Marshal(reqBody) @@ -282,7 +288,7 @@ func (p *ProxyBlobStore) getDownloadURL(ctx context.Context, dgst digest.Digest) return "", err } - url := fmt.Sprintf("%s/get-presigned-url", p.storageEndpoint) + url := fmt.Sprintf("%s/presigned-url", p.storageEndpoint) req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) if err != nil { return "", err @@ -296,7 +302,7 @@ func (p *ProxyBlobStore) getDownloadURL(ctx context.Context, dgst digest.Digest) defer resp.Body.Close() if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to get download URL: status %d", resp.StatusCode) + return "", fmt.Errorf("failed to get presigned URL: status %d", resp.StatusCode) } var result struct { @@ -309,87 +315,24 @@ func (p *ProxyBlobStore) getDownloadURL(ctx context.Context, dgst digest.Digest) return result.URL, nil } +// getDownloadURL requests a presigned download URL from the storage service +func (p *ProxyBlobStore) getDownloadURL(ctx context.Context, dgst digest.Digest) (string, error) { + return p.getPresignedURL(ctx, "GET", dgst.String(), 0) +} + // getHeadURL requests a presigned HEAD URL from the storage service func (p *ProxyBlobStore) getHeadURL(ctx context.Context, dgst digest.Digest) (string, error) { - reqBody := map[string]any{ - "did": p.did, - "digest": dgst.String(), - } - - body, err := json.Marshal(reqBody) - if err != nil { - return "", err - } - - url := fmt.Sprintf("%s/head-presigned-url", p.storageEndpoint) - req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) - if err != nil { - return "", err - } - req.Header.Set("Content-Type", "application/json") - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to get HEAD URL: status %d", resp.StatusCode) - } - - var result struct { - URL string `json:"url"` - } - if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { - return "", err - } - - return result.URL, nil + return p.getPresignedURL(ctx, "HEAD", dgst.String(), 0) } // getUploadURL requests a presigned upload URL from the storage service func (p *ProxyBlobStore) getUploadURL(ctx context.Context, dgst digest.Digest, size int64) (string, error) { fmt.Printf("DEBUG [proxy_blob_store/getUploadURL]: storageEndpoint=%s, digest=%s\n", p.storageEndpoint, dgst) - - reqBody := map[string]any{ - "did": p.did, - "digest": dgst.String(), - "size": size, + url, err := p.getPresignedURL(ctx, "PUT", dgst.String(), size) + if err == nil { + fmt.Printf("DEBUG [proxy_blob_store/getUploadURL]: Got presigned URL=%s\n", url) } - - body, err := json.Marshal(reqBody) - if err != nil { - return "", err - } - - url := fmt.Sprintf("%s/put-presigned-url", p.storageEndpoint) - fmt.Printf("DEBUG [proxy_blob_store/getUploadURL]: Calling %s\n", url) - req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(body)) - if err != nil { - return "", err - } - req.Header.Set("Content-Type", "application/json") - - resp, err := p.httpClient.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode != http.StatusOK { - return "", fmt.Errorf("failed to get upload URL: status %d", resp.StatusCode) - } - - var result struct { - URL string `json:"url"` - } - if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { - return "", err - } - - fmt.Printf("DEBUG [proxy_blob_store/getUploadURL]: Got presigned URL=%s\n", result.URL) - return result.URL, nil + return url, err } // startMultipartUpload initiates a multipart upload via hold service From 0706132186ff69e2d000349f10781d23de489573 Mon Sep 17 00:00:00 2001 From: Evan Jarrett Date: Sat, 11 Oct 2025 23:29:56 -0500 Subject: [PATCH 10/10] code clean up --- cmd/appview/main.go | 2 +- cmd/appview/serve.go | 25 +++--- pkg/{ => appview}/middleware/registry.go | 2 +- pkg/{ => appview}/storage/hold_cache.go | 0 pkg/{ => appview}/storage/proxy_blob_store.go | 0 .../storage/routing_repository.go | 0 pkg/auth/scope.go | 7 ++ pkg/auth/types.go | 8 -- pkg/hold/handlers.go | 84 +++++++++++++++++ pkg/hold/types.go | 89 ------------------- 10 files changed, 105 insertions(+), 112 deletions(-) rename pkg/{ => appview}/middleware/registry.go (99%) rename pkg/{ => appview}/storage/hold_cache.go (100%) rename pkg/{ => appview}/storage/proxy_blob_store.go (100%) rename pkg/{ => appview}/storage/routing_repository.go (100%) delete mode 100644 pkg/auth/types.go delete mode 100644 pkg/hold/types.go diff --git a/cmd/appview/main.go b/cmd/appview/main.go index 9f98904..85e117d 100644 --- a/cmd/appview/main.go +++ b/cmd/appview/main.go @@ -8,7 +8,7 @@ import ( _ "github.com/distribution/distribution/v3/registry/storage/driver/inmemory" // Register our custom middleware - _ "atcr.io/pkg/middleware" + _ "atcr.io/pkg/appview/middleware" ) func main() { diff --git a/cmd/appview/serve.go b/cmd/appview/serve.go index 461b2bc..e00312b 100644 --- a/cmd/appview/serve.go +++ b/cmd/appview/serve.go @@ -19,16 +19,15 @@ import ( sqlite3 "github.com/mattn/go-sqlite3" "github.com/spf13/cobra" + "atcr.io/pkg/appview/middleware" "atcr.io/pkg/auth/oauth" "atcr.io/pkg/auth/token" - "atcr.io/pkg/middleware" // UI components "atcr.io/pkg/appview" "atcr.io/pkg/appview/db" uihandlers "atcr.io/pkg/appview/handlers" "atcr.io/pkg/appview/jetstream" - appmiddleware "atcr.io/pkg/appview/middleware" "github.com/gorilla/mux" ) @@ -474,7 +473,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S // Public routes (with optional auth for navbar) // SECURITY: Public pages use read-only DB - router.Handle("/", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/", middleware.OptionalAuth(sessionStore, database)( &uihandlers.HomeHandler{ DB: readOnlyDB, Templates: templates, @@ -482,7 +481,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S }, )).Methods("GET") - router.Handle("/api/recent-pushes", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/api/recent-pushes", middleware.OptionalAuth(sessionStore, database)( &uihandlers.RecentPushesHandler{ DB: readOnlyDB, Templates: templates, @@ -491,7 +490,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S )).Methods("GET") // SECURITY: Search uses read-only DB to prevent writes and limit access to sensitive tables - router.Handle("/search", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/search", middleware.OptionalAuth(sessionStore, database)( &uihandlers.SearchHandler{ DB: readOnlyDB, Templates: templates, @@ -499,7 +498,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S }, )).Methods("GET") - router.Handle("/api/search-results", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/api/search-results", middleware.OptionalAuth(sessionStore, database)( &uihandlers.SearchResultsHandler{ DB: readOnlyDB, Templates: templates, @@ -508,7 +507,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S )).Methods("GET") // API route for repository stats (public, read-only) - router.Handle("/api/stats/{handle}/{repository}", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/api/stats/{handle}/{repository}", middleware.OptionalAuth(sessionStore, database)( &uihandlers.GetStatsHandler{ DB: readOnlyDB, Directory: oauthApp.Directory(), @@ -516,7 +515,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S )).Methods("GET") // API routes for stars (require authentication) - router.Handle("/api/stars/{handle}/{repository}", appmiddleware.RequireAuth(sessionStore, database)( + router.Handle("/api/stars/{handle}/{repository}", middleware.RequireAuth(sessionStore, database)( &uihandlers.StarRepositoryHandler{ DB: database, // Needs write access Directory: oauthApp.Directory(), @@ -524,7 +523,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S }, )).Methods("POST") - router.Handle("/api/stars/{handle}/{repository}", appmiddleware.RequireAuth(sessionStore, database)( + router.Handle("/api/stars/{handle}/{repository}", middleware.RequireAuth(sessionStore, database)( &uihandlers.UnstarRepositoryHandler{ DB: database, // Needs write access Directory: oauthApp.Directory(), @@ -532,7 +531,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S }, )).Methods("DELETE") - router.Handle("/api/stars/{handle}/{repository}", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/api/stars/{handle}/{repository}", middleware.OptionalAuth(sessionStore, database)( &uihandlers.CheckStarHandler{ DB: readOnlyDB, // Read-only check Directory: oauthApp.Directory(), @@ -540,7 +539,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S }, )).Methods("GET") - router.Handle("/u/{handle}", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/u/{handle}", middleware.OptionalAuth(sessionStore, database)( &uihandlers.UserPageHandler{ DB: readOnlyDB, Templates: templates, @@ -548,7 +547,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S }, )).Methods("GET") - router.Handle("/r/{handle}/{repository}", appmiddleware.OptionalAuth(sessionStore, database)( + router.Handle("/r/{handle}/{repository}", middleware.OptionalAuth(sessionStore, database)( &uihandlers.RepositoryPageHandler{ DB: readOnlyDB, Templates: templates, @@ -560,7 +559,7 @@ func initializeUIRoutes(database *sql.DB, readOnlyDB *sql.DB, sessionStore *db.S // Authenticated routes authRouter := router.NewRoute().Subrouter() - authRouter.Use(appmiddleware.RequireAuth(sessionStore, database)) + authRouter.Use(middleware.RequireAuth(sessionStore, database)) authRouter.Handle("/settings", &uihandlers.SettingsHandler{ Templates: templates, diff --git a/pkg/middleware/registry.go b/pkg/appview/middleware/registry.go similarity index 99% rename from pkg/middleware/registry.go rename to pkg/appview/middleware/registry.go index a0e9660..782383c 100644 --- a/pkg/middleware/registry.go +++ b/pkg/appview/middleware/registry.go @@ -14,10 +14,10 @@ import ( "github.com/distribution/distribution/v3/registry/storage/driver" "github.com/distribution/reference" + "atcr.io/pkg/appview/storage" "atcr.io/pkg/atproto" "atcr.io/pkg/auth" "atcr.io/pkg/auth/oauth" - "atcr.io/pkg/storage" ) // Global refresher instance (set by main.go) diff --git a/pkg/storage/hold_cache.go b/pkg/appview/storage/hold_cache.go similarity index 100% rename from pkg/storage/hold_cache.go rename to pkg/appview/storage/hold_cache.go diff --git a/pkg/storage/proxy_blob_store.go b/pkg/appview/storage/proxy_blob_store.go similarity index 100% rename from pkg/storage/proxy_blob_store.go rename to pkg/appview/storage/proxy_blob_store.go diff --git a/pkg/storage/routing_repository.go b/pkg/appview/storage/routing_repository.go similarity index 100% rename from pkg/storage/routing_repository.go rename to pkg/appview/storage/routing_repository.go diff --git a/pkg/auth/scope.go b/pkg/auth/scope.go index faaff55..93b517b 100644 --- a/pkg/auth/scope.go +++ b/pkg/auth/scope.go @@ -5,6 +5,13 @@ import ( "strings" ) +// AccessEntry represents access permissions for a resource +type AccessEntry struct { + Type string `json:"type"` // "repository" + Name string `json:"name,omitempty"` // e.g., "alice/myapp" + Actions []string `json:"actions,omitempty"` // e.g., ["pull", "push"] +} + // ParseScope parses Docker registry scope strings into AccessEntry structures // Scope format: "repository:alice/myapp:pull,push" // Multiple scopes can be provided diff --git a/pkg/auth/types.go b/pkg/auth/types.go deleted file mode 100644 index e5ffa1c..0000000 --- a/pkg/auth/types.go +++ /dev/null @@ -1,8 +0,0 @@ -package auth - -// AccessEntry represents access permissions for a resource -type AccessEntry struct { - Type string `json:"type"` // "repository" - Name string `json:"name,omitempty"` // e.g., "alice/myapp" - Actions []string `json:"actions,omitempty"` // e.g., ["pull", "push"] -} diff --git a/pkg/hold/handlers.go b/pkg/hold/handlers.go index 5b628e3..dda4a28 100644 --- a/pkg/hold/handlers.go +++ b/pkg/hold/handlers.go @@ -12,6 +12,29 @@ import ( "atcr.io/pkg/atproto" ) +// PresignedURLOperation defines the type of presigned URL operation +type PresignedURLOperation string + +const ( + OperationGet PresignedURLOperation = "GET" + OperationHead PresignedURLOperation = "HEAD" + OperationPut PresignedURLOperation = "PUT" +) + +// PresignedURLRequest represents a request for a presigned URL (GET, HEAD, or PUT) +type PresignedURLRequest struct { + Operation PresignedURLOperation `json:"operation"` + DID string `json:"did"` + Digest string `json:"digest"` + Size int64 `json:"size,omitempty"` // Only required for PUT operations +} + +// PresignedURLResponse contains the presigned URL +type PresignedURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + // HandlePresignedURL handles presigned URL requests (GET, HEAD, or PUT) // Operation type is specified in the request body func (s *HoldService) HandlePresignedURL(w http.ResponseWriter, r *http.Request) { @@ -232,6 +255,18 @@ func (s *HoldService) HandleProxyPut(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusCreated) } +// StartMultipartUploadRequest initiates a multipart upload +type StartMultipartUploadRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` +} + +// StartMultipartUploadResponse contains the multipart upload ID +type StartMultipartUploadResponse struct { + UploadID string `json:"upload_id"` + ExpiresAt time.Time `json:"expires_at"` +} + // HandleStartMultipart initiates a multipart upload func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { @@ -276,6 +311,20 @@ func (s *HoldService) HandleStartMultipart(w http.ResponseWriter, r *http.Reques json.NewEncoder(w).Encode(resp) } +// GetPartURLRequest requests a presigned URL for a specific part +type GetPartURLRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + PartNumber int `json:"part_number"` +} + +// GetPartURLResponse contains the presigned URL for a part +type GetPartURLResponse struct { + URL string `json:"url"` + ExpiresAt time.Time `json:"expires_at"` +} + // HandleGetPartURL generates a presigned URL for uploading a specific part func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { @@ -325,6 +374,20 @@ func (s *HoldService) HandleGetPartURL(w http.ResponseWriter, r *http.Request) { json.NewEncoder(w).Encode(resp) } +// CompleteMultipartRequest completes a multipart upload +type CompleteMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` + Parts []CompletedPart `json:"parts"` +} + +// CompletedPart represents an uploaded part with its ETag +type CompletedPart struct { + PartNumber int `json:"part_number"` + ETag string `json:"etag"` +} + // HandleCompleteMultipart completes a multipart upload func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { @@ -381,6 +444,13 @@ func (s *HoldService) HandleCompleteMultipart(w http.ResponseWriter, r *http.Req }) } +// AbortMultipartRequest aborts an in-progress upload +type AbortMultipartRequest struct { + DID string `json:"did"` + Digest string `json:"digest"` + UploadID string `json:"upload_id"` +} + // HandleAbortMultipart aborts an in-progress multipart upload func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { @@ -427,6 +497,20 @@ func (s *HoldService) HandleAbortMultipart(w http.ResponseWriter, r *http.Reques }) } +// RegisterRequest represents a request to register this hold in a user's PDS +type RegisterRequest struct { + DID string `json:"did"` + AccessToken string `json:"access_token"` + PDSEndpoint string `json:"pds_endpoint"` +} + +// RegisterResponse contains the registration result +type RegisterResponse struct { + HoldURI string `json:"hold_uri"` + CrewURI string `json:"crew_uri"` + Message string `json:"message"` +} + // HandleRegister registers this hold service in a user's PDS (manual endpoint) func (s *HoldService) HandleRegister(w http.ResponseWriter, r *http.Request) { if r.Method != http.MethodPost { diff --git a/pkg/hold/types.go b/pkg/hold/types.go deleted file mode 100644 index 44d491e..0000000 --- a/pkg/hold/types.go +++ /dev/null @@ -1,89 +0,0 @@ -package hold - -import ( - "time" -) - -// PresignedURLOperation defines the type of presigned URL operation -type PresignedURLOperation string - -const ( - OperationGet PresignedURLOperation = "GET" - OperationHead PresignedURLOperation = "HEAD" - OperationPut PresignedURLOperation = "PUT" -) - -// PresignedURLRequest represents a request for a presigned URL (GET, HEAD, or PUT) -type PresignedURLRequest struct { - Operation PresignedURLOperation `json:"operation"` - DID string `json:"did"` - Digest string `json:"digest"` - Size int64 `json:"size,omitempty"` // Only required for PUT operations -} - -// PresignedURLResponse contains the presigned URL -type PresignedURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// StartMultipartUploadRequest initiates a multipart upload -type StartMultipartUploadRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` -} - -// StartMultipartUploadResponse contains the multipart upload ID -type StartMultipartUploadResponse struct { - UploadID string `json:"upload_id"` - ExpiresAt time.Time `json:"expires_at"` -} - -// GetPartURLRequest requests a presigned URL for a specific part -type GetPartURLRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - UploadID string `json:"upload_id"` - PartNumber int `json:"part_number"` -} - -// GetPartURLResponse contains the presigned URL for a part -type GetPartURLResponse struct { - URL string `json:"url"` - ExpiresAt time.Time `json:"expires_at"` -} - -// CompleteMultipartRequest completes a multipart upload -type CompleteMultipartRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - UploadID string `json:"upload_id"` - Parts []CompletedPart `json:"parts"` -} - -// CompletedPart represents an uploaded part with its ETag -type CompletedPart struct { - PartNumber int `json:"part_number"` - ETag string `json:"etag"` -} - -// AbortMultipartRequest aborts an in-progress upload -type AbortMultipartRequest struct { - DID string `json:"did"` - Digest string `json:"digest"` - UploadID string `json:"upload_id"` -} - -// RegisterRequest represents a request to register this hold in a user's PDS -type RegisterRequest struct { - DID string `json:"did"` - AccessToken string `json:"access_token"` - PDSEndpoint string `json:"pds_endpoint"` -} - -// RegisterResponse contains the registration result -type RegisterResponse struct { - HoldURI string `json:"hold_uri"` - CrewURI string `json:"crew_uri"` - Message string `json:"message"` -}