fix(s3): keep server-side copy data in the bucket collection (#9607)

* fix(s3): keep server-side copy data in the bucket collection

UploadPartCopy and SSE-C CopyObject assigned destination volumes against
r.URL.Path, the S3 request URI. The filer derives a bucket's collection
only when the assign path sits under its buckets folder, so an S3 URI
routed copied bytes to the default collection instead of the destination
bucket's. Assign against the destination's real filer path.

* refactor(s3): centralize copy-part path and thread dstPath into SSE-C copy

Extract copyPartLocation so the fast path and writeEmptyCopyPart share one
definition of the .uploads/<id>/<n>_copy.part location. Pass the destination
filer path into copyChunksWithSSEC instead of re-deriving it from the request,
and thread it through key rotation so re-encrypt copies also assign in the
destination bucket's collection.
This commit is contained in:
Chris Lu
2026-05-21 09:35:42 -07:00
committed by GitHub
parent eae8f33db5
commit 83b7ea5e7b
4 changed files with 80 additions and 16 deletions

View File

@@ -627,6 +627,17 @@ type CopyPartResult struct {
ETag string `xml:"ETag"`
}
// copyPartLocation returns the destination directory and filename for a
// server-side copy part under the destination bucket's .uploads folder. Copy
// parts carry a fixed "copy" suffix (rather than the random suffix
// genPartUploadPath mints for client uploads) so re-copying a part replaces
// its predecessor in place.
func (s3a *S3ApiServer) copyPartLocation(dstBucket, uploadID string, partID int) (uploadDir, partName string) {
uploadDir = s3a.genUploadsFolder(dstBucket) + "/" + uploadID
partName = fmt.Sprintf("%04d_%s.part", partID, "copy")
return
}
func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Request) {
t := time.Now().UTC().Truncate(time.Millisecond)
// https://docs.aws.amazon.com/AmazonS3/latest/dev/CopyingObjctsUsingRESTMPUapi.html
@@ -845,6 +856,15 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req
Extended: make(map[string][]byte),
}
// The copied part lives under the destination bucket's .uploads folder.
// Assign destination volumes against that real filer path so they land in
// the destination bucket's collection. r.URL.Path is the S3 request URI
// (e.g. /bucket/key), not a filer path, so passing it would skip the
// filer's bucket-to-collection mapping and route the copied bytes to the
// default collection.
uploadDir, partName := s3a.copyPartLocation(dstBucket, uploadID, partID)
dstPartPath := uploadDir + "/" + partName
// Handle zero-size files or empty ranges
if entry.Attributes.FileSize == 0 || endOffset < startOffset {
// For zero-size files or invalid ranges, create an empty part with size 0
@@ -852,7 +872,7 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req
dstEntry.Chunks = nil
} else {
// Copy chunks that overlap with the range
dstChunks, err := s3a.copyChunksForRange(entry, startOffset, endOffset, r.URL.Path)
dstChunks, err := s3a.copyChunksForRange(entry, startOffset, endOffset, dstPartPath)
if err != nil {
glog.Errorf("CopyObjectPartHandler copy chunks error: %v", err)
s3err.WriteErrorResponse(w, r, s3err.ErrInternalError)
@@ -862,9 +882,6 @@ func (s3a *S3ApiServer) CopyObjectPartHandler(w http.ResponseWriter, r *http.Req
}
// Save the part entry to the multipart uploads folder
uploadDir := s3a.genUploadsFolder(dstBucket) + "/" + uploadID
partName := fmt.Sprintf("%04d_%s.part", partID, "copy")
// Check if part exists and remove it first (allow re-copying same part)
if exists, _ := s3a.exists(uploadDir, partName, false); exists {
if err := s3a.rm(uploadDir, partName, false, false); err != nil {
@@ -2276,7 +2293,10 @@ func (s3a *S3ApiServer) copyCrossEncryptionChunk(chunk *filer_pb.FileChunk, sour
// copyChunksWithSSEC handles SSE-C aware copying with smart fast/slow path selection
// Returns chunks and destination metadata that should be applied to the destination entry
func (s3a *S3ApiServer) copyChunksWithSSEC(entry *filer_pb.Entry, r *http.Request) ([]*filer_pb.FileChunk, map[string][]byte, error) {
// dstPath is the destination object's filer path, so volume assignment targets
// the destination bucket's collection. The caller must not pass r.URL.Path (the
// S3 request URI), which would route copied chunks to the default collection.
func (s3a *S3ApiServer) copyChunksWithSSEC(entry *filer_pb.Entry, r *http.Request, dstPath string) ([]*filer_pb.FileChunk, map[string][]byte, error) {
// Parse SSE-C headers
copySourceKey, err := ParseSSECCopySourceHeaders(r)
@@ -2304,7 +2324,7 @@ func (s3a *S3ApiServer) copyChunksWithSSEC(entry *filer_pb.Entry, r *http.Reques
if isMultipartSSEC {
glog.V(2).Infof("Detected multipart SSE-C object with %d encrypted chunks for copy", sseCChunks)
return s3a.copyMultipartSSECChunks(entry, copySourceKey, destKey, r.URL.Path)
return s3a.copyMultipartSSECChunks(entry, copySourceKey, destKey, dstPath)
}
// Single-part SSE-C object: use original logic
@@ -2320,13 +2340,13 @@ func (s3a *S3ApiServer) copyChunksWithSSEC(entry *filer_pb.Entry, r *http.Reques
case SSECCopyStrategyDirect:
// FAST PATH: Direct chunk copy
glog.V(2).Infof("Using fast path: direct chunk copy for %s", r.URL.Path)
chunks, err := s3a.copyChunks(entry, r.URL.Path)
chunks, err := s3a.copyChunks(entry, dstPath)
return chunks, nil, err
case SSECCopyStrategyDecryptEncrypt:
// SLOW PATH: Decrypt and re-encrypt
glog.V(2).Infof("Using slow path: decrypt/re-encrypt for %s", r.URL.Path)
chunks, destIV, err := s3a.copyChunksWithReencryption(entry, copySourceKey, destKey, r.URL.Path)
chunks, destIV, err := s3a.copyChunksWithReencryption(entry, copySourceKey, destKey, dstPath)
if err != nil {
return nil, nil, err
}

View File

@@ -0,0 +1,45 @@
package s3api
import (
"fmt"
"testing"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/util"
)
// TestCopyDestinationPathResolvesBucketCollection guards against server-side
// copies routing data into the default collection.
//
// The filer maps a write to a bucket's collection only when the AssignVolume
// Path sits under its buckets folder (/buckets/<bucket>/...). UploadPartCopy
// and SSE-C CopyObject used to assign destination volumes against r.URL.Path,
// the S3 request URI (e.g. /bucket/key), which never has that prefix; the
// copied bytes therefore landed in the default collection instead of the
// destination bucket's. The handlers must assign against the real filer path
// of the destination.
func TestCopyDestinationPathResolvesBucketCollection(t *testing.T) {
const bucket = "docker-registry"
s3a := &S3ApiServer{option: &S3ApiServerOption{BucketsPath: "/buckets"}}
f := &filer.Filer{DirBucketsPath: "/buckets"}
// The S3 request URI is not a filer path: the filer cannot derive the
// destination bucket from it. This is the shape that caused the leak.
if got := f.DetectBucket(util.FullPath("/" + bucket + "/blobs/data")); got != "" {
t.Fatalf("S3 request URI unexpectedly mapped to collection %q; the test no longer reproduces the bug", got)
}
// UploadPartCopy assigns against the destination part path under .uploads.
uploadDir, partName := s3a.copyPartLocation(bucket, "uploadid", 1)
partPath := uploadDir + "/" + partName
if got := f.DetectBucket(util.FullPath(partPath)); got != bucket {
t.Fatalf("UploadPartCopy dst path %q resolved to collection %q, want %q", partPath, got, bucket)
}
// CopyObject (including the SSE-C paths) assigns against the destination
// object path.
objPath := fmt.Sprintf("%s/%s", s3a.bucketDir(bucket), "blobs/data")
if got := f.DetectBucket(util.FullPath(objPath)); got != bucket {
t.Fatalf("CopyObject dst path %q resolved to collection %q, want %q", objPath, got, bucket)
}
}

View File

@@ -407,8 +407,7 @@ func (s3a *S3ApiServer) copyObjectPartViaReencryption(
// writeEmptyCopyPart writes a 0-byte part entry for an empty UploadPartCopy
// range, mirroring the legacy fast path's handling of endOffset < startOffset.
func (s3a *S3ApiServer) writeEmptyCopyPart(dstBucket, uploadID string, partID int) (string, s3err.ErrorCode) {
uploadDir := s3a.genUploadsFolder(dstBucket) + "/" + uploadID
partName := fmt.Sprintf("%04d_%s.part", partID, "copy")
uploadDir, partName := s3a.copyPartLocation(dstBucket, uploadID, partID)
if exists, _ := s3a.exists(uploadDir, partName, false); exists {
if err := s3a.rm(uploadDir, partName, false, false); err != nil {
return "", s3err.ErrInternalError

View File

@@ -45,7 +45,7 @@ func (s3a *S3ApiServer) executeUnifiedCopyStrategy(entry *filer_pb.Entry, r *htt
return chunks, nil, err
case CopyStrategyKeyRotation:
return s3a.executeKeyRotation(entry, r, state)
return s3a.executeKeyRotation(entry, r, state, dstBucket, dstPath)
case CopyStrategyEncrypt:
return s3a.executeEncryptCopy(entry, r, state, dstBucket, dstPath)
@@ -90,13 +90,13 @@ func (s3a *S3ApiServer) mapCopyErrorToS3Error(err error) s3err.ErrorCode {
}
// executeKeyRotation handles key rotation for same-object copies
func (s3a *S3ApiServer) executeKeyRotation(entry *filer_pb.Entry, r *http.Request, state *EncryptionState) ([]*filer_pb.FileChunk, map[string][]byte, error) {
func (s3a *S3ApiServer) executeKeyRotation(entry *filer_pb.Entry, r *http.Request, state *EncryptionState, dstBucket, dstPath string) ([]*filer_pb.FileChunk, map[string][]byte, error) {
// For key rotation, we only need to update metadata, not re-copy chunks
// This is a significant optimization for same-object key changes
if state.SrcSSEC && state.DstSSEC {
// SSE-C key rotation - need to handle new key/IV, use reencrypt logic
return s3a.executeReencryptCopy(entry, r, state, "", "")
return s3a.executeReencryptCopy(entry, r, state, dstBucket, dstPath)
}
if state.SrcSSEKMS && state.DstSSEKMS {
@@ -105,14 +105,14 @@ func (s3a *S3ApiServer) executeKeyRotation(entry *filer_pb.Entry, r *http.Reques
}
// Fallback to reencrypt if we can't do metadata-only rotation
return s3a.executeReencryptCopy(entry, r, state, "", "")
return s3a.executeReencryptCopy(entry, r, state, dstBucket, dstPath)
}
// executeEncryptCopy handles plain → encrypted copies
func (s3a *S3ApiServer) executeEncryptCopy(entry *filer_pb.Entry, r *http.Request, state *EncryptionState, dstBucket, dstPath string) ([]*filer_pb.FileChunk, map[string][]byte, error) {
if state.DstSSEC {
// Use existing SSE-C copy logic
return s3a.copyChunksWithSSEC(entry, r)
return s3a.copyChunksWithSSEC(entry, r, dstPath)
}
if state.DstSSEKMS {
@@ -145,7 +145,7 @@ func (s3a *S3ApiServer) executeDecryptCopy(entry *filer_pb.Entry, r *http.Reques
func (s3a *S3ApiServer) executeReencryptCopy(entry *filer_pb.Entry, r *http.Request, state *EncryptionState, dstBucket, dstPath string) ([]*filer_pb.FileChunk, map[string][]byte, error) {
// Use chunk-by-chunk approach for all cross-encryption scenarios (consistent behavior)
if state.SrcSSEC && state.DstSSEC {
return s3a.copyChunksWithSSEC(entry, r)
return s3a.copyChunksWithSSEC(entry, r, dstPath)
}
if state.SrcSSEKMS && state.DstSSEKMS {