From 5b1098e2ada6aedf6f9723c78f0616195eab8bcb Mon Sep 17 00:00:00 2001 From: qzhello <951012707@qq.com> Date: Fri, 29 May 2026 03:55:08 +0800 Subject: [PATCH] fix(s3): honor MetadataDirective=REPLACE for system metadata on CopyObject (#9721) * fix(s3): honor MetadataDirective=REPLACE for system metadata on CopyObject * fix(s3): match copy metadata keys case-insensitively for legacy data Legacy / non-S3 write paths (FUSE mount, direct filer HTTP API, older versions) may persist Cache-Control etc. in lowercase form. Make isManagedCopyMetadataKey case-insensitive so mergeCopyMetadata still clears stale source values under REPLACE, and let the COPY branch of processMetadataBytes fall back to a lowercase key on the source so legacy values survive into the destination (re-emitted as canonical). Mirrors the existing x-amz-meta-* backward-compat path. * fix(s3): keep legacy non-canonical tag and system metadata across COPY The previous case-insensitive isManagedCopyMetadataKey caused mergeCopyMetadata to delete legacy lowercase x-amz-tagging-* and mixed-case system headers, but the COPY branch in processMetadataBytes only matched canonical or strict-lowercase keys when re-populating them, so any non-canonical key was permanently dropped on COPY. - COPY now scans existing in a single pass and uses strings.EqualFold against the system header whitelist, re-emitting under the canonical header name. Handles any case folding (CACHE-CONTROL, Cache-control, etc.), not just strings.ToLower. - COPY tagging branch now uses hasPrefixFold(k, AmzObjectTagging) and re-emits the canonical X-Amz-Tagging-, mirroring the existing X-Amz-Meta-* migration path. - Tests cover lowercase/uppercase/mixed-case system headers and tags surviving COPY. * fix(s3): make COPY of system metadata and tags deterministic across case variants Single-pass EqualFold matching let Go's randomized map iteration pick either the canonical or a legacy-cased value when both lived on the source, so the COPY result varied between calls. Both COPY branches now use two passes: a canonical-exact lookup first, then a case-insensitive fallback that only writes when the canonical slot is still empty. Mirrors the collision-check pattern used by the X-Amz-Meta-* migration path. Tests run the canonical-vs-legacy collision 32 times each to exercise varied map orders. * fix(s3): apply REPLACE Content-Type on in-place copy The metadata-only self-copy path never set Attributes.Mime, so a same-key CopyObject with REPLACE and a new Content-Type silently kept the old type. Route in place only when the Mime is unchanged; otherwise take the locked clone path (still metadata-only, reuses source chunks) and set the new Mime there. Also covers the versioned self-copy path. * perf(s3): drop per-key ToLower in isManagedCopyMetadataKey Use the allocation-free hasPrefixFold helper instead of lowercasing the key and both constant prefixes on every metadata-key check. --------- Co-authored-by: Chris Lu --- weed/s3api/s3api_object_handlers_copy.go | 120 +++++++- .../s3api_object_handlers_copy_mime_test.go | 281 ++++++++++++++++++ weed/s3api/s3err/s3api_errors.go | 15 + 3 files changed, 409 insertions(+), 7 deletions(-) create mode 100644 weed/s3api/s3api_object_handlers_copy_mime_test.go diff --git a/weed/s3api/s3api_object_handlers_copy.go b/weed/s3api/s3api_object_handlers_copy.go index 72942c34c..512bfa95b 100644 --- a/weed/s3api/s3api_object_handlers_copy.go +++ b/weed/s3api/s3api_object_handlers_copy.go @@ -32,6 +32,41 @@ const ( DirectiveReplace = "REPLACE" ) +// AWS default when REPLACE is requested without a Content-Type. +const defaultCopyContentType = "binary/octet-stream" + +// System-metadata headers that REPLACE must rewrite on the destination. +// Content-Type lives on Attributes.Mime, tagging/storage-class have their +// own handling, so they're not in this list. +var copyReplaceSystemHeaders = []string{ + "Cache-Control", + "Content-Encoding", + "Content-Disposition", + "Content-Language", + "Expires", +} + +func resolveDestinationMime(reqHeader http.Header, sourceMime string, replaceMeta bool) string { + if replaceMeta { + if ct := reqHeader.Get("Content-Type"); ct != "" { + return ct + } + return defaultCopyContentType + } + return sourceMime +} + +// Empty means default (COPY). Anything else outside {COPY, REPLACE} must be +// rejected, not silently downgraded. +func isValidDirective(value string) bool { + return value == "" || value == DirectiveCopy || value == DirectiveReplace +} + +// hasPrefixFold reports whether s starts with prefix, ignoring case. +func hasPrefixFold(s, prefix string) bool { + return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix) +} + func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request) { t := time.Now().UTC().Truncate(time.Millisecond) @@ -80,6 +115,15 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request return } + if !isValidDirective(r.Header.Get(s3_constants.AmzUserMetaDirective)) { + s3err.WriteErrorResponse(w, r, s3err.ErrInvalidMetadataDirective) + return + } + if !isValidDirective(r.Header.Get(s3_constants.AmzObjectTaggingDirective)) { + s3err.WriteErrorResponse(w, r, s3err.ErrInvalidTagDirective) + return + } + replaceMeta, replaceTagging := replaceDirective(r.Header) // If source object is empty or bucket is empty, reply back invalid copy source. @@ -161,8 +205,15 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request // A non-versioned in-place metadata replace routes to the owner as a // serialized PATCH (off the distributed lock); versioned/suspended (which // create a new version) and the no-owner bootstrap keep the lock. + // + // REPLACE can also change Content-Type, which lives on Attributes.Mime, + // not Extended. The routed PATCH only carries Extended keys, so when the + // Mime actually changes keep the lock and take the clone path below: it is + // still metadata-only (reuses the source chunks) but can set the Mime. owner := s3a.objectWriteOwner(dstBucket, dstObject) - routeInPlace := owner != "" && dstVersioningState == "" + sourceMime := entry.GetAttributes().GetMime() + mimeChanged := resolveDestinationMime(r.Header, sourceMime, replaceMeta) != sourceMime + routeInPlace := owner != "" && dstVersioningState == "" && !mimeChanged selfCopyBody := func() s3err.ErrorCode { currentEntry, currentErr := s3a.resolveCopySourceEntry(srcBucket, srcObject, srcVersionId, srcVersioningState) if currentErr != nil || currentEntry.IsDirectory { @@ -188,6 +239,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request if updatedEntry.Attributes == nil { updatedEntry.Attributes = &filer_pb.FuseAttributes{} } + updatedEntry.Attributes.Mime = resolveDestinationMime(r.Header, currentEntry.GetAttributes().GetMime(), replaceMeta) updatedEntry.Attributes.Mtime = t.Unix() var finErr error dstVersionId, etag, finErr = s3a.finalizeCopyDestination(dstBucket, dstObject, dstVersioningState, updatedEntry) @@ -242,7 +294,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request FileSize: entry.Attributes.FileSize, Mtime: t.Unix(), Crtime: entry.Attributes.Crtime, - Mime: entry.Attributes.Mime, + Mime: resolveDestinationMime(r.Header, entry.Attributes.Mime, replaceMeta), }, Extended: make(map[string][]byte), } @@ -288,10 +340,10 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request return } - // Apply processed metadata to destination entry - for k, v := range processedMetadata { - dstEntry.Extended[k] = v - } + // mergeCopyMetadata drops stale managed keys before applying the new set, + // so REPLACE doesn't leak source values through the merge. Mirrors the + // self-copy path's routedMetadataReplace. + dstEntry.Extended = mergeCopyMetadata(dstEntry.Extended, processedMetadata) // For zero-size files or files without chunks, handle inline content // This includes encrypted inline files that need decryption/re-encryption @@ -565,7 +617,16 @@ func isManagedCopyMetadataKey(key string) bool { s3_constants.AmzTagCount: return true } - return strings.HasPrefix(key, s3_constants.AmzUserMetaPrefix) || strings.HasPrefix(key, s3_constants.AmzObjectTagging) + for _, h := range copyReplaceSystemHeaders { + if strings.EqualFold(key, h) { + return true + } + } + // Match X-Amz-Meta-* / X-Amz-Tagging case-insensitively so legacy + // non-canonical keys (written by non-S3 paths or older versions) are + // still recognized as managed. + return hasPrefixFold(key, s3_constants.AmzUserMetaPrefix) || + hasPrefixFold(key, s3_constants.AmzObjectTagging) } func (s3a *S3ApiServer) resolveSuspendedCopySourceEntry(bucket, normalizedObject, operation string) (*filer_pb.Entry, error) { @@ -1024,7 +1085,34 @@ func processMetadataBytes(reqHeader http.Header, existing map[string][]byte, rep } } } + for _, h := range copyReplaceSystemHeaders { + if v := reqHeader.Get(h); v != "" { + metadata[h] = []byte(v) + } + } } else { + // Two-pass copy keeps the result deterministic when both the + // canonical and a legacy-cased variant of the same header live on + // the source: canonical always wins, legacy only fills holes. + for _, h := range copyReplaceSystemHeaders { + if v, ok := existing[h]; ok { + metadata[h] = v + } + } + for k, v := range existing { + for _, h := range copyReplaceSystemHeaders { + if k == h { + continue + } + if !strings.EqualFold(k, h) { + continue + } + if _, present := metadata[h]; present { + continue + } + metadata[h] = v + } + } // Copy existing metadata as-is // Note: Metadata should already be normalized during storage (X-Amz-Meta-*), // but we handle legacy non-canonical formats for backward compatibility @@ -1066,11 +1154,29 @@ func processMetadataBytes(reqHeader http.Header, existing map[string][]byte, rep } } } else { + // Two passes: canonical exact-prefix wins; legacy variants only + // fill in keys that no canonical entry already provided. Keeps + // the result deterministic when both variants coexist on the + // source. + prefixLen := len(s3_constants.AmzObjectTagging) for k, v := range existing { if strings.HasPrefix(k, s3_constants.AmzObjectTagging) { metadata[k] = v } } + for k, v := range existing { + if strings.HasPrefix(k, s3_constants.AmzObjectTagging) { + continue + } + if !hasPrefixFold(k, s3_constants.AmzObjectTagging) { + continue + } + canonicalKey := s3_constants.AmzObjectTagging + k[prefixLen:] + if _, present := metadata[canonicalKey]; present { + continue + } + metadata[canonicalKey] = v + } delete(metadata, s3_constants.AmzTagCount) } diff --git a/weed/s3api/s3api_object_handlers_copy_mime_test.go b/weed/s3api/s3api_object_handlers_copy_mime_test.go new file mode 100644 index 000000000..38ffc2643 --- /dev/null +++ b/weed/s3api/s3api_object_handlers_copy_mime_test.go @@ -0,0 +1,281 @@ +package s3api + +import ( + "net/http" + "testing" +) + +func TestResolveDestinationMime(t *testing.T) { + cases := []struct { + name string + reqCT string + srcMime string + replaceMeta bool + want string + }{ + {"COPY keeps source mime", "text/plain", "image/png", false, "image/png"}, + {"COPY without request CT", "", "image/png", false, "image/png"}, + {"COPY with empty source mime", "text/plain", "", false, ""}, + {"REPLACE with request CT wins", "text/plain", "image/png", true, "text/plain"}, + {"REPLACE without request CT uses default", "", "image/png", true, defaultCopyContentType}, + {"REPLACE with request CT and empty source", "application/json", "", true, "application/json"}, + {"REPLACE without request CT and empty source", "", "", true, defaultCopyContentType}, + } + for _, c := range cases { + t.Run(c.name, func(t *testing.T) { + h := http.Header{} + if c.reqCT != "" { + h.Set("Content-Type", c.reqCT) + } + got := resolveDestinationMime(h, c.srcMime, c.replaceMeta) + if got != c.want { + t.Errorf("got %q want %q", got, c.want) + } + }) + } +} + +func TestIsValidDirective(t *testing.T) { + cases := []struct { + value string + want bool + }{ + {"", true}, + {DirectiveCopy, true}, + {DirectiveReplace, true}, + {"copy", false}, + {"replace", false}, + {"FOO", false}, + {"REPLACE ", false}, + } + for _, c := range cases { + t.Run(c.value, func(t *testing.T) { + if got := isValidDirective(c.value); got != c.want { + t.Errorf("isValidDirective(%q) = %v, want %v", c.value, got, c.want) + } + }) + } +} + +func TestProcessMetadataBytes_ReplaceSystemHeaders(t *testing.T) { + existing := map[string][]byte{ + "Cache-Control": []byte("max-age=60"), + "Content-Disposition": []byte(`attachment; filename="old.bin"`), + } + req := http.Header{} + req.Set("Cache-Control", "no-cache") + req.Set("Content-Encoding", "gzip") + + out, err := processMetadataBytes(req, existing, true /* replaceMeta */, false /* replaceTagging */) + if err != nil { + t.Fatalf("processMetadataBytes returned error: %v", err) + } + if got := string(out["Cache-Control"]); got != "no-cache" { + t.Errorf("Cache-Control = %q, want %q", got, "no-cache") + } + if got := string(out["Content-Encoding"]); got != "gzip" { + t.Errorf("Content-Encoding = %q, want %q", got, "gzip") + } + if _, present := out["Content-Disposition"]; present { + t.Errorf("Content-Disposition should be dropped under REPLACE when not in request, got %q", string(out["Content-Disposition"])) + } +} + +func TestIsManagedCopyMetadataKey_CoversSystemHeaders(t *testing.T) { + for _, h := range copyReplaceSystemHeaders { + if !isManagedCopyMetadataKey(h) { + t.Errorf("isManagedCopyMetadataKey(%q) = false, want true so mergeCopyMetadata drops stale source values", h) + } + } +} + +func TestIsManagedCopyMetadataKey_CaseInsensitive(t *testing.T) { + cases := []string{ + "cache-control", + "CACHE-CONTROL", + "content-encoding", + "x-amz-meta-owner", + "X-AMZ-META-OWNER", + "x-amz-tagging-env", + } + for _, k := range cases { + t.Run(k, func(t *testing.T) { + if !isManagedCopyMetadataKey(k) { + t.Errorf("isManagedCopyMetadataKey(%q) = false, want true; legacy non-canonical keys must still be recognized so mergeCopyMetadata clears them", k) + } + }) + } +} + +func TestProcessMetadataBytes_CopyAcceptsLowercaseSourceKeys(t *testing.T) { + existing := map[string][]byte{ + "cache-control": []byte("max-age=60"), + "content-disposition": []byte(`attachment; filename="legacy.bin"`), + "CONTENT-ENCODING": []byte("gzip"), + "Content-language": []byte("en"), + } + req := http.Header{} + + out, err := processMetadataBytes(req, existing, false, false) + if err != nil { + t.Fatalf("processMetadataBytes error: %v", err) + } + if got := string(out["Cache-Control"]); got != "max-age=60" { + t.Errorf("Cache-Control = %q, want %q (lowercase source must be promoted to canonical)", got, "max-age=60") + } + if got := string(out["Content-Disposition"]); got != `attachment; filename="legacy.bin"` { + t.Errorf("Content-Disposition = %q, want legacy source value promoted to canonical", got) + } + if got := string(out["Content-Encoding"]); got != "gzip" { + t.Errorf("Content-Encoding = %q, want %q (uppercase source must be promoted to canonical)", got, "gzip") + } + if got := string(out["Content-Language"]); got != "en" { + t.Errorf("Content-Language = %q, want %q (mixed-case source must be promoted to canonical)", got, "en") + } +} + +func TestProcessMetadataBytes_CopyCanonicalSystemHeaderWinsOverLegacy(t *testing.T) { + // When both canonical and legacy-cased variants live on the source, + // COPY must deterministically prefer the canonical value. Run several + // times to exercise different Go map iteration orders. + for i := 0; i < 32; i++ { + existing := map[string][]byte{ + "Cache-Control": []byte("canonical-wins"), + "cache-control": []byte("legacy-loses"), + } + out, err := processMetadataBytes(http.Header{}, existing, false, false) + if err != nil { + t.Fatalf("processMetadataBytes error: %v", err) + } + if got := string(out["Cache-Control"]); got != "canonical-wins" { + t.Fatalf("iter %d: Cache-Control = %q, want canonical-wins (canonical must beat legacy on COPY)", i, got) + } + } +} + +func TestProcessMetadataBytes_CopyCanonicalTagWinsOverLegacy(t *testing.T) { + for i := 0; i < 32; i++ { + existing := map[string][]byte{ + "X-Amz-Tagging-env": []byte("canonical-wins"), + "x-amz-tagging-env": []byte("legacy-loses"), + } + out, err := processMetadataBytes(http.Header{}, existing, false, false) + if err != nil { + t.Fatalf("processMetadataBytes error: %v", err) + } + if got := string(out["X-Amz-Tagging-env"]); got != "canonical-wins" { + t.Fatalf("iter %d: X-Amz-Tagging-env = %q, want canonical-wins (canonical tag must beat legacy on COPY)", i, got) + } + } +} + +func TestProcessMetadataBytes_CopyAcceptsLowercaseTagKeys(t *testing.T) { + existing := map[string][]byte{ + "x-amz-tagging-env": []byte("prod"), + "X-AMZ-TAGGING-team": []byte("infra"), + } + req := http.Header{} + + out, err := processMetadataBytes(req, existing, false /* replaceMeta */, false /* replaceTagging */) + if err != nil { + t.Fatalf("processMetadataBytes error: %v", err) + } + if got := string(out["X-Amz-Tagging-env"]); got != "prod" { + t.Errorf("X-Amz-Tagging-env = %q, want %q (legacy lowercase tag must be promoted to canonical)", got, "prod") + } + if got := string(out["X-Amz-Tagging-team"]); got != "infra" { + t.Errorf("X-Amz-Tagging-team = %q, want %q (uppercase tag must be promoted to canonical)", got, "infra") + } +} + +func TestMergeCopyMetadata_ReplaceDropsStaleSystemHeader(t *testing.T) { + // End-to-end caller flow: source-populated Extended + REPLACE request + // must drop stale managed values, keep non-managed ones. + existing := map[string][]byte{ + "Content-Disposition": []byte(`attachment; filename="old.bin"`), + "Cache-Control": []byte("max-age=60"), + "X-Amz-Meta-Owner": []byte("old"), + "X-Amz-Meta-OnlyOnSource": []byte("should-drop"), + "X-Custom-Non-Managed": []byte("keep-me"), + "X-Amz-Server-Side-Encryption": []byte("aws:kms"), + } + req := http.Header{} + req.Set("Cache-Control", "no-cache") + req.Set("X-Amz-Meta-Owner", "new") + + processed, err := processMetadataBytes(req, existing, true, false) + if err != nil { + t.Fatalf("processMetadataBytes error: %v", err) + } + + merged := mergeCopyMetadata(existing, processed) + + if _, leak := merged["Content-Disposition"]; leak { + t.Errorf("Content-Disposition leaked through REPLACE merge: %q", string(merged["Content-Disposition"])) + } + if got := string(merged["Cache-Control"]); got != "no-cache" { + t.Errorf("Cache-Control = %q, want %q", got, "no-cache") + } + if got := string(merged["X-Amz-Meta-Owner"]); got != "new" { + t.Errorf("X-Amz-Meta-Owner = %q, want %q", got, "new") + } + if _, leak := merged["X-Amz-Meta-OnlyOnSource"]; leak { + t.Errorf("stale X-Amz-Meta-OnlyOnSource must be dropped under REPLACE, still present: %q", string(merged["X-Amz-Meta-OnlyOnSource"])) + } + if got := string(merged["X-Custom-Non-Managed"]); got != "keep-me" { + t.Errorf("non-managed key %q must survive REPLACE merge, got %q", "X-Custom-Non-Managed", got) + } +} + +func TestMergeCopyMetadata_ReplaceTaggingDropsStaleTags(t *testing.T) { + // REPLACE tagging directive must drop old object tags that the new + // request doesn't redeclare. + existing := map[string][]byte{ + "X-Amz-Tagging-old": []byte("v1"), + "X-Amz-Tagging-env": []byte("dev"), + "X-Amz-Meta-Author": []byte("alice"), + } + req := http.Header{} + req.Set("X-Amz-Tagging", "env=prod") + + processed, err := processMetadataBytes(req, existing, false /* replaceMeta */, true /* replaceTagging */) + if err != nil { + t.Fatalf("processMetadataBytes error: %v", err) + } + + merged := mergeCopyMetadata(existing, processed) + + if _, leak := merged["X-Amz-Tagging-old"]; leak { + t.Errorf("stale tag X-Amz-Tagging-old must be dropped, still present: %q", string(merged["X-Amz-Tagging-old"])) + } + if got := string(merged["X-Amz-Tagging-env"]); got != "prod" { + t.Errorf("X-Amz-Tagging-env = %q, want %q", got, "prod") + } + if got := string(merged["X-Amz-Meta-Author"]); got != "alice" { + t.Errorf("COPY-mode user metadata must survive tag-only REPLACE: got %q", got) + } +} + +func TestProcessMetadataBytes_CopyInheritsSystemHeaders(t *testing.T) { + existing := map[string][]byte{ + "Cache-Control": []byte("max-age=60"), + "Content-Disposition": []byte(`attachment; filename="src.bin"`), + "Content-Encoding": []byte("gzip"), + } + req := http.Header{} + req.Set("Cache-Control", "no-cache") + + out, err := processMetadataBytes(req, existing, false /* replaceMeta */, false /* replaceTagging */) + if err != nil { + t.Fatalf("processMetadataBytes returned error: %v", err) + } + if got := string(out["Cache-Control"]); got != "max-age=60" { + t.Errorf("Cache-Control = %q, want %q (source value, request ignored under COPY)", got, "max-age=60") + } + if got := string(out["Content-Disposition"]); got != `attachment; filename="src.bin"` { + t.Errorf("Content-Disposition = %q, want source value", got) + } + if got := string(out["Content-Encoding"]); got != "gzip" { + t.Errorf("Content-Encoding = %q, want %q", got, "gzip") + } +} diff --git a/weed/s3api/s3err/s3api_errors.go b/weed/s3api/s3err/s3api_errors.go index bb5eded76..dcc8a2b84 100644 --- a/weed/s3api/s3err/s3api_errors.go +++ b/weed/s3api/s3err/s3api_errors.go @@ -145,6 +145,9 @@ const ( ErrNoSuchBucketEncryptionConfiguration ErrInvalidStorageClass + ErrInvalidMetadataDirective + ErrInvalidTagDirective + ErrInvalidAttributeName // Object key length errors @@ -614,6 +617,18 @@ var errorCodeResponse = map[ErrorCode]APIError{ HTTPStatusCode: http.StatusBadRequest, }, + ErrInvalidMetadataDirective: { + Code: "InvalidArgument", + Description: "Unknown metadata directive.", + HTTPStatusCode: http.StatusBadRequest, + }, + + ErrInvalidTagDirective: { + Code: "InvalidArgument", + Description: "Unknown tag directive.", + HTTPStatusCode: http.StatusBadRequest, + }, + ErrInvalidAttributeName: { Code: "InvalidArgument", Description: "Invalid attribute name specified",