fix(s3): honor MetadataDirective=REPLACE for system metadata on CopyObject (#9721)

* fix(s3): honor MetadataDirective=REPLACE for system metadata on CopyObject

* fix(s3): match copy metadata keys case-insensitively for legacy data

Legacy / non-S3 write paths (FUSE mount, direct filer HTTP API, older
versions) may persist Cache-Control etc. in lowercase form. Make
isManagedCopyMetadataKey case-insensitive so mergeCopyMetadata still
clears stale source values under REPLACE, and let the COPY branch of
processMetadataBytes fall back to a lowercase key on the source so
legacy values survive into the destination (re-emitted as canonical).

Mirrors the existing x-amz-meta-* backward-compat path.

* fix(s3): keep legacy non-canonical tag and system metadata across COPY

The previous case-insensitive isManagedCopyMetadataKey caused
mergeCopyMetadata to delete legacy lowercase x-amz-tagging-* and
mixed-case system headers, but the COPY branch in processMetadataBytes
only matched canonical or strict-lowercase keys when re-populating
them, so any non-canonical key was permanently dropped on COPY.

- COPY now scans existing in a single pass and uses strings.EqualFold
  against the system header whitelist, re-emitting under the canonical
  header name. Handles any case folding (CACHE-CONTROL, Cache-control,
  etc.), not just strings.ToLower.
- COPY tagging branch now uses hasPrefixFold(k, AmzObjectTagging) and
  re-emits the canonical X-Amz-Tagging-<suffix>, mirroring the existing
  X-Amz-Meta-* migration path.
- Tests cover lowercase/uppercase/mixed-case system headers and tags
  surviving COPY.

* fix(s3): make COPY of system metadata and tags deterministic across case variants

Single-pass EqualFold matching let Go's randomized map iteration pick
either the canonical or a legacy-cased value when both lived on the
source, so the COPY result varied between calls.

Both COPY branches now use two passes: a canonical-exact lookup first,
then a case-insensitive fallback that only writes when the canonical
slot is still empty. Mirrors the collision-check pattern used by the
X-Amz-Meta-* migration path.

Tests run the canonical-vs-legacy collision 32 times each to exercise
varied map orders.

* fix(s3): apply REPLACE Content-Type on in-place copy

The metadata-only self-copy path never set Attributes.Mime, so a same-key
CopyObject with REPLACE and a new Content-Type silently kept the old type.
Route in place only when the Mime is unchanged; otherwise take the locked
clone path (still metadata-only, reuses source chunks) and set the new Mime
there. Also covers the versioned self-copy path.

* perf(s3): drop per-key ToLower in isManagedCopyMetadataKey

Use the allocation-free hasPrefixFold helper instead of lowercasing the key
and both constant prefixes on every metadata-key check.

---------

Co-authored-by: Chris Lu <chris.lu@gmail.com>
This commit is contained in:
qzhello
2026-05-29 03:55:08 +08:00
committed by GitHub
parent 21ab68aa94
commit 5b1098e2ad
3 changed files with 409 additions and 7 deletions

View File

@@ -32,6 +32,41 @@ const (
DirectiveReplace = "REPLACE"
)
// AWS default when REPLACE is requested without a Content-Type.
const defaultCopyContentType = "binary/octet-stream"
// System-metadata headers that REPLACE must rewrite on the destination.
// Content-Type lives on Attributes.Mime, tagging/storage-class have their
// own handling, so they're not in this list.
var copyReplaceSystemHeaders = []string{
"Cache-Control",
"Content-Encoding",
"Content-Disposition",
"Content-Language",
"Expires",
}
func resolveDestinationMime(reqHeader http.Header, sourceMime string, replaceMeta bool) string {
if replaceMeta {
if ct := reqHeader.Get("Content-Type"); ct != "" {
return ct
}
return defaultCopyContentType
}
return sourceMime
}
// Empty means default (COPY). Anything else outside {COPY, REPLACE} must be
// rejected, not silently downgraded.
func isValidDirective(value string) bool {
return value == "" || value == DirectiveCopy || value == DirectiveReplace
}
// hasPrefixFold reports whether s starts with prefix, ignoring case.
func hasPrefixFold(s, prefix string) bool {
return len(s) >= len(prefix) && strings.EqualFold(s[:len(prefix)], prefix)
}
func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request) {
t := time.Now().UTC().Truncate(time.Millisecond)
@@ -80,6 +115,15 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
return
}
if !isValidDirective(r.Header.Get(s3_constants.AmzUserMetaDirective)) {
s3err.WriteErrorResponse(w, r, s3err.ErrInvalidMetadataDirective)
return
}
if !isValidDirective(r.Header.Get(s3_constants.AmzObjectTaggingDirective)) {
s3err.WriteErrorResponse(w, r, s3err.ErrInvalidTagDirective)
return
}
replaceMeta, replaceTagging := replaceDirective(r.Header)
// If source object is empty or bucket is empty, reply back invalid copy source.
@@ -161,8 +205,15 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
// A non-versioned in-place metadata replace routes to the owner as a
// serialized PATCH (off the distributed lock); versioned/suspended (which
// create a new version) and the no-owner bootstrap keep the lock.
//
// REPLACE can also change Content-Type, which lives on Attributes.Mime,
// not Extended. The routed PATCH only carries Extended keys, so when the
// Mime actually changes keep the lock and take the clone path below: it is
// still metadata-only (reuses the source chunks) but can set the Mime.
owner := s3a.objectWriteOwner(dstBucket, dstObject)
routeInPlace := owner != "" && dstVersioningState == ""
sourceMime := entry.GetAttributes().GetMime()
mimeChanged := resolveDestinationMime(r.Header, sourceMime, replaceMeta) != sourceMime
routeInPlace := owner != "" && dstVersioningState == "" && !mimeChanged
selfCopyBody := func() s3err.ErrorCode {
currentEntry, currentErr := s3a.resolveCopySourceEntry(srcBucket, srcObject, srcVersionId, srcVersioningState)
if currentErr != nil || currentEntry.IsDirectory {
@@ -188,6 +239,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
if updatedEntry.Attributes == nil {
updatedEntry.Attributes = &filer_pb.FuseAttributes{}
}
updatedEntry.Attributes.Mime = resolveDestinationMime(r.Header, currentEntry.GetAttributes().GetMime(), replaceMeta)
updatedEntry.Attributes.Mtime = t.Unix()
var finErr error
dstVersionId, etag, finErr = s3a.finalizeCopyDestination(dstBucket, dstObject, dstVersioningState, updatedEntry)
@@ -242,7 +294,7 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
FileSize: entry.Attributes.FileSize,
Mtime: t.Unix(),
Crtime: entry.Attributes.Crtime,
Mime: entry.Attributes.Mime,
Mime: resolveDestinationMime(r.Header, entry.Attributes.Mime, replaceMeta),
},
Extended: make(map[string][]byte),
}
@@ -288,10 +340,10 @@ func (s3a *S3ApiServer) CopyObjectHandler(w http.ResponseWriter, r *http.Request
return
}
// Apply processed metadata to destination entry
for k, v := range processedMetadata {
dstEntry.Extended[k] = v
}
// mergeCopyMetadata drops stale managed keys before applying the new set,
// so REPLACE doesn't leak source values through the merge. Mirrors the
// self-copy path's routedMetadataReplace.
dstEntry.Extended = mergeCopyMetadata(dstEntry.Extended, processedMetadata)
// For zero-size files or files without chunks, handle inline content
// This includes encrypted inline files that need decryption/re-encryption
@@ -565,7 +617,16 @@ func isManagedCopyMetadataKey(key string) bool {
s3_constants.AmzTagCount:
return true
}
return strings.HasPrefix(key, s3_constants.AmzUserMetaPrefix) || strings.HasPrefix(key, s3_constants.AmzObjectTagging)
for _, h := range copyReplaceSystemHeaders {
if strings.EqualFold(key, h) {
return true
}
}
// Match X-Amz-Meta-* / X-Amz-Tagging case-insensitively so legacy
// non-canonical keys (written by non-S3 paths or older versions) are
// still recognized as managed.
return hasPrefixFold(key, s3_constants.AmzUserMetaPrefix) ||
hasPrefixFold(key, s3_constants.AmzObjectTagging)
}
func (s3a *S3ApiServer) resolveSuspendedCopySourceEntry(bucket, normalizedObject, operation string) (*filer_pb.Entry, error) {
@@ -1024,7 +1085,34 @@ func processMetadataBytes(reqHeader http.Header, existing map[string][]byte, rep
}
}
}
for _, h := range copyReplaceSystemHeaders {
if v := reqHeader.Get(h); v != "" {
metadata[h] = []byte(v)
}
}
} else {
// Two-pass copy keeps the result deterministic when both the
// canonical and a legacy-cased variant of the same header live on
// the source: canonical always wins, legacy only fills holes.
for _, h := range copyReplaceSystemHeaders {
if v, ok := existing[h]; ok {
metadata[h] = v
}
}
for k, v := range existing {
for _, h := range copyReplaceSystemHeaders {
if k == h {
continue
}
if !strings.EqualFold(k, h) {
continue
}
if _, present := metadata[h]; present {
continue
}
metadata[h] = v
}
}
// Copy existing metadata as-is
// Note: Metadata should already be normalized during storage (X-Amz-Meta-*),
// but we handle legacy non-canonical formats for backward compatibility
@@ -1066,11 +1154,29 @@ func processMetadataBytes(reqHeader http.Header, existing map[string][]byte, rep
}
}
} else {
// Two passes: canonical exact-prefix wins; legacy variants only
// fill in keys that no canonical entry already provided. Keeps
// the result deterministic when both variants coexist on the
// source.
prefixLen := len(s3_constants.AmzObjectTagging)
for k, v := range existing {
if strings.HasPrefix(k, s3_constants.AmzObjectTagging) {
metadata[k] = v
}
}
for k, v := range existing {
if strings.HasPrefix(k, s3_constants.AmzObjectTagging) {
continue
}
if !hasPrefixFold(k, s3_constants.AmzObjectTagging) {
continue
}
canonicalKey := s3_constants.AmzObjectTagging + k[prefixLen:]
if _, present := metadata[canonicalKey]; present {
continue
}
metadata[canonicalKey] = v
}
delete(metadata, s3_constants.AmzTagCount)
}

View File

@@ -0,0 +1,281 @@
package s3api
import (
"net/http"
"testing"
)
func TestResolveDestinationMime(t *testing.T) {
cases := []struct {
name string
reqCT string
srcMime string
replaceMeta bool
want string
}{
{"COPY keeps source mime", "text/plain", "image/png", false, "image/png"},
{"COPY without request CT", "", "image/png", false, "image/png"},
{"COPY with empty source mime", "text/plain", "", false, ""},
{"REPLACE with request CT wins", "text/plain", "image/png", true, "text/plain"},
{"REPLACE without request CT uses default", "", "image/png", true, defaultCopyContentType},
{"REPLACE with request CT and empty source", "application/json", "", true, "application/json"},
{"REPLACE without request CT and empty source", "", "", true, defaultCopyContentType},
}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
h := http.Header{}
if c.reqCT != "" {
h.Set("Content-Type", c.reqCT)
}
got := resolveDestinationMime(h, c.srcMime, c.replaceMeta)
if got != c.want {
t.Errorf("got %q want %q", got, c.want)
}
})
}
}
func TestIsValidDirective(t *testing.T) {
cases := []struct {
value string
want bool
}{
{"", true},
{DirectiveCopy, true},
{DirectiveReplace, true},
{"copy", false},
{"replace", false},
{"FOO", false},
{"REPLACE ", false},
}
for _, c := range cases {
t.Run(c.value, func(t *testing.T) {
if got := isValidDirective(c.value); got != c.want {
t.Errorf("isValidDirective(%q) = %v, want %v", c.value, got, c.want)
}
})
}
}
func TestProcessMetadataBytes_ReplaceSystemHeaders(t *testing.T) {
existing := map[string][]byte{
"Cache-Control": []byte("max-age=60"),
"Content-Disposition": []byte(`attachment; filename="old.bin"`),
}
req := http.Header{}
req.Set("Cache-Control", "no-cache")
req.Set("Content-Encoding", "gzip")
out, err := processMetadataBytes(req, existing, true /* replaceMeta */, false /* replaceTagging */)
if err != nil {
t.Fatalf("processMetadataBytes returned error: %v", err)
}
if got := string(out["Cache-Control"]); got != "no-cache" {
t.Errorf("Cache-Control = %q, want %q", got, "no-cache")
}
if got := string(out["Content-Encoding"]); got != "gzip" {
t.Errorf("Content-Encoding = %q, want %q", got, "gzip")
}
if _, present := out["Content-Disposition"]; present {
t.Errorf("Content-Disposition should be dropped under REPLACE when not in request, got %q", string(out["Content-Disposition"]))
}
}
func TestIsManagedCopyMetadataKey_CoversSystemHeaders(t *testing.T) {
for _, h := range copyReplaceSystemHeaders {
if !isManagedCopyMetadataKey(h) {
t.Errorf("isManagedCopyMetadataKey(%q) = false, want true so mergeCopyMetadata drops stale source values", h)
}
}
}
func TestIsManagedCopyMetadataKey_CaseInsensitive(t *testing.T) {
cases := []string{
"cache-control",
"CACHE-CONTROL",
"content-encoding",
"x-amz-meta-owner",
"X-AMZ-META-OWNER",
"x-amz-tagging-env",
}
for _, k := range cases {
t.Run(k, func(t *testing.T) {
if !isManagedCopyMetadataKey(k) {
t.Errorf("isManagedCopyMetadataKey(%q) = false, want true; legacy non-canonical keys must still be recognized so mergeCopyMetadata clears them", k)
}
})
}
}
func TestProcessMetadataBytes_CopyAcceptsLowercaseSourceKeys(t *testing.T) {
existing := map[string][]byte{
"cache-control": []byte("max-age=60"),
"content-disposition": []byte(`attachment; filename="legacy.bin"`),
"CONTENT-ENCODING": []byte("gzip"),
"Content-language": []byte("en"),
}
req := http.Header{}
out, err := processMetadataBytes(req, existing, false, false)
if err != nil {
t.Fatalf("processMetadataBytes error: %v", err)
}
if got := string(out["Cache-Control"]); got != "max-age=60" {
t.Errorf("Cache-Control = %q, want %q (lowercase source must be promoted to canonical)", got, "max-age=60")
}
if got := string(out["Content-Disposition"]); got != `attachment; filename="legacy.bin"` {
t.Errorf("Content-Disposition = %q, want legacy source value promoted to canonical", got)
}
if got := string(out["Content-Encoding"]); got != "gzip" {
t.Errorf("Content-Encoding = %q, want %q (uppercase source must be promoted to canonical)", got, "gzip")
}
if got := string(out["Content-Language"]); got != "en" {
t.Errorf("Content-Language = %q, want %q (mixed-case source must be promoted to canonical)", got, "en")
}
}
func TestProcessMetadataBytes_CopyCanonicalSystemHeaderWinsOverLegacy(t *testing.T) {
// When both canonical and legacy-cased variants live on the source,
// COPY must deterministically prefer the canonical value. Run several
// times to exercise different Go map iteration orders.
for i := 0; i < 32; i++ {
existing := map[string][]byte{
"Cache-Control": []byte("canonical-wins"),
"cache-control": []byte("legacy-loses"),
}
out, err := processMetadataBytes(http.Header{}, existing, false, false)
if err != nil {
t.Fatalf("processMetadataBytes error: %v", err)
}
if got := string(out["Cache-Control"]); got != "canonical-wins" {
t.Fatalf("iter %d: Cache-Control = %q, want canonical-wins (canonical must beat legacy on COPY)", i, got)
}
}
}
func TestProcessMetadataBytes_CopyCanonicalTagWinsOverLegacy(t *testing.T) {
for i := 0; i < 32; i++ {
existing := map[string][]byte{
"X-Amz-Tagging-env": []byte("canonical-wins"),
"x-amz-tagging-env": []byte("legacy-loses"),
}
out, err := processMetadataBytes(http.Header{}, existing, false, false)
if err != nil {
t.Fatalf("processMetadataBytes error: %v", err)
}
if got := string(out["X-Amz-Tagging-env"]); got != "canonical-wins" {
t.Fatalf("iter %d: X-Amz-Tagging-env = %q, want canonical-wins (canonical tag must beat legacy on COPY)", i, got)
}
}
}
func TestProcessMetadataBytes_CopyAcceptsLowercaseTagKeys(t *testing.T) {
existing := map[string][]byte{
"x-amz-tagging-env": []byte("prod"),
"X-AMZ-TAGGING-team": []byte("infra"),
}
req := http.Header{}
out, err := processMetadataBytes(req, existing, false /* replaceMeta */, false /* replaceTagging */)
if err != nil {
t.Fatalf("processMetadataBytes error: %v", err)
}
if got := string(out["X-Amz-Tagging-env"]); got != "prod" {
t.Errorf("X-Amz-Tagging-env = %q, want %q (legacy lowercase tag must be promoted to canonical)", got, "prod")
}
if got := string(out["X-Amz-Tagging-team"]); got != "infra" {
t.Errorf("X-Amz-Tagging-team = %q, want %q (uppercase tag must be promoted to canonical)", got, "infra")
}
}
func TestMergeCopyMetadata_ReplaceDropsStaleSystemHeader(t *testing.T) {
// End-to-end caller flow: source-populated Extended + REPLACE request
// must drop stale managed values, keep non-managed ones.
existing := map[string][]byte{
"Content-Disposition": []byte(`attachment; filename="old.bin"`),
"Cache-Control": []byte("max-age=60"),
"X-Amz-Meta-Owner": []byte("old"),
"X-Amz-Meta-OnlyOnSource": []byte("should-drop"),
"X-Custom-Non-Managed": []byte("keep-me"),
"X-Amz-Server-Side-Encryption": []byte("aws:kms"),
}
req := http.Header{}
req.Set("Cache-Control", "no-cache")
req.Set("X-Amz-Meta-Owner", "new")
processed, err := processMetadataBytes(req, existing, true, false)
if err != nil {
t.Fatalf("processMetadataBytes error: %v", err)
}
merged := mergeCopyMetadata(existing, processed)
if _, leak := merged["Content-Disposition"]; leak {
t.Errorf("Content-Disposition leaked through REPLACE merge: %q", string(merged["Content-Disposition"]))
}
if got := string(merged["Cache-Control"]); got != "no-cache" {
t.Errorf("Cache-Control = %q, want %q", got, "no-cache")
}
if got := string(merged["X-Amz-Meta-Owner"]); got != "new" {
t.Errorf("X-Amz-Meta-Owner = %q, want %q", got, "new")
}
if _, leak := merged["X-Amz-Meta-OnlyOnSource"]; leak {
t.Errorf("stale X-Amz-Meta-OnlyOnSource must be dropped under REPLACE, still present: %q", string(merged["X-Amz-Meta-OnlyOnSource"]))
}
if got := string(merged["X-Custom-Non-Managed"]); got != "keep-me" {
t.Errorf("non-managed key %q must survive REPLACE merge, got %q", "X-Custom-Non-Managed", got)
}
}
func TestMergeCopyMetadata_ReplaceTaggingDropsStaleTags(t *testing.T) {
// REPLACE tagging directive must drop old object tags that the new
// request doesn't redeclare.
existing := map[string][]byte{
"X-Amz-Tagging-old": []byte("v1"),
"X-Amz-Tagging-env": []byte("dev"),
"X-Amz-Meta-Author": []byte("alice"),
}
req := http.Header{}
req.Set("X-Amz-Tagging", "env=prod")
processed, err := processMetadataBytes(req, existing, false /* replaceMeta */, true /* replaceTagging */)
if err != nil {
t.Fatalf("processMetadataBytes error: %v", err)
}
merged := mergeCopyMetadata(existing, processed)
if _, leak := merged["X-Amz-Tagging-old"]; leak {
t.Errorf("stale tag X-Amz-Tagging-old must be dropped, still present: %q", string(merged["X-Amz-Tagging-old"]))
}
if got := string(merged["X-Amz-Tagging-env"]); got != "prod" {
t.Errorf("X-Amz-Tagging-env = %q, want %q", got, "prod")
}
if got := string(merged["X-Amz-Meta-Author"]); got != "alice" {
t.Errorf("COPY-mode user metadata must survive tag-only REPLACE: got %q", got)
}
}
func TestProcessMetadataBytes_CopyInheritsSystemHeaders(t *testing.T) {
existing := map[string][]byte{
"Cache-Control": []byte("max-age=60"),
"Content-Disposition": []byte(`attachment; filename="src.bin"`),
"Content-Encoding": []byte("gzip"),
}
req := http.Header{}
req.Set("Cache-Control", "no-cache")
out, err := processMetadataBytes(req, existing, false /* replaceMeta */, false /* replaceTagging */)
if err != nil {
t.Fatalf("processMetadataBytes returned error: %v", err)
}
if got := string(out["Cache-Control"]); got != "max-age=60" {
t.Errorf("Cache-Control = %q, want %q (source value, request ignored under COPY)", got, "max-age=60")
}
if got := string(out["Content-Disposition"]); got != `attachment; filename="src.bin"` {
t.Errorf("Content-Disposition = %q, want source value", got)
}
if got := string(out["Content-Encoding"]); got != "gzip" {
t.Errorf("Content-Encoding = %q, want %q", got, "gzip")
}
}

View File

@@ -145,6 +145,9 @@ const (
ErrNoSuchBucketEncryptionConfiguration
ErrInvalidStorageClass
ErrInvalidMetadataDirective
ErrInvalidTagDirective
ErrInvalidAttributeName
// Object key length errors
@@ -614,6 +617,18 @@ var errorCodeResponse = map[ErrorCode]APIError{
HTTPStatusCode: http.StatusBadRequest,
},
ErrInvalidMetadataDirective: {
Code: "InvalidArgument",
Description: "Unknown metadata directive.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrInvalidTagDirective: {
Code: "InvalidArgument",
Description: "Unknown tag directive.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrInvalidAttributeName: {
Code: "InvalidArgument",
Description: "Invalid attribute name specified",