From fbdcec1cba07b10d488f2813b5889854701c920c Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Thu, 21 May 2026 14:05:16 -0700 Subject: [PATCH] fix(s3): list empty directories as directory markers (#9615) * fix(s3): list empty directories as directory markers A real but empty directory created out of band (mount, mkdir, filer API) carries no MIME, so it was hidden from S3 listings. hadoop-aws getFileStatus probes LIST prefix=dir/ &delimiter=/ and reads an empty result as a missing path, which breaks Spark's eventLog.dir when it points at an empty directory. Surface such directories as directory markers, matching directories created via PutObject with a trailing "/". Emptiness comes from the recursion result, and the marker MIME is set only on the in-memory listing entry, so empty directories stay eligible for empty-folder cleanup. * fix(s3): only surface empty directory markers for explicit dir probes Restrict the empty-directory marker to a trailing-slash prefix probe (prefix=dir/), the pattern hadoop-aws getFileStatus uses. Plain listings are left as before, so an empty directory left behind by deleted objects (e.g. after lifecycle expiration) is no longer shown as a phantom key. --- weed/s3api/s3api_object_handlers_list.go | 31 ++++- ...api_object_handlers_list_directory_test.go | 120 ++++++++++++++++++ 2 files changed, 145 insertions(+), 6 deletions(-) diff --git a/weed/s3api/s3api_object_handlers_list.go b/weed/s3api/s3api_object_handlers_list.go index 7c55a02b7..e6e194979 100644 --- a/weed/s3api/s3api_object_handlers_list.go +++ b/weed/s3api/s3api_object_handlers_list.go @@ -677,22 +677,41 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d } if delimiter != "/" || cursor.prefixEndsOnDelimiter { + // A trailing-slash prefix (e.g. "logs/") names one directory and asks + // whether it exists, so a real but empty directory must be reported for + // that probe. + explicitDirProbe := cursor.prefixEndsOnDelimiter if cursor.prefixEndsOnDelimiter { cursor.prefixEndsOnDelimiter = false - if entry.IsDirectoryKeyObject() { - eachEntryFn(dir, entry) - } - } else if entry.IsDirectoryKeyObject() { + } + isKeyObject := entry.IsDirectoryKeyObject() + if isKeyObject { // Directory key objects (created via PutObject with trailing "/") // must appear as regular keys in recursive listing mode. eachEntryFn(dir, entry) } - // Recurse into subdirectory to list any children - subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, eachEntryFn) + // Recurse into subdirectory to list any children, noting whether the + // subtree produced any entries. + childEmitted := false + subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, func(d string, e *filer_pb.Entry) { + childEmitted = true + eachEntryFn(d, e) + }) if subErr != nil { err = fmt.Errorf("doListFilerEntries2: %w", subErr) return } + // A real but empty directory (created out of band via mount, mkdir or + // the filer API, so it carries no MIME) is otherwise invisible to S3 + // clients that detect directories by listing it under its own "/" + // prefix. Surface it as a directory marker for that explicit probe, + // identical to a directory created via PutObject with a trailing "/", so + // tools like hadoop-aws can find it. Plain listings are left untouched, so + // empty directories left behind by deleted objects are not shown as keys. + if explicitDirProbe && !isKeyObject && !childEmitted && !cursor.isTruncated && entry.Attributes != nil { + entry.Attributes.Mime = s3_constants.FolderMimeType + eachEntryFn(dir, entry) + } // println("doListFilerEntries2 dir", dir+"/"+entry.Name, "subNextMarker", subNextMarker) nextMarker = entry.Name + "/" + subNextMarker if cursor.isTruncated { diff --git a/weed/s3api/s3api_object_handlers_list_directory_test.go b/weed/s3api/s3api_object_handlers_list_directory_test.go index 8b3c26b8b..ee7623a2a 100644 --- a/weed/s3api/s3api_object_handlers_list_directory_test.go +++ b/weed/s3api/s3api_object_handlers_list_directory_test.go @@ -4,6 +4,7 @@ import ( "testing" "github.com/seaweedfs/seaweedfs/weed/pb/filer_pb" + "github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants" "github.com/stretchr/testify/assert" ) @@ -61,3 +62,122 @@ func TestDirectoryListedAsCommonPrefix(t *testing.T) { "Directory should be passed to callback for CommonPrefix processing with delimiter=/") assert.Empty(t, seenFiles, "No files should be seen, only the directory") } + +// TestEmptyDirectorySurfacedAsMarker reproduces the hadoop-aws / Spark case where a +// real but empty directory (created via mount, mkdir or the filer API, so it has no +// MIME) must be visible to S3 clients that detect directories by listing under the +// "/" prefix. Such a directory is surfaced as a directory marker, identical to +// one created via PutObject with a trailing "/". +func TestEmptyDirectorySurfacedAsMarker(t *testing.T) { + s3a := &S3ApiServer{ + option: &S3ApiServerOption{BucketsPath: "/buckets"}, + } + + emptyDir := &filer_pb.Entry{ + Name: "logs", + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{Mime: ""}, + } + client := &testFilerClient{ + entriesByDir: map[string][]*filer_pb.Entry{ + "/buckets/test": {emptyDir}, + "/buckets/test/logs": {}, // empty directory has no children + }, + } + + // Mirrors the getFileStatus probe: prefix "logs/" with delimiter "/". + cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true} + var seen []*filer_pb.Entry + _, err := s3a.doListFilerEntries(client, "/buckets/test", "logs", cursor, "", "/", false, "test", + func(dir string, entry *filer_pb.Entry) { + seen = append(seen, entry) + }) + + assert.NoError(t, err) + if assert.Len(t, seen, 1, "empty directory must be surfaced under the logs/ prefix") { + assert.True(t, seen[0].IsDirectoryKeyObject(), + "empty directory must be marked as a directory key object so it lists as logs/") + assert.Equal(t, s3_constants.FolderMimeType, seen[0].Attributes.Mime) + } +} + +// TestNonEmptyDirectoryGetsNoPhantomMarker ensures the empty-directory fix does not add +// a spurious marker for directories that already have children; the children represent +// the directory. +func TestNonEmptyDirectoryGetsNoPhantomMarker(t *testing.T) { + s3a := &S3ApiServer{ + option: &S3ApiServerOption{BucketsPath: "/buckets"}, + } + + dir := &filer_pb.Entry{ + Name: "data", + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{Mime: ""}, + } + child := &filer_pb.Entry{ + Name: "a.txt", + IsDirectory: false, + Attributes: &filer_pb.FuseAttributes{}, + } + client := &testFilerClient{ + entriesByDir: map[string][]*filer_pb.Entry{ + "/buckets/test": {dir}, + "/buckets/test/data": {child}, + }, + } + + // Recursive listing under the "data/" prefix. + cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true} + var seen []string + _, err := s3a.doListFilerEntries(client, "/buckets/test", "data", cursor, "", "", false, "test", + func(dir string, entry *filer_pb.Entry) { + seen = append(seen, entry.Name) + }) + + assert.NoError(t, err) + assert.Equal(t, []string{"a.txt"}, seen, "non-empty directory must be represented by its child only") +} + +// TestEmptyDirectoryHiddenInFlatListing ensures the marker is only surfaced for an +// explicit "/" probe, not in a plain (no prefix, no delimiter) listing. An empty +// directory left behind by deleted objects (e.g. after lifecycle expiration) must not +// appear as a phantom key, matching AWS S3. +func TestEmptyDirectoryHiddenInFlatListing(t *testing.T) { + s3a := &S3ApiServer{ + option: &S3ApiServerOption{BucketsPath: "/buckets"}, + } + + emptyDir := &filer_pb.Entry{ + Name: "expire1", + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{Mime: ""}, + } + keepDir := &filer_pb.Entry{ + Name: "keep2", + IsDirectory: true, + Attributes: &filer_pb.FuseAttributes{Mime: ""}, + } + keepObj := &filer_pb.Entry{ + Name: "foo", + IsDirectory: false, + Attributes: &filer_pb.FuseAttributes{}, + } + client := &testFilerClient{ + entriesByDir: map[string][]*filer_pb.Entry{ + "/buckets/test": {emptyDir, keepDir}, + "/buckets/test/expire1": {}, + "/buckets/test/keep2": {keepObj}, + }, + } + + // Plain flat listing: no prefix, no delimiter. + cursor := &ListingCursor{maxKeys: 1000} + var seen []string + _, err := s3a.doListFilerEntries(client, "/buckets/test", "", cursor, "", "", false, "test", + func(dir string, entry *filer_pb.Entry) { + seen = append(seen, entry.Name) + }) + + assert.NoError(t, err) + assert.Equal(t, []string{"foo"}, seen, "flat listing must hide the empty directory and return only real objects") +}