fix(s3): list empty directories as directory markers (#9615)

* fix(s3): list empty directories as directory markers

A real but empty directory created out of band (mount, mkdir, filer API)
carries no MIME, so it was hidden from S3 listings. hadoop-aws getFileStatus
probes LIST prefix=dir/ &delimiter=/ and reads an empty result as a missing
path, which breaks Spark's eventLog.dir when it points at an empty directory.

Surface such directories as directory markers, matching directories created
via PutObject with a trailing "/". Emptiness comes from the recursion result,
and the marker MIME is set only on the in-memory listing entry, so empty
directories stay eligible for empty-folder cleanup.

* fix(s3): only surface empty directory markers for explicit dir probes

Restrict the empty-directory marker to a trailing-slash prefix probe
(prefix=dir/), the pattern hadoop-aws getFileStatus uses. Plain listings
are left as before, so an empty directory left behind by deleted objects
(e.g. after lifecycle expiration) is no longer shown as a phantom key.
This commit is contained in:
Chris Lu
2026-05-21 14:05:16 -07:00
committed by GitHub
parent 0accff0e4a
commit fbdcec1cba
2 changed files with 145 additions and 6 deletions

View File

@@ -677,22 +677,41 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d
}
if delimiter != "/" || cursor.prefixEndsOnDelimiter {
// A trailing-slash prefix (e.g. "logs/") names one directory and asks
// whether it exists, so a real but empty directory must be reported for
// that probe.
explicitDirProbe := cursor.prefixEndsOnDelimiter
if cursor.prefixEndsOnDelimiter {
cursor.prefixEndsOnDelimiter = false
if entry.IsDirectoryKeyObject() {
eachEntryFn(dir, entry)
}
} else if entry.IsDirectoryKeyObject() {
}
isKeyObject := entry.IsDirectoryKeyObject()
if isKeyObject {
// Directory key objects (created via PutObject with trailing "/")
// must appear as regular keys in recursive listing mode.
eachEntryFn(dir, entry)
}
// Recurse into subdirectory to list any children
subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, eachEntryFn)
// Recurse into subdirectory to list any children, noting whether the
// subtree produced any entries.
childEmitted := false
subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, func(d string, e *filer_pb.Entry) {
childEmitted = true
eachEntryFn(d, e)
})
if subErr != nil {
err = fmt.Errorf("doListFilerEntries2: %w", subErr)
return
}
// A real but empty directory (created out of band via mount, mkdir or
// the filer API, so it carries no MIME) is otherwise invisible to S3
// clients that detect directories by listing it under its own "<dir>/"
// prefix. Surface it as a directory marker for that explicit probe,
// identical to a directory created via PutObject with a trailing "/", so
// tools like hadoop-aws can find it. Plain listings are left untouched, so
// empty directories left behind by deleted objects are not shown as keys.
if explicitDirProbe && !isKeyObject && !childEmitted && !cursor.isTruncated && entry.Attributes != nil {
entry.Attributes.Mime = s3_constants.FolderMimeType
eachEntryFn(dir, entry)
}
// println("doListFilerEntries2 dir", dir+"/"+entry.Name, "subNextMarker", subNextMarker)
nextMarker = entry.Name + "/" + subNextMarker
if cursor.isTruncated {

View File

@@ -4,6 +4,7 @@ import (
"testing"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/stretchr/testify/assert"
)
@@ -61,3 +62,122 @@ func TestDirectoryListedAsCommonPrefix(t *testing.T) {
"Directory should be passed to callback for CommonPrefix processing with delimiter=/")
assert.Empty(t, seenFiles, "No files should be seen, only the directory")
}
// TestEmptyDirectorySurfacedAsMarker reproduces the hadoop-aws / Spark case where a
// real but empty directory (created via mount, mkdir or the filer API, so it has no
// MIME) must be visible to S3 clients that detect directories by listing under the
// "<dir>/" prefix. Such a directory is surfaced as a directory marker, identical to
// one created via PutObject with a trailing "/".
func TestEmptyDirectorySurfacedAsMarker(t *testing.T) {
s3a := &S3ApiServer{
option: &S3ApiServerOption{BucketsPath: "/buckets"},
}
emptyDir := &filer_pb.Entry{
Name: "logs",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{Mime: ""},
}
client := &testFilerClient{
entriesByDir: map[string][]*filer_pb.Entry{
"/buckets/test": {emptyDir},
"/buckets/test/logs": {}, // empty directory has no children
},
}
// Mirrors the getFileStatus probe: prefix "logs/" with delimiter "/".
cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true}
var seen []*filer_pb.Entry
_, err := s3a.doListFilerEntries(client, "/buckets/test", "logs", cursor, "", "/", false, "test",
func(dir string, entry *filer_pb.Entry) {
seen = append(seen, entry)
})
assert.NoError(t, err)
if assert.Len(t, seen, 1, "empty directory must be surfaced under the logs/ prefix") {
assert.True(t, seen[0].IsDirectoryKeyObject(),
"empty directory must be marked as a directory key object so it lists as logs/")
assert.Equal(t, s3_constants.FolderMimeType, seen[0].Attributes.Mime)
}
}
// TestNonEmptyDirectoryGetsNoPhantomMarker ensures the empty-directory fix does not add
// a spurious marker for directories that already have children; the children represent
// the directory.
func TestNonEmptyDirectoryGetsNoPhantomMarker(t *testing.T) {
s3a := &S3ApiServer{
option: &S3ApiServerOption{BucketsPath: "/buckets"},
}
dir := &filer_pb.Entry{
Name: "data",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{Mime: ""},
}
child := &filer_pb.Entry{
Name: "a.txt",
IsDirectory: false,
Attributes: &filer_pb.FuseAttributes{},
}
client := &testFilerClient{
entriesByDir: map[string][]*filer_pb.Entry{
"/buckets/test": {dir},
"/buckets/test/data": {child},
},
}
// Recursive listing under the "data/" prefix.
cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true}
var seen []string
_, err := s3a.doListFilerEntries(client, "/buckets/test", "data", cursor, "", "", false, "test",
func(dir string, entry *filer_pb.Entry) {
seen = append(seen, entry.Name)
})
assert.NoError(t, err)
assert.Equal(t, []string{"a.txt"}, seen, "non-empty directory must be represented by its child only")
}
// TestEmptyDirectoryHiddenInFlatListing ensures the marker is only surfaced for an
// explicit "<dir>/" probe, not in a plain (no prefix, no delimiter) listing. An empty
// directory left behind by deleted objects (e.g. after lifecycle expiration) must not
// appear as a phantom key, matching AWS S3.
func TestEmptyDirectoryHiddenInFlatListing(t *testing.T) {
s3a := &S3ApiServer{
option: &S3ApiServerOption{BucketsPath: "/buckets"},
}
emptyDir := &filer_pb.Entry{
Name: "expire1",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{Mime: ""},
}
keepDir := &filer_pb.Entry{
Name: "keep2",
IsDirectory: true,
Attributes: &filer_pb.FuseAttributes{Mime: ""},
}
keepObj := &filer_pb.Entry{
Name: "foo",
IsDirectory: false,
Attributes: &filer_pb.FuseAttributes{},
}
client := &testFilerClient{
entriesByDir: map[string][]*filer_pb.Entry{
"/buckets/test": {emptyDir, keepDir},
"/buckets/test/expire1": {},
"/buckets/test/keep2": {keepObj},
},
}
// Plain flat listing: no prefix, no delimiter.
cursor := &ListingCursor{maxKeys: 1000}
var seen []string
_, err := s3a.doListFilerEntries(client, "/buckets/test", "", cursor, "", "", false, "test",
func(dir string, entry *filer_pb.Entry) {
seen = append(seen, entry.Name)
})
assert.NoError(t, err)
assert.Equal(t, []string{"foo"}, seen, "flat listing must hide the empty directory and return only real objects")
}