mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-06-03 15:36:22 +00:00
fix(s3): list empty directories as directory markers (#9615)
* fix(s3): list empty directories as directory markers A real but empty directory created out of band (mount, mkdir, filer API) carries no MIME, so it was hidden from S3 listings. hadoop-aws getFileStatus probes LIST prefix=dir/ &delimiter=/ and reads an empty result as a missing path, which breaks Spark's eventLog.dir when it points at an empty directory. Surface such directories as directory markers, matching directories created via PutObject with a trailing "/". Emptiness comes from the recursion result, and the marker MIME is set only on the in-memory listing entry, so empty directories stay eligible for empty-folder cleanup. * fix(s3): only surface empty directory markers for explicit dir probes Restrict the empty-directory marker to a trailing-slash prefix probe (prefix=dir/), the pattern hadoop-aws getFileStatus uses. Plain listings are left as before, so an empty directory left behind by deleted objects (e.g. after lifecycle expiration) is no longer shown as a phantom key.
This commit is contained in:
@@ -677,22 +677,41 @@ func (s3a *S3ApiServer) doListFilerEntries(client filer_pb.SeaweedFilerClient, d
|
||||
}
|
||||
|
||||
if delimiter != "/" || cursor.prefixEndsOnDelimiter {
|
||||
// A trailing-slash prefix (e.g. "logs/") names one directory and asks
|
||||
// whether it exists, so a real but empty directory must be reported for
|
||||
// that probe.
|
||||
explicitDirProbe := cursor.prefixEndsOnDelimiter
|
||||
if cursor.prefixEndsOnDelimiter {
|
||||
cursor.prefixEndsOnDelimiter = false
|
||||
if entry.IsDirectoryKeyObject() {
|
||||
eachEntryFn(dir, entry)
|
||||
}
|
||||
} else if entry.IsDirectoryKeyObject() {
|
||||
}
|
||||
isKeyObject := entry.IsDirectoryKeyObject()
|
||||
if isKeyObject {
|
||||
// Directory key objects (created via PutObject with trailing "/")
|
||||
// must appear as regular keys in recursive listing mode.
|
||||
eachEntryFn(dir, entry)
|
||||
}
|
||||
// Recurse into subdirectory to list any children
|
||||
subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, eachEntryFn)
|
||||
// Recurse into subdirectory to list any children, noting whether the
|
||||
// subtree produced any entries.
|
||||
childEmitted := false
|
||||
subNextMarker, subErr := s3a.doListFilerEntries(client, dir+"/"+entry.Name, "", cursor, "", delimiter, false, bucket, func(d string, e *filer_pb.Entry) {
|
||||
childEmitted = true
|
||||
eachEntryFn(d, e)
|
||||
})
|
||||
if subErr != nil {
|
||||
err = fmt.Errorf("doListFilerEntries2: %w", subErr)
|
||||
return
|
||||
}
|
||||
// A real but empty directory (created out of band via mount, mkdir or
|
||||
// the filer API, so it carries no MIME) is otherwise invisible to S3
|
||||
// clients that detect directories by listing it under its own "<dir>/"
|
||||
// prefix. Surface it as a directory marker for that explicit probe,
|
||||
// identical to a directory created via PutObject with a trailing "/", so
|
||||
// tools like hadoop-aws can find it. Plain listings are left untouched, so
|
||||
// empty directories left behind by deleted objects are not shown as keys.
|
||||
if explicitDirProbe && !isKeyObject && !childEmitted && !cursor.isTruncated && entry.Attributes != nil {
|
||||
entry.Attributes.Mime = s3_constants.FolderMimeType
|
||||
eachEntryFn(dir, entry)
|
||||
}
|
||||
// println("doListFilerEntries2 dir", dir+"/"+entry.Name, "subNextMarker", subNextMarker)
|
||||
nextMarker = entry.Name + "/" + subNextMarker
|
||||
if cursor.isTruncated {
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
||||
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
@@ -61,3 +62,122 @@ func TestDirectoryListedAsCommonPrefix(t *testing.T) {
|
||||
"Directory should be passed to callback for CommonPrefix processing with delimiter=/")
|
||||
assert.Empty(t, seenFiles, "No files should be seen, only the directory")
|
||||
}
|
||||
|
||||
// TestEmptyDirectorySurfacedAsMarker reproduces the hadoop-aws / Spark case where a
|
||||
// real but empty directory (created via mount, mkdir or the filer API, so it has no
|
||||
// MIME) must be visible to S3 clients that detect directories by listing under the
|
||||
// "<dir>/" prefix. Such a directory is surfaced as a directory marker, identical to
|
||||
// one created via PutObject with a trailing "/".
|
||||
func TestEmptyDirectorySurfacedAsMarker(t *testing.T) {
|
||||
s3a := &S3ApiServer{
|
||||
option: &S3ApiServerOption{BucketsPath: "/buckets"},
|
||||
}
|
||||
|
||||
emptyDir := &filer_pb.Entry{
|
||||
Name: "logs",
|
||||
IsDirectory: true,
|
||||
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||
}
|
||||
client := &testFilerClient{
|
||||
entriesByDir: map[string][]*filer_pb.Entry{
|
||||
"/buckets/test": {emptyDir},
|
||||
"/buckets/test/logs": {}, // empty directory has no children
|
||||
},
|
||||
}
|
||||
|
||||
// Mirrors the getFileStatus probe: prefix "logs/" with delimiter "/".
|
||||
cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true}
|
||||
var seen []*filer_pb.Entry
|
||||
_, err := s3a.doListFilerEntries(client, "/buckets/test", "logs", cursor, "", "/", false, "test",
|
||||
func(dir string, entry *filer_pb.Entry) {
|
||||
seen = append(seen, entry)
|
||||
})
|
||||
|
||||
assert.NoError(t, err)
|
||||
if assert.Len(t, seen, 1, "empty directory must be surfaced under the logs/ prefix") {
|
||||
assert.True(t, seen[0].IsDirectoryKeyObject(),
|
||||
"empty directory must be marked as a directory key object so it lists as logs/")
|
||||
assert.Equal(t, s3_constants.FolderMimeType, seen[0].Attributes.Mime)
|
||||
}
|
||||
}
|
||||
|
||||
// TestNonEmptyDirectoryGetsNoPhantomMarker ensures the empty-directory fix does not add
|
||||
// a spurious marker for directories that already have children; the children represent
|
||||
// the directory.
|
||||
func TestNonEmptyDirectoryGetsNoPhantomMarker(t *testing.T) {
|
||||
s3a := &S3ApiServer{
|
||||
option: &S3ApiServerOption{BucketsPath: "/buckets"},
|
||||
}
|
||||
|
||||
dir := &filer_pb.Entry{
|
||||
Name: "data",
|
||||
IsDirectory: true,
|
||||
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||
}
|
||||
child := &filer_pb.Entry{
|
||||
Name: "a.txt",
|
||||
IsDirectory: false,
|
||||
Attributes: &filer_pb.FuseAttributes{},
|
||||
}
|
||||
client := &testFilerClient{
|
||||
entriesByDir: map[string][]*filer_pb.Entry{
|
||||
"/buckets/test": {dir},
|
||||
"/buckets/test/data": {child},
|
||||
},
|
||||
}
|
||||
|
||||
// Recursive listing under the "data/" prefix.
|
||||
cursor := &ListingCursor{maxKeys: 1000, prefixEndsOnDelimiter: true}
|
||||
var seen []string
|
||||
_, err := s3a.doListFilerEntries(client, "/buckets/test", "data", cursor, "", "", false, "test",
|
||||
func(dir string, entry *filer_pb.Entry) {
|
||||
seen = append(seen, entry.Name)
|
||||
})
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []string{"a.txt"}, seen, "non-empty directory must be represented by its child only")
|
||||
}
|
||||
|
||||
// TestEmptyDirectoryHiddenInFlatListing ensures the marker is only surfaced for an
|
||||
// explicit "<dir>/" probe, not in a plain (no prefix, no delimiter) listing. An empty
|
||||
// directory left behind by deleted objects (e.g. after lifecycle expiration) must not
|
||||
// appear as a phantom key, matching AWS S3.
|
||||
func TestEmptyDirectoryHiddenInFlatListing(t *testing.T) {
|
||||
s3a := &S3ApiServer{
|
||||
option: &S3ApiServerOption{BucketsPath: "/buckets"},
|
||||
}
|
||||
|
||||
emptyDir := &filer_pb.Entry{
|
||||
Name: "expire1",
|
||||
IsDirectory: true,
|
||||
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||
}
|
||||
keepDir := &filer_pb.Entry{
|
||||
Name: "keep2",
|
||||
IsDirectory: true,
|
||||
Attributes: &filer_pb.FuseAttributes{Mime: ""},
|
||||
}
|
||||
keepObj := &filer_pb.Entry{
|
||||
Name: "foo",
|
||||
IsDirectory: false,
|
||||
Attributes: &filer_pb.FuseAttributes{},
|
||||
}
|
||||
client := &testFilerClient{
|
||||
entriesByDir: map[string][]*filer_pb.Entry{
|
||||
"/buckets/test": {emptyDir, keepDir},
|
||||
"/buckets/test/expire1": {},
|
||||
"/buckets/test/keep2": {keepObj},
|
||||
},
|
||||
}
|
||||
|
||||
// Plain flat listing: no prefix, no delimiter.
|
||||
cursor := &ListingCursor{maxKeys: 1000}
|
||||
var seen []string
|
||||
_, err := s3a.doListFilerEntries(client, "/buckets/test", "", cursor, "", "", false, "test",
|
||||
func(dir string, entry *filer_pb.Entry) {
|
||||
seen = append(seen, entry.Name)
|
||||
})
|
||||
|
||||
assert.NoError(t, err)
|
||||
assert.Equal(t, []string{"foo"}, seen, "flat listing must hide the empty directory and return only real objects")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user