diff --git a/backend/posix/posix.go b/backend/posix/posix.go index b5bd9a7..828b4a6 100644 --- a/backend/posix/posix.go +++ b/backend/posix/posix.go @@ -6,7 +6,6 @@ import ( "encoding/hex" "fmt" "io" - "io/fs" "os" "path/filepath" "sort" @@ -964,20 +963,21 @@ func (p *Posix) ListObjects(bucket, prefix, marker, delim string, maxkeys int) ( return nil, fmt.Errorf("stat bucket: %w", err) } - results, err := walk(bucket, prefix, delim, marker, maxkeys) + fileSystem := os.DirFS(bucket) + results, err := backend.Walk(fileSystem, prefix, delim, marker, maxkeys) if err != nil { return nil, fmt.Errorf("walk %v: %w", bucket, err) } return &s3.ListObjectsOutput{ - CommonPrefixes: results.commonPrefixes, - Contents: results.objects, + CommonPrefixes: results.CommonPrefixes, + Contents: results.Objects, Delimiter: &delim, - IsTruncated: results.truncated, + IsTruncated: results.Truncated, Marker: &marker, MaxKeys: int32(maxkeys), Name: &bucket, - NextMarker: &results.nextMarker, + NextMarker: &results.NextMarker, Prefix: &prefix, }, nil } @@ -990,177 +990,21 @@ func (p *Posix) ListObjectsV2(bucket, prefix, marker, delim string, maxkeys int) return nil, fmt.Errorf("stat bucket: %w", err) } - results, err := walk(bucket, prefix, delim, marker, maxkeys) + fileSystem := os.DirFS(bucket) + results, err := backend.Walk(fileSystem, prefix, delim, marker, maxkeys) if err != nil { return nil, fmt.Errorf("walk %v: %w", bucket, err) } return &s3.ListObjectsV2Output{ - CommonPrefixes: results.commonPrefixes, - Contents: results.objects, + CommonPrefixes: results.CommonPrefixes, + Contents: results.Objects, Delimiter: &delim, - IsTruncated: results.truncated, + IsTruncated: results.Truncated, ContinuationToken: &marker, MaxKeys: int32(maxkeys), Name: &bucket, - NextContinuationToken: &results.nextMarker, + NextContinuationToken: &results.NextMarker, Prefix: &prefix, }, nil } - -type walkResults struct { - commonPrefixes []types.CommonPrefix - objects []types.Object - truncated bool - nextMarker string -} - -func walk(root, prefix, delimiter, marker string, max int) (walkResults, error) { - fileSystem := os.DirFS(root) - cpmap := make(map[string]struct{}) - var objects []types.Object - - var pastMarker bool - if marker == "" { - pastMarker = true - } - - var pastMax bool - var newMarker string - var truncated bool - - err := fs.WalkDir(fileSystem, ".", func(path string, d fs.DirEntry, err error) error { - if err != nil { - return err - } - - if pastMax { - newMarker = path - truncated = true - return fs.SkipAll - } - - if d.IsDir() { - // Ignore the root directory - if path == "." { - return nil - } - - // If prefix is defined and the directory does not match prefix, - // do not descend into the directory because nothing will - // match this prefix. Make sure to append the / at the end of - // directories since this is implied as a directory path name. - if prefix != "" && !strings.HasPrefix(path+string(os.PathSeparator), prefix) { - return fs.SkipDir - } - - // TODO: special case handling if directory is empty - // and was "PUT" explicitly - return nil - } - - if !pastMarker { - if path != marker { - return nil - } - pastMarker = true - } - - // If object doesnt have prefix, dont include in results. - if prefix != "" && !strings.HasPrefix(path, prefix) { - return nil - } - - if delimiter == "" { - // If no delimeter specified, then all files with matching - // prefix are included in results - fi, err := d.Info() - if err != nil { - return fmt.Errorf("get info for %v: %w", path, err) - } - - objects = append(objects, types.Object{ - ETag: new(string), - Key: &path, - LastModified: backend.GetTimePtr(fi.ModTime()), - Size: fi.Size(), - }) - if (len(objects) + len(cpmap)) == max { - pastMax = true - } - return nil - } - - // Since delimiter is specified, we only want results that - // do not contain the delimiter beyond the prefix. If the - // delimiter exists past the prefix, then the substring - // between the prefix and delimiter is part of common prefixes. - // - // For example: - // prefix = A/ - // delimeter = / - // and objects: - // A/file - // A/B/file - // B/C - // would return: - // objects: A/file - // common prefix: A/B/ - // - // Note: No obects are included past the common prefix since - // these are all rolled up into the common prefix. - // Note: The delimeter can be anything, so we have to operate on - // the full path without any assumptions on posix directory heirarchy - // here. Usually the delimeter with be "/", but thats not required. - suffix := strings.TrimPrefix(path, prefix) - before, _, found := strings.Cut(suffix, delimiter) - if !found { - fi, err := d.Info() - if err != nil { - return fmt.Errorf("get info for %v: %w", path, err) - } - objects = append(objects, types.Object{ - ETag: new(string), - Key: &path, - LastModified: backend.GetTimePtr(fi.ModTime()), - Size: fi.Size(), - }) - if (len(objects) + len(cpmap)) == max { - pastMax = true - } - return nil - } - - // Common prefixes are a set, so should not have duplicates. - // These are abstractly a "directory", so need to include the - // delimeter at the end. - cpmap[prefix+before+delimiter] = struct{}{} - if (len(objects) + len(cpmap)) == max { - pastMax = true - } - - return nil - }) - if err != nil { - return walkResults{}, err - } - - commonPrefixStrings := make([]string, 0, len(cpmap)) - for k := range cpmap { - commonPrefixStrings = append(commonPrefixStrings, k) - } - sort.Strings(commonPrefixStrings) - commonPrefixes := make([]types.CommonPrefix, 0, len(commonPrefixStrings)) - for _, cp := range commonPrefixStrings { - commonPrefixes = append(commonPrefixes, types.CommonPrefix{ - Prefix: &cp, - }) - } - - return walkResults{ - commonPrefixes: commonPrefixes, - objects: objects, - truncated: truncated, - nextMarker: newMarker, - }, nil -} diff --git a/backend/walk.go b/backend/walk.go new file mode 100644 index 0000000..de78033 --- /dev/null +++ b/backend/walk.go @@ -0,0 +1,169 @@ +package backend + +import ( + "fmt" + "io/fs" + "os" + "sort" + "strings" + + "github.com/aws/aws-sdk-go-v2/service/s3/types" +) + +type WalkResults struct { + CommonPrefixes []types.CommonPrefix + Objects []types.Object + Truncated bool + NextMarker string +} + +// Walk walks the supplied fs.FS and returns results compatible with list +// objects responses +func Walk(fileSystem fs.FS, prefix, delimiter, marker string, max int) (WalkResults, error) { + cpmap := make(map[string]struct{}) + var objects []types.Object + + var pastMarker bool + if marker == "" { + pastMarker = true + } + + var pastMax bool + var newMarker string + var truncated bool + + err := fs.WalkDir(fileSystem, ".", func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if pastMax { + newMarker = path + truncated = true + return fs.SkipAll + } + + if d.IsDir() { + // Ignore the root directory + if path == "." { + return nil + } + + // If prefix is defined and the directory does not match prefix, + // do not descend into the directory because nothing will + // match this prefix. Make sure to append the / at the end of + // directories since this is implied as a directory path name. + if prefix != "" && !strings.HasPrefix(path+string(os.PathSeparator), prefix) { + return fs.SkipDir + } + + // TODO: special case handling if directory is empty + // and was "PUT" explicitly + return nil + } + + if !pastMarker { + if path != marker { + return nil + } + pastMarker = true + } + + // If object doesnt have prefix, dont include in results. + if prefix != "" && !strings.HasPrefix(path, prefix) { + return nil + } + + if delimiter == "" { + // If no delimeter specified, then all files with matching + // prefix are included in results + fi, err := d.Info() + if err != nil { + return fmt.Errorf("get info for %v: %w", path, err) + } + + objects = append(objects, types.Object{ + ETag: new(string), + Key: &path, + LastModified: GetTimePtr(fi.ModTime()), + Size: fi.Size(), + }) + if (len(objects) + len(cpmap)) == max { + pastMax = true + } + return nil + } + + // Since delimiter is specified, we only want results that + // do not contain the delimiter beyond the prefix. If the + // delimiter exists past the prefix, then the substring + // between the prefix and delimiter is part of common prefixes. + // + // For example: + // prefix = A/ + // delimeter = / + // and objects: + // A/file + // A/B/file + // B/C + // would return: + // objects: A/file + // common prefix: A/B/ + // + // Note: No obects are included past the common prefix since + // these are all rolled up into the common prefix. + // Note: The delimeter can be anything, so we have to operate on + // the full path without any assumptions on posix directory heirarchy + // here. Usually the delimeter with be "/", but thats not required. + suffix := strings.TrimPrefix(path, prefix) + before, _, found := strings.Cut(suffix, delimiter) + if !found { + fi, err := d.Info() + if err != nil { + return fmt.Errorf("get info for %v: %w", path, err) + } + objects = append(objects, types.Object{ + ETag: new(string), + Key: &path, + LastModified: GetTimePtr(fi.ModTime()), + Size: fi.Size(), + }) + if (len(objects) + len(cpmap)) == max { + pastMax = true + } + return nil + } + + // Common prefixes are a set, so should not have duplicates. + // These are abstractly a "directory", so need to include the + // delimeter at the end. + cpmap[prefix+before+delimiter] = struct{}{} + if (len(objects) + len(cpmap)) == max { + pastMax = true + } + + return nil + }) + if err != nil { + return WalkResults{}, err + } + + commonPrefixStrings := make([]string, 0, len(cpmap)) + for k := range cpmap { + commonPrefixStrings = append(commonPrefixStrings, k) + } + sort.Strings(commonPrefixStrings) + commonPrefixes := make([]types.CommonPrefix, 0, len(commonPrefixStrings)) + for _, cp := range commonPrefixStrings { + commonPrefixes = append(commonPrefixes, types.CommonPrefix{ + Prefix: &cp, + }) + } + + return WalkResults{ + CommonPrefixes: commonPrefixes, + Objects: objects, + Truncated: truncated, + NextMarker: newMarker, + }, nil +} diff --git a/backend/walk_test.go b/backend/walk_test.go new file mode 100644 index 0000000..075b73c --- /dev/null +++ b/backend/walk_test.go @@ -0,0 +1,128 @@ +package backend_test + +import ( + "io/fs" + "testing" + "testing/fstest" + + "github.com/aws/aws-sdk-go-v2/service/s3/types" + "github.com/versity/scoutgw/backend" +) + +type walkTest struct { + fsys fs.FS + expected backend.WalkResults +} + +func TestWalk(t *testing.T) { + tests := []walkTest{{ + // test case from + // https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html + fsys: fstest.MapFS{ + "sample.jpg": {}, + "photos/2006/January/sample.jpg": {}, + "photos/2006/February/sample2.jpg": {}, + "photos/2006/February/sample3.jpg": {}, + "photos/2006/February/sample4.jpg": {}, + }, + expected: backend.WalkResults{ + CommonPrefixes: []types.CommonPrefix{{ + Prefix: backend.GetStringPtr("photos/"), + }}, + Objects: []types.Object{{ + Key: backend.GetStringPtr("sample.jpg"), + }}, + }, + }} + + for _, tt := range tests { + res, err := backend.Walk(tt.fsys, "", "/", "", 1000) + if err != nil { + t.Fatalf("walk: %v", err) + } + + compareResults(res, tt.expected, t) + } +} + +func compareResults(got, wanted backend.WalkResults, t *testing.T) { + if !compareCommonPrefix(got.CommonPrefixes, wanted.CommonPrefixes) { + t.Errorf("unexpected common prefix, got %v wanted %v", + printCommonPrefixes(got.CommonPrefixes), + printCommonPrefixes(wanted.CommonPrefixes)) + } + + if !compareObjects(got.Objects, wanted.Objects) { + t.Errorf("unexpected common prefix, got %v wanted %v", + printObjects(got.Objects), + printObjects(wanted.Objects)) + } +} + +func compareCommonPrefix(a, b []types.CommonPrefix) bool { + if len(a) != len(b) { + return false + } + + for _, cp := range a { + if containsCommonPrefix(cp, b) { + return true + } + } + return false +} + +func containsCommonPrefix(c types.CommonPrefix, list []types.CommonPrefix) bool { + for _, cp := range list { + if *c.Prefix == *cp.Prefix { + return true + } + } + return false +} + +func printCommonPrefixes(list []types.CommonPrefix) string { + res := "[" + for _, cp := range list { + if res == "[" { + res = res + *cp.Prefix + } else { + res = res + ", " + *cp.Prefix + } + } + return res + "]" +} + +func compareObjects(a, b []types.Object) bool { + if len(a) != len(b) { + return false + } + + for _, cp := range a { + if containsObject(cp, b) { + return true + } + } + return false +} + +func containsObject(c types.Object, list []types.Object) bool { + for _, cp := range list { + if *c.Key == *cp.Key { + return true + } + } + return false +} + +func printObjects(list []types.Object) string { + res := "[" + for _, cp := range list { + if res == "[" { + res = res + *cp.Key + } else { + res = res + ", " + *cp.Key + } + } + return res + "]" +}