Merge pull request #18 from versity/ben/posix

backend: move posix list objects walk to common utility
This commit is contained in:
Ben McClelland
2023-05-23 15:43:32 -07:00
committed by GitHub
3 changed files with 309 additions and 168 deletions

View File

@@ -6,7 +6,6 @@ import (
"encoding/hex"
"fmt"
"io"
"io/fs"
"os"
"path/filepath"
"sort"
@@ -964,20 +963,21 @@ func (p *Posix) ListObjects(bucket, prefix, marker, delim string, maxkeys int) (
return nil, fmt.Errorf("stat bucket: %w", err)
}
results, err := walk(bucket, prefix, delim, marker, maxkeys)
fileSystem := os.DirFS(bucket)
results, err := backend.Walk(fileSystem, prefix, delim, marker, maxkeys)
if err != nil {
return nil, fmt.Errorf("walk %v: %w", bucket, err)
}
return &s3.ListObjectsOutput{
CommonPrefixes: results.commonPrefixes,
Contents: results.objects,
CommonPrefixes: results.CommonPrefixes,
Contents: results.Objects,
Delimiter: &delim,
IsTruncated: results.truncated,
IsTruncated: results.Truncated,
Marker: &marker,
MaxKeys: int32(maxkeys),
Name: &bucket,
NextMarker: &results.nextMarker,
NextMarker: &results.NextMarker,
Prefix: &prefix,
}, nil
}
@@ -990,177 +990,21 @@ func (p *Posix) ListObjectsV2(bucket, prefix, marker, delim string, maxkeys int)
return nil, fmt.Errorf("stat bucket: %w", err)
}
results, err := walk(bucket, prefix, delim, marker, maxkeys)
fileSystem := os.DirFS(bucket)
results, err := backend.Walk(fileSystem, prefix, delim, marker, maxkeys)
if err != nil {
return nil, fmt.Errorf("walk %v: %w", bucket, err)
}
return &s3.ListObjectsV2Output{
CommonPrefixes: results.commonPrefixes,
Contents: results.objects,
CommonPrefixes: results.CommonPrefixes,
Contents: results.Objects,
Delimiter: &delim,
IsTruncated: results.truncated,
IsTruncated: results.Truncated,
ContinuationToken: &marker,
MaxKeys: int32(maxkeys),
Name: &bucket,
NextContinuationToken: &results.nextMarker,
NextContinuationToken: &results.NextMarker,
Prefix: &prefix,
}, nil
}
type walkResults struct {
commonPrefixes []types.CommonPrefix
objects []types.Object
truncated bool
nextMarker string
}
func walk(root, prefix, delimiter, marker string, max int) (walkResults, error) {
fileSystem := os.DirFS(root)
cpmap := make(map[string]struct{})
var objects []types.Object
var pastMarker bool
if marker == "" {
pastMarker = true
}
var pastMax bool
var newMarker string
var truncated bool
err := fs.WalkDir(fileSystem, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if pastMax {
newMarker = path
truncated = true
return fs.SkipAll
}
if d.IsDir() {
// Ignore the root directory
if path == "." {
return nil
}
// If prefix is defined and the directory does not match prefix,
// do not descend into the directory because nothing will
// match this prefix. Make sure to append the / at the end of
// directories since this is implied as a directory path name.
if prefix != "" && !strings.HasPrefix(path+string(os.PathSeparator), prefix) {
return fs.SkipDir
}
// TODO: special case handling if directory is empty
// and was "PUT" explicitly
return nil
}
if !pastMarker {
if path != marker {
return nil
}
pastMarker = true
}
// If object doesnt have prefix, dont include in results.
if prefix != "" && !strings.HasPrefix(path, prefix) {
return nil
}
if delimiter == "" {
// If no delimeter specified, then all files with matching
// prefix are included in results
fi, err := d.Info()
if err != nil {
return fmt.Errorf("get info for %v: %w", path, err)
}
objects = append(objects, types.Object{
ETag: new(string),
Key: &path,
LastModified: backend.GetTimePtr(fi.ModTime()),
Size: fi.Size(),
})
if (len(objects) + len(cpmap)) == max {
pastMax = true
}
return nil
}
// Since delimiter is specified, we only want results that
// do not contain the delimiter beyond the prefix. If the
// delimiter exists past the prefix, then the substring
// between the prefix and delimiter is part of common prefixes.
//
// For example:
// prefix = A/
// delimeter = /
// and objects:
// A/file
// A/B/file
// B/C
// would return:
// objects: A/file
// common prefix: A/B/
//
// Note: No obects are included past the common prefix since
// these are all rolled up into the common prefix.
// Note: The delimeter can be anything, so we have to operate on
// the full path without any assumptions on posix directory heirarchy
// here. Usually the delimeter with be "/", but thats not required.
suffix := strings.TrimPrefix(path, prefix)
before, _, found := strings.Cut(suffix, delimiter)
if !found {
fi, err := d.Info()
if err != nil {
return fmt.Errorf("get info for %v: %w", path, err)
}
objects = append(objects, types.Object{
ETag: new(string),
Key: &path,
LastModified: backend.GetTimePtr(fi.ModTime()),
Size: fi.Size(),
})
if (len(objects) + len(cpmap)) == max {
pastMax = true
}
return nil
}
// Common prefixes are a set, so should not have duplicates.
// These are abstractly a "directory", so need to include the
// delimeter at the end.
cpmap[prefix+before+delimiter] = struct{}{}
if (len(objects) + len(cpmap)) == max {
pastMax = true
}
return nil
})
if err != nil {
return walkResults{}, err
}
commonPrefixStrings := make([]string, 0, len(cpmap))
for k := range cpmap {
commonPrefixStrings = append(commonPrefixStrings, k)
}
sort.Strings(commonPrefixStrings)
commonPrefixes := make([]types.CommonPrefix, 0, len(commonPrefixStrings))
for _, cp := range commonPrefixStrings {
commonPrefixes = append(commonPrefixes, types.CommonPrefix{
Prefix: &cp,
})
}
return walkResults{
commonPrefixes: commonPrefixes,
objects: objects,
truncated: truncated,
nextMarker: newMarker,
}, nil
}

169
backend/walk.go Normal file
View File

@@ -0,0 +1,169 @@
package backend
import (
"fmt"
"io/fs"
"os"
"sort"
"strings"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
)
type WalkResults struct {
CommonPrefixes []types.CommonPrefix
Objects []types.Object
Truncated bool
NextMarker string
}
// Walk walks the supplied fs.FS and returns results compatible with list
// objects responses
func Walk(fileSystem fs.FS, prefix, delimiter, marker string, max int) (WalkResults, error) {
cpmap := make(map[string]struct{})
var objects []types.Object
var pastMarker bool
if marker == "" {
pastMarker = true
}
var pastMax bool
var newMarker string
var truncated bool
err := fs.WalkDir(fileSystem, ".", func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if pastMax {
newMarker = path
truncated = true
return fs.SkipAll
}
if d.IsDir() {
// Ignore the root directory
if path == "." {
return nil
}
// If prefix is defined and the directory does not match prefix,
// do not descend into the directory because nothing will
// match this prefix. Make sure to append the / at the end of
// directories since this is implied as a directory path name.
if prefix != "" && !strings.HasPrefix(path+string(os.PathSeparator), prefix) {
return fs.SkipDir
}
// TODO: special case handling if directory is empty
// and was "PUT" explicitly
return nil
}
if !pastMarker {
if path != marker {
return nil
}
pastMarker = true
}
// If object doesnt have prefix, dont include in results.
if prefix != "" && !strings.HasPrefix(path, prefix) {
return nil
}
if delimiter == "" {
// If no delimeter specified, then all files with matching
// prefix are included in results
fi, err := d.Info()
if err != nil {
return fmt.Errorf("get info for %v: %w", path, err)
}
objects = append(objects, types.Object{
ETag: new(string),
Key: &path,
LastModified: GetTimePtr(fi.ModTime()),
Size: fi.Size(),
})
if (len(objects) + len(cpmap)) == max {
pastMax = true
}
return nil
}
// Since delimiter is specified, we only want results that
// do not contain the delimiter beyond the prefix. If the
// delimiter exists past the prefix, then the substring
// between the prefix and delimiter is part of common prefixes.
//
// For example:
// prefix = A/
// delimeter = /
// and objects:
// A/file
// A/B/file
// B/C
// would return:
// objects: A/file
// common prefix: A/B/
//
// Note: No obects are included past the common prefix since
// these are all rolled up into the common prefix.
// Note: The delimeter can be anything, so we have to operate on
// the full path without any assumptions on posix directory heirarchy
// here. Usually the delimeter with be "/", but thats not required.
suffix := strings.TrimPrefix(path, prefix)
before, _, found := strings.Cut(suffix, delimiter)
if !found {
fi, err := d.Info()
if err != nil {
return fmt.Errorf("get info for %v: %w", path, err)
}
objects = append(objects, types.Object{
ETag: new(string),
Key: &path,
LastModified: GetTimePtr(fi.ModTime()),
Size: fi.Size(),
})
if (len(objects) + len(cpmap)) == max {
pastMax = true
}
return nil
}
// Common prefixes are a set, so should not have duplicates.
// These are abstractly a "directory", so need to include the
// delimeter at the end.
cpmap[prefix+before+delimiter] = struct{}{}
if (len(objects) + len(cpmap)) == max {
pastMax = true
}
return nil
})
if err != nil {
return WalkResults{}, err
}
commonPrefixStrings := make([]string, 0, len(cpmap))
for k := range cpmap {
commonPrefixStrings = append(commonPrefixStrings, k)
}
sort.Strings(commonPrefixStrings)
commonPrefixes := make([]types.CommonPrefix, 0, len(commonPrefixStrings))
for _, cp := range commonPrefixStrings {
commonPrefixes = append(commonPrefixes, types.CommonPrefix{
Prefix: &cp,
})
}
return WalkResults{
CommonPrefixes: commonPrefixes,
Objects: objects,
Truncated: truncated,
NextMarker: newMarker,
}, nil
}

128
backend/walk_test.go Normal file
View File

@@ -0,0 +1,128 @@
package backend_test
import (
"io/fs"
"testing"
"testing/fstest"
"github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/versity/scoutgw/backend"
)
type walkTest struct {
fsys fs.FS
expected backend.WalkResults
}
func TestWalk(t *testing.T) {
tests := []walkTest{{
// test case from
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html
fsys: fstest.MapFS{
"sample.jpg": {},
"photos/2006/January/sample.jpg": {},
"photos/2006/February/sample2.jpg": {},
"photos/2006/February/sample3.jpg": {},
"photos/2006/February/sample4.jpg": {},
},
expected: backend.WalkResults{
CommonPrefixes: []types.CommonPrefix{{
Prefix: backend.GetStringPtr("photos/"),
}},
Objects: []types.Object{{
Key: backend.GetStringPtr("sample.jpg"),
}},
},
}}
for _, tt := range tests {
res, err := backend.Walk(tt.fsys, "", "/", "", 1000)
if err != nil {
t.Fatalf("walk: %v", err)
}
compareResults(res, tt.expected, t)
}
}
func compareResults(got, wanted backend.WalkResults, t *testing.T) {
if !compareCommonPrefix(got.CommonPrefixes, wanted.CommonPrefixes) {
t.Errorf("unexpected common prefix, got %v wanted %v",
printCommonPrefixes(got.CommonPrefixes),
printCommonPrefixes(wanted.CommonPrefixes))
}
if !compareObjects(got.Objects, wanted.Objects) {
t.Errorf("unexpected common prefix, got %v wanted %v",
printObjects(got.Objects),
printObjects(wanted.Objects))
}
}
func compareCommonPrefix(a, b []types.CommonPrefix) bool {
if len(a) != len(b) {
return false
}
for _, cp := range a {
if containsCommonPrefix(cp, b) {
return true
}
}
return false
}
func containsCommonPrefix(c types.CommonPrefix, list []types.CommonPrefix) bool {
for _, cp := range list {
if *c.Prefix == *cp.Prefix {
return true
}
}
return false
}
func printCommonPrefixes(list []types.CommonPrefix) string {
res := "["
for _, cp := range list {
if res == "[" {
res = res + *cp.Prefix
} else {
res = res + ", " + *cp.Prefix
}
}
return res + "]"
}
func compareObjects(a, b []types.Object) bool {
if len(a) != len(b) {
return false
}
for _, cp := range a {
if containsObject(cp, b) {
return true
}
}
return false
}
func containsObject(c types.Object, list []types.Object) bool {
for _, cp := range list {
if *c.Key == *cp.Key {
return true
}
}
return false
}
func printObjects(list []types.Object) string {
res := "["
for _, cp := range list {
if res == "[" {
res = res + *cp.Key
} else {
res = res + ", " + *cp.Key
}
}
return res + "]"
}