fix: expose posix tmpfile fd to enable copy_file_range

The complete multipart upload can be optimized in some cases
to not need to copy the full data from parts to the final
object file. If the filesystem supports it, there can be
optimizations to just clone exent references and not have to
actually copy the data.

For io.Copy() to make use of file_copy_range, we have to pass it
the *os.File file handles from the filesystem for the source and
destination. We were previously adding a layer of indirection
that was causing io.Copy() to fallback to full data copy. This
fixes the copy by exposing the underlying fd.

This also skips the falloc for the final object in complete
mutlipart upload, because some filesystems will be able to use
file_copy_range to optimize the copy and may not even need
new data allocations in the final object file.

Note that this only affects posix mode as scoutfs has special
handling for this case that is similar to but not compatible
with copy_file_range using a special ioctl.
This commit is contained in:
Ben McClelland
2024-05-21 16:02:04 -07:00
parent 99a84abdba
commit 190dd8853c
3 changed files with 20 additions and 9 deletions

View File

@@ -79,6 +79,9 @@ const (
bucketLockKey = "bucket-lock"
objectRetentionKey = "object-retention"
objectLegalHoldKey = "object-legal-hold"
doFalloc = true
skipFalloc = false
)
type PosixOpts struct {
@@ -461,7 +464,7 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
}
f, err := p.openTmpFile(filepath.Join(bucket, metaTmpDir), bucket, object,
totalsize, acct)
totalsize, acct, skipFalloc)
if err != nil {
if errors.Is(err, syscall.EDQUOT) {
return nil, s3err.GetAPIError(s3err.ErrQuotaExceeded)
@@ -477,7 +480,7 @@ func (p *Posix) CompleteMultipartUpload(ctx context.Context, input *s3.CompleteM
if err != nil {
return nil, fmt.Errorf("open part %v: %v", *part.PartNumber, err)
}
_, err = io.Copy(f, pf)
_, err = io.Copy(f.File(), pf)
pf.Close()
if err != nil {
if errors.Is(err, syscall.EDQUOT) {
@@ -970,7 +973,7 @@ func (p *Posix) UploadPart(ctx context.Context, input *s3.UploadPartInput) (stri
partPath := filepath.Join(objdir, uploadID, fmt.Sprintf("%v", *part))
f, err := p.openTmpFile(filepath.Join(bucket, objdir),
bucket, partPath, length, acct)
bucket, partPath, length, acct, doFalloc)
if err != nil {
if errors.Is(err, syscall.EDQUOT) {
return "", s3err.GetAPIError(s3err.ErrQuotaExceeded)
@@ -1078,7 +1081,7 @@ func (p *Posix) UploadPartCopy(ctx context.Context, upi *s3.UploadPartCopyInput)
}
f, err := p.openTmpFile(filepath.Join(*upi.Bucket, objdir),
*upi.Bucket, partPath, length, acct)
*upi.Bucket, partPath, length, acct, doFalloc)
if err != nil {
if errors.Is(err, syscall.EDQUOT) {
return s3response.CopyObjectResult{}, s3err.GetAPIError(s3err.ErrQuotaExceeded)
@@ -1217,7 +1220,7 @@ func (p *Posix) PutObject(ctx context.Context, po *s3.PutObjectInput) (string, e
}
f, err := p.openTmpFile(filepath.Join(*po.Bucket, metaTmpDir),
*po.Bucket, *po.Key, contentLength, acct)
*po.Bucket, *po.Key, contentLength, acct, doFalloc)
if err != nil {
if errors.Is(err, syscall.EDQUOT) {
return "", s3err.GetAPIError(s3err.ErrQuotaExceeded)

View File

@@ -50,7 +50,7 @@ var (
defaultFilePerm uint32 = 0644
)
func (p *Posix) openTmpFile(dir, bucket, obj string, size int64, acct auth.Account) (*tmpfile, error) {
func (p *Posix) openTmpFile(dir, bucket, obj string, size int64, acct auth.Account, dofalloc bool) (*tmpfile, error) {
uid, gid, doChown := p.getChownIDs(acct)
// O_TMPFILE allows for a file handle to an unnamed file in the filesystem.
@@ -81,7 +81,7 @@ func (p *Posix) openTmpFile(dir, bucket, obj string, size int64, acct auth.Accou
gid: gid,
}
// falloc is best effort, its fine if this fails
if size > 0 {
if size > 0 && dofalloc {
tmp.falloc()
}
@@ -111,7 +111,7 @@ func (p *Posix) openTmpFile(dir, bucket, obj string, size int64, acct auth.Accou
}
// falloc is best effort, its fine if this fails
if size > 0 {
if size > 0 && dofalloc {
tmp.falloc()
}
@@ -221,3 +221,7 @@ func (tmp *tmpfile) Write(b []byte) (int, error) {
func (tmp *tmpfile) cleanup() {
tmp.f.Close()
}
func (tmp *tmpfile) File() *os.File {
return tmp.f
}

View File

@@ -36,7 +36,7 @@ type tmpfile struct {
size int64
}
func (p *Posix) openTmpFile(dir, bucket, obj string, size int64, acct auth.Account) (*tmpfile, error) {
func (p *Posix) openTmpFile(dir, bucket, obj string, size int64, acct auth.Account, _ bool) (*tmpfile, error) {
uid, gid, doChown := p.getChownIDs(acct)
// Create a temp file for upload while in progress (see link comments below).
@@ -112,3 +112,7 @@ func (tmp *tmpfile) Write(b []byte) (int, error) {
func (tmp *tmpfile) cleanup() {
tmp.f.Close()
}
func (tmp *tmpfile) File() *os.File {
return tmp.f
}