From c201d8bda903bf951ca4734d502b3f365fa8ba04 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 15 Mar 2024 12:27:59 -0700 Subject: [PATCH] write anything beyond 4k to be written in 4k pages (#19269) we were prematurely not writing 4k pages while we could have due to the fact that most buffers would be multiples of 4k upto some number and there shall be some remainder. We only need to write the remainder without O_DIRECT. --- cmd/xl-storage.go | 6 ++--- internal/ioutil/ioutil.go | 49 +++++++++++++++++++++++---------------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index f5b0b86e1..bd9af41dc 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -54,8 +54,6 @@ import ( const ( nullVersionID = "null" - // Largest streams threshold per shard. - largestFileThreshold = 64 * humanize.MiByte // Optimized for HDDs // Small file threshold below which data accompanies metadata from storage layer. smallFileThreshold = 128 * humanize.KiByte // Optimized for NVMe/SSDs @@ -2113,11 +2111,11 @@ func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSiz var bufp *[]byte switch { - case fileSize > 0 && fileSize >= largestFileThreshold: + case fileSize > 0 && fileSize >= xioutil.BlockSizeReallyLarge: // use a larger 4MiB buffer for a really large streams. bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte) defer xioutil.ODirectPoolXLarge.Put(bufp) - case fileSize <= smallFileThreshold: + case fileSize <= xioutil.BlockSizeSmall: bufp = xioutil.ODirectPoolSmall.Get().(*[]byte) defer xioutil.ODirectPoolSmall.Put(bufp) default: diff --git a/internal/ioutil/ioutil.go b/internal/ioutil/ioutil.go index 09712f4b1..20a496afd 100644 --- a/internal/ioutil/ioutil.go +++ b/internal/ioutil/ioutil.go @@ -20,7 +20,6 @@ package ioutil import ( - "bytes" "context" "errors" "io" @@ -36,8 +35,8 @@ import ( // Block sizes constant. const ( BlockSizeSmall = 32 * humanize.KiByte // Default r/w block size for smaller objects. - BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects. - BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB + BlockSizeLarge = 1 * humanize.MiByte // Default r/w block size for normal objects. + BlockSizeReallyLarge = 4 * humanize.MiByte // Default r/w block size for very large objects. ) // aligned sync.Pool's @@ -341,19 +340,6 @@ func CopyAligned(w io.Writer, r io.Reader, alignedBuf []byte, totalSize int64, f return 0, nil } - // Writes remaining bytes in the buffer. - writeUnaligned := func(w io.Writer, buf []byte) (remainingWritten int64, err error) { - // Disable O_DIRECT on fd's on unaligned buffer - // perform an amortized Fdatasync(fd) on the fd at - // the end, this is performed by the caller before - // closing 'w'. - if err = disk.DisableDirectIO(file); err != nil { - return remainingWritten, err - } - // Since w is *os.File io.Copy shall use ReadFrom() call. - return io.Copy(w, bytes.NewReader(buf)) - } - var written int64 for { buf := alignedBuf @@ -371,15 +357,38 @@ func CopyAligned(w io.Writer, r io.Reader, alignedBuf []byte, totalSize int64, f } buf = buf[:nr] - var nw int64 - if len(buf)%DirectioAlignSize == 0 { - var n int + var ( + n int + un int + nw int64 + ) + + remain := len(buf) % DirectioAlignSize + if remain == 0 { // buf is aligned for directio write() n, err = w.Write(buf) nw = int64(n) } else { + if remain < len(buf) { + n, err = w.Write(buf[:len(buf)-remain]) + if err != nil { + return written, err + } + nw = int64(n) + } + + // Disable O_DIRECT on fd's on unaligned buffer + // perform an amortized Fdatasync(fd) on the fd at + // the end, this is performed by the caller before + // closing 'w'. + if err = disk.DisableDirectIO(file); err != nil { + return written, err + } + // buf is not aligned, hence use writeUnaligned() - nw, err = writeUnaligned(w, buf) + // for the remainder + un, err = w.Write(buf[len(buf)-remain:]) + nw += int64(un) } if nw > 0 {