From cb06aee5ac3f7dcb872e527b8d84079e1622fdaf Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 15 Apr 2024 03:02:39 -0700 Subject: [PATCH] convert multipart-cleanup from a blocking unlink() to a rename to trash (#19495) unlinking() at two different locations on a disk when there are lots to purge, this can lead to huge IOwaits, instead rely on rename() to .trash to avoid running multiple unlinks() in parallel. --- cmd/erasure-multipart.go | 39 ++++++++++++++++++++++++--------------- cmd/erasure.go | 32 ++++++++++++++++---------------- cmd/globals.go | 7 +++++-- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 96c0f3edb..069b39476 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -194,11 +194,10 @@ func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) { // Remove the old multipart uploads on the given disk. func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk StorageAPI, expiry time.Duration) { - now := time.Now() - diskPath := disk.Endpoint().Path + drivePath := disk.Endpoint().Path - readDirFn(pathJoin(diskPath, minioMetaMultipartBucket), func(shaDir string, typ os.FileMode) error { - readDirFn(pathJoin(diskPath, minioMetaMultipartBucket, shaDir), func(uploadIDDir string, typ os.FileMode) error { + readDirFn(pathJoin(drivePath, minioMetaMultipartBucket), func(shaDir string, typ os.FileMode) error { + readDirFn(pathJoin(drivePath, minioMetaMultipartBucket, shaDir), func(uploadIDDir string, typ os.FileMode) error { uploadIDPath := pathJoin(shaDir, uploadIDDir) fi, err := disk.ReadVersion(ctx, "", minioMetaMultipartBucket, uploadIDPath, "", ReadOptions{}) if err != nil { @@ -206,9 +205,12 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto } w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) return w.Run(func() error { - wait := deletedCleanupSleeper.Timer(ctx) - if now.Sub(fi.ModTime) > expiry { - removeAll(pathJoin(diskPath, minioMetaMultipartBucket, uploadIDPath)) + wait := deleteMultipartCleanupSleeper.Timer(ctx) + if time.Since(fi.ModTime) > expiry { + pathUUID := mustGetUUID() + targetPath := pathJoin(drivePath, minioMetaTmpDeletedBucket, pathUUID) + + renameAll(pathJoin(drivePath, minioMetaMultipartBucket, uploadIDPath), targetPath, pathJoin(drivePath, minioMetaBucket)) } wait() return nil @@ -220,19 +222,23 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto } w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) return w.Run(func() error { - wait := deletedCleanupSleeper.Timer(ctx) - if now.Sub(vi.Created) > expiry { + wait := deleteMultipartCleanupSleeper.Timer(ctx) + if time.Since(vi.Created) > expiry { + pathUUID := mustGetUUID() + targetPath := pathJoin(drivePath, minioMetaTmpDeletedBucket, pathUUID) + // We are not deleting shaDir recursively here, if shaDir is empty // and its older then we can happily delete it. - Remove(pathJoin(diskPath, minioMetaMultipartBucket, shaDir)) + Rename(pathJoin(drivePath, minioMetaMultipartBucket, shaDir), targetPath) } wait() return nil }) }) - readDirFn(pathJoin(diskPath, minioMetaTmpBucket), func(tmpDir string, typ os.FileMode) error { - if tmpDir == ".trash/" { // do not remove .trash/ here, it has its own routines + readDirFn(pathJoin(drivePath, minioMetaTmpBucket), func(tmpDir string, typ os.FileMode) error { + if strings.HasPrefix(tmpDir, ".trash") { + // do not remove .trash/ here, it has its own routines return nil } vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir)) @@ -241,9 +247,12 @@ func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk Sto } w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) return w.Run(func() error { - wait := deletedCleanupSleeper.Timer(ctx) - if now.Sub(vi.Created) > expiry { - removeAll(pathJoin(diskPath, minioMetaTmpBucket, tmpDir)) + wait := deleteMultipartCleanupSleeper.Timer(ctx) + if time.Since(vi.Created) > expiry { + pathUUID := mustGetUUID() + targetPath := pathJoin(drivePath, minioMetaTmpDeletedBucket, pathUUID) + + renameAll(pathJoin(drivePath, minioMetaTmpBucket, tmpDir), targetPath, pathJoin(drivePath, minioMetaBucket)) } wait() return nil diff --git a/cmd/erasure.go b/cmd/erasure.go index 6d265b470..34971ab42 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -350,25 +350,25 @@ func (er erasureObjects) getOnlineDisksWithHealing(inclHealing bool) (newDisks [ // Clean-up previously deleted objects. from .minio.sys/tmp/.trash/ func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) { - // run multiple cleanup's local to this server. var wg sync.WaitGroup for _, disk := range er.getLocalDisks() { - if disk != nil { - wg.Add(1) - go func(disk StorageAPI) { - defer wg.Done() - diskPath := disk.Endpoint().Path - readDirFn(pathJoin(diskPath, minioMetaTmpDeletedBucket), func(ddir string, typ os.FileMode) error { - w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) - return w.Run(func() error { - wait := deletedCleanupSleeper.Timer(ctx) - removeAll(pathJoin(diskPath, minioMetaTmpDeletedBucket, ddir)) - wait() - return nil - }) - }) - }(disk) + if disk == nil { + continue } + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + drivePath := disk.Endpoint().Path + readDirFn(pathJoin(drivePath, minioMetaTmpDeletedBucket), func(ddir string, typ os.FileMode) error { + w := xioutil.NewDeadlineWorker(globalDriveConfig.GetMaxTimeout()) + return w.Run(func() error { + wait := deleteCleanupSleeper.Timer(ctx) + removeAll(pathJoin(drivePath, minioMetaTmpDeletedBucket, ddir)) + wait() + return nil + }) + }) + }(disk) } wg.Wait() } diff --git a/cmd/globals.go b/cmd/globals.go index 10f0d5563..51ddd2417 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -453,8 +453,11 @@ var ( globalConnReadDeadline time.Duration globalConnWriteDeadline time.Duration - // Controller for deleted file sweeper. - deletedCleanupSleeper = newDynamicSleeper(5, 25*time.Millisecond, false) + // dynamic sleeper to avoid thundering herd for trash folder expunge routine + deleteCleanupSleeper = newDynamicSleeper(5, 25*time.Millisecond, false) + + // dynamic sleeper for multipart expiration routine + deleteMultipartCleanupSleeper = newDynamicSleeper(5, 25*time.Millisecond, false) // Is _MINIO_DISABLE_API_FREEZE_ON_BOOT set? globalDisableFreezeOnBoot bool