diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index 56a95e588..c24b41622 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -148,6 +148,26 @@ func initHealingTracker(disk StorageAPI, healID string) *healingTracker { return h } +func (h *healingTracker) resetHealing() { + h.mu.Lock() + defer h.mu.Unlock() + + h.ItemsHealed = 0 + h.ItemsFailed = 0 + h.BytesDone = 0 + h.BytesFailed = 0 + h.ResumeItemsHealed = 0 + h.ResumeItemsFailed = 0 + h.ResumeBytesDone = 0 + h.ResumeBytesFailed = 0 + h.ItemsSkipped = 0 + h.BytesSkipped = 0 + + h.HealedBuckets = nil + h.Object = "" + h.Bucket = "" +} + func (h *healingTracker) getLastUpdate() time.Time { h.mu.RLock() defer h.mu.RUnlock() @@ -349,6 +369,7 @@ func (h *healingTracker) toHealingDisk() madmin.HealingDisk { Object: h.Object, QueuedBuckets: h.QueuedBuckets, HealedBuckets: h.HealedBuckets, + RetryAttempts: h.RetryAttempts, ObjectsHealed: h.ItemsHealed, // Deprecated July 2021 ObjectsFailed: h.ItemsFailed, // Deprecated July 2021 @@ -482,16 +503,19 @@ func healFreshDisk(ctx context.Context, z *erasureServerPools, endpoint Endpoint // if objects have failed healing, we attempt a retry to heal the drive upto 3 times before giving up. if tracker.ItemsFailed > 0 && tracker.RetryAttempts < 4 { tracker.RetryAttempts++ - bugLogIf(ctx, tracker.update(ctx)) healingLogEvent(ctx, "Healing of drive '%s' is incomplete, retrying %s time (healed: %d, skipped: %d, failed: %d).", disk, humanize.Ordinal(int(tracker.RetryAttempts)), tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed) + + tracker.resetHealing() + bugLogIf(ctx, tracker.update(ctx)) + return errRetryHealing } if tracker.ItemsFailed > 0 { healingLogEvent(ctx, "Healing of drive '%s' is incomplete, retried %d times (healed: %d, skipped: %d, failed: %d).", disk, - tracker.RetryAttempts-1, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed) + tracker.RetryAttempts, tracker.ItemsHealed, tracker.ItemsSkipped, tracker.ItemsFailed) } else { if tracker.RetryAttempts > 0 { healingLogEvent(ctx, "Healing of drive '%s' is complete, retried %d times (healed: %d, skipped: %d).", disk, diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 49449e190..9ae58a323 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -167,6 +167,19 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, return errServerNotInitialized } + started := tracker.Started + if started.IsZero() || started.Equal(timeSentinel) { + healingLogIf(ctx, fmt.Errorf("unexpected tracker healing start time found: %v", started)) + started = time.Time{} + } + + // Final tracer update before quitting + defer func() { + tracker.setObject("") + tracker.setBucket("") + healingLogIf(ctx, tracker.update(ctx)) + }() + for _, bucket := range healBuckets { if err := bgSeq.healBucket(objAPI, bucket, true); err != nil { // Log bucket healing error if any, we shall retry again. @@ -435,13 +448,10 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, var versionNotFound int for _, version := range fivs.Versions { - // Ignore a version with a modtime newer than healing start time. - if version.ModTime.After(tracker.Started) { - continue - } - - // Apply lifecycle rules on the objects that are expired. - if filterLifecycle(bucket, version.Name, version) { + // Ignore healing a version if: + // - It is uploaded after the drive healing is started + // - An object that is already expired by ILM rule. + if !started.IsZero() && version.ModTime.After(started) || filterLifecycle(bucket, version.Name, version) { versionNotFound++ if !send(healEntrySkipped(uint64(version.Size))) { return @@ -556,10 +566,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, healingLogIf(ctx, tracker.update(ctx)) } } - - tracker.setObject("") - tracker.setBucket("") - if retErr != nil { return retErr } diff --git a/go.mod b/go.mod index 5461bda23..9018318b3 100644 --- a/go.mod +++ b/go.mod @@ -51,7 +51,7 @@ require ( github.com/minio/highwayhash v1.0.3 github.com/minio/kms-go/kes v0.3.0 github.com/minio/kms-go/kms v0.4.0 - github.com/minio/madmin-go/v3 v3.0.63 + github.com/minio/madmin-go/v3 v3.0.64-0.20240822003756-fe52a32e526d github.com/minio/minio-go/v7 v7.0.75 github.com/minio/mux v1.9.0 github.com/minio/pkg/v3 v3.0.11 diff --git a/go.sum b/go.sum index c72aa291c..edb1a365c 100644 --- a/go.sum +++ b/go.sum @@ -426,8 +426,8 @@ github.com/minio/kms-go/kes v0.3.0 h1:SU8VGVM/Hk9w1OiSby3OatkcojooUqIdDHl6dtM6Nk github.com/minio/kms-go/kes v0.3.0/go.mod h1:w6DeVT878qEOU3nUrYVy1WOT5H1Ig9hbDIh698NYJKY= github.com/minio/kms-go/kms v0.4.0 h1:cLPZceEp+05xHotVBaeFJrgL7JcXM4lBy6PU0idkE7I= github.com/minio/kms-go/kms v0.4.0/go.mod h1:q12CehiIy2qgBnDKq6Q7wmPi2PHSyRVug5DKp0HAVeE= -github.com/minio/madmin-go/v3 v3.0.63 h1:ERJRxEI/FFRh8MDi4Z+3DKe4sONkQ0g+OkNzRpk7qxk= -github.com/minio/madmin-go/v3 v3.0.63/go.mod h1:IFAwr0XMrdsLovxAdCcuq/eoL4nRuMVQQv0iubJANQw= +github.com/minio/madmin-go/v3 v3.0.64-0.20240822003756-fe52a32e526d h1:ma9PAmbEs+TP9BdsbQLO3gUa2nHSzeuQobOCT8BWUpg= +github.com/minio/madmin-go/v3 v3.0.64-0.20240822003756-fe52a32e526d/go.mod h1:IFAwr0XMrdsLovxAdCcuq/eoL4nRuMVQQv0iubJANQw= github.com/minio/mc v0.0.0-20240815155011-479171e7be9c h1:0tzuJ1nV6oZstqKQ/CwK1dzxNJ/cE38ym4SPi2HsWoY= github.com/minio/mc v0.0.0-20240815155011-479171e7be9c/go.mod h1:Cr4x7eiMJfOTWwg40Rk3EaOI7i+DUyOAtqLO7x+heiA= github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=