From a3f7d575e06162528c1a50083bff73dfdd20d78e Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 26 Jul 2021 11:48:09 -0700 Subject: [PATCH] improve delete-marker healing (#12794) delete-markers missing on drives were not healed due to few things disksWithAllParts() does not know-how to deal with delete markers, add support for that. fixes #12787 --- cmd/erasure-healing-common.go | 59 +++++++++++++++++++---------------- cmd/erasure-healing.go | 9 +++--- 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/cmd/erasure-healing-common.go b/cmd/erasure-healing-common.go index c949d054d..b12ec67a2 100644 --- a/cmd/erasure-healing-common.go +++ b/cmd/erasure-healing-common.go @@ -225,8 +225,9 @@ func fileInfoConsistent(ctx context.Context, partsMetadata []FileInfo, errs []er // // - slice of errors about the state of data files on disk - can have // a not-found error or a hash-mismatch error. -func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo, errs []error, bucket, - object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) { +func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetadata []FileInfo, + errs []error, bucket, object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) { + // List of disks having latest version of the object er.meta (by modtime) _, modTime, dataDir := listOnlineDisks(onlineDisks, partsMetadata, errs) @@ -239,15 +240,17 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad // consider the offline disks as consistent. continue } - if len(meta.Erasure.Distribution) != len(onlineDisks) { - // Erasure distribution seems to have lesser - // number of items than number of online disks. - inconsistent++ - continue - } - if meta.Erasure.Distribution[i] != meta.Erasure.Index { - // Mismatch indexes with distribution order - inconsistent++ + if !meta.Deleted { + if len(meta.Erasure.Distribution) != len(onlineDisks) { + // Erasure distribution seems to have lesser + // number of items than number of online disks. + inconsistent++ + continue + } + if meta.Erasure.Distribution[i] != meta.Erasure.Index { + // Mismatch indexes with distribution order + inconsistent++ + } } } @@ -267,8 +270,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad dataErrs[i] = errDiskNotFound continue } - meta := partsMetadata[i] + meta := partsMetadata[i] if !meta.ModTime.Equal(modTime) || meta.DataDir != dataDir { dataErrs[i] = errFileCorrupt partsMetadata[i] = FileInfo{} @@ -280,20 +283,22 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad continue } - if len(meta.Erasure.Distribution) != len(onlineDisks) { - // Erasure distribution is not the same as onlineDisks - // attempt a fix if possible, assuming other entries - // might have the right erasure distribution. - partsMetadata[i] = FileInfo{} - dataErrs[i] = errFileCorrupt - continue - } + if !meta.Deleted { + if len(meta.Erasure.Distribution) != len(onlineDisks) { + // Erasure distribution is not the same as onlineDisks + // attempt a fix if possible, assuming other entries + // might have the right erasure distribution. + partsMetadata[i] = FileInfo{} + dataErrs[i] = errFileCorrupt + continue + } - // Since erasure.Distribution is trustable we can fix the mismatching erasure.Index - if meta.Erasure.Distribution[i] != meta.Erasure.Index { - partsMetadata[i] = FileInfo{} - dataErrs[i] = errFileCorrupt - continue + // Since erasure.Distribution is trustable we can fix the mismatching erasure.Index + if meta.Erasure.Distribution[i] != meta.Erasure.Index { + partsMetadata[i] = FileInfo{} + dataErrs[i] = errFileCorrupt + continue + } } } @@ -320,11 +325,11 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad // disk has a valid xl.meta but may not have all the // parts. This is considered an outdated disk, since // it needs healing too. - if !partsMetadata[i].IsRemote() { + if !partsMetadata[i].Deleted && !partsMetadata[i].IsRemote() { dataErrs[i] = onlineDisk.VerifyFile(ctx, bucket, object, partsMetadata[i]) } case madmin.HealNormalScan: - if !partsMetadata[i].IsRemote() { + if !partsMetadata[i].Deleted && !partsMetadata[i].IsRemote() { dataErrs[i] = onlineDisk.CheckParts(ctx, bucket, object, partsMetadata[i]) } } diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index ad2b0bc36..ec11c692d 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -212,7 +212,7 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, quorumModTime t return true } if erErr == nil { - if !meta.IsRemote() { + if !meta.Deleted && !meta.IsRemote() { // If xl.meta was read fine but there may be problem with the part.N files. if IsErr(dataErr, []error{ errFileNotFound, @@ -267,10 +267,10 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s // Re-read when we have lock... partsMetadata, errs := readAllFileInfo(ctx, storageDisks, bucket, object, versionID, true) - _, err = getLatestFileInfo(ctx, partsMetadata, errs) - if err != nil { + if _, err = getLatestFileInfo(ctx, partsMetadata, errs); err != nil { return er.purgeObjectDangling(ctx, bucket, object, versionID, partsMetadata, errs, []error{}, opts) } + // List of disks having latest version of the object er.meta // (by modtime). _, modTime, dataDir := listOnlineDisks(storageDisks, partsMetadata, errs) @@ -292,7 +292,8 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s // used here for reconstruction. This is done to ensure that // we do not skip drives that have inconsistent metadata to be // skipped from purging when they are stale. - availableDisks, dataErrs := disksWithAllParts(ctx, storageDisks, partsMetadata, errs, bucket, object, scanMode) + availableDisks, dataErrs := disksWithAllParts(ctx, storageDisks, partsMetadata, + errs, bucket, object, scanMode) // Loop to find number of disks with valid data, per-drive // data state and a list of outdated disks on which data needs