diff --git a/cmd/erasure-healing-common.go b/cmd/erasure-healing-common.go index 9a9008d45..b83122876 100644 --- a/cmd/erasure-healing-common.go +++ b/cmd/erasure-healing-common.go @@ -25,6 +25,38 @@ import ( "github.com/minio/madmin-go/v2" ) +func commonETags(etags []string) (etag string, maxima int) { + etagOccurrenceMap := make(map[string]int, len(etags)) + + // Ignore the uuid sentinel and count the rest. + for _, etag := range etags { + if etag == "" { + continue + } + etagOccurrenceMap[etag]++ + } + + maxima = 0 // Counter for remembering max occurrence of elements. + latest := "" + + // Find the common cardinality from previously collected + // occurrences of elements. + for etag, count := range etagOccurrenceMap { + if count < maxima { + continue + } + + // We are at or above maxima + if count > maxima { + maxima = count + latest = etag + } + } + + // Return the collected common max time, with maxima + return latest, maxima +} + // commonTime returns a maximally occurring time from a list of time. func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime time.Time, maxima int) { timeOccurenceMap := make(map[int64]int, len(times)) @@ -86,6 +118,13 @@ func commonTime(modTimes []time.Time, quorum int) time.Time { return timeSentinel } +func commonETag(etags []string, quorum int) string { + if etag, count := commonETags(etags); count >= quorum { + return etag + } + return "" +} + // Beginning of unix time is treated as sentinel value here. var ( timeSentinel = time.Unix(0, 0).UTC() @@ -102,6 +141,33 @@ func bootModtimes(diskCount int) []time.Time { return modTimes } +func listObjectETags(partsMetadata []FileInfo, errs []error, quorum int) (etags []string) { + etags = make([]string, len(partsMetadata)) + vidMap := map[string]int{} + for index, metadata := range partsMetadata { + if errs[index] != nil { + continue + } + vid := metadata.VersionID + if metadata.VersionID == "" { + vid = nullVersionID + } + vidMap[vid]++ + etags[index] = metadata.Metadata["etag"] + } + + for _, count := range vidMap { + // do we have enough common versions + // that have enough quorum to satisfy + // the etag. + if count >= quorum { + return etags + } + } + + return make([]string, len(partsMetadata)) +} + // Extracts list of times from FileInfo slice and returns, skips // slice elements which have errors. func listObjectModtimes(partsMetadata []FileInfo, errs []error) (modTimes []time.Time) { @@ -162,7 +228,7 @@ func listObjectDiskMtimes(partsMetadata []FileInfo) (diskMTimes []time.Time) { // - a slice of disks where disk having 'older' xl.meta (or nothing) // are set to nil. // - latest (in time) of the maximally occurring modTime(s), which has at least quorum occurrences. -func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, quorum int) (onlineDisks []StorageAPI, modTime time.Time) { +func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, quorum int) (onlineDisks []StorageAPI, modTime time.Time, etag string) { onlineDisks = make([]StorageAPI, len(disks)) // List all the file commit ids from parts metadata. @@ -171,6 +237,23 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, // Reduce list of UUIDs to a single common value. modTime = commonTime(modTimes, quorum) + if modTime.IsZero() || modTime.Equal(timeSentinel) { + etags := listObjectETags(partsMetadata, errs, quorum) + + etag = commonETag(etags, quorum) + + if etag != "" { // allow this fallback only if a non-empty etag is found. + for index, e := range etags { + if partsMetadata[index].IsValid() && e == etag { + onlineDisks[index] = disks[index] + } else { + onlineDisks[index] = nil + } + } + return onlineDisks, modTime, etag + } + } + // Create a new online disks slice, which have common uuid. for index, t := range modTimes { if partsMetadata[index].IsValid() && t.Equal(modTime) { @@ -180,7 +263,7 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, } } - return onlineDisks, modTime + return onlineDisks, modTime, "" } // disksWithAllParts - This function needs to be called with diff --git a/cmd/erasure-healing-common_test.go b/cmd/erasure-healing-common_test.go index f93653725..d0d6398c0 100644 --- a/cmd/erasure-healing-common_test.go +++ b/cmd/erasure-healing-common_test.go @@ -299,7 +299,7 @@ func TestListOnlineDisks(t *testing.T) { } rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount - onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum) + onlineDisks, modTime, _ := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum) if !modTime.Equal(test.expectedTime) { t.Fatalf("Expected modTime to be equal to %v but was found to be %v", test.expectedTime, modTime) @@ -481,7 +481,7 @@ func TestListOnlineDisksSmallObjects(t *testing.T) { } rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount - onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum) + onlineDisks, modTime, _ := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum) if !modTime.Equal(test.expectedTime) { t.Fatalf("Expected modTime to be equal to %v but was found to be %v", test.expectedTime, modTime) @@ -548,7 +548,7 @@ func TestDisksWithAllParts(t *testing.T) { t.Fatalf("Failed to get quorum consistent fileInfo %v", err) } - erasureDisks, _ = listOnlineDisks(erasureDisks, partsMetadata, errs, readQuorum) + erasureDisks, _, _ = listOnlineDisks(erasureDisks, partsMetadata, errs, readQuorum) filteredDisks, errs, _ := disksWithAllParts(ctx, erasureDisks, partsMetadata, errs, fi, bucket, object, madmin.HealDeepScan) diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index a79d7a804..ab129f6b1 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -386,11 +386,11 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object // List of disks having latest version of the object xl.meta // (by modtime). - onlineDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs, readQuorum) + onlineDisks, modTime, etag := listOnlineDisks(storageDisks, partsMetadata, errs, readQuorum) // Latest FileInfo for reference. If a valid metadata is not // present, it is as good as object not found. - latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, readQuorum) + latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, etag, readQuorum) if err != nil { return result, err } diff --git a/cmd/erasure-metadata.go b/cmd/erasure-metadata.go index 158c9364e..6f6a7758e 100644 --- a/cmd/erasure-metadata.go +++ b/cmd/erasure-metadata.go @@ -328,7 +328,7 @@ func (fi FileInfo) ObjectToPartOffset(ctx context.Context, offset int64) (partIn return 0, 0, InvalidRange{} } -func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.Time, quorum int) (FileInfo, error) { +func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) { // with less quorum return error. if quorum < 1 { return FileInfo{}, errErasureReadQuorum @@ -336,9 +336,17 @@ func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time. metaHashes := make([]string, len(metaArr)) h := sha256.New() for i, meta := range metaArr { - if meta.IsValid() && meta.ModTime.Equal(modTime) { + if !meta.IsValid() { + continue + } + etagOnly := modTime.Equal(timeSentinel) && (etag != "" && etag == meta.Metadata["etag"]) + mtimeValid := meta.ModTime.Equal(modTime) + if mtimeValid || etagOnly { fmt.Fprintf(h, "%v", meta.XLV1) - fmt.Fprintf(h, "%v", meta.GetDataDir()) + if !etagOnly { + // Verify dataDir is same only when mtime is valid and etag is not considered. + fmt.Fprintf(h, "%v", meta.GetDataDir()) + } for _, part := range meta.Parts { fmt.Fprintf(h, "part.%d", part.Number) } @@ -408,8 +416,8 @@ func pickValidDiskTimeWithQuorum(metaArr []FileInfo, quorum int) time.Time { // pickValidFileInfo - picks one valid FileInfo content and returns from a // slice of FileInfo. -func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, quorum int) (FileInfo, error) { - return findFileInfoInQuorum(ctx, metaArr, modTime, quorum) +func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) { + return findFileInfoInQuorum(ctx, metaArr, modTime, etag, quorum) } // writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently. diff --git a/cmd/erasure-metadata_test.go b/cmd/erasure-metadata_test.go index 15d54f906..b5cf54461 100644 --- a/cmd/erasure-metadata_test.go +++ b/cmd/erasure-metadata_test.go @@ -204,7 +204,7 @@ func TestFindFileInfoInQuorum(t *testing.T) { for _, test := range tests { test := test t.Run("", func(t *testing.T) { - _, err := findFileInfoInQuorum(context.Background(), test.fis, test.modTime, test.expectedQuorum) + _, err := findFileInfoInQuorum(context.Background(), test.fis, test.modTime, "", test.expectedQuorum) if err != test.expectedErr { t.Errorf("Expected %s, got %s", test.expectedErr, err) } diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 75a7c8cbc..d2cc798c6 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -83,8 +83,9 @@ func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object if write { quorum = writeQuorum } + // List all online disks. - _, modTime := listOnlineDisks(storageDisks, partsMetadata, errs, quorum) + _, modTime, etag := listOnlineDisks(storageDisks, partsMetadata, errs, quorum) if write { reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) @@ -98,7 +99,7 @@ func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object } // Pick one from the first valid metadata. - fi, err = pickValidFileInfo(ctx, partsMetadata, modTime, quorum) + fi, err = pickValidFileInfo(ctx, partsMetadata, modTime, etag, quorum) return fi, partsMetadata, err } diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 64755a01f..2dd8022f7 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -108,10 +108,10 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d } // List all online disks. - onlineDisks, modTime := listOnlineDisks(storageDisks, metaArr, errs, readQuorum) + onlineDisks, modTime, etag := listOnlineDisks(storageDisks, metaArr, errs, readQuorum) // Pick latest valid metadata. - fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) + fi, err := pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum) if err != nil { return oi, toObjectErr(err, srcBucket, srcObject) } @@ -347,11 +347,6 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje var healOnce sync.Once - // once we have obtained a common FileInfo i.e latest, we should stick - // to single dataDir to read the content to avoid reading from some other - // dataDir that has stale FileInfo{} to ensure that we fail appropriately - // during reads and expect the same dataDir everywhere. - dataDir := fi.DataDir for ; partIndex <= lastPartIndex; partIndex++ { if length == totalBytesRead { break @@ -380,7 +375,7 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje continue } checksumInfo := metaArr[index].Erasure.GetChecksumInfo(partNumber) - partPath := pathJoin(object, dataDir, fmt.Sprintf("part.%d", partNumber)) + partPath := pathJoin(object, metaArr[index].DataDir, fmt.Sprintf("part.%d", partNumber)) readers[index] = newBitrotReader(disk, metaArr[index].Data, bucket, partPath, tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize()) @@ -664,10 +659,10 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s } // List all online disks. - onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum) + onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum) // Pick latest valid metadata. - fi, err = pickValidFileInfo(ctx, metaArr, modTime, readQuorum) + fi, err = pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum) if err != nil { return fi, nil, nil, err } @@ -693,7 +688,7 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s missingBlocks++ continue } - if metaArr[i].IsValid() && metaArr[i].ModTime.Equal(fi.ModTime) { + if metaArr[i].IsValid() && metaArr[i].ModTime.Equal(modTime) { continue } metaArr[i] = FileInfo{} @@ -1794,10 +1789,10 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s } // List all online disks. - onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum) + onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum) // Pick latest valid metadata. - fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) + fi, err := pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum) if err != nil { return ObjectInfo{}, toObjectErr(err, bucket, object) } @@ -1867,10 +1862,10 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin } // List all online disks. - onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum) + onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum) // Pick latest valid metadata. - fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) + fi, err := pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum) if err != nil { return ObjectInfo{}, toObjectErr(err, bucket, object) }