From 88d719689c1de14446098f99b3f891430eb28f41 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Wed, 25 Aug 2021 17:25:26 +0200 Subject: [PATCH] Synchronize bucket cycle numbers (#13058) Synchronize bucket cycles so it is much more likely that the same prefixes will be picked up for scanning. Use the global bloom filter cycle for that. Bump bloom filter versions to clear those. --- cmd/data-scanner.go | 11 ++++------- cmd/data-update-tracker.go | 4 ++-- cmd/data-usage_test.go | 2 ++ cmd/erasure-server-pool.go | 4 ++-- cmd/erasure.go | 7 ++++--- cmd/fs-v1.go | 2 +- cmd/gateway-unsupported.go | 2 +- cmd/object-api-interface.go | 2 +- cmd/xl-storage.go | 1 + 9 files changed, 18 insertions(+), 17 deletions(-) diff --git a/cmd/data-scanner.go b/cmd/data-scanner.go index 79eb81439..d851a7d94 100644 --- a/cmd/data-scanner.go +++ b/cmd/data-scanner.go @@ -147,7 +147,7 @@ func runDataScanner(pctx context.Context, objAPI ObjectLayer) { go storeDataUsageInBackend(ctx, objAPI, results) bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle) logger.LogIf(ctx, err) - err = objAPI.NSScanner(ctx, bf, results) + err = objAPI.NSScanner(ctx, bf, results, uint32(nextBloomCycle)) logger.LogIf(ctx, err) if err == nil { // Store new cycle... @@ -320,7 +320,7 @@ func scanDataFolder(ctx context.Context, basePath string, cache dataUsageCache, console.Debugf(logPrefix+"Finished scanner, %v entries (%+v) %s \n", len(s.newCache.Cache), *s.newCache.sizeRecursive(s.newCache.Info.Name), logSuffix) } s.newCache.Info.LastUpdate = UTCNow() - s.newCache.Info.NextCycle++ + s.newCache.Info.NextCycle = cache.Info.NextCycle return s.newCache, nil } @@ -929,9 +929,6 @@ func (i *scannerItem) applyLifecycle(ctx context.Context, o ObjectLayer, meta ac case lifecycle.DeleteRestoredAction, lifecycle.DeleteRestoredVersionAction: default: // No action. - if i.debug { - console.Debugf(applyActionsLogPrefix+" object not expirable: %q\n", i.objectPath()) - } return false, size } @@ -1063,7 +1060,7 @@ func evalActionFromLifecycle(ctx context.Context, lc lifecycle.Lifecycle, obj Ob return action } -func applyTransitionRule(ctx context.Context, action lifecycle.Action, objLayer ObjectLayer, obj ObjectInfo) bool { +func applyTransitionRule(obj ObjectInfo) bool { if obj.DeleteMarker { return false } @@ -1143,7 +1140,7 @@ func applyLifecycleAction(ctx context.Context, action lifecycle.Action, objLayer case lifecycle.DeleteRestoredAction, lifecycle.DeleteRestoredVersionAction: success = applyExpiryRule(ctx, objLayer, obj, true, action == lifecycle.DeleteRestoredVersionAction) case lifecycle.TransitionAction, lifecycle.TransitionVersionAction: - success = applyTransitionRule(ctx, action, objLayer, obj) + success = applyTransitionRule(obj) } return } diff --git a/cmd/data-update-tracker.go b/cmd/data-update-tracker.go index 976694a94..069e497e4 100644 --- a/cmd/data-update-tracker.go +++ b/cmd/data-update-tracker.go @@ -46,7 +46,7 @@ const ( dataUpdateTrackerQueueSize = 0 dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin" - dataUpdateTrackerVersion = 6 + dataUpdateTrackerVersion = 7 dataUpdateTrackerSaveInterval = 5 * time.Minute ) @@ -397,7 +397,7 @@ func (d *dataUpdateTracker) deserialize(src io.Reader, newerThan time.Time) erro return err } switch tmp[0] { - case 1, 2, 3, 4, 5: + case 1, 2, 3, 4, 5, 6: if intDataUpdateTracker.debug { console.Debugln(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.") } diff --git a/cmd/data-usage_test.go b/cmd/data-usage_test.go index 4bc01714a..32fb487d3 100644 --- a/cmd/data-usage_test.go +++ b/cmd/data-usage_test.go @@ -179,6 +179,7 @@ func TestDataUsageUpdate(t *testing.T) { // Changed dir must be picked up in this many cycles. for i := 0; i < dataUsageUpdateDirCycles; i++ { got, err = scanDataFolder(context.Background(), base, got, getSize) + got.Info.NextCycle++ if err != nil { t.Fatal(err) } @@ -423,6 +424,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) { // Changed dir must be picked up in this many cycles. for i := 0; i < dataUsageUpdateDirCycles; i++ { got, err = scanDataFolder(context.Background(), base, got, getSize) + got.Info.NextCycle++ if err != nil { t.Fatal(err) } diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index 45cb5bd37..faaa79f1b 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -456,7 +456,7 @@ func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []er return storageInfo, errs } -func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { +func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error { // Updates must be closed before we return. defer close(updates) @@ -501,7 +501,7 @@ func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, upd } }() // Start scanner. Blocks until done. - err := erObj.nsScanner(ctx, allBuckets, bf, updates) + err := erObj.nsScanner(ctx, allBuckets, bf, wantCycle, updates) if err != nil { logger.LogIf(ctx, err) mu.Lock() diff --git a/cmd/erasure.go b/cmd/erasure.go index 468518514..645a5f672 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -326,7 +326,7 @@ func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) { // nsScanner will start scanning buckets and send updated totals as they are traversed. // Updates are sent on a regular basis and the caller *must* consume them. -func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error { +func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, wantCycle uint32, updates chan<- dataUsageCache) error { if len(buckets) == 0 { return nil } @@ -419,7 +419,7 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf case v, ok := <-bucketResults: if !ok { // Save final state... - cache.Info.NextCycle++ + cache.Info.NextCycle = wantCycle cache.Info.LastUpdate = time.Now() logger.LogIf(ctx, cache.save(ctx, er, dataUsageCacheName)) updates <- cache @@ -461,12 +461,13 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf cache.Info.BloomFilter = bloom cache.Info.SkipHealing = healing cache.Disks = allDiskIDs + cache.Info.NextCycle = wantCycle if cache.Info.Name != bucket.Name { logger.LogIf(ctx, fmt.Errorf("cache name mismatch: %s != %s", cache.Info.Name, bucket.Name)) cache.Info = dataUsageCacheInfo{ Name: bucket.Name, LastUpdate: time.Time{}, - NextCycle: 0, + NextCycle: wantCycle, } } // Collect updates. diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index 64813ded6..19fcede3d 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -240,7 +240,7 @@ func (fs *FSObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) { } // NSScanner returns data usage stats of the current FS deployment -func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { +func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error { defer close(updates) // Load bucket totals var totalCache dataUsageCache diff --git a/cmd/gateway-unsupported.go b/cmd/gateway-unsupported.go index 0de6822a6..939a4c368 100644 --- a/cmd/gateway-unsupported.go +++ b/cmd/gateway-unsupported.go @@ -48,7 +48,7 @@ func (a GatewayUnsupported) LocalStorageInfo(ctx context.Context) (StorageInfo, } // NSScanner - scanner is not implemented for gateway -func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error { +func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error { logger.CriticalIf(ctx, errors.New("not implemented")) return NotImplemented{} } diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index dd8b9111d..6d9368471 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -103,7 +103,7 @@ type ObjectLayer interface { // Storage operations. Shutdown(context.Context) error - NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo) error + NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- madmin.DataUsageInfo, wantCycle uint32) error BackendInfo() madmin.BackendInfo StorageInfo(ctx context.Context) (StorageInfo, []error) diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 2639dcdc2..6e9fe2492 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -473,6 +473,7 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates } return sizeSummary{}, errSkipFile } + defer metaDataPoolPut(buf) // Remove filename which is the meta file. item.transformMetaDir()