From ed0cbfb31e00644013e6c2073310a2268c04a381 Mon Sep 17 00:00:00 2001 From: Anis Elleuch Date: Fri, 8 Jul 2022 01:05:23 +0100 Subject: [PATCH] fix: rootdisk detection by not using cached value when GetDiskInfo() errors out (#15249) GetDiskInfo() uses timedValue to cache the disk info for one second. timedValue behavior was recently changed to return an old cached value when calculating a new value returns an error. When a mount point is empty, GetDiskInfo() will return errUnformattedDisk, timedValue will return cached disk info with unexpected IsRootDisk value, e.g. false if the mount point belongs to a root disk. Therefore, the mount point will be considered a valid disk and will be formatted as well. This commit will also add more defensive code when marking root disks: always mark a disk offline for any GetDiskInfo() error except errUnformattedDisk. The server will try anyway to reconnect to those disks every 10 seconds. --- cmd/erasure-sets.go | 6 +++++- cmd/metrics-v2.go | 1 + cmd/utils.go | 21 ++++++++++++++------- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index db4fb52e2..94f9d6eff 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -1165,8 +1165,12 @@ func markRootDisksAsDown(storageDisks []StorageAPI, errs []error) { // Do nothing return } - infos, _ := getHealDiskInfos(storageDisks, errs) + infos, ierrs := getHealDiskInfos(storageDisks, errs) for i := range storageDisks { + if ierrs[i] != nil && ierrs[i] != errUnformattedDisk { + storageDisks[i] = nil + continue + } if storageDisks[i] != nil && infos[i].RootDisk { // We should not heal on root disk. i.e in a situation where the minio-administrator has unmounted a // defective drive we should not heal a path on the root disk. diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 13eee4896..3116ea071 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -232,6 +232,7 @@ type MetricsGroup struct { // to populate new values upon cache invalidation. func (g *MetricsGroup) RegisterRead(read func(ctx context.Context) []Metric) { g.metricsCache.Once.Do(func() { + g.metricsCache.Relax = true g.metricsCache.TTL = g.cacheInterval g.metricsCache.Update = func() (interface{}, error) { return read(GlobalContext), nil diff --git a/cmd/utils.go b/cmd/utils.go index 56edd684d..3a2a91bcc 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -931,6 +931,10 @@ type timedValue struct { // Should be set before calling Get(). TTL time.Duration + // When set to true, return the last cached value + // even if updating the value errors out + Relax bool + // Once can be used to initialize values for lazy initialization. // Should be set before calling Get(). Once sync.Once @@ -951,13 +955,16 @@ func (t *timedValue) Get() (interface{}, error) { v, err := t.Update() if err != nil { - // if update fails, return current - // cached value along with error. - // - // Let the caller decide if they want - // to use the returned value based - // on error. - v = t.get(0) + if t.Relax { + // if update fails, return current + // cached value along with error. + // + // Let the caller decide if they want + // to use the returned value based + // on error. + v = t.get(0) + return v, err + } return v, err }