From b2c5b75efa279844c3f286a18dcdd74b7826621f Mon Sep 17 00:00:00 2001 From: Aditya Manthramurthy Date: Sun, 10 Mar 2024 01:15:15 -0800 Subject: [PATCH] feat: Add Metrics V3 API (#19068) Metrics v3 is mainly a reorganization of metrics into smaller groups of metrics and the removal of internal aggregation of metrics received from peer nodes in a MinIO cluster. This change adds the endpoint `/minio/metrics/v3` as the top-level metrics endpoint and under this, various sub-endpoints are implemented. These are currently documented in `docs/metrics/v3.md` The handler will serve metrics at any path `/minio/metrics/v3/PATH`, as follows: when PATH is a sub-endpoint listed above => serves the group of metrics under that path; or when PATH is a (non-empty) parent directory of the sub-endpoints listed above => serves metrics from each child sub-endpoint of PATH. otherwise, returns a no resource found error All available metrics are listed in the `docs/metrics/v3.md`. More will be added subsequently. --- cmd/data-usage-utils.go | 8 +- cmd/generic-handlers.go | 3 +- cmd/globals.go | 1 + cmd/http-stats.go | 22 + cmd/metrics-resource.go | 28 +- cmd/metrics-router.go | 15 +- cmd/metrics-v2.go | 702 +++++++++++++------------- cmd/metrics-v2_gen.go | 568 ++++++++++----------- cmd/metrics-v2_gen_test.go | 132 ++--- cmd/metrics-v3-api.go | 220 ++++++++ cmd/metrics-v3-cache.go | 145 ++++++ cmd/metrics-v3-cluster-erasure-set.go | 89 ++++ cmd/metrics-v3-cluster-health.go | 109 ++++ cmd/metrics-v3-cluster-usage.go | 189 +++++++ cmd/metrics-v3-handler.go | 254 ++++++++++ cmd/metrics-v3-system-drive.go | 126 +++++ cmd/metrics-v3-system-network.go | 61 +++ cmd/metrics-v3-types.go | 487 ++++++++++++++++++ cmd/metrics-v3.go | 272 ++++++++++ cmd/notification.go | 20 +- cmd/peer-rest-client.go | 14 +- cmd/peer-rest-server.go | 26 +- cmd/tier.go | 6 +- docs/metrics/v3.md | 178 +++++++ 24 files changed, 2920 insertions(+), 755 deletions(-) create mode 100644 cmd/metrics-v3-api.go create mode 100644 cmd/metrics-v3-cache.go create mode 100644 cmd/metrics-v3-cluster-erasure-set.go create mode 100644 cmd/metrics-v3-cluster-health.go create mode 100644 cmd/metrics-v3-cluster-usage.go create mode 100644 cmd/metrics-v3-handler.go create mode 100644 cmd/metrics-v3-system-drive.go create mode 100644 cmd/metrics-v3-system-network.go create mode 100644 cmd/metrics-v3-types.go create mode 100644 cmd/metrics-v3.go create mode 100644 docs/metrics/v3.md diff --git a/cmd/data-usage-utils.go b/cmd/data-usage-utils.go index 6cc21c4c3..1f6b3f11e 100644 --- a/cmd/data-usage-utils.go +++ b/cmd/data-usage-utils.go @@ -140,7 +140,7 @@ func (dui DataUsageInfo) tierStats() []madmin.TierInfo { return infos } -func (dui DataUsageInfo) tierMetrics() (metrics []Metric) { +func (dui DataUsageInfo) tierMetrics() (metrics []MetricV2) { if dui.TierStats == nil { return nil } @@ -148,17 +148,17 @@ func (dui DataUsageInfo) tierMetrics() (metrics []Metric) { // minio_cluster_ilm_transitioned_objects{tier="S3TIER-1"}=1 // minio_cluster_ilm_transitioned_versions{tier="S3TIER-1"}=3 for tier, st := range dui.TierStats.Tiers { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterTransitionedBytesMD(), Value: float64(st.TotalSize), VariableLabels: map[string]string{"tier": tier}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterTransitionedObjectsMD(), Value: float64(st.NumObjects), VariableLabels: map[string]string{"tier": tier}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterTransitionedVersionsMD(), Value: float64(st.NumVersions), VariableLabels: map[string]string{"tier": tier}, diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index fad099a0b..47cd09e2a 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -233,7 +233,8 @@ func guessIsMetricsReq(req *http.Request) bool { req.URL.Path == minioReservedBucketPath+prometheusMetricsV2ClusterPath || req.URL.Path == minioReservedBucketPath+prometheusMetricsV2NodePath || req.URL.Path == minioReservedBucketPath+prometheusMetricsV2BucketPath || - req.URL.Path == minioReservedBucketPath+prometheusMetricsV2ResourcePath + req.URL.Path == minioReservedBucketPath+prometheusMetricsV2ResourcePath || + strings.HasPrefix(req.URL.Path, minioReservedBucketPath+metricsV3Path) } // guessIsRPCReq - returns true if the request is for an RPC endpoint. diff --git a/cmd/globals.go b/cmd/globals.go index 1f2f68faa..d3309bcf4 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -470,6 +470,7 @@ var ( // Indicates if server was started as `--address ":0"` globalDynamicAPIPort bool + // Add new variable global values here. ) diff --git a/cmd/http-stats.go b/cmd/http-stats.go index 1393636f6..077a72575 100644 --- a/cmd/http-stats.go +++ b/cmd/http-stats.go @@ -269,6 +269,28 @@ func (s *bucketConnStats) getS3InOutBytes() map[string]inOutBytes { return bucketStats } +// Return S3 total input/output bytes for each +func (s *bucketConnStats) getBucketS3InOutBytes(buckets []string) map[string]inOutBytes { + s.RLock() + defer s.RUnlock() + + if len(s.stats) == 0 || len(buckets) == 0 { + return nil + } + + bucketStats := make(map[string]inOutBytes, len(buckets)) + for _, bucket := range buckets { + if stats, ok := s.stats[bucket]; ok { + bucketStats[bucket] = inOutBytes{ + In: stats.s3InputBytes, + Out: stats.s3OutputBytes, + } + } + } + + return bucketStats +} + // delete metrics once bucket is deleted. func (s *bucketConnStats) delete(bucket string) { s.Lock() diff --git a/cmd/metrics-resource.go b/cmd/metrics-resource.go index 6d052fefb..f2f54a637 100644 --- a/cmd/metrics-resource.go +++ b/cmd/metrics-resource.go @@ -81,7 +81,7 @@ var ( resourceMetricsMapMu sync.RWMutex // resourceMetricsHelpMap maps metric name to its help string resourceMetricsHelpMap map[MetricName]string - resourceMetricsGroups []*MetricsGroup + resourceMetricsGroups []*MetricsGroupV2 // initial values for drives (at the time of server startup) // used for calculating avg values for drive metrics latestDriveStats map[string]madmin.DiskIOStats @@ -164,7 +164,7 @@ func init() { cpuLoad5Perc: "CPU load average 5min (percentage)", cpuLoad15Perc: "CPU load average 15min (percentage)", } - resourceMetricsGroups = []*MetricsGroup{ + resourceMetricsGroups = []*MetricsGroupV2{ getResourceMetrics(), } @@ -405,7 +405,7 @@ func startResourceMetricsCollection() { // minioResourceCollector is the Collector for resource metrics type minioResourceCollector struct { - metricsGroups []*MetricsGroup + metricsGroups []*MetricsGroupV2 desc *prometheus.Desc } @@ -417,7 +417,7 @@ func (c *minioResourceCollector) Describe(ch chan<- *prometheus.Desc) { // Collect is called by the Prometheus registry when collecting metrics. func (c *minioResourceCollector) Collect(out chan<- prometheus.Metric) { var wg sync.WaitGroup - publish := func(in <-chan Metric) { + publish := func(in <-chan MetricV2) { defer wg.Done() for metric := range in { labels, values := getOrderedLabelValueArrays(metric.VariableLabels) @@ -436,18 +436,18 @@ func (c *minioResourceCollector) Collect(out chan<- prometheus.Metric) { // and returns reference of minio resource Collector // It creates the Prometheus Description which is used // to define Metric and help string -func newMinioResourceCollector(metricsGroups []*MetricsGroup) *minioResourceCollector { +func newMinioResourceCollector(metricsGroups []*MetricsGroupV2) *minioResourceCollector { return &minioResourceCollector{ metricsGroups: metricsGroups, desc: prometheus.NewDesc("minio_resource_stats", "Resource statistics exposed by MinIO server", nil, nil), } } -func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []Metric { +func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAvgMax bool) []MetricV2 { help := resourceMetricsHelpMap[rm.Name] name := rm.Name - metrics := make([]Metric, 0, 3) - metrics = append(metrics, Metric{ + metrics := make([]MetricV2, 0, 3) + metrics = append(metrics, MetricV2{ Description: getResourceMetricDescription(subSys, name, help), Value: rm.Current, VariableLabels: cloneMSS(rm.Labels), @@ -456,7 +456,7 @@ func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAv if requireAvgMax { avgName := MetricName(fmt.Sprintf("%s_avg", name)) avgHelp := fmt.Sprintf("%s (avg)", help) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getResourceMetricDescription(subSys, avgName, avgHelp), Value: math.Round(rm.Avg*100) / 100, VariableLabels: cloneMSS(rm.Labels), @@ -464,7 +464,7 @@ func prepareResourceMetrics(rm ResourceMetric, subSys MetricSubsystem, requireAv maxName := MetricName(fmt.Sprintf("%s_max", name)) maxHelp := fmt.Sprintf("%s (max)", help) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getResourceMetricDescription(subSys, maxName, maxHelp), Value: rm.Max, VariableLabels: cloneMSS(rm.Labels), @@ -484,12 +484,12 @@ func getResourceMetricDescription(subSys MetricSubsystem, name MetricName, help } } -func getResourceMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getResourceMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: resourceMetricsCacheInterval, } - mg.RegisterRead(func(ctx context.Context) []Metric { - metrics := []Metric{} + mg.RegisterRead(func(ctx context.Context) []MetricV2 { + metrics := []MetricV2{} subSystems := []MetricSubsystem{interfaceSubsystem, memSubsystem, driveSubsystem, cpuSubsystem} resourceMetricsMapMu.RLock() diff --git a/cmd/metrics-router.go b/cmd/metrics-router.go index e2cf23ced..b34d93a8a 100644 --- a/cmd/metrics-router.go +++ b/cmd/metrics-router.go @@ -18,6 +18,7 @@ package cmd import ( + "net/http" "strings" "github.com/minio/mux" @@ -30,6 +31,9 @@ const ( prometheusMetricsV2BucketPath = "/v2/metrics/bucket" prometheusMetricsV2NodePath = "/v2/metrics/node" prometheusMetricsV2ResourcePath = "/v2/metrics/resource" + + // Metrics v3 endpoints + metricsV3Path = "/metrics/v3" ) // Standard env prometheus auth type @@ -48,10 +52,10 @@ const ( func registerMetricsRouter(router *mux.Router) { // metrics router metricsRouter := router.NewRoute().PathPrefix(minioReservedBucketPath).Subrouter() - authType := strings.ToLower(env.Get(EnvPrometheusAuthType, string(prometheusJWT))) + authType := prometheusAuthType(strings.ToLower(env.Get(EnvPrometheusAuthType, string(prometheusJWT)))) auth := AuthMiddleware - if prometheusAuthType(authType) == prometheusPublic { + if authType == prometheusPublic { auth = NoAuthMiddleware } metricsRouter.Handle(prometheusMetricsPathLegacy, auth(metricsHandler())) @@ -59,4 +63,11 @@ func registerMetricsRouter(router *mux.Router) { metricsRouter.Handle(prometheusMetricsV2BucketPath, auth(metricsBucketHandler())) metricsRouter.Handle(prometheusMetricsV2NodePath, auth(metricsNodeHandler())) metricsRouter.Handle(prometheusMetricsV2ResourcePath, auth(metricsResourceHandler())) + + // Metrics v3! + metricsV3Server := newMetricsV3Server(authType) + + // Register metrics v3 handler. It also accepts an optional query + // parameter `?list` - see handler for details. + metricsRouter.Methods(http.MethodGet).Path(metricsV3Path + "{pathComps:.*}").Handler(metricsV3Server) } diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 3ae7cc8ad..53be68915 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -49,12 +49,12 @@ var ( nodeCollector *minioNodeCollector clusterCollector *minioClusterCollector bucketCollector *minioBucketCollector - peerMetricsGroups []*MetricsGroup - bucketPeerMetricsGroups []*MetricsGroup + peerMetricsGroups []*MetricsGroupV2 + bucketPeerMetricsGroups []*MetricsGroupV2 ) func init() { - clusterMetricsGroups := []*MetricsGroup{ + clusterMetricsGroups := []*MetricsGroupV2{ getNodeHealthMetrics(MetricsGroupOpts{dependGlobalNotificationSys: true}), getClusterStorageMetrics(MetricsGroupOpts{dependGlobalObjectAPI: true}), getClusterTierMetrics(MetricsGroupOpts{dependGlobalObjectAPI: true}), @@ -66,7 +66,7 @@ func init() { getBatchJobsMetrics(MetricsGroupOpts{dependGlobalObjectAPI: true}), } - peerMetricsGroups = []*MetricsGroup{ + peerMetricsGroups = []*MetricsGroupV2{ getGoMetrics(), getHTTPMetrics(MetricsGroupOpts{}), getNotificationMetrics(MetricsGroupOpts{dependGlobalLambdaTargetList: true}), @@ -83,13 +83,13 @@ func init() { getTierMetrics(), } - allMetricsGroups := func() (allMetrics []*MetricsGroup) { + allMetricsGroups := func() (allMetrics []*MetricsGroupV2) { allMetrics = append(allMetrics, clusterMetricsGroups...) allMetrics = append(allMetrics, peerMetricsGroups...) return allMetrics }() - nodeGroups := []*MetricsGroup{ + nodeGroups := []*MetricsGroupV2{ getNodeHealthMetrics(MetricsGroupOpts{dependGlobalNotificationSys: true}), getHTTPMetrics(MetricsGroupOpts{}), getNetworkMetrics(), @@ -103,13 +103,13 @@ func init() { getReplicationNodeMetrics(MetricsGroupOpts{dependGlobalObjectAPI: true, dependBucketTargetSys: true}), } - bucketMetricsGroups := []*MetricsGroup{ + bucketMetricsGroups := []*MetricsGroupV2{ getBucketUsageMetrics(MetricsGroupOpts{dependGlobalObjectAPI: true}), getHTTPMetrics(MetricsGroupOpts{bucketOnly: true}), getBucketTTFBMetric(), } - bucketPeerMetricsGroups = []*MetricsGroup{ + bucketPeerMetricsGroups = []*MetricsGroupV2{ getHTTPMetrics(MetricsGroupOpts{bucketOnly: true}), getBucketTTFBMetric(), } @@ -305,8 +305,8 @@ const ( serverName = "server" ) -// MetricType for the types of metrics supported -type MetricType string +// MetricTypeV2 for the types of metrics supported +type MetricTypeV2 string const ( gaugeMetric = "gaugeMetric" @@ -320,11 +320,11 @@ type MetricDescription struct { Subsystem MetricSubsystem `json:"Subsystem"` Name MetricName `json:"MetricName"` Help string `json:"Help"` - Type MetricType `json:"Type"` + Type MetricTypeV2 `json:"Type"` } -// Metric captures the details for a metric -type Metric struct { +// MetricV2 captures the details for a metric +type MetricV2 struct { Description MetricDescription `json:"Description"` StaticLabels map[string]string `json:"StaticLabels"` Value float64 `json:"Value"` @@ -333,9 +333,9 @@ type Metric struct { Histogram map[string]uint64 `json:"Histogram"` } -// MetricsGroup are a group of metrics that are initialized together. -type MetricsGroup struct { - metricsCache *cachevalue.Cache[[]Metric] `msg:"-"` +// MetricsGroupV2 are a group of metrics that are initialized together. +type MetricsGroupV2 struct { + metricsCache *cachevalue.Cache[[]MetricV2] `msg:"-"` cacheInterval time.Duration metricsGroupOpts MetricsGroupOpts } @@ -358,65 +358,65 @@ type MetricsGroupOpts struct { // RegisterRead register the metrics populator function to be used // to populate new values upon cache invalidation. -func (g *MetricsGroup) RegisterRead(read func(context.Context) []Metric) { +func (g *MetricsGroupV2) RegisterRead(read func(context.Context) []MetricV2) { g.metricsCache = cachevalue.NewFromFunc(g.cacheInterval, cachevalue.Opts{ReturnLastGood: true}, - func() ([]Metric, error) { + func() ([]MetricV2, error) { if g.metricsGroupOpts.dependGlobalObjectAPI { objLayer := newObjectLayerFn() // Service not initialized yet if objLayer == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalAuthNPlugin { if globalAuthNPlugin == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalSiteReplicationSys { if !globalSiteReplicationSys.isEnabled() { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalNotificationSys { if globalNotificationSys == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalKMS { if GlobalKMS == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalLambdaTargetList { if globalLambdaTargetList == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalIAMSys { if globalIAMSys == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalLockServer { if globalLockServer == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalIsDistErasure { if !globalIsDistErasure { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependGlobalBackgroundHealState { if globalBackgroundHealState == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } if g.metricsGroupOpts.dependBucketTargetSys { if globalBucketTargetSys == nil { - return []Metric{}, nil + return []MetricV2{}, nil } } return read(GlobalContext), nil @@ -424,8 +424,8 @@ func (g *MetricsGroup) RegisterRead(read func(context.Context) []Metric) { ) } -func (m *Metric) clone() Metric { - metric := Metric{ +func (m *MetricV2) clone() MetricV2 { + metric := MetricV2{ Description: m.Description, Value: m.Value, HistogramBucketLabel: m.HistogramBucketLabel, @@ -448,13 +448,13 @@ func (m *Metric) clone() Metric { // Get - returns cached value always upton the configured TTL, // once the TTL expires "read()" registered function is called // to return the new values and updated. -func (g *MetricsGroup) Get() (metrics []Metric) { +func (g *MetricsGroupV2) Get() (metrics []MetricV2) { m, _ := g.metricsCache.Get() if len(m) == 0 { - return []Metric{} + return []MetricV2{} } - metrics = make([]Metric, 0, len(m)) + metrics = make([]MetricV2, 0, len(m)) for i := range m { metrics = append(metrics, m[i].clone()) } @@ -1679,11 +1679,11 @@ func getMinIOProcessCPUTime() MetricDescription { } } -func getMinioProcMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getMinioProcMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { if runtime.GOOS == "windows" { return nil } @@ -1700,11 +1700,11 @@ func getMinioProcMetrics() *MetricsGroup { stat, _ := p.Stat() startTime, _ := stat.StartTime() - metrics = make([]Metric, 0, 20) + metrics = make([]MetricV2, 0, 20) if openFDs > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinioFDOpenMD(), Value: float64(openFDs), }, @@ -1713,7 +1713,7 @@ func getMinioProcMetrics() *MetricsGroup { if l.OpenFiles > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinioFDLimitMD(), Value: float64(l.OpenFiles), }) @@ -1721,7 +1721,7 @@ func getMinioProcMetrics() *MetricsGroup { if io.SyscR > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessSysCallRMD(), Value: float64(io.SyscR), }) @@ -1729,7 +1729,7 @@ func getMinioProcMetrics() *MetricsGroup { if io.SyscW > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessSysCallWMD(), Value: float64(io.SyscW), }) @@ -1737,7 +1737,7 @@ func getMinioProcMetrics() *MetricsGroup { if io.ReadBytes > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinioProcessIOReadBytesMD(), Value: float64(io.ReadBytes), }) @@ -1745,7 +1745,7 @@ func getMinioProcMetrics() *MetricsGroup { if io.WriteBytes > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinioProcessIOWriteBytesMD(), Value: float64(io.WriteBytes), }) @@ -1753,7 +1753,7 @@ func getMinioProcMetrics() *MetricsGroup { if io.RChar > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinioProcessIOReadCachedBytesMD(), Value: float64(io.RChar), }) @@ -1761,7 +1761,7 @@ func getMinioProcMetrics() *MetricsGroup { if io.WChar > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinioProcessIOWriteCachedBytesMD(), Value: float64(io.WChar), }) @@ -1769,7 +1769,7 @@ func getMinioProcMetrics() *MetricsGroup { if startTime > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessStartTimeMD(), Value: startTime, }) @@ -1777,7 +1777,7 @@ func getMinioProcMetrics() *MetricsGroup { if !globalBootTime.IsZero() { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessUptimeMD(), Value: time.Since(globalBootTime).Seconds(), }) @@ -1785,7 +1785,7 @@ func getMinioProcMetrics() *MetricsGroup { if stat.ResidentMemory() > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessResidentMemory(), Value: float64(stat.ResidentMemory()), }) @@ -1793,7 +1793,7 @@ func getMinioProcMetrics() *MetricsGroup { if stat.VirtualMemory() > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessVirtualMemory(), Value: float64(stat.VirtualMemory()), }) @@ -1801,7 +1801,7 @@ func getMinioProcMetrics() *MetricsGroup { if stat.CPUTime() > 0 { metrics = append(metrics, - Metric{ + MetricV2{ Description: getMinIOProcessCPUTime(), Value: stat.CPUTime(), }) @@ -1811,12 +1811,12 @@ func getMinioProcMetrics() *MetricsGroup { return mg } -func getGoMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getGoMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { - metrics = append(metrics, Metric{ + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { + metrics = append(metrics, MetricV2{ Description: getMinIOGORoutineCountMD(), Value: float64(runtime.NumGoroutine()), }) @@ -1830,7 +1830,7 @@ func getGoMetrics() *MetricsGroup { // // The last parameter is added for compatibility - if true it lowercases the // `api` label values. -func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, toLowerAPILabels bool) []Metric { +func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, toLowerAPILabels bool) []MetricV2 { ch := make(chan prometheus.Metric) go func() { defer xioutil.SafeClose(ch) @@ -1839,7 +1839,7 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, }() // Converts metrics received into internal []Metric type - var metrics []Metric + var metrics []MetricV2 for promMetric := range ch { dtoMetric := &dto.Metric{} err := promMetric.Write(dtoMetric) @@ -1861,7 +1861,7 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, } } labels["le"] = fmt.Sprintf("%.3f", *b.UpperBound) - metric := Metric{ + metric := MetricV2{ Description: desc, VariableLabels: labels, Value: float64(b.GetCumulativeCount()), @@ -1874,7 +1874,7 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, labels1[*lp.Name] = *lp.Value } labels1["le"] = fmt.Sprintf("%.3f", math.Inf(+1)) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: desc, VariableLabels: labels1, Value: dtoMetric.Counter.GetValue(), @@ -1883,33 +1883,33 @@ func getHistogramMetrics(hist *prometheus.HistogramVec, desc MetricDescription, return metrics } -func getBucketTTFBMetric() *MetricsGroup { - mg := &MetricsGroup{ +func getBucketTTFBMetric() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) []Metric { + mg.RegisterRead(func(ctx context.Context) []MetricV2 { return getHistogramMetrics(bucketHTTPRequestsDuration, getBucketTTFBDistributionMD(), true) }) return mg } -func getS3TTFBMetric() *MetricsGroup { - mg := &MetricsGroup{ +func getS3TTFBMetric() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) []Metric { + mg.RegisterRead(func(ctx context.Context) []MetricV2 { return getHistogramMetrics(httpRequestsDuration, getS3TTFBDistributionMD(), true) }) return mg } -func getTierMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getTierMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) []Metric { + mg.RegisterRead(func(ctx context.Context) []MetricV2 { return globalTierMetrics.Report() }) return mg @@ -2005,15 +2005,15 @@ func getBucketS3RequestsCanceledMD() MetricDescription { } } -func getILMNodeMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getILMNodeMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(_ context.Context) []Metric { - expPendingTasks := Metric{ + mg.RegisterRead(func(_ context.Context) []MetricV2 { + expPendingTasks := MetricV2{ Description: getExpiryPendingTasksMD(), } - expMissedTasks := Metric{ + expMissedTasks := MetricV2{ Description: MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: ilmSubsystem, @@ -2022,7 +2022,7 @@ func getILMNodeMetrics() *MetricsGroup { Type: counterMetric, }, } - expMissedFreeVersions := Metric{ + expMissedFreeVersions := MetricV2{ Description: MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: ilmSubsystem, @@ -2031,7 +2031,7 @@ func getILMNodeMetrics() *MetricsGroup { Type: counterMetric, }, } - expMissedTierJournalTasks := Metric{ + expMissedTierJournalTasks := MetricV2{ Description: MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: ilmSubsystem, @@ -2040,7 +2040,7 @@ func getILMNodeMetrics() *MetricsGroup { Type: counterMetric, }, } - expNumWorkers := Metric{ + expNumWorkers := MetricV2{ Description: MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: ilmSubsystem, @@ -2049,13 +2049,13 @@ func getILMNodeMetrics() *MetricsGroup { Type: gaugeMetric, }, } - trPendingTasks := Metric{ + trPendingTasks := MetricV2{ Description: getTransitionPendingTasksMD(), } - trActiveTasks := Metric{ + trActiveTasks := MetricV2{ Description: getTransitionActiveTasksMD(), } - trMissedTasks := Metric{ + trMissedTasks := MetricV2{ Description: getTransitionMissedTasksMD(), } if globalExpiryState != nil { @@ -2070,7 +2070,7 @@ func getILMNodeMetrics() *MetricsGroup { trActiveTasks.Value = float64(globalTransitionState.ActiveTasks()) trMissedTasks.Value = float64(globalTransitionState.MissedImmediateTasks()) } - return []Metric{ + return []MetricV2{ expPendingTasks, expMissedTasks, expMissedFreeVersions, @@ -2084,12 +2084,12 @@ func getILMNodeMetrics() *MetricsGroup { return mg } -func getScannerNodeMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getScannerNodeMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(_ context.Context) []Metric { - metrics := []Metric{ + mg.RegisterRead(func(_ context.Context) []MetricV2 { + metrics := []MetricV2{ { Description: MetricDescription{ Namespace: nodeMetricNamespace, @@ -2157,7 +2157,7 @@ func getScannerNodeMetrics() *MetricsGroup { if v == 0 { continue } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: nodeMetricNamespace, Subsystem: ilmSubsystem, @@ -2173,12 +2173,12 @@ func getScannerNodeMetrics() *MetricsGroup { return mg } -func getIAMNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getIAMNodeMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(_ context.Context) (metrics []Metric) { + mg.RegisterRead(func(_ context.Context) (metrics []MetricV2) { lastSyncTime := atomic.LoadUint64(&globalIAMSys.LastRefreshTimeUnixNano) var sinceLastSyncMillis uint64 if lastSyncTime != 0 { @@ -2186,7 +2186,7 @@ func getIAMNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { } pluginAuthNMetrics := globalAuthNPlugin.Metrics() - metrics = []Metric{ + metrics = []MetricV2{ { Description: MetricDescription{ Namespace: nodeMetricNamespace, @@ -2295,8 +2295,8 @@ func getIAMNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { } // replication metrics for each node - published to the cluster endpoint with nodename as label -func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } @@ -2305,50 +2305,50 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { Offline = 0 ) - mg.RegisterRead(func(_ context.Context) []Metric { - var ml []Metric + mg.RegisterRead(func(_ context.Context) []MetricV2 { + var ml []MetricV2 // common operational metrics for bucket replication and site replication - published // at cluster level if globalReplicationStats != nil { qs := globalReplicationStats.getNodeQueueStatsSummary() - activeWorkersCount := Metric{ + activeWorkersCount := MetricV2{ Description: getClusterReplActiveWorkersCountMD(), } - avgActiveWorkersCount := Metric{ + avgActiveWorkersCount := MetricV2{ Description: getClusterReplAvgActiveWorkersCountMD(), } - maxActiveWorkersCount := Metric{ + maxActiveWorkersCount := MetricV2{ Description: getClusterReplMaxActiveWorkersCountMD(), } - currInQueueCount := Metric{ + currInQueueCount := MetricV2{ Description: getClusterReplCurrQueuedOperationsMD(), } - currInQueueBytes := Metric{ + currInQueueBytes := MetricV2{ Description: getClusterReplCurrQueuedBytesMD(), } - currTransferRate := Metric{ + currTransferRate := MetricV2{ Description: getClusterReplCurrentTransferRateMD(), } - avgQueueCount := Metric{ + avgQueueCount := MetricV2{ Description: getClusterReplAvgQueuedOperationsMD(), } - avgQueueBytes := Metric{ + avgQueueBytes := MetricV2{ Description: getClusterReplAvgQueuedBytesMD(), } - maxQueueCount := Metric{ + maxQueueCount := MetricV2{ Description: getClusterReplMaxQueuedOperationsMD(), } - maxQueueBytes := Metric{ + maxQueueBytes := MetricV2{ Description: getClusterReplMaxQueuedBytesMD(), } - avgTransferRate := Metric{ + avgTransferRate := MetricV2{ Description: getClusterReplAvgTransferRateMD(), } - maxTransferRate := Metric{ + maxTransferRate := MetricV2{ Description: getClusterReplMaxTransferRateMD(), } - mrfCount := Metric{ + mrfCount := MetricV2{ Description: getClusterReplMRFFailedOperationsMD(), Value: float64(qs.MRFStats.LastFailedCount), } @@ -2372,7 +2372,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { avgTransferRate.Value = tots.Avg maxTransferRate.Value = tots.Peak } - ml = []Metric{ + ml = []MetricV2{ activeWorkersCount, avgActiveWorkersCount, maxActiveWorkersCount, @@ -2390,7 +2390,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { } for ep, health := range globalBucketTargetSys.healthStats() { // link latency current - m := Metric{ + m := MetricV2{ Description: getClusterRepLinkLatencyCurrMD(), VariableLabels: map[string]string{ "endpoint": ep, @@ -2400,7 +2400,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { ml = append(ml, m) // link latency average - m = Metric{ + m = MetricV2{ Description: getClusterRepLinkLatencyAvgMD(), VariableLabels: map[string]string{ "endpoint": ep, @@ -2410,7 +2410,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { ml = append(ml, m) // link latency max - m = Metric{ + m = MetricV2{ Description: getClusterRepLinkLatencyMaxMD(), VariableLabels: map[string]string{ "endpoint": ep, @@ -2419,7 +2419,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { m.Value = float64(health.latency.peak / time.Millisecond) ml = append(ml, m) - linkOnline := Metric{ + linkOnline := MetricV2{ Description: getClusterRepLinkOnlineMD(), VariableLabels: map[string]string{ "endpoint": ep, @@ -2431,7 +2431,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { } linkOnline.Value = float64(online) ml = append(ml, linkOnline) - offlineDuration := Metric{ + offlineDuration := MetricV2{ Description: getClusterRepLinkCurrOfflineDurationMD(), VariableLabels: map[string]string{ "endpoint": ep, @@ -2444,7 +2444,7 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { offlineDuration.Value = float64(currDowntime / time.Second) ml = append(ml, offlineDuration) - downtimeDuration := Metric{ + downtimeDuration := MetricV2{ Description: getClusterRepLinkTotalOfflineDurationMD(), VariableLabels: map[string]string{ "endpoint": ep, @@ -2464,13 +2464,13 @@ func getReplicationNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { } // replication metrics for site replication -func getReplicationSiteMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getReplicationSiteMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(_ context.Context) []Metric { - ml := []Metric{} + mg.RegisterRead(func(_ context.Context) []MetricV2 { + ml := []MetricV2{} // metrics pertinent to site replication - overall roll up. if globalSiteReplicationSys.isEnabled() { @@ -2479,103 +2479,103 @@ func getReplicationSiteMetrics(opts MetricsGroupOpts) *MetricsGroup { logger.LogIf(GlobalContext, err) return ml } - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepReceivedBytesMD(clusterMetricNamespace), Value: float64(m.ReplicaSize), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepReceivedOperationsMD(clusterMetricNamespace), Value: float64(m.ReplicaCount), }) for _, stat := range m.Metrics { - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepFailedBytesLastMinuteMD(clusterMetricNamespace), Value: float64(stat.Failed.LastMinute.Bytes), VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepFailedOperationsLastMinuteMD(clusterMetricNamespace), Value: stat.Failed.LastMinute.Count, VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepFailedBytesLastHourMD(clusterMetricNamespace), Value: float64(stat.Failed.LastHour.Bytes), VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepFailedOperationsLastHourMD(clusterMetricNamespace), Value: stat.Failed.LastHour.Count, VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepFailedBytesTotalMD(clusterMetricNamespace), Value: float64(stat.Failed.Totals.Bytes), VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepFailedOperationsTotalMD(clusterMetricNamespace), Value: stat.Failed.Totals.Count, VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepSentBytesMD(clusterMetricNamespace), Value: float64(stat.ReplicatedSize), VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getRepSentOperationsMD(clusterMetricNamespace), Value: float64(stat.ReplicatedCount), VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) if c, ok := stat.Failed.ErrCounts["AccessDenied"]; ok { - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterRepCredentialErrorsMD(clusterMetricNamespace), Value: float64(c), VariableLabels: map[string]string{"endpoint": stat.Endpoint}, }) } } - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedGetOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.GetTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedHeadOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.HeadTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedPutTaggingOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.PutTagTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedGetTaggingOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.GetTagTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedRmvTaggingOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.RmvTagTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedGetFailedOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.GetFailedTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedHeadFailedOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.HeadFailedTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedPutTaggingFailedOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.PutTagFailedTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedGetTaggingFailedOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.GetTagFailedTotal), }) - ml = append(ml, Metric{ + ml = append(ml, MetricV2{ Description: getClusterReplProxiedRmvTaggingFailedOperationsMD(clusterMetricNamespace), Value: float64(m.Proxied.RmvTagFailedTotal), }) @@ -2586,16 +2586,16 @@ func getReplicationSiteMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getMinioVersionMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getMinioVersionMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(_ context.Context) (metrics []Metric) { - metrics = append(metrics, Metric{ + mg.RegisterRead(func(_ context.Context) (metrics []MetricV2) { + metrics = append(metrics, MetricV2{ Description: getMinIOCommitMD(), VariableLabels: map[string]string{"commit": CommitID}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getMinIOVersionMD(), VariableLabels: map[string]string{"version": Version}, }) @@ -2604,19 +2604,19 @@ func getMinioVersionMetrics() *MetricsGroup { return mg } -func getNodeHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getNodeHealthMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(_ context.Context) (metrics []Metric) { - metrics = make([]Metric, 0, 16) + mg.RegisterRead(func(_ context.Context) (metrics []MetricV2) { + metrics = make([]MetricV2, 0, 16) nodesUp, nodesDown := globalNotificationSys.GetPeerOnlineCount() - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeOnlineTotalMD(), Value: float64(nodesUp), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeOfflineTotalMD(), Value: float64(nodesDown), }) @@ -2625,12 +2625,12 @@ func getNodeHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getMinioHealingMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getMinioHealingMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(_ context.Context) (metrics []Metric) { + mg.RegisterRead(func(_ context.Context) (metrics []MetricV2) { bgSeq, exists := globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) if !exists { return @@ -2640,8 +2640,8 @@ func getMinioHealingMetrics(opts MetricsGroupOpts) *MetricsGroup { return } - metrics = make([]Metric, 0, 5) - metrics = append(metrics, Metric{ + metrics = make([]MetricV2, 0, 5) + metrics = append(metrics, MetricV2{ Description: getHealLastActivityTimeMD(), Value: float64(time.Since(bgSeq.lastHealActivity)), }) @@ -2653,12 +2653,12 @@ func getMinioHealingMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getFailedItems(seq *healSequence) (m []Metric) { +func getFailedItems(seq *healSequence) (m []MetricV2) { items := seq.gethealFailedItemsMap() - m = make([]Metric, 0, len(items)) + m = make([]MetricV2, 0, len(items)) for k, v := range items { s := strings.Split(k, ",") - m = append(m, Metric{ + m = append(m, MetricV2{ Description: getHealObjectsFailTotalMD(), VariableLabels: map[string]string{ "mount_path": s[0], @@ -2670,11 +2670,11 @@ func getFailedItems(seq *healSequence) (m []Metric) { return } -func getHealedItems(seq *healSequence) (m []Metric) { +func getHealedItems(seq *healSequence) (m []MetricV2) { items := seq.getHealedItemsMap() - m = make([]Metric, 0, len(items)) + m = make([]MetricV2, 0, len(items)) for k, v := range items { - m = append(m, Metric{ + m = append(m, MetricV2{ Description: getHealObjectsHealTotalMD(), VariableLabels: map[string]string{"type": string(k)}, Value: float64(v), @@ -2683,11 +2683,11 @@ func getHealedItems(seq *healSequence) (m []Metric) { return } -func getObjectsScanned(seq *healSequence) (m []Metric) { +func getObjectsScanned(seq *healSequence) (m []MetricV2) { items := seq.getScannedItemsMap() - m = make([]Metric, 0, len(items)) + m = make([]MetricV2, 0, len(items)) for k, v := range items { - m = append(m, Metric{ + m = append(m, MetricV2{ Description: getHealObjectsTotalMD(), VariableLabels: map[string]string{"type": string(k)}, Value: float64(v), @@ -2696,20 +2696,20 @@ func getObjectsScanned(seq *healSequence) (m []Metric) { return } -func getDistLockMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getDistLockMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) []Metric { + mg.RegisterRead(func(ctx context.Context) []MetricV2 { if !globalIsDistErasure { - return []Metric{} + return []MetricV2{} } st := globalLockServer.stats() - metrics := make([]Metric, 0, 3) - metrics = append(metrics, Metric{ + metrics := make([]MetricV2, 0, 3) + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: "locks", @@ -2719,7 +2719,7 @@ func getDistLockMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(st.Total), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: "locks", @@ -2729,7 +2729,7 @@ func getDistLockMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(st.Writes), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: "locks", @@ -2744,17 +2744,17 @@ func getDistLockMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) []Metric { - metrics := make([]Metric, 0, 3) + mg.RegisterRead(func(ctx context.Context) []MetricV2 { + metrics := make([]MetricV2, 0, 3) if globalEventNotifier != nil { nstats := globalEventNotifier.targetList.Stats() - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2764,7 +2764,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(nstats.CurrentSendCalls), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2774,7 +2774,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(nstats.EventsSkipped), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2784,7 +2784,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(nstats.EventsErrorsTotal), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2795,7 +2795,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { Value: float64(nstats.TotalEvents), }) for id, st := range nstats.TargetStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2806,7 +2806,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, Value: float64(st.TotalEvents), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2817,7 +2817,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, Value: float64(st.FailedEvents), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2828,7 +2828,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": id.ID, "target_name": id.Name}, Value: float64(st.CurrentSendCalls), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: notifySubsystem, @@ -2844,7 +2844,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { lstats := globalLambdaTargetList.Stats() for _, st := range lstats.TargetStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: lambdaSubsystem, @@ -2854,7 +2854,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": st.ID.ID, "target_name": st.ID.Name}, Value: float64(st.ActiveRequests), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: lambdaSubsystem, @@ -2865,7 +2865,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": st.ID.ID, "target_name": st.ID.Name}, Value: float64(st.TotalRequests), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: lambdaSubsystem, @@ -2881,7 +2881,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { // Audit and system: audit := logger.CurrentStats() for id, st := range audit { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: auditSubsystem, @@ -2892,7 +2892,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": id}, Value: float64(st.QueueLength), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: auditSubsystem, @@ -2903,7 +2903,7 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"target_id": id}, Value: float64(st.TotalMessages), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: minioNamespace, Subsystem: auditSubsystem, @@ -2920,82 +2920,82 @@ func getNotificationMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { if !mg.metricsGroupOpts.bucketOnly { httpStats := globalHTTPStats.toServerHTTPStats(true) - metrics = make([]Metric, 0, 3+ + metrics = make([]MetricV2, 0, 3+ len(httpStats.CurrentS3Requests.APIStats)+ len(httpStats.TotalS3Requests.APIStats)+ len(httpStats.TotalS3Errors.APIStats)+ len(httpStats.TotalS35xxErrors.APIStats)+ len(httpStats.TotalS34xxErrors.APIStats)) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RejectedAuthRequestsTotalMD(), Value: float64(httpStats.TotalS3RejectedAuth), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RejectedTimestampRequestsTotalMD(), Value: float64(httpStats.TotalS3RejectedTime), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RejectedHeaderRequestsTotalMD(), Value: float64(httpStats.TotalS3RejectedHeader), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RejectedInvalidRequestsTotalMD(), Value: float64(httpStats.TotalS3RejectedInvalid), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RequestsInQueueMD(), Value: float64(httpStats.S3RequestsInQueue), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getIncomingS3RequestsMD(), Value: float64(httpStats.S3RequestsIncoming), }) for api, value := range httpStats.CurrentS3Requests.APIStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RequestsInFlightMD(), Value: float64(value), VariableLabels: map[string]string{"api": api}, }) } for api, value := range httpStats.TotalS3Requests.APIStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RequestsTotalMD(), Value: float64(value), VariableLabels: map[string]string{"api": api}, }) } for api, value := range httpStats.TotalS3Errors.APIStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RequestsErrorsMD(), Value: float64(value), VariableLabels: map[string]string{"api": api}, }) } for api, value := range httpStats.TotalS35xxErrors.APIStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3Requests5xxErrorsMD(), Value: float64(value), VariableLabels: map[string]string{"api": api}, }) } for api, value := range httpStats.TotalS34xxErrors.APIStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3Requests4xxErrorsMD(), Value: float64(value), VariableLabels: map[string]string{"api": api}, }) } for api, value := range httpStats.TotalS3Canceled.APIStats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3RequestsCanceledMD(), Value: float64(value), VariableLabels: map[string]string{"api": api}, @@ -3007,7 +3007,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { for bucket, inOut := range globalBucketConnStats.getS3InOutBytes() { recvBytes := inOut.In if recvBytes > 0 { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketTrafficReceivedBytes(), Value: float64(recvBytes), VariableLabels: map[string]string{"bucket": bucket}, @@ -3015,7 +3015,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { } sentBytes := inOut.Out if sentBytes > 0 { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketTrafficSentBytes(), Value: float64(sentBytes), VariableLabels: map[string]string{"bucket": bucket}, @@ -3024,7 +3024,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { httpStats := globalBucketHTTPStats.load(bucket) for k, v := range httpStats.currentS3Requests.Load(true) { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketS3RequestsInFlightMD(), Value: float64(v), VariableLabels: map[string]string{"bucket": bucket, "api": k}, @@ -3032,7 +3032,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { } for k, v := range httpStats.totalS3Requests.Load(true) { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketS3RequestsTotalMD(), Value: float64(v), VariableLabels: map[string]string{"bucket": bucket, "api": k}, @@ -3040,7 +3040,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { } for k, v := range httpStats.totalS3Canceled.Load(true) { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketS3RequestsCanceledMD(), Value: float64(v), VariableLabels: map[string]string{"bucket": bucket, "api": k}, @@ -3048,7 +3048,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { } for k, v := range httpStats.totalS34xxErrors.Load(true) { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketS3Requests4xxErrorsMD(), Value: float64(v), VariableLabels: map[string]string{"bucket": bucket, "api": k}, @@ -3056,7 +3056,7 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { } for k, v := range httpStats.totalS35xxErrors.Load(true) { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketS3Requests5xxErrorsMD(), Value: float64(v), VariableLabels: map[string]string{"bucket": bucket, "api": k}, @@ -3069,41 +3069,41 @@ func getHTTPMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getNetworkMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getNetworkMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { - metrics = make([]Metric, 0, 10) + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { + metrics = make([]MetricV2, 0, 10) connStats := globalConnStats.toServerConnStats() rpcStats := rest.GetRPCStats() if globalIsDistErasure { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getInternodeFailedRequests(), Value: float64(rpcStats.Errs), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getInternodeTCPDialTimeout(), Value: float64(rpcStats.DialErrs), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getInternodeTCPAvgDuration(), Value: float64(rpcStats.DialAvgDuration), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getInterNodeSentBytesMD(), Value: float64(connStats.internodeOutputBytes), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getInterNodeReceivedBytesMD(), Value: float64(connStats.internodeInputBytes), }) } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3SentBytesMD(), Value: float64(connStats.s3OutputBytes), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getS3ReceivedBytesMD(), Value: float64(connStats.s3InputBytes), }) @@ -3112,18 +3112,18 @@ func getNetworkMetrics() *MetricsGroup { return mg } -func getClusterUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getClusterUsageMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { objLayer := newObjectLayerFn() if objLayer == nil { return } - metrics = make([]Metric, 0, 50) + metrics = make([]MetricV2, 0, 50) dataUsageInfo, err := loadDataUsageFromBackend(ctx, objLayer) if err != nil { logger.LogIf(ctx, err) @@ -3135,7 +3135,7 @@ func getClusterUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { return } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getUsageLastScanActivityMD(), Value: float64(time.Since(dataUsageInfo.LastUpdate)), }) @@ -3176,39 +3176,39 @@ func getClusterUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { } } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterUsageTotalBytesMD(), Value: float64(clusterSize), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterUsageObjectsTotalMD(), Value: float64(clusterObjectsCount), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterUsageVersionsTotalMD(), Value: float64(clusterVersionsCount), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterUsageDeleteMarkersTotalMD(), Value: float64(clusterDeleteMarkersCount), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterObjectDistributionMD(), Histogram: clusterObjectSizesHistogram, HistogramBucketLabel: "range", }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterObjectVersionsMD(), Histogram: clusterVersionsHistogram, HistogramBucketLabel: "range", }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterBucketsTotalMD(), Value: float64(clusterBuckets), }) @@ -3218,15 +3218,15 @@ func getClusterUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { objLayer := newObjectLayerFn() - metrics = make([]Metric, 0, 50) + metrics = make([]MetricV2, 0, 50) dataUsageInfo, err := loadDataUsageFromBackend(ctx, objLayer) if err != nil { logger.LogIf(ctx, err) @@ -3238,7 +3238,7 @@ func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { return } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getUsageLastScanActivityMD(), Value: float64(time.Since(dataUsageInfo.LastUpdate)), }) @@ -3250,32 +3250,32 @@ func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { for bucket, usage := range dataUsageInfo.BucketsUsage { quota, _ := globalBucketQuotaSys.Get(ctx, bucket) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketUsageTotalBytesMD(), Value: float64(usage.Size), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketUsageObjectsTotalMD(), Value: float64(usage.ObjectsCount), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketUsageVersionsTotalMD(), Value: float64(usage.VersionsCount), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketUsageDeleteMarkersTotalMD(), Value: float64(usage.DeleteMarkersCount), VariableLabels: map[string]string{"bucket": bucket}, }) if quota != nil && quota.Quota > 0 { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketUsageQuotaTotalBytesMD(), Value: float64(quota.Quota), VariableLabels: map[string]string{"bucket": bucket}, @@ -3286,112 +3286,112 @@ func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { s, ok := bucketReplStats[bucket] if ok { stats = s.ReplicationStats - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepReceivedBytesMD(bucketMetricNamespace), Value: float64(stats.ReplicaSize), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepReceivedOperationsMD(bucketMetricNamespace), Value: float64(stats.ReplicaCount), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedGetOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.GetTotal), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedHeadOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.HeadTotal), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedPutTaggingOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.PutTagTotal), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedGetTaggingOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.GetTagTotal), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedRmvTaggingOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.RmvTagTotal), VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedGetFailedOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.GetFailedTotal), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedHeadFailedOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.HeadFailedTotal), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedPutTaggingFailedOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.PutTagFailedTotal), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedGetTaggingFailedOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.GetTagFailedTotal), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterReplProxiedRmvTaggingFailedOperationsMD(bucketMetricNamespace), Value: float64(s.ProxyStats.RmvTagFailedTotal), }) } if stats.hasReplicationUsage() { for arn, stat := range stats.Stats { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepFailedBytesLastMinuteMD(bucketMetricNamespace), Value: float64(stat.Failed.LastMinute.Bytes), VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepFailedOperationsLastMinuteMD(bucketMetricNamespace), Value: stat.Failed.LastMinute.Count, VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepFailedBytesLastHourMD(bucketMetricNamespace), Value: float64(stat.Failed.LastHour.Bytes), VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepFailedOperationsLastHourMD(bucketMetricNamespace), Value: stat.Failed.LastHour.Count, VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepFailedBytesTotalMD(bucketMetricNamespace), Value: float64(stat.Failed.Totals.Bytes), VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepFailedOperationsTotalMD(bucketMetricNamespace), Value: stat.Failed.Totals.Count, VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepSentBytesMD(bucketMetricNamespace), Value: float64(stat.ReplicatedSize), VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getRepSentOperationsMD(bucketMetricNamespace), Value: float64(stat.ReplicatedCount), VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketRepLatencyMD(), HistogramBucketLabel: "range", Histogram: stat.Latency.getUploadLatency(), VariableLabels: map[string]string{"bucket": bucket, "operation": "upload", "targetArn": arn}, }) if c, ok := stat.Failed.ErrCounts["AccessDenied"]; ok { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterRepCredentialErrorsMD(bucketMetricNamespace), Value: float64(c), VariableLabels: map[string]string{"bucket": bucket, "targetArn": arn}, @@ -3400,14 +3400,14 @@ func getBucketUsageMetrics(opts MetricsGroupOpts) *MetricsGroup { } } } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketObjectDistributionMD(), Histogram: usage.ObjectSizesHistogram, HistogramBucketLabel: "range", VariableLabels: map[string]string{"bucket": bucket}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getBucketObjectVersionsMD(), Histogram: usage.ObjectVersionsHistogram, HistogramBucketLabel: "range", @@ -3449,12 +3449,12 @@ func getClusterTransitionedVersionsMD() MetricDescription { } } -func getClusterTierMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getClusterTierMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { objLayer := newObjectLayerFn() if globalTierConfigMgr.Empty() { @@ -3476,65 +3476,65 @@ func getClusterTierMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getLocalStorageMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getLocalStorageMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { objLayer := newObjectLayerFn() - metrics = make([]Metric, 0, 50) + metrics = make([]MetricV2, 0, 50) storageInfo := objLayer.LocalStorageInfo(ctx, true) onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) totalDrives := onlineDrives.Merge(offlineDrives) for _, disk := range storageInfo.Disks { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveUsedBytesMD(), Value: float64(disk.UsedSpace), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveFreeBytesMD(), Value: float64(disk.AvailableSpace), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveTotalBytesMD(), Value: float64(disk.TotalSpace), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDrivesFreeInodesMD(), Value: float64(disk.FreeInodes), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) if disk.Metrics != nil { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveTimeoutErrorsMD(), Value: float64(disk.Metrics.TotalErrorsTimeout), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveAvailabilityErrorsMD(), Value: float64(disk.Metrics.TotalErrorsAvailability), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveWaitingIOMD(), Value: float64(disk.Metrics.TotalWaiting), VariableLabels: map[string]string{"drive": disk.DrivePath}, }) for apiName, latency := range disk.Metrics.LastMinute { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDriveAPILatencyMD(), Value: float64(latency.Avg().Microseconds()), VariableLabels: map[string]string{"drive": disk.DrivePath, "api": "storage." + apiName}, @@ -3543,27 +3543,27 @@ func getLocalStorageMetrics(opts MetricsGroupOpts) *MetricsGroup { } } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDrivesOfflineTotalMD(), Value: float64(offlineDrives.Sum()), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDrivesOnlineTotalMD(), Value: float64(onlineDrives.Sum()), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeDrivesTotalMD(), Value: float64(totalDrives.Sum()), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeStandardParityMD(), Value: float64(storageInfo.Backend.StandardSCParity), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getNodeRRSParityMD(), Value: float64(storageInfo.Backend.RRSCParity), }) @@ -3643,20 +3643,20 @@ func getClusterErasureSetHealingDrivesMD() MetricDescription { } } -func getClusterHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getClusterHealthMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { objLayer := newObjectLayerFn() opts := HealthOptions{} result := objLayer.Health(ctx, opts) - metrics = make([]Metric, 0, 2+4*len(result.ESHealth)) + metrics = make([]MetricV2, 0, 2+4*len(result.ESHealth)) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterWriteQuorumMD(), Value: float64(result.WriteQuorum), }) @@ -3666,7 +3666,7 @@ func getClusterHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { health = 0 } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterHealthStatusMD(), Value: float64(health), }) @@ -3676,22 +3676,22 @@ func getClusterHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { "pool": strconv.Itoa(h.PoolID), "set": strconv.Itoa(h.SetID), } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterErasureSetReadQuorumMD(), VariableLabels: labels, Value: float64(h.ReadQuorum), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterErasureSetWriteQuorumMD(), VariableLabels: labels, Value: float64(h.WriteQuorum), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterErasureSetOnlineDrivesMD(), VariableLabels: labels, Value: float64(h.HealthyDrives), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterErasureSetHealingDrivesMD(), VariableLabels: labels, Value: float64(h.HealingDrives), @@ -3702,7 +3702,7 @@ func getClusterHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { health = 0 } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterErasureSetHealthStatusMD(), VariableLabels: labels, Value: float64(health), @@ -3715,13 +3715,13 @@ func getClusterHealthMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getBatchJobsMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getBatchJobsMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { var m madmin.RealtimeMetrics mLocal := collectLocalMetrics(madmin.MetricsBatchJobs, collectMetricsOpts{}) m.Merge(&mLocal) @@ -3752,7 +3752,7 @@ func getBatchJobsMetrics(opts MetricsGroupOpts) *MetricsGroup { bucket = mj.Expired.Bucket } metrics = append(metrics, - Metric{ + MetricV2{ Description: MetricDescription{ Namespace: bucketMetricNamespace, Subsystem: "batch", @@ -3763,7 +3763,7 @@ func getBatchJobsMetrics(opts MetricsGroupOpts) *MetricsGroup { Value: objects, VariableLabels: map[string]string{"bucket": bucket, "jobId": mj.JobID}, }, - Metric{ + MetricV2{ Description: MetricDescription{ Namespace: bucketMetricNamespace, Subsystem: "batch", @@ -3781,51 +3781,51 @@ func getBatchJobsMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getClusterStorageMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getClusterStorageMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 1 * time.Minute, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { objLayer := newObjectLayerFn() // Fetch disk space info, ignore errors - metrics = make([]Metric, 0, 10) + metrics = make([]MetricV2, 0, 10) storageInfo := objLayer.StorageInfo(ctx, true) onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) totalDrives := onlineDrives.Merge(offlineDrives) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterCapacityTotalBytesMD(), Value: float64(GetTotalCapacity(storageInfo.Disks)), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterCapacityFreeBytesMD(), Value: float64(GetTotalCapacityFree(storageInfo.Disks)), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterCapacityUsageBytesMD(), Value: float64(GetTotalUsableCapacity(storageInfo.Disks, storageInfo)), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterCapacityUsageFreeBytesMD(), Value: float64(GetTotalUsableCapacityFree(storageInfo.Disks, storageInfo)), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterDrivesOfflineTotalMD(), Value: float64(offlineDrives.Sum()), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterDrivesOnlineTotalMD(), Value: float64(onlineDrives.Sum()), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: getClusterDrivesTotalMD(), Value: float64(totalDrives.Sum()), }) @@ -3834,13 +3834,13 @@ func getClusterStorageMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getKMSNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getKMSNodeMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) (metrics []Metric) { + mg.RegisterRead(func(ctx context.Context) (metrics []MetricV2) { const ( Online = 1 Offline = 0 @@ -3854,12 +3854,12 @@ func getKMSNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { } _, err := GlobalKMS.Metrics(ctx) if _, ok := kes.IsConnError(err); ok { - return []Metric{{ + return []MetricV2{{ Description: desc, Value: float64(Offline), }} } - return []Metric{{ + return []MetricV2{{ Description: desc, Value: float64(Online), }} @@ -3867,13 +3867,13 @@ func getKMSNodeMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func getWebhookMetrics() *MetricsGroup { - mg := &MetricsGroup{ +func getWebhookMetrics() *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, } - mg.RegisterRead(func(ctx context.Context) []Metric { + mg.RegisterRead(func(ctx context.Context) []MetricV2 { tgts := append(logger.SystemTargets(), logger.AuditTargets()...) - metrics := make([]Metric, 0, len(tgts)*4) + metrics := make([]MetricV2, 0, len(tgts)*4) for _, t := range tgts { isOnline := 0 if t.IsOnline(ctx) { @@ -3883,7 +3883,7 @@ func getWebhookMetrics() *MetricsGroup { "name": t.String(), "endpoint": t.Endpoint(), } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: webhookSubsystem, @@ -3894,7 +3894,7 @@ func getWebhookMetrics() *MetricsGroup { VariableLabels: labels, Value: float64(isOnline), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: webhookSubsystem, @@ -3905,7 +3905,7 @@ func getWebhookMetrics() *MetricsGroup { VariableLabels: labels, Value: float64(t.Stats().QueueLength), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: webhookSubsystem, @@ -3916,7 +3916,7 @@ func getWebhookMetrics() *MetricsGroup { VariableLabels: labels, Value: float64(t.Stats().TotalMessages), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: webhookSubsystem, @@ -3934,19 +3934,19 @@ func getWebhookMetrics() *MetricsGroup { return mg } -func getKMSMetrics(opts MetricsGroupOpts) *MetricsGroup { - mg := &MetricsGroup{ +func getKMSMetrics(opts MetricsGroupOpts) *MetricsGroupV2 { + mg := &MetricsGroupV2{ cacheInterval: 10 * time.Second, metricsGroupOpts: opts, } - mg.RegisterRead(func(ctx context.Context) []Metric { - metrics := make([]Metric, 0, 4) + mg.RegisterRead(func(ctx context.Context) []MetricV2 { + metrics := make([]MetricV2, 0, 4) metric, err := GlobalKMS.Metrics(ctx) if err != nil { return metrics } - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: kmsSubsystem, @@ -3956,7 +3956,7 @@ func getKMSMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(metric.RequestOK), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: kmsSubsystem, @@ -3966,7 +3966,7 @@ func getKMSMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(metric.RequestErr), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: kmsSubsystem, @@ -3976,7 +3976,7 @@ func getKMSMetrics(opts MetricsGroupOpts) *MetricsGroup { }, Value: float64(metric.RequestFail), }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: MetricDescription{ Namespace: clusterMetricNamespace, Subsystem: kmsSubsystem, @@ -3992,7 +3992,7 @@ func getKMSMetrics(opts MetricsGroupOpts) *MetricsGroup { return mg } -func collectMetric(metric Metric, labels []string, values []string, metricName string, out chan<- prometheus.Metric) { +func collectMetric(metric MetricV2, labels []string, values []string, metricName string, out chan<- prometheus.Metric) { if metric.Description.Type == histogramMetric { if metric.Histogram == nil { return @@ -4049,11 +4049,11 @@ func collectMetric(metric Metric, labels []string, values []string, metricName s //msgp:ignore minioBucketCollector type minioBucketCollector struct { - metricsGroups []*MetricsGroup + metricsGroups []*MetricsGroupV2 desc *prometheus.Desc } -func newMinioBucketCollector(metricsGroups []*MetricsGroup) *minioBucketCollector { +func newMinioBucketCollector(metricsGroups []*MetricsGroupV2) *minioBucketCollector { return &minioBucketCollector{ metricsGroups: metricsGroups, desc: prometheus.NewDesc("minio_bucket_stats", "Statistics exposed by MinIO server cluster wide per bucket", nil, nil), @@ -4068,7 +4068,7 @@ func (c *minioBucketCollector) Describe(ch chan<- *prometheus.Desc) { // Collect is called by the Prometheus registry when collecting metrics. func (c *minioBucketCollector) Collect(out chan<- prometheus.Metric) { var wg sync.WaitGroup - publish := func(in <-chan Metric) { + publish := func(in <-chan MetricV2) { defer wg.Done() for metric := range in { labels, values := getOrderedLabelValueArrays(metric.VariableLabels) @@ -4085,11 +4085,11 @@ func (c *minioBucketCollector) Collect(out chan<- prometheus.Metric) { //msgp:ignore minioClusterCollector type minioClusterCollector struct { - metricsGroups []*MetricsGroup + metricsGroups []*MetricsGroupV2 desc *prometheus.Desc } -func newMinioClusterCollector(metricsGroups []*MetricsGroup) *minioClusterCollector { +func newMinioClusterCollector(metricsGroups []*MetricsGroupV2) *minioClusterCollector { return &minioClusterCollector{ metricsGroups: metricsGroups, desc: prometheus.NewDesc("minio_stats", "Statistics exposed by MinIO server per cluster", nil, nil), @@ -4104,7 +4104,7 @@ func (c *minioClusterCollector) Describe(ch chan<- *prometheus.Desc) { // Collect is called by the Prometheus registry when collecting metrics. func (c *minioClusterCollector) Collect(out chan<- prometheus.Metric) { var wg sync.WaitGroup - publish := func(in <-chan Metric) { + publish := func(in <-chan MetricV2) { defer wg.Done() for metric := range in { labels, values := getOrderedLabelValueArrays(metric.VariableLabels) @@ -4120,11 +4120,11 @@ func (c *minioClusterCollector) Collect(out chan<- prometheus.Metric) { } // ReportMetrics reports serialized metrics to the channel passed for the metrics generated. -func ReportMetrics(ctx context.Context, metricsGroups []*MetricsGroup) <-chan Metric { - ch := make(chan Metric) +func ReportMetrics(ctx context.Context, metricsGroups []*MetricsGroupV2) <-chan MetricV2 { + ch := make(chan MetricV2) go func() { defer xioutil.SafeClose(ch) - populateAndPublish(metricsGroups, func(m Metric) bool { + populateAndPublish(metricsGroups, func(m MetricV2) bool { if m.VariableLabels == nil { m.VariableLabels = make(map[string]string) } @@ -4146,7 +4146,7 @@ func ReportMetrics(ctx context.Context, metricsGroups []*MetricsGroup) <-chan Me // //msgp:ignore minioNodeCollector type minioNodeCollector struct { - metricsGroups []*MetricsGroup + metricsGroups []*MetricsGroupV2 desc *prometheus.Desc } @@ -4156,7 +4156,7 @@ func (c *minioNodeCollector) Describe(ch chan<- *prometheus.Desc) { } // populateAndPublish populates and then publishes the metrics generated by the generator function. -func populateAndPublish(metricsGroups []*MetricsGroup, publish func(m Metric) bool) { +func populateAndPublish(metricsGroups []*MetricsGroupV2, publish func(m MetricV2) bool) { for _, mg := range metricsGroups { if mg == nil { continue @@ -4174,7 +4174,7 @@ func (c *minioNodeCollector) Collect(ch chan<- prometheus.Metric) { // Expose MinIO's version information minioVersionInfo.WithLabelValues(Version, CommitID).Set(1.0) - populateAndPublish(c.metricsGroups, func(metric Metric) bool { + populateAndPublish(c.metricsGroups, func(metric MetricV2) bool { labels, values := getOrderedLabelValueArrays(metric.VariableLabels) values = append(values, globalLocalNodeName) labels = append(labels, serverName) @@ -4236,7 +4236,7 @@ func getOrderedLabelValueArrays(labelsWithValue map[string]string) (labels, valu // and returns reference of minioCollector for version 2 // It creates the Prometheus Description which is used // to define Metric and help string -func newMinioCollectorNode(metricsGroups []*MetricsGroup) *minioNodeCollector { +func newMinioCollectorNode(metricsGroups []*MetricsGroupV2) *minioNodeCollector { return &minioNodeCollector{ metricsGroups: metricsGroups, desc: prometheus.NewDesc("minio_stats", "Statistics exposed by MinIO server per node", nil, nil), diff --git a/cmd/metrics-v2_gen.go b/cmd/metrics-v2_gen.go index 2d9f4abe5..81f9fd9bb 100644 --- a/cmd/metrics-v2_gen.go +++ b/cmd/metrics-v2_gen.go @@ -7,7 +7,223 @@ import ( ) // MarshalMsg implements msgp.Marshaler -func (z *Metric) MarshalMsg(b []byte) (o []byte, err error) { +func (z *MetricDescription) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 5 + // string "Namespace" + o = append(o, 0x85, 0xa9, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65) + o = msgp.AppendString(o, string(z.Namespace)) + // string "Subsystem" + o = append(o, 0xa9, 0x53, 0x75, 0x62, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d) + o = msgp.AppendString(o, string(z.Subsystem)) + // string "Name" + o = append(o, 0xa4, 0x4e, 0x61, 0x6d, 0x65) + o = msgp.AppendString(o, string(z.Name)) + // string "Help" + o = append(o, 0xa4, 0x48, 0x65, 0x6c, 0x70) + o = msgp.AppendString(o, z.Help) + // string "Type" + o = append(o, 0xa4, 0x54, 0x79, 0x70, 0x65) + o = msgp.AppendString(o, string(z.Type)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *MetricDescription) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "Namespace": + { + var zb0002 string + zb0002, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Namespace") + return + } + z.Namespace = MetricNamespace(zb0002) + } + case "Subsystem": + { + var zb0003 string + zb0003, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Subsystem") + return + } + z.Subsystem = MetricSubsystem(zb0003) + } + case "Name": + { + var zb0004 string + zb0004, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Name") + return + } + z.Name = MetricName(zb0004) + } + case "Help": + z.Help, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Help") + return + } + case "Type": + { + var zb0005 string + zb0005, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Type") + return + } + z.Type = MetricTypeV2(zb0005) + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *MetricDescription) Msgsize() (s int) { + s = 1 + 10 + msgp.StringPrefixSize + len(string(z.Namespace)) + 10 + msgp.StringPrefixSize + len(string(z.Subsystem)) + 5 + msgp.StringPrefixSize + len(string(z.Name)) + 5 + msgp.StringPrefixSize + len(z.Help) + 5 + msgp.StringPrefixSize + len(string(z.Type)) + return +} + +// MarshalMsg implements msgp.Marshaler +func (z MetricName) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendString(o, string(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *MetricName) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zb0001 string + zb0001, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = MetricName(zb0001) + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z MetricName) Msgsize() (s int) { + s = msgp.StringPrefixSize + len(string(z)) + return +} + +// MarshalMsg implements msgp.Marshaler +func (z MetricNamespace) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendString(o, string(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *MetricNamespace) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zb0001 string + zb0001, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = MetricNamespace(zb0001) + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z MetricNamespace) Msgsize() (s int) { + s = msgp.StringPrefixSize + len(string(z)) + return +} + +// MarshalMsg implements msgp.Marshaler +func (z MetricSubsystem) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendString(o, string(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *MetricSubsystem) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zb0001 string + zb0001, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = MetricSubsystem(zb0001) + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z MetricSubsystem) Msgsize() (s int) { + s = msgp.StringPrefixSize + len(string(z)) + return +} + +// MarshalMsg implements msgp.Marshaler +func (z MetricTypeV2) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendString(o, string(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *MetricTypeV2) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zb0001 string + zb0001, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = MetricTypeV2(zb0001) + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z MetricTypeV2) Msgsize() (s int) { + s = msgp.StringPrefixSize + len(string(z)) + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *MetricV2) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) // map header, size 6 // string "Description" @@ -48,7 +264,7 @@ func (z *Metric) MarshalMsg(b []byte) (o []byte, err error) { } // UnmarshalMsg implements msgp.Unmarshaler -func (z *Metric) UnmarshalMsg(bts []byte) (o []byte, err error) { +func (z *MetricV2) UnmarshalMsg(bts []byte) (o []byte, err error) { var field []byte _ = field var zb0001 uint32 @@ -186,7 +402,7 @@ func (z *Metric) UnmarshalMsg(bts []byte) (o []byte, err error) { } // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z *Metric) Msgsize() (s int) { +func (z *MetricV2) Msgsize() (s int) { s = 1 + 12 + z.Description.Msgsize() + 13 + msgp.MapHeaderSize if z.StaticLabels != nil { for za0001, za0002 := range z.StaticLabels { @@ -211,287 +427,6 @@ func (z *Metric) Msgsize() (s int) { return } -// MarshalMsg implements msgp.Marshaler -func (z *MetricDescription) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - // map header, size 5 - // string "Namespace" - o = append(o, 0x85, 0xa9, 0x4e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65) - o = msgp.AppendString(o, string(z.Namespace)) - // string "Subsystem" - o = append(o, 0xa9, 0x53, 0x75, 0x62, 0x73, 0x79, 0x73, 0x74, 0x65, 0x6d) - o = msgp.AppendString(o, string(z.Subsystem)) - // string "Name" - o = append(o, 0xa4, 0x4e, 0x61, 0x6d, 0x65) - o = msgp.AppendString(o, string(z.Name)) - // string "Help" - o = append(o, 0xa4, 0x48, 0x65, 0x6c, 0x70) - o = msgp.AppendString(o, z.Help) - // string "Type" - o = append(o, 0xa4, 0x54, 0x79, 0x70, 0x65) - o = msgp.AppendString(o, string(z.Type)) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *MetricDescription) UnmarshalMsg(bts []byte) (o []byte, err error) { - var field []byte - _ = field - var zb0001 uint32 - zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - for zb0001 > 0 { - zb0001-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - switch msgp.UnsafeString(field) { - case "Namespace": - { - var zb0002 string - zb0002, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err, "Namespace") - return - } - z.Namespace = MetricNamespace(zb0002) - } - case "Subsystem": - { - var zb0003 string - zb0003, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err, "Subsystem") - return - } - z.Subsystem = MetricSubsystem(zb0003) - } - case "Name": - { - var zb0004 string - zb0004, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err, "Name") - return - } - z.Name = MetricName(zb0004) - } - case "Help": - z.Help, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err, "Help") - return - } - case "Type": - { - var zb0005 string - zb0005, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err, "Type") - return - } - z.Type = MetricType(zb0005) - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z *MetricDescription) Msgsize() (s int) { - s = 1 + 10 + msgp.StringPrefixSize + len(string(z.Namespace)) + 10 + msgp.StringPrefixSize + len(string(z.Subsystem)) + 5 + msgp.StringPrefixSize + len(string(z.Name)) + 5 + msgp.StringPrefixSize + len(z.Help) + 5 + msgp.StringPrefixSize + len(string(z.Type)) - return -} - -// MarshalMsg implements msgp.Marshaler -func (z MetricName) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - o = msgp.AppendString(o, string(z)) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *MetricName) UnmarshalMsg(bts []byte) (o []byte, err error) { - { - var zb0001 string - zb0001, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - (*z) = MetricName(zb0001) - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z MetricName) Msgsize() (s int) { - s = msgp.StringPrefixSize + len(string(z)) - return -} - -// MarshalMsg implements msgp.Marshaler -func (z MetricNamespace) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - o = msgp.AppendString(o, string(z)) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *MetricNamespace) UnmarshalMsg(bts []byte) (o []byte, err error) { - { - var zb0001 string - zb0001, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - (*z) = MetricNamespace(zb0001) - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z MetricNamespace) Msgsize() (s int) { - s = msgp.StringPrefixSize + len(string(z)) - return -} - -// MarshalMsg implements msgp.Marshaler -func (z MetricSubsystem) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - o = msgp.AppendString(o, string(z)) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *MetricSubsystem) UnmarshalMsg(bts []byte) (o []byte, err error) { - { - var zb0001 string - zb0001, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - (*z) = MetricSubsystem(zb0001) - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z MetricSubsystem) Msgsize() (s int) { - s = msgp.StringPrefixSize + len(string(z)) - return -} - -// MarshalMsg implements msgp.Marshaler -func (z MetricType) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - o = msgp.AppendString(o, string(z)) - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *MetricType) UnmarshalMsg(bts []byte) (o []byte, err error) { - { - var zb0001 string - zb0001, bts, err = msgp.ReadStringBytes(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - (*z) = MetricType(zb0001) - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z MetricType) Msgsize() (s int) { - s = msgp.StringPrefixSize + len(string(z)) - return -} - -// MarshalMsg implements msgp.Marshaler -func (z *MetricsGroup) MarshalMsg(b []byte) (o []byte, err error) { - o = msgp.Require(b, z.Msgsize()) - // map header, size 2 - // string "cacheInterval" - o = append(o, 0x82, 0xad, 0x63, 0x61, 0x63, 0x68, 0x65, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x76, 0x61, 0x6c) - o = msgp.AppendDuration(o, z.cacheInterval) - // string "metricsGroupOpts" - o = append(o, 0xb0, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x4f, 0x70, 0x74, 0x73) - o, err = z.metricsGroupOpts.MarshalMsg(o) - if err != nil { - err = msgp.WrapError(err, "metricsGroupOpts") - return - } - return -} - -// UnmarshalMsg implements msgp.Unmarshaler -func (z *MetricsGroup) UnmarshalMsg(bts []byte) (o []byte, err error) { - var field []byte - _ = field - var zb0001 uint32 - zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - for zb0001 > 0 { - zb0001-- - field, bts, err = msgp.ReadMapKeyZC(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - switch msgp.UnsafeString(field) { - case "cacheInterval": - z.cacheInterval, bts, err = msgp.ReadDurationBytes(bts) - if err != nil { - err = msgp.WrapError(err, "cacheInterval") - return - } - case "metricsGroupOpts": - bts, err = z.metricsGroupOpts.UnmarshalMsg(bts) - if err != nil { - err = msgp.WrapError(err, "metricsGroupOpts") - return - } - default: - bts, err = msgp.Skip(bts) - if err != nil { - err = msgp.WrapError(err) - return - } - } - } - o = bts - return -} - -// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message -func (z *MetricsGroup) Msgsize() (s int) { - s = 1 + 14 + msgp.DurationSize + 17 + z.metricsGroupOpts.Msgsize() - return -} - // MarshalMsg implements msgp.Marshaler func (z *MetricsGroupOpts) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) @@ -642,3 +577,68 @@ func (z *MetricsGroupOpts) Msgsize() (s int) { s = 1 + 22 + msgp.BoolSize + 24 + msgp.BoolSize + 31 + msgp.BoolSize + 28 + msgp.BoolSize + 16 + msgp.BoolSize + 11 + msgp.BoolSize + 29 + msgp.BoolSize + 19 + msgp.BoolSize + 23 + msgp.BoolSize + 26 + msgp.BoolSize + 32 + msgp.BoolSize + 22 + msgp.BoolSize return } + +// MarshalMsg implements msgp.Marshaler +func (z *MetricsGroupV2) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "cacheInterval" + o = append(o, 0x82, 0xad, 0x63, 0x61, 0x63, 0x68, 0x65, 0x49, 0x6e, 0x74, 0x65, 0x72, 0x76, 0x61, 0x6c) + o = msgp.AppendDuration(o, z.cacheInterval) + // string "metricsGroupOpts" + o = append(o, 0xb0, 0x6d, 0x65, 0x74, 0x72, 0x69, 0x63, 0x73, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x4f, 0x70, 0x74, 0x73) + o, err = z.metricsGroupOpts.MarshalMsg(o) + if err != nil { + err = msgp.WrapError(err, "metricsGroupOpts") + return + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *MetricsGroupV2) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "cacheInterval": + z.cacheInterval, bts, err = msgp.ReadDurationBytes(bts) + if err != nil { + err = msgp.WrapError(err, "cacheInterval") + return + } + case "metricsGroupOpts": + bts, err = z.metricsGroupOpts.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "metricsGroupOpts") + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *MetricsGroupV2) Msgsize() (s int) { + s = 1 + 14 + msgp.DurationSize + 17 + z.metricsGroupOpts.Msgsize() + return +} diff --git a/cmd/metrics-v2_gen_test.go b/cmd/metrics-v2_gen_test.go index c55137cc2..f486214f3 100644 --- a/cmd/metrics-v2_gen_test.go +++ b/cmd/metrics-v2_gen_test.go @@ -8,64 +8,6 @@ import ( "github.com/tinylib/msgp/msgp" ) -func TestMarshalUnmarshalMetric(t *testing.T) { - v := Metric{} - bts, err := v.MarshalMsg(nil) - if err != nil { - t.Fatal(err) - } - left, err := v.UnmarshalMsg(bts) - if err != nil { - t.Fatal(err) - } - if len(left) > 0 { - t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) - } - - left, err = msgp.Skip(bts) - if err != nil { - t.Fatal(err) - } - if len(left) > 0 { - t.Errorf("%d bytes left over after Skip(): %q", len(left), left) - } -} - -func BenchmarkMarshalMsgMetric(b *testing.B) { - v := Metric{} - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - v.MarshalMsg(nil) - } -} - -func BenchmarkAppendMsgMetric(b *testing.B) { - v := Metric{} - bts := make([]byte, 0, v.Msgsize()) - bts, _ = v.MarshalMsg(bts[0:0]) - b.SetBytes(int64(len(bts))) - b.ReportAllocs() - b.ResetTimer() - for i := 0; i < b.N; i++ { - bts, _ = v.MarshalMsg(bts[0:0]) - } -} - -func BenchmarkUnmarshalMetric(b *testing.B) { - v := Metric{} - bts, _ := v.MarshalMsg(nil) - b.ReportAllocs() - b.SetBytes(int64(len(bts))) - b.ResetTimer() - for i := 0; i < b.N; i++ { - _, err := v.UnmarshalMsg(bts) - if err != nil { - b.Fatal(err) - } - } -} - func TestMarshalUnmarshalMetricDescription(t *testing.T) { v := MetricDescription{} bts, err := v.MarshalMsg(nil) @@ -124,8 +66,8 @@ func BenchmarkUnmarshalMetricDescription(b *testing.B) { } } -func TestMarshalUnmarshalMetricsGroup(t *testing.T) { - v := MetricsGroup{} +func TestMarshalUnmarshalMetricV2(t *testing.T) { + v := MetricV2{} bts, err := v.MarshalMsg(nil) if err != nil { t.Fatal(err) @@ -147,8 +89,8 @@ func TestMarshalUnmarshalMetricsGroup(t *testing.T) { } } -func BenchmarkMarshalMsgMetricsGroup(b *testing.B) { - v := MetricsGroup{} +func BenchmarkMarshalMsgMetricV2(b *testing.B) { + v := MetricV2{} b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { @@ -156,8 +98,8 @@ func BenchmarkMarshalMsgMetricsGroup(b *testing.B) { } } -func BenchmarkAppendMsgMetricsGroup(b *testing.B) { - v := MetricsGroup{} +func BenchmarkAppendMsgMetricV2(b *testing.B) { + v := MetricV2{} bts := make([]byte, 0, v.Msgsize()) bts, _ = v.MarshalMsg(bts[0:0]) b.SetBytes(int64(len(bts))) @@ -168,8 +110,8 @@ func BenchmarkAppendMsgMetricsGroup(b *testing.B) { } } -func BenchmarkUnmarshalMetricsGroup(b *testing.B) { - v := MetricsGroup{} +func BenchmarkUnmarshalMetricV2(b *testing.B) { + v := MetricV2{} bts, _ := v.MarshalMsg(nil) b.ReportAllocs() b.SetBytes(int64(len(bts))) @@ -239,3 +181,61 @@ func BenchmarkUnmarshalMetricsGroupOpts(b *testing.B) { } } } + +func TestMarshalUnmarshalMetricsGroupV2(t *testing.T) { + v := MetricsGroupV2{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgMetricsGroupV2(b *testing.B) { + v := MetricsGroupV2{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgMetricsGroupV2(b *testing.B) { + v := MetricsGroupV2{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshalMetricsGroupV2(b *testing.B) { + v := MetricsGroupV2{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/cmd/metrics-v3-api.go b/cmd/metrics-v3-api.go new file mode 100644 index 000000000..b12fe3a5f --- /dev/null +++ b/cmd/metrics-v3-api.go @@ -0,0 +1,220 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" +) + +const ( + apiRejectedAuthTotal MetricName = "rejected_auth_total" + apiRejectedHeaderTotal MetricName = "rejected_header_total" + apiRejectedTimestampTotal MetricName = "rejected_timestamp_total" + apiRejectedInvalidTotal MetricName = "rejected_invalid_total" + + apiRequestsWaitingTotal MetricName = "waiting_total" + apiRequestsIncomingTotal MetricName = "incoming_total" + + apiRequestsInFlightTotal MetricName = "inflight_total" + apiRequestsTotal MetricName = "total" + apiRequestsErrorsTotal MetricName = "errors_total" + apiRequests5xxErrorsTotal MetricName = "5xx_errors_total" + apiRequests4xxErrorsTotal MetricName = "4xx_errors_total" + apiRequestsCanceledTotal MetricName = "canceled_total" + + apiRequestsTTFBSecondsDistribution MetricName = "ttfb_seconds_distribution" + + apiTrafficSentBytes MetricName = "traffic_sent_bytes" + apiTrafficRecvBytes MetricName = "traffic_received_bytes" +) + +var ( + apiRejectedAuthTotalMD = NewCounterMD(apiRejectedAuthTotal, + "Total number of requests rejected for auth failure", "type") + apiRejectedHeaderTotalMD = NewCounterMD(apiRejectedHeaderTotal, + "Total number of requests rejected for invalid header", "type") + apiRejectedTimestampTotalMD = NewCounterMD(apiRejectedTimestampTotal, + "Total number of requests rejected for invalid timestamp", "type") + apiRejectedInvalidTotalMD = NewCounterMD(apiRejectedInvalidTotal, + "Total number of invalid requests", "type") + + apiRequestsWaitingTotalMD = NewGaugeMD(apiRequestsWaitingTotal, + "Total number of requests in the waiting queue", "type") + apiRequestsIncomingTotalMD = NewGaugeMD(apiRequestsIncomingTotal, + "Total number of incoming requests", "type") + + apiRequestsInFlightTotalMD = NewGaugeMD(apiRequestsInFlightTotal, + "Total number of requests currently in flight", "name", "type") + apiRequestsTotalMD = NewCounterMD(apiRequestsTotal, + "Total number of requests", "name", "type") + apiRequestsErrorsTotalMD = NewCounterMD(apiRequestsErrorsTotal, + "Total number of requests with (4xx and 5xx) errors", "name", "type") + apiRequests5xxErrorsTotalMD = NewCounterMD(apiRequests5xxErrorsTotal, + "Total number of requests with 5xx errors", "name", "type") + apiRequests4xxErrorsTotalMD = NewCounterMD(apiRequests4xxErrorsTotal, + "Total number of requests with 4xx errors", "name", "type") + apiRequestsCanceledTotalMD = NewCounterMD(apiRequestsCanceledTotal, + "Total number of requests canceled by the client", "name", "type") + + apiRequestsTTFBSecondsDistributionMD = NewCounterMD(apiRequestsTTFBSecondsDistribution, + "Distribution of time to first byte across API calls", "name", "type", "le") + + apiTrafficSentBytesMD = NewCounterMD(apiTrafficSentBytes, + "Total number of bytes sent", "type") + apiTrafficRecvBytesMD = NewCounterMD(apiTrafficRecvBytes, + "Total number of bytes received", "type") +) + +// loadAPIRequestsHTTPMetrics - reads S3 HTTP metrics. +// +// This is a `MetricsLoaderFn`. +// +// This includes node level S3 HTTP metrics. +// +// This function currently ignores `opts`. +func loadAPIRequestsHTTPMetrics(ctx context.Context, m MetricValues, _ *metricsCache) error { + // Collect node level S3 HTTP metrics. + httpStats := globalHTTPStats.toServerHTTPStats(false) + + // Currently we only collect S3 API related stats, so we set the "type" + // label to "s3". + + m.Set(apiRejectedAuthTotal, float64(httpStats.TotalS3RejectedAuth), "type", "s3") + m.Set(apiRejectedTimestampTotal, float64(httpStats.TotalS3RejectedTime), "type", "s3") + m.Set(apiRejectedHeaderTotal, float64(httpStats.TotalS3RejectedHeader), "type", "s3") + m.Set(apiRejectedInvalidTotal, float64(httpStats.TotalS3RejectedInvalid), "type", "s3") + m.Set(apiRequestsWaitingTotal, float64(httpStats.S3RequestsInQueue), "type", "s3") + m.Set(apiRequestsIncomingTotal, float64(httpStats.S3RequestsIncoming), "type", "s3") + + for name, value := range httpStats.CurrentS3Requests.APIStats { + m.Set(apiRequestsInFlightTotal, float64(value), "name", name, "type", "s3") + } + for name, value := range httpStats.TotalS3Requests.APIStats { + m.Set(apiRequestsTotal, float64(value), "name", name, "type", "s3") + } + for name, value := range httpStats.TotalS3Errors.APIStats { + m.Set(apiRequestsErrorsTotal, float64(value), "name", name, "type", "s3") + } + for name, value := range httpStats.TotalS35xxErrors.APIStats { + m.Set(apiRequests5xxErrorsTotal, float64(value), "name", name, "type", "s3") + } + for name, value := range httpStats.TotalS34xxErrors.APIStats { + m.Set(apiRequests4xxErrorsTotal, float64(value), "name", name, "type", "s3") + } + for name, value := range httpStats.TotalS3Canceled.APIStats { + m.Set(apiRequestsCanceledTotal, float64(value), "name", name, "type", "s3") + } + return nil +} + +// loadAPIRequestsTTFBMetrics - loads S3 TTFB metrics. +// +// This is a `MetricsLoaderFn`. +func loadAPIRequestsTTFBMetrics(ctx context.Context, m MetricValues, _ *metricsCache) error { + renameLabels := map[string]string{"api": "name"} + m.SetHistogram(apiRequestsTTFBSecondsDistribution, httpRequestsDuration, renameLabels, nil, + "type", "s3") + return nil +} + +// loadAPIRequestsNetworkMetrics - loads S3 network metrics. +// +// This is a `MetricsLoaderFn`. +func loadAPIRequestsNetworkMetrics(ctx context.Context, m MetricValues, _ *metricsCache) error { + connStats := globalConnStats.toServerConnStats() + m.Set(apiTrafficSentBytes, float64(connStats.s3OutputBytes), "type", "s3") + m.Set(apiTrafficRecvBytes, float64(connStats.s3InputBytes), "type", "s3") + return nil +} + +// Metric Descriptions for bucket level S3 metrics. +var ( + apiBucketTrafficSentBytesMD = NewCounterMD(apiTrafficSentBytes, + "Total number of bytes received for a bucket", "bucket", "type") + apiBucketTrafficRecvBytesMD = NewCounterMD(apiTrafficRecvBytes, + "Total number of bytes sent for a bucket", "bucket", "type") + + apiBucketRequestsInFlightMD = NewGaugeMD(apiRequestsInFlightTotal, + "Total number of requests currently in flight for a bucket", "bucket", "name", "type") + apiBucketRequestsTotalMD = NewCounterMD(apiRequestsTotal, + "Total number of requests for a bucket", "bucket", "name", "type") + apiBucketRequestsCanceledMD = NewCounterMD(apiRequestsCanceledTotal, + "Total number of requests canceled by the client for a bucket", "bucket", "name", "type") + apiBucketRequests4xxErrorsMD = NewCounterMD(apiRequests4xxErrorsTotal, + "Total number of requests with 4xx errors for a bucket", "bucket", "name", "type") + apiBucketRequests5xxErrorsMD = NewCounterMD(apiRequests5xxErrorsTotal, + "Total number of requests with 5xx errors for a bucket", "bucket", "name", "type") + + apiBucketRequestsTTFBSecondsDistributionMD = NewCounterMD(apiRequestsTTFBSecondsDistribution, + "Distribution of time to first byte across API calls for a bucket", + "bucket", "name", "le", "type") +) + +// loadAPIBucketHTTPMetrics - loads bucket level S3 HTTP metrics. +// +// This is a `MetricsLoaderFn`. +// +// This includes bucket level S3 HTTP metrics and S3 network in/out metrics. +func loadAPIBucketHTTPMetrics(ctx context.Context, m MetricValues, _ *metricsCache, buckets []string) error { + if len(buckets) == 0 { + return nil + } + for bucket, inOut := range globalBucketConnStats.getBucketS3InOutBytes(buckets) { + recvBytes := inOut.In + if recvBytes > 0 { + m.Set(apiTrafficSentBytes, float64(recvBytes), "bucket", bucket, "type", "s3") + } + sentBytes := inOut.Out + if sentBytes > 0 { + m.Set(apiTrafficRecvBytes, float64(sentBytes), "bucket", bucket, "type", "s3") + } + + httpStats := globalBucketHTTPStats.load(bucket) + for k, v := range httpStats.currentS3Requests.Load(false) { + m.Set(apiRequestsInFlightTotal, float64(v), "bucket", bucket, "name", k, "type", "s3") + } + + for k, v := range httpStats.totalS3Requests.Load(false) { + m.Set(apiRequestsTotal, float64(v), "bucket", bucket, "name", k, "type", "s3") + } + + for k, v := range httpStats.totalS3Canceled.Load(false) { + m.Set(apiRequestsCanceledTotal, float64(v), "bucket", bucket, "name", k, "type", "s3") + } + + for k, v := range httpStats.totalS34xxErrors.Load(false) { + m.Set(apiRequests4xxErrorsTotal, float64(v), "bucket", bucket, "name", k, "type", "s3") + } + + for k, v := range httpStats.totalS35xxErrors.Load(false) { + m.Set(apiRequests5xxErrorsTotal, float64(v), "bucket", bucket, "name", k, "type", "s3") + } + } + + return nil +} + +// loadAPIBucketTTFBMetrics - loads bucket S3 TTFB metrics. +// +// This is a `MetricsLoaderFn`. +func loadAPIBucketTTFBMetrics(ctx context.Context, m MetricValues, _ *metricsCache, buckets []string) error { + renameLabels := map[string]string{"api": "name"} + m.SetHistogram(apiRequestsTTFBSecondsDistribution, bucketHTTPRequestsDuration, renameLabels, + buckets, "type", "s3") + return nil +} diff --git a/cmd/metrics-v3-cache.go b/cmd/metrics-v3-cache.go new file mode 100644 index 000000000..1fa22396c --- /dev/null +++ b/cmd/metrics-v3-cache.go @@ -0,0 +1,145 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "time" + + "github.com/minio/madmin-go/v3" + "github.com/minio/minio/internal/cachevalue" +) + +// metricsCache - cache for metrics. +// +// When serving metrics, this cache is passed to the MetricsLoaderFn. +// +// This cache is used for metrics that would result in network/storage calls. +type metricsCache struct { + dataUsageInfo *cachevalue.Cache[DataUsageInfo] + esetHealthResult *cachevalue.Cache[HealthResult] + driveMetrics *cachevalue.Cache[storageMetrics] + clusterDriveMetrics *cachevalue.Cache[storageMetrics] + nodesUpDown *cachevalue.Cache[nodesOnline] +} + +func newMetricsCache() *metricsCache { + return &metricsCache{ + dataUsageInfo: newDataUsageInfoCache(), + esetHealthResult: newESetHealthResultCache(), + driveMetrics: newDriveMetricsCache(), + clusterDriveMetrics: newClusterStorageInfoCache(), + nodesUpDown: newNodesUpDownCache(), + } +} + +type nodesOnline struct { + Online, Offline int +} + +func newNodesUpDownCache() *cachevalue.Cache[nodesOnline] { + loadNodesUpDown := func() (v nodesOnline, err error) { + v.Online, v.Offline = globalNotificationSys.GetPeerOnlineCount() + return + } + return cachevalue.NewFromFunc(1*time.Minute, + cachevalue.Opts{ReturnLastGood: true}, + loadNodesUpDown) +} + +type storageMetrics struct { + storageInfo madmin.StorageInfo + onlineDrives, offlineDrives, totalDrives int +} + +func newDataUsageInfoCache() *cachevalue.Cache[DataUsageInfo] { + loadDataUsage := func() (u DataUsageInfo, err error) { + objLayer := newObjectLayerFn() + if objLayer == nil { + return + } + + // Collect cluster level object metrics. + u, err = loadDataUsageFromBackend(GlobalContext, objLayer) + return + } + return cachevalue.NewFromFunc(1*time.Minute, + cachevalue.Opts{ReturnLastGood: true}, + loadDataUsage) +} + +func newESetHealthResultCache() *cachevalue.Cache[HealthResult] { + loadHealth := func() (r HealthResult, err error) { + objLayer := newObjectLayerFn() + if objLayer == nil { + return + } + + r = objLayer.Health(GlobalContext, HealthOptions{}) + return + } + return cachevalue.NewFromFunc(1*time.Minute, + cachevalue.Opts{ReturnLastGood: true}, + loadHealth, + ) +} + +func newDriveMetricsCache() *cachevalue.Cache[storageMetrics] { + loadDriveMetrics := func() (v storageMetrics, err error) { + objLayer := newObjectLayerFn() + if objLayer == nil { + return + } + + storageInfo := objLayer.LocalStorageInfo(GlobalContext, true) + onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) + totalDrives := onlineDrives.Merge(offlineDrives) + v = storageMetrics{ + storageInfo: storageInfo, + onlineDrives: onlineDrives.Sum(), + offlineDrives: offlineDrives.Sum(), + totalDrives: totalDrives.Sum(), + } + return + } + return cachevalue.NewFromFunc(1*time.Minute, + cachevalue.Opts{ReturnLastGood: true}, + loadDriveMetrics) +} + +func newClusterStorageInfoCache() *cachevalue.Cache[storageMetrics] { + loadStorageInfo := func() (v storageMetrics, err error) { + objLayer := newObjectLayerFn() + if objLayer == nil { + return storageMetrics{}, nil + } + storageInfo := objLayer.StorageInfo(GlobalContext, true) + onlineDrives, offlineDrives := getOnlineOfflineDisksStats(storageInfo.Disks) + totalDrives := onlineDrives.Merge(offlineDrives) + v = storageMetrics{ + storageInfo: storageInfo, + onlineDrives: onlineDrives.Sum(), + offlineDrives: offlineDrives.Sum(), + totalDrives: totalDrives.Sum(), + } + return + } + return cachevalue.NewFromFunc(1*time.Minute, + cachevalue.Opts{ReturnLastGood: true}, + loadStorageInfo, + ) +} diff --git a/cmd/metrics-v3-cluster-erasure-set.go b/cmd/metrics-v3-cluster-erasure-set.go new file mode 100644 index 000000000..69d7523b3 --- /dev/null +++ b/cmd/metrics-v3-cluster-erasure-set.go @@ -0,0 +1,89 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "strconv" +) + +const ( + erasureSetOverallWriteQuorum = "overall_write_quorum" + erasureSetOverallHealth = "overall_health" + erasureSetReadQuorum = "read_quorum" + erasureSetWriteQuorum = "write_quorum" + erasureSetOnlineDrivesCount = "online_drives_count" + erasureSetHealingDrivesCount = "healing_drives_count" + erasureSetHealth = "health" +) + +const ( + poolIDL = "pool_id" + setIDL = "set_id" +) + +var ( + erasureSetOverallWriteQuorumMD = NewGaugeMD(erasureSetOverallWriteQuorum, + "Overall write quorum across pools and sets") + erasureSetOverallHealthMD = NewGaugeMD(erasureSetOverallHealth, + "Overall health across pools and sets (1=healthy, 0=unhealthy)") + erasureSetReadQuorumMD = NewGaugeMD(erasureSetReadQuorum, + "Read quorum for the erasure set in a pool", poolIDL, setIDL) + erasureSetWriteQuorumMD = NewGaugeMD(erasureSetWriteQuorum, + "Write quorum for the erasure set in a pool", poolIDL, setIDL) + erasureSetOnlineDrivesCountMD = NewGaugeMD(erasureSetOnlineDrivesCount, + "Count of online drives in the erasure set in a pool", poolIDL, setIDL) + erasureSetHealingDrivesCountMD = NewGaugeMD(erasureSetHealingDrivesCount, + "Count of healing drives in the erasure set in a pool", poolIDL, setIDL) + erasureSetHealthMD = NewGaugeMD(erasureSetHealth, + "Health of the erasure set in a pool (1=healthy, 0=unhealthy)", + poolIDL, setIDL) +) + +func b2f(v bool) float64 { + if v { + return 1 + } + return 0 +} + +// loadClusterErasureSetMetrics - `MetricsLoaderFn` for cluster storage erasure +// set metrics. +func loadClusterErasureSetMetrics(ctx context.Context, m MetricValues, c *metricsCache) error { + result, _ := c.esetHealthResult.Get() + + m.Set(erasureSetOverallWriteQuorum, float64(result.WriteQuorum)) + m.Set(erasureSetOverallHealth, b2f(result.Healthy)) + + for _, h := range result.ESHealth { + poolLV := strconv.Itoa(h.PoolID) + setLV := strconv.Itoa(h.SetID) + m.Set(erasureSetReadQuorum, float64(h.ReadQuorum), + poolIDL, poolLV, setIDL, setLV) + m.Set(erasureSetWriteQuorum, float64(h.WriteQuorum), + poolIDL, poolLV, setIDL, setLV) + m.Set(erasureSetOnlineDrivesCount, float64(h.HealthyDrives), + poolIDL, poolLV, setIDL, setLV) + m.Set(erasureSetHealingDrivesCount, float64(h.HealingDrives), + poolIDL, poolLV, setIDL, setLV) + m.Set(erasureSetHealth, b2f(h.Healthy), + poolIDL, poolLV, setIDL, setLV) + } + + return nil +} diff --git a/cmd/metrics-v3-cluster-health.go b/cmd/metrics-v3-cluster-health.go new file mode 100644 index 000000000..8636fc96b --- /dev/null +++ b/cmd/metrics-v3-cluster-health.go @@ -0,0 +1,109 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import "context" + +const ( + healthDrivesOfflineCount = "drives_offline_count" + healthDrivesOnlineCount = "drives_online_count" + healthDrivesCount = "drives_count" +) + +var ( + healthDrivesOfflineCountMD = NewGaugeMD(healthDrivesOfflineCount, + "Count of offline drives in the cluster") + healthDrivesOnlineCountMD = NewGaugeMD(healthDrivesOnlineCount, + "Count of online drives in the cluster") + healthDrivesCountMD = NewGaugeMD(healthDrivesCount, + "Count of all drives in the cluster") +) + +// loadClusterHealthDriveMetrics - `MetricsLoaderFn` for cluster storage drive metrics +// such as online, offline and total drives. +func loadClusterHealthDriveMetrics(ctx context.Context, m MetricValues, + c *metricsCache, +) error { + clusterDriveMetrics, _ := c.clusterDriveMetrics.Get() + + m.Set(healthDrivesOfflineCount, float64(clusterDriveMetrics.offlineDrives)) + m.Set(healthDrivesOnlineCount, float64(clusterDriveMetrics.onlineDrives)) + m.Set(healthDrivesCount, float64(clusterDriveMetrics.totalDrives)) + + return nil +} + +const ( + healthNodesOfflineCount = "nodes_offline_count" + healthNodesOnlineCount = "nodes_online_count" +) + +var ( + healthNodesOfflineCountMD = NewGaugeMD(healthNodesOfflineCount, + "Count of offline nodes in the cluster") + healthNodesOnlineCountMD = NewGaugeMD(healthNodesOnlineCount, + "Count of online nodes in the cluster") +) + +// loadClusterHealthNodeMetrics - `MetricsLoaderFn` for cluster health node +// metrics. +func loadClusterHealthNodeMetrics(ctx context.Context, m MetricValues, + c *metricsCache, +) error { + nodesUpDown, _ := c.nodesUpDown.Get() + + m.Set(healthNodesOfflineCount, float64(nodesUpDown.Offline)) + m.Set(healthNodesOnlineCount, float64(nodesUpDown.Online)) + + return nil +} + +const ( + healthCapacityRawTotalBytes = "capacity_raw_total_bytes" + healthCapacityRawFreeBytes = "capacity_raw_free_bytes" + healthCapacityUsableTotalBytes = "capacity_usable_total_bytes" + healthCapacityUsableFreeBytes = "capacity_usable_free_bytes" +) + +var ( + healthCapacityRawTotalBytesMD = NewGaugeMD(healthCapacityRawTotalBytes, + "Total cluster raw storage capacity in bytes") + healthCapacityRawFreeBytesMD = NewGaugeMD(healthCapacityRawFreeBytes, + "Total cluster raw storage free in bytes") + healthCapacityUsableTotalBytesMD = NewGaugeMD(healthCapacityUsableTotalBytes, + "Total cluster usable storage capacity in bytes") + healthCapacityUsableFreeBytesMD = NewGaugeMD(healthCapacityUsableFreeBytes, + "Total cluster usable storage free in bytes") +) + +// loadClusterHealthCapacityMetrics - `MetricsLoaderFn` for cluster storage +// capacity metrics. +func loadClusterHealthCapacityMetrics(ctx context.Context, m MetricValues, + c *metricsCache, +) error { + clusterDriveMetrics, _ := c.clusterDriveMetrics.Get() + + storageInfo := clusterDriveMetrics.storageInfo + + m.Set(healthCapacityRawTotalBytes, float64(GetTotalCapacity(storageInfo.Disks))) + m.Set(healthCapacityRawFreeBytes, float64(GetTotalCapacityFree(storageInfo.Disks))) + m.Set(healthCapacityUsableTotalBytes, float64(GetTotalUsableCapacity(storageInfo.Disks, storageInfo))) + m.Set(healthCapacityUsableFreeBytes, float64(GetTotalUsableCapacityFree(storageInfo.Disks, storageInfo))) + + return nil +} diff --git a/cmd/metrics-v3-cluster-usage.go b/cmd/metrics-v3-cluster-usage.go new file mode 100644 index 000000000..d8844d9f9 --- /dev/null +++ b/cmd/metrics-v3-cluster-usage.go @@ -0,0 +1,189 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "time" + + "github.com/minio/minio/internal/logger" +) + +const ( + usageSinceLastUpdateSeconds = "since_last_update_seconds" + usageTotalBytes = "total_bytes" + usageObjectsCount = "count" + usageVersionsCount = "versions_count" + usageDeleteMarkersCount = "delete_markers_count" + usageBucketsCount = "buckets_count" + usageSizeDistribution = "size_distribution" + usageVersionCountDistribution = "version_count_distribution" +) + +var ( + usageSinceLastUpdateSecondsMD = NewGaugeMD(usageSinceLastUpdateSeconds, + "Time since last update of usage metrics in seconds") + usageTotalBytesMD = NewGaugeMD(usageTotalBytes, + "Total cluster usage in bytes") + usageObjectsCountMD = NewGaugeMD(usageObjectsCount, + "Total cluster objects count") + usageVersionsCountMD = NewGaugeMD(usageVersionsCount, + "Total cluster object versions (including delete markers) count") + usageDeleteMarkersCountMD = NewGaugeMD(usageDeleteMarkersCount, + "Total cluster delete markers count") + usageBucketsCountMD = NewGaugeMD(usageBucketsCount, + "Total cluster buckets count") + usageObjectsDistributionMD = NewGaugeMD(usageSizeDistribution, + "Cluster object size distribution", "range") + usageVersionsDistributionMD = NewGaugeMD(usageVersionCountDistribution, + "Cluster object version count distribution", "range") +) + +// loadClusterUsageObjectMetrics - reads cluster usage metrics. +// +// This is a `MetricsLoaderFn`. +func loadClusterUsageObjectMetrics(ctx context.Context, m MetricValues, c *metricsCache) error { + dataUsageInfo, err := c.dataUsageInfo.Get() + if err != nil { + logger.LogIf(ctx, err) + return nil + } + + // data usage has not captured any data yet. + if dataUsageInfo.LastUpdate.IsZero() { + return nil + } + + var ( + clusterSize uint64 + clusterBuckets uint64 + clusterObjectsCount uint64 + clusterVersionsCount uint64 + clusterDeleteMarkersCount uint64 + ) + + clusterObjectSizesHistogram := map[string]uint64{} + clusterVersionsHistogram := map[string]uint64{} + for _, usage := range dataUsageInfo.BucketsUsage { + clusterBuckets++ + clusterSize += usage.Size + clusterObjectsCount += usage.ObjectsCount + clusterVersionsCount += usage.VersionsCount + clusterDeleteMarkersCount += usage.DeleteMarkersCount + for k, v := range usage.ObjectSizesHistogram { + clusterObjectSizesHistogram[k] += v + } + for k, v := range usage.ObjectVersionsHistogram { + clusterVersionsHistogram[k] += v + } + } + + m.Set(usageSinceLastUpdateSeconds, time.Since(dataUsageInfo.LastUpdate).Seconds()) + m.Set(usageTotalBytes, float64(clusterSize)) + m.Set(usageObjectsCount, float64(clusterObjectsCount)) + m.Set(usageVersionsCount, float64(clusterVersionsCount)) + m.Set(usageDeleteMarkersCount, float64(clusterDeleteMarkersCount)) + m.Set(usageBucketsCount, float64(clusterBuckets)) + for k, v := range clusterObjectSizesHistogram { + m.Set(usageSizeDistribution, float64(v), "range", k) + } + for k, v := range clusterVersionsHistogram { + m.Set(usageVersionCountDistribution, float64(v), "range", k) + } + + return nil +} + +const ( + usageBucketQuotaTotalBytes = "quota_total_bytes" + + usageBucketTotalBytes = "total_bytes" + usageBucketObjectsCount = "objects_count" + usageBucketVersionsCount = "versions_count" + usageBucketDeleteMarkersCount = "delete_markers_count" + usageBucketObjectSizeDistribution = "object_size_distribution" + usageBucketObjectVersionCountDistribution = "object_version_count_distribution" +) + +var ( + usageBucketTotalBytesMD = NewGaugeMD(usageBucketTotalBytes, + "Total bucket size in bytes", "bucket") + usageBucketObjectsTotalMD = NewGaugeMD(usageBucketObjectsCount, + "Total objects count in bucket", "bucket") + usageBucketVersionsCountMD = NewGaugeMD(usageBucketVersionsCount, + "Total object versions (including delete markers) count in bucket", "bucket") + usageBucketDeleteMarkersCountMD = NewGaugeMD(usageBucketDeleteMarkersCount, + "Total delete markers count in bucket", "bucket") + + usageBucketQuotaTotalBytesMD = NewGaugeMD(usageBucketQuotaTotalBytes, + "Total bucket quota in bytes", "bucket") + + usageBucketObjectSizeDistributionMD = NewGaugeMD(usageBucketObjectSizeDistribution, + "Bucket object size distribution", "range", "bucket") + usageBucketObjectVersionCountDistributionMD = NewGaugeMD( + usageBucketObjectVersionCountDistribution, + "Bucket object version count distribution", "range", "bucket") +) + +// loadClusterUsageBucketMetrics - `MetricsLoaderFn` to load bucket usage metrics. +func loadClusterUsageBucketMetrics(ctx context.Context, m MetricValues, c *metricsCache, buckets []string) error { + dataUsageInfo, err := c.dataUsageInfo.Get() + if err != nil { + logger.LogIf(ctx, err) + return nil + } + + // data usage has not been captured yet. + if dataUsageInfo.LastUpdate.IsZero() { + return nil + } + + m.Set(usageSinceLastUpdateSeconds, float64(time.Since(dataUsageInfo.LastUpdate))) + + for _, bucket := range buckets { + usage, ok := dataUsageInfo.BucketsUsage[bucket] + if !ok { + continue + } + quota, err := globalBucketQuotaSys.Get(ctx, bucket) + if err != nil { + // Log and continue if we are unable to retrieve metrics for this + // bucket. + logger.LogIf(ctx, err) + continue + } + + m.Set(usageBucketTotalBytes, float64(usage.Size), "bucket", bucket) + m.Set(usageBucketObjectsCount, float64(usage.ObjectsCount), "bucket", bucket) + m.Set(usageBucketVersionsCount, float64(usage.VersionsCount), "bucket", bucket) + m.Set(usageBucketDeleteMarkersCount, float64(usage.DeleteMarkersCount), "bucket", bucket) + + if quota != nil && quota.Quota > 0 { + m.Set(usageBucketQuotaTotalBytes, float64(quota.Quota), "bucket", bucket) + } + + for k, v := range usage.ObjectSizesHistogram { + m.Set(usageBucketObjectSizeDistribution, float64(v), "range", k, "bucket", bucket) + } + for k, v := range usage.ObjectVersionsHistogram { + m.Set(usageBucketObjectVersionCountDistribution, float64(v), "range", k, "bucket", bucket) + } + + } + return nil +} diff --git a/cmd/metrics-v3-handler.go b/cmd/metrics-v3-handler.go new file mode 100644 index 000000000..2d31a08b6 --- /dev/null +++ b/cmd/metrics-v3-handler.go @@ -0,0 +1,254 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "encoding/json" + "fmt" + "net/http" + "slices" + "strings" + + "github.com/minio/minio/internal/logger" + "github.com/minio/minio/internal/mcontext" + "github.com/minio/mux" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +type promLogger struct{} + +func (p promLogger) Println(v ...interface{}) { + s := make([]string, 0, len(v)) + for _, val := range v { + s = append(s, fmt.Sprintf("%v", val)) + } + err := fmt.Errorf("metrics handler error: %v", strings.Join(s, " ")) + logger.LogIf(GlobalContext, err) +} + +type metricsV3Server struct { + registry *prometheus.Registry + opts promhttp.HandlerOpts + authFn func(http.Handler) http.Handler + + metricsData *metricsV3Collection +} + +func newMetricsV3Server(authType prometheusAuthType) *metricsV3Server { + registry := prometheus.NewRegistry() + authFn := AuthMiddleware + if authType == prometheusPublic { + authFn = NoAuthMiddleware + } + + metricGroups := newMetricGroups(registry) + + return &metricsV3Server{ + registry: registry, + opts: promhttp.HandlerOpts{ + ErrorLog: promLogger{}, + ErrorHandling: promhttp.HTTPErrorOnError, + Registry: registry, + MaxRequestsInFlight: 2, + }, + authFn: authFn, + + metricsData: metricGroups, + } +} + +// metricDisplay - contains info on a metric for display purposes. +type metricDisplay struct { + Name string `json:"name"` + Help string `json:"help"` + Type string `json:"type"` + Labels []string `json:"labels"` +} + +func (md metricDisplay) String() string { + return fmt.Sprintf("Name: %s\nType: %s\nHelp: %s\nLabels: {%s}\n", md.Name, md.Type, md.Help, strings.Join(md.Labels, ",")) +} + +func (md metricDisplay) TableRow() string { + labels := strings.Join(md.Labels, ",") + if labels == "" { + labels = "" + } else { + labels = "`" + labels + "`" + } + return fmt.Sprintf("| `%s` | `%s` | %s | %s |\n", md.Name, md.Type, md.Help, labels) +} + +// listMetrics - returns a handler that lists all the metrics that could be +// returned for the requested path. +// +// FIXME: It currently only lists `minio_` prefixed metrics. +func (h *metricsV3Server) listMetrics(path string) http.Handler { + // First collect all matching MetricsGroup's + matchingMG := make(map[collectorPath]*MetricsGroup) + for _, collPath := range h.metricsData.collectorPaths { + if collPath.isDescendantOf(path) { + if v, ok := h.metricsData.mgMap[collPath]; ok { + matchingMG[collPath] = v + } else { + matchingMG[collPath] = h.metricsData.bucketMGMap[collPath] + } + } + } + + if len(matchingMG) == 0 { + return nil + } + + var metrics []metricDisplay + for _, collectorPath := range h.metricsData.collectorPaths { + if mg, ok := matchingMG[collectorPath]; ok { + var commonLabels []string + for k := range mg.ExtraLabels { + commonLabels = append(commonLabels, k) + } + for _, d := range mg.Descriptors { + labels := slices.Clone(d.VariableLabels) + labels = append(labels, commonLabels...) + metric := metricDisplay{ + Name: mg.MetricFQN(d.Name), + Help: d.Help, + Type: d.Type.String(), + Labels: labels, + } + metrics = append(metrics, metric) + } + } + } + + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + contentType := r.Header.Get("Content-Type") + if contentType == "application/json" { + w.Header().Set("Content-Type", "application/json") + jsonEncoder := json.NewEncoder(w) + jsonEncoder.Encode(metrics) + return + } + + // If not JSON, return plain text. We format it as a markdown table for + // readability. + w.Header().Set("Content-Type", "text/plain") + var b strings.Builder + b.WriteString("| Name | Type | Help | Labels |\n") + b.WriteString("| ---- | ---- | ---- | ------ |\n") + for _, metric := range metrics { + b.WriteString(metric.TableRow()) + } + w.Write([]byte(b.String())) + }) +} + +func (h *metricsV3Server) handle(path string, isListingRequest bool, buckets []string) http.Handler { + var notFoundHandler http.Handler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + http.Error(w, "Metrics Resource Not found", http.StatusNotFound) + }) + + // Require that metrics path has at least component. + if path == "/" { + return notFoundHandler + } + + if isListingRequest { + handler := h.listMetrics(path) + if handler == nil { + return notFoundHandler + } + return handler + } + + // In each of the following cases, we check if the collect path is a + // descendant of `path`, and if so, we add the corresponding gatherer to + // the list of gatherers. This way, /api/a will return all metrics returned + // by /api/a/b and /api/a/c (and any other matching descendant collector + // paths). + + var gatherers []prometheus.Gatherer + for _, collectorPath := range h.metricsData.collectorPaths { + if collectorPath.isDescendantOf(path) { + gatherer := h.metricsData.mgGatherers[collectorPath] + + // For Bucket metrics we need to set the buckets argument inside the + // metric group, so that it will affect collection. If no buckets + // are provided, we will not return bucket metrics. + if bmg, ok := h.metricsData.bucketMGMap[collectorPath]; ok { + if len(buckets) == 0 { + continue + } + unLocker := bmg.LockAndSetBuckets(buckets) + defer unLocker() + } + gatherers = append(gatherers, gatherer) + } + } + + if len(gatherers) == 0 { + return notFoundHandler + } + + return promhttp.HandlerFor(prometheus.Gatherers(gatherers), h.opts) +} + +// ServeHTTP - implements http.Handler interface. +// +// When the `list` query parameter is provided (its value is ignored), the +// server lists all metrics that could be returned for the requested path. +// +// The (repeatable) `buckets` query parameter is a list of bucket names (or it +// could be a comma separated value) to return metrics with a bucket label. +// Bucket metrics will be returned only for the provided buckets. If no buckets +// parameter is provided, no bucket metrics are returned. +func (h *metricsV3Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + pathComponents := mux.Vars(r)["pathComps"] + isListingRequest := r.Form.Has("list") + + // Parse optional buckets query parameter. + bucketsParam := r.Form["buckets"] + buckets := make([]string, 0, len(bucketsParam)) + for _, bp := range bucketsParam { + bp = strings.TrimSpace(bp) + if bp == "" { + continue + } + splits := strings.Split(bp, ",") + for _, split := range splits { + buckets = append(buckets, strings.TrimSpace(split)) + } + } + + innerHandler := h.handle(pathComponents, isListingRequest, buckets) + + // Add tracing to the prom. handler + tracedHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt) + if ok { + tc.FuncName = "handler.MetricsV3" + tc.ResponseRecorder.LogErrBody = true + } + + innerHandler.ServeHTTP(w, r) + }) + + // Add authentication + h.authFn(tracedHandler).ServeHTTP(w, r) +} diff --git a/cmd/metrics-v3-system-drive.go b/cmd/metrics-v3-system-drive.go new file mode 100644 index 000000000..cb0d66a53 --- /dev/null +++ b/cmd/metrics-v3-system-drive.go @@ -0,0 +1,126 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "strconv" + + "github.com/minio/minio/internal/logger" +) + +// label constants +const ( + driveL = "drive" + poolIndexL = "pool_index" + setIndexL = "set_index" + driveIndexL = "drive_index" + + apiL = "api" +) + +var allDriveLabels = []string{driveL, poolIndexL, setIndexL, driveIndexL} + +const ( + driveUsedBytes = "used_bytes" + driveFreeBytes = "free_bytes" + driveTotalBytes = "total_bytes" + driveFreeInodes = "free_inodes" + driveTimeoutErrorsTotal = "timeout_errors_total" + driveAvailabilityErrorsTotal = "availability_errors_total" + driveWaitingIO = "waiting_io" + driveAPILatencyMicros = "api_latency_micros" + + driveOfflineCount = "offline_count" + driveOnlineCount = "online_count" + driveCount = "count" +) + +var ( + driveUsedBytesMD = NewGaugeMD(driveUsedBytes, + "Total storage used on a drive in bytes", allDriveLabels...) + driveFreeBytesMD = NewGaugeMD(driveFreeBytes, + "Total storage free on a drive in bytes", allDriveLabels...) + driveTotalBytesMD = NewGaugeMD(driveTotalBytes, + "Total storage available on a drive in bytes", allDriveLabels...) + driveFreeInodesMD = NewGaugeMD(driveFreeInodes, + "Total free inodes on a drive", allDriveLabels...) + driveTimeoutErrorsMD = NewCounterMD(driveTimeoutErrorsTotal, + "Total timeout errors on a drive", allDriveLabels...) + driveAvailabilityErrorsMD = NewCounterMD(driveAvailabilityErrorsTotal, + "Total availability errors (I/O errors, permission denied and timeouts) on a drive", + allDriveLabels...) + driveWaitingIOMD = NewGaugeMD(driveWaitingIO, + "Total waiting I/O operations on a drive", allDriveLabels...) + driveAPILatencyMD = NewGaugeMD(driveAPILatencyMicros, + "Average last minute latency in µs for drive API storage operations", + append(allDriveLabels, apiL)...) + + driveOfflineCountMD = NewGaugeMD(driveOfflineCount, + "Count of offline drives") + driveOnlineCountMD = NewGaugeMD(driveOnlineCount, + "Count of online drives") + driveCountMD = NewGaugeMD(driveCount, + "Count of all drives") +) + +// loadDriveMetrics - `MetricsLoaderFn` for node drive metrics. +func loadDriveMetrics(ctx context.Context, m MetricValues, c *metricsCache) error { + driveMetrics, err := c.driveMetrics.Get() + if err != nil { + logger.LogIf(ctx, err) + return nil + } + + storageInfo := driveMetrics.storageInfo + + for _, disk := range storageInfo.Disks { + labels := []string{ + driveL, disk.DrivePath, + poolIndexL, strconv.Itoa(disk.PoolIndex), + setIndexL, strconv.Itoa(disk.SetIndex), + driveIndexL, strconv.Itoa(disk.DiskIndex), + } + + m.Set(driveUsedBytes, float64(disk.UsedSpace), labels...) + m.Set(driveFreeBytes, float64(disk.AvailableSpace), labels...) + m.Set(driveTotalBytes, float64(disk.TotalSpace), labels...) + m.Set(driveFreeInodes, float64(disk.FreeInodes), labels...) + + if disk.Metrics != nil { + m.Set(driveTimeoutErrorsTotal, float64(disk.Metrics.TotalErrorsTimeout), labels...) + m.Set(driveAvailabilityErrorsTotal, float64(disk.Metrics.TotalErrorsAvailability), labels...) + m.Set(driveWaitingIO, float64(disk.Metrics.TotalWaiting), labels...) + + // Append the api label for the drive API latencies. + labels = append(labels, "api", "") + lastIdx := len(labels) - 1 + for apiName, latency := range disk.Metrics.LastMinute { + labels[lastIdx] = "storage." + apiName + m.Set(driveAPILatencyMicros, float64(latency.Avg().Microseconds()), + labels...) + } + } + } + + m.Set(driveOfflineCount, float64(driveMetrics.offlineDrives)) + m.Set(driveOnlineCount, float64(driveMetrics.onlineDrives)) + m.Set(driveCount, float64(driveMetrics.totalDrives)) + + return nil +} diff --git a/cmd/metrics-v3-system-network.go b/cmd/metrics-v3-system-network.go new file mode 100644 index 000000000..e0328afc6 --- /dev/null +++ b/cmd/metrics-v3-system-network.go @@ -0,0 +1,61 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + + "github.com/minio/minio/internal/rest" +) + +const ( + internodeErrorsTotal MetricName = "errors_total" + internodeDialErrorsTotal MetricName = "dial_errors_total" + internodeDialAvgTimeNanos MetricName = "dial_avg_time_nanos" + internodeSentBytesTotal MetricName = "sent_bytes_total" + internodeRecvBytesTotal MetricName = "recv_bytes_total" +) + +var ( + internodeErrorsTotalMD = NewCounterMD(internodeErrorsTotal, + "Total number of failed internode calls") + internodeDialedErrorsTotalMD = NewCounterMD(internodeDialErrorsTotal, + "Total number of internode TCP dial timeouts and errors") + internodeDialAvgTimeNanosMD = NewGaugeMD(internodeDialAvgTimeNanos, + "Average dial time of internode TCP calls in nanoseconds") + internodeSentBytesTotalMD = NewCounterMD(internodeSentBytesTotal, + "Total number of bytes sent to other peer nodes") + internodeRecvBytesTotalMD = NewCounterMD(internodeRecvBytesTotal, + "Total number of bytes received from other peer nodes") +) + +// loadNetworkInternodeMetrics - reads internode network metrics. +// +// This is a `MetricsLoaderFn`. +func loadNetworkInternodeMetrics(ctx context.Context, m MetricValues, _ *metricsCache) error { + connStats := globalConnStats.toServerConnStats() + rpcStats := rest.GetRPCStats() + if globalIsDistErasure { + m.Set(internodeErrorsTotal, float64(rpcStats.Errs)) + m.Set(internodeDialErrorsTotal, float64(rpcStats.DialErrs)) + m.Set(internodeDialAvgTimeNanos, float64(rpcStats.DialAvgDuration)) + m.Set(internodeSentBytesTotal, float64(connStats.internodeOutputBytes)) + m.Set(internodeRecvBytesTotal, float64(connStats.internodeInputBytes)) + } + return nil +} diff --git a/cmd/metrics-v3-types.go b/cmd/metrics-v3-types.go new file mode 100644 index 000000000..b7df8eae3 --- /dev/null +++ b/cmd/metrics-v3-types.go @@ -0,0 +1,487 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "context" + "fmt" + "strings" + "sync" + + "github.com/minio/minio/internal/logger" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/exp/slices" +) + +type collectorPath string + +// metricPrefix converts a collector path to a metric name prefix. The path is +// converted to snake-case (by replaced '/' and '-' with '_') and prefixed with +// `minio_`. +func (cp collectorPath) metricPrefix() string { + s := strings.TrimPrefix(string(cp), "/") + s = strings.ReplaceAll(s, "/", "_") + s = strings.ReplaceAll(s, "-", "_") + return "minio_" + s +} + +// isDescendantOf returns true if it is a descendant of (or the same as) +// `ancestor`. +// +// For example: +// +// /a, /a/b, /a/b/c are all descendants of /a. +// /abc or /abd/a are not descendants of /ab. +func (cp collectorPath) isDescendantOf(arg string) bool { + descendant := string(cp) + if descendant == arg { + return true + } + if len(arg) >= len(descendant) { + return false + } + if !strings.HasSuffix(arg, "/") { + arg += "/" + } + return strings.HasPrefix(descendant, arg) +} + +// MetricType - represents the type of a metric. +type MetricType int + +const ( + // CounterMT - represents a counter metric. + CounterMT MetricType = iota + // GaugeMT - represents a gauge metric. + GaugeMT + // HistogramMT - represents a histogram metric. + HistogramMT +) + +func (mt MetricType) String() string { + switch mt { + case CounterMT: + return "counter" + case GaugeMT: + return "gauge" + case HistogramMT: + return "histogram" + default: + return "*unknown*" + } +} + +func (mt MetricType) toProm() prometheus.ValueType { + switch mt { + case CounterMT: + return prometheus.CounterValue + case GaugeMT: + return prometheus.GaugeValue + case HistogramMT: + return prometheus.CounterValue + default: + panic(fmt.Sprintf("unknown metric type: %d", mt)) + } +} + +// MetricDescriptor - represents a metric descriptor. +type MetricDescriptor struct { + Name MetricName + Type MetricType + Help string + VariableLabels []string + + // managed values follow: + labelSet map[string]struct{} +} + +func (md *MetricDescriptor) getLabelSet() map[string]struct{} { + if md.labelSet != nil { + return md.labelSet + } + md.labelSet = make(map[string]struct{}, len(md.VariableLabels)) + for _, label := range md.VariableLabels { + md.labelSet[label] = struct{}{} + } + return md.labelSet +} + +func (md *MetricDescriptor) toPromName(namePrefix string) string { + return prometheus.BuildFQName(namePrefix, "", string(md.Name)) +} + +func (md *MetricDescriptor) toPromDesc(namePrefix string, extraLabels map[string]string) *prometheus.Desc { + return prometheus.NewDesc( + md.toPromName(namePrefix), + md.Help, + md.VariableLabels, extraLabels, + ) +} + +// NewCounterMD - creates a new counter metric descriptor. +func NewCounterMD(name MetricName, help string, labels ...string) MetricDescriptor { + return MetricDescriptor{ + Name: name, + Type: CounterMT, + Help: help, + VariableLabels: labels, + } +} + +// NewGaugeMD - creates a new gauge metric descriptor. +func NewGaugeMD(name MetricName, help string, labels ...string) MetricDescriptor { + return MetricDescriptor{ + Name: name, + Type: GaugeMT, + Help: help, + VariableLabels: labels, + } +} + +type metricValue struct { + Labels map[string]string + Value float64 +} + +// MetricValues - type to set metric values retrieved while loading metrics. A +// value of this type is passed to the `MetricsLoaderFn`. +type MetricValues struct { + values map[MetricName][]metricValue + descriptors map[MetricName]MetricDescriptor +} + +func newMetricValues(d map[MetricName]MetricDescriptor) MetricValues { + return MetricValues{ + values: make(map[MetricName][]metricValue, len(d)), + descriptors: d, + } +} + +// ToPromMetrics - converts the internal metric values to Prometheus +// adding the given name prefix. The extraLabels are added to each metric as +// constant labels. +func (m *MetricValues) ToPromMetrics(namePrefix string, extraLabels map[string]string, +) []prometheus.Metric { + metrics := make([]prometheus.Metric, 0, len(m.values)) + for metricName, mv := range m.values { + desc := m.descriptors[metricName] + promDesc := desc.toPromDesc(namePrefix, extraLabels) + for _, v := range mv { + // labelValues is in the same order as the variable labels in the + // descriptor. + labelValues := make([]string, 0, len(v.Labels)) + for _, k := range desc.VariableLabels { + labelValues = append(labelValues, v.Labels[k]) + } + metrics = append(metrics, + prometheus.MustNewConstMetric(promDesc, desc.Type.toProm(), v.Value, + labelValues...)) + } + } + return metrics +} + +// Set - sets a metric value along with any provided labels. It is used only +// with Gauge and Counter metrics. +// +// If the MetricName given here is not present in the `MetricsGroup`'s +// descriptors, this function panics. +// +// Panics if `labels` is not a list of ordered label name and label value pairs +// or if all labels for the metric are not provided. +func (m *MetricValues) Set(name MetricName, value float64, labels ...string) { + desc, ok := m.descriptors[name] + if !ok { + panic(fmt.Sprintf("metric has no description: %s", name)) + } + + if len(labels)%2 != 0 { + panic("labels must be a list of ordered key-value pairs") + } + + validLabels := desc.getLabelSet() + labelMap := make(map[string]string, len(labels)/2) + for i := 0; i < len(labels); i += 2 { + if _, ok := validLabels[labels[i]]; !ok { + panic(fmt.Sprintf("invalid label: %s (metric: %s)", labels[i], name)) + } + labelMap[labels[i]] = labels[i+1] + } + + if len(labels)/2 != len(validLabels) { + panic(fmt.Sprintf("not all labels were given values")) + } + + v, ok := m.values[name] + if !ok { + v = make([]metricValue, 0, 1) + } + m.values[name] = append(v, metricValue{ + Labels: labelMap, + Value: value, + }) +} + +// SetHistogram - sets values for the given MetricName using the provided +// histogram. +// +// `renameLabels` is a map of label names to rename. The keys are the original +// label names and the values are the new label names. +// +// TODO: bucketFilter doc +// +// `extraLabels` are additional labels to add to each metric. They are ordered +// label name and value pairs. +func (m *MetricValues) SetHistogram(name MetricName, hist *prometheus.HistogramVec, + renameLabels map[string]string, bucketFilter []string, extraLabels ...string, +) { + if _, ok := m.descriptors[name]; !ok { + panic(fmt.Sprintf("metric has no description: %s", name)) + } + dummyDesc := MetricDescription{} + metricsV2 := getHistogramMetrics(hist, dummyDesc, false) + for _, metric := range metricsV2 { + // If a bucket filter is provided, only add metrics for the given + // buckets. + if len(bucketFilter) > 0 { + if !slices.Contains(bucketFilter, metric.VariableLabels["bucket"]) { + continue + } + } + + labels := make([]string, 0, len(metric.VariableLabels)*2) + for k, v := range metric.VariableLabels { + if newLabel, ok := renameLabels[k]; ok { + labels = append(labels, newLabel, v) + } else { + labels = append(labels, k, v) + } + } + labels = append(labels, extraLabels...) + m.Set(name, metric.Value, labels...) + } +} + +// MetricsLoaderFn - represents a function to load metrics from the +// metricsCache. +// +// Note that returning an error here will cause the Metrics handler to return a +// 500 Internal Server Error. +type MetricsLoaderFn func(context.Context, MetricValues, *metricsCache) error + +// JoinLoaders - joins multiple loaders into a single loader. The returned +// loader will call each of the given loaders in order. If any of the loaders +// return an error, the returned loader will return that error. +func JoinLoaders(loaders ...MetricsLoaderFn) MetricsLoaderFn { + return func(ctx context.Context, m MetricValues, c *metricsCache) error { + for _, loader := range loaders { + if err := loader(ctx, m, c); err != nil { + return err + } + } + return nil + } +} + +// BucketMetricsLoaderFn - represents a function to load metrics from the +// metricsCache and the system for a given list of buckets. +// +// Note that returning an error here will cause the Metrics handler to return a +// 500 Internal Server Error. +type BucketMetricsLoaderFn func(context.Context, MetricValues, *metricsCache, []string) error + +// JoinBucketLoaders - joins multiple bucket loaders into a single loader, +// similar to `JoinLoaders`. +func JoinBucketLoaders(loaders ...BucketMetricsLoaderFn) BucketMetricsLoaderFn { + return func(ctx context.Context, m MetricValues, c *metricsCache, b []string) error { + for _, loader := range loaders { + if err := loader(ctx, m, c, b); err != nil { + return err + } + } + return nil + } +} + +// MetricsGroup - represents a group of metrics. It includes a `MetricsLoaderFn` +// function that provides a way to load the metrics from the system. The metrics +// are cached and refreshed after a given timeout. +// +// For metrics with a `bucket` dimension, a list of buckets argument is required +// to collect the metrics. +// +// It implements the prometheus.Collector interface for metric groups without a +// bucket dimension. For metric groups with a bucket dimension, use the +// `GetBucketCollector` method to get a `BucketCollector` that implements the +// prometheus.Collector interface. +type MetricsGroup struct { + // Path (relative to the Metrics v3 base endpoint) at which this group of + // metrics is served. This value is converted into a metric name prefix + // using `.metricPrefix()` and is added to each metric returned. + CollectorPath collectorPath + // List of all metric descriptors that could be returned by the loader. + Descriptors []MetricDescriptor + // (Optional) Extra (constant) label KV pairs to be added to each metric in + // the group. + ExtraLabels map[string]string + + // Loader functions to load metrics. Only one of these will be set. Metrics + // returned by these functions must be present in the `Descriptors` list. + loader MetricsLoaderFn + bucketLoader BucketMetricsLoaderFn + + // Cache for all metrics groups. Set via `.SetCache` method. + cache *metricsCache + + // managed values follow: + + // map of metric descriptors by metric name. + descriptorMap map[MetricName]MetricDescriptor + + // For bucket metrics, the list of buckets is stored here. It is used in the + // Collect() call. This is protected by the `bucketsLock`. + bucketsLock sync.Mutex + buckets []string +} + +// NewMetricsGroup creates a new MetricsGroup. To create a metrics group for +// metrics with a `bucket` dimension (label), use `NewBucketMetricsGroup`. +// +// The `loader` function loads metrics from the cache and the system. +func NewMetricsGroup(path collectorPath, descriptors []MetricDescriptor, + loader MetricsLoaderFn, +) *MetricsGroup { + mg := &MetricsGroup{ + CollectorPath: path, + Descriptors: descriptors, + loader: loader, + } + mg.validate() + return mg +} + +// NewBucketMetricsGroup creates a new MetricsGroup for metrics with a `bucket` +// dimension (label). +// +// The `loader` function loads metrics from the cache and the system for a given +// list of buckets. +func NewBucketMetricsGroup(path collectorPath, descriptors []MetricDescriptor, + loader BucketMetricsLoaderFn, +) *MetricsGroup { + mg := &MetricsGroup{ + CollectorPath: path, + Descriptors: descriptors, + bucketLoader: loader, + } + mg.validate() + return mg +} + +// AddExtraLabels - adds extra (constant) label KV pairs to the metrics group. +// This is a helper to initialize the `ExtraLabels` field. The argument is a +// list of ordered label name and value pairs. +func (mg *MetricsGroup) AddExtraLabels(labels ...string) { + if len(labels)%2 != 0 { + panic("Labels must be an ordered list of name value pairs") + } + if mg.ExtraLabels == nil { + mg.ExtraLabels = make(map[string]string, len(labels)) + } + for i := 0; i < len(labels); i += 2 { + mg.ExtraLabels[labels[i]] = labels[i+1] + } +} + +// IsBucketMetricsGroup - returns true if the given MetricsGroup is a bucket +// metrics group. +func (mg *MetricsGroup) IsBucketMetricsGroup() bool { + return mg.bucketLoader != nil +} + +// Describe - implements prometheus.Collector interface. +func (mg *MetricsGroup) Describe(ch chan<- *prometheus.Desc) { + for _, desc := range mg.Descriptors { + ch <- desc.toPromDesc(mg.CollectorPath.metricPrefix(), mg.ExtraLabels) + } +} + +// Collect - implements prometheus.Collector interface. +func (mg *MetricsGroup) Collect(ch chan<- prometheus.Metric) { + metricValues := newMetricValues(mg.descriptorMap) + + var err error + if mg.IsBucketMetricsGroup() { + err = mg.bucketLoader(GlobalContext, metricValues, mg.cache, mg.buckets) + } else { + err = mg.loader(GlobalContext, metricValues, mg.cache) + } + + // There is no way to handle errors here, so we panic the current goroutine + // and the Metrics API handler returns a 500 HTTP status code. This should + // normally not happen, and usually indicates a bug. + logger.CriticalIf(GlobalContext, errors.Wrap(err, "failed to get metrics")) + + promMetrics := metricValues.ToPromMetrics(mg.CollectorPath.metricPrefix(), + mg.ExtraLabels) + for _, metric := range promMetrics { + ch <- metric + } +} + +// LockAndSetBuckets - locks the buckets and sets the given buckets. It returns +// a function to unlock the buckets. +func (mg *MetricsGroup) LockAndSetBuckets(buckets []string) func() { + mg.bucketsLock.Lock() + mg.buckets = buckets + return func() { + mg.bucketsLock.Unlock() + } +} + +// MetricFQN - returns the fully qualified name for the given metric name. +func (mg *MetricsGroup) MetricFQN(name MetricName) string { + v, ok := mg.descriptorMap[name] + if !ok { + // This should never happen. + return "" + } + return v.toPromName(mg.CollectorPath.metricPrefix()) +} + +func (mg *MetricsGroup) validate() { + if len(mg.Descriptors) == 0 { + panic("Descriptors must be set") + } + + // For bools A and B, A XOR B <=> A != B. + isExactlyOneSet := (mg.loader == nil) != (mg.bucketLoader == nil) + if !isExactlyOneSet { + panic("Exactly one Loader function must be set") + } + + mg.descriptorMap = make(map[MetricName]MetricDescriptor, len(mg.Descriptors)) + for _, desc := range mg.Descriptors { + mg.descriptorMap[desc.Name] = desc + } +} + +// SetCache is a helper to initialize MetricsGroup. It sets the cache object. +func (mg *MetricsGroup) SetCache(c *metricsCache) { + mg.cache = c +} diff --git a/cmd/metrics-v3.go b/cmd/metrics-v3.go new file mode 100644 index 000000000..5814f9c39 --- /dev/null +++ b/cmd/metrics-v3.go @@ -0,0 +1,272 @@ +// Copyright (c) 2015-2024 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "slices" + "strings" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" +) + +// Collector paths. +// +// These are paths under the top-level /minio/metrics/v3 metrics endpoint. Each +// of these paths returns a set of V3 metrics. +const ( + apiRequestsCollectorPath collectorPath = "/api/requests" + apiBucketCollectorPath collectorPath = "/api/bucket" + + systemNetworkInternodeCollectorPath collectorPath = "/system/network/internode" + systemDriveCollectorPath collectorPath = "/system/drive" + systemProcessCollectorPath collectorPath = "/system/process" + systemGoCollectorPath collectorPath = "/system/go" + + clusterHealthCollectorPath collectorPath = "/cluster/health" + clusterUsageObjectsCollectorPath collectorPath = "/cluster/usage/objects" + clusterUsageBucketsCollectorPath collectorPath = "/cluster/usage/buckets" + clusterErasureSetCollectorPath collectorPath = "/cluster/erasure-set" +) + +const ( + clusterBasePath = "/cluster" +) + +type metricsV3Collection struct { + mgMap map[collectorPath]*MetricsGroup + bucketMGMap map[collectorPath]*MetricsGroup + + // Gatherers for non-bucket MetricsGroup's + mgGatherers map[collectorPath]prometheus.Gatherer + + collectorPaths []collectorPath +} + +func newMetricGroups(r *prometheus.Registry) *metricsV3Collection { + // Create all metric groups. + apiRequestsMG := NewMetricsGroup(apiRequestsCollectorPath, + []MetricDescriptor{ + apiRejectedAuthTotalMD, + apiRejectedHeaderTotalMD, + apiRejectedTimestampTotalMD, + apiRejectedInvalidTotalMD, + + apiRequestsWaitingTotalMD, + apiRequestsIncomingTotalMD, + apiRequestsInFlightTotalMD, + apiRequestsTotalMD, + apiRequestsErrorsTotalMD, + apiRequests5xxErrorsTotalMD, + apiRequests4xxErrorsTotalMD, + apiRequestsCanceledTotalMD, + + apiRequestsTTFBSecondsDistributionMD, + + apiTrafficSentBytesMD, + apiTrafficRecvBytesMD, + }, + JoinLoaders(loadAPIRequestsHTTPMetrics, loadAPIRequestsTTFBMetrics, + loadAPIRequestsNetworkMetrics), + ) + + apiBucketMG := NewBucketMetricsGroup(apiBucketCollectorPath, + []MetricDescriptor{ + apiBucketTrafficRecvBytesMD, + apiBucketTrafficSentBytesMD, + + apiBucketRequestsInFlightMD, + apiBucketRequestsTotalMD, + apiBucketRequestsCanceledMD, + apiBucketRequests4xxErrorsMD, + apiBucketRequests5xxErrorsMD, + + apiBucketRequestsTTFBSecondsDistributionMD, + }, + JoinBucketLoaders(loadAPIBucketHTTPMetrics, loadAPIBucketTTFBMetrics), + ) + + systemNetworkInternodeMG := NewMetricsGroup(systemNetworkInternodeCollectorPath, + []MetricDescriptor{ + internodeErrorsTotalMD, + internodeDialedErrorsTotalMD, + internodeDialAvgTimeNanosMD, + internodeSentBytesTotalMD, + internodeRecvBytesTotalMD, + }, + loadNetworkInternodeMetrics, + ) + + systemDriveMG := NewMetricsGroup(systemDriveCollectorPath, + []MetricDescriptor{ + driveUsedBytesMD, + driveFreeBytesMD, + driveTotalBytesMD, + driveFreeInodesMD, + driveTimeoutErrorsMD, + driveAvailabilityErrorsMD, + driveWaitingIOMD, + driveAPILatencyMD, + + driveOfflineCountMD, + driveOnlineCountMD, + driveCountMD, + }, + loadDriveMetrics, + ) + + clusterHealthMG := NewMetricsGroup(clusterHealthCollectorPath, + []MetricDescriptor{ + healthDrivesOfflineCountMD, + healthDrivesOnlineCountMD, + healthDrivesCountMD, + + healthNodesOfflineCountMD, + healthNodesOnlineCountMD, + + healthCapacityRawTotalBytesMD, + healthCapacityRawFreeBytesMD, + healthCapacityUsableTotalBytesMD, + healthCapacityUsableFreeBytesMD, + }, + JoinLoaders(loadClusterHealthDriveMetrics, + loadClusterHealthNodeMetrics, + loadClusterHealthCapacityMetrics), + ) + + clusterUsageObjectsMG := NewMetricsGroup(clusterUsageObjectsCollectorPath, + []MetricDescriptor{ + usageSinceLastUpdateSecondsMD, + usageTotalBytesMD, + usageObjectsCountMD, + usageVersionsCountMD, + usageDeleteMarkersCountMD, + usageBucketsCountMD, + usageObjectsDistributionMD, + usageVersionsDistributionMD, + }, + loadClusterUsageObjectMetrics, + ) + + clusterUsageBucketsMG := NewBucketMetricsGroup(clusterUsageBucketsCollectorPath, + []MetricDescriptor{ + usageSinceLastUpdateSecondsMD, + usageBucketTotalBytesMD, + usageBucketObjectsTotalMD, + usageBucketVersionsCountMD, + usageBucketDeleteMarkersCountMD, + usageBucketQuotaTotalBytesMD, + usageBucketObjectSizeDistributionMD, + usageBucketObjectVersionCountDistributionMD, + }, + loadClusterUsageBucketMetrics, + ) + + clusterErasureSetMG := NewMetricsGroup(clusterErasureSetCollectorPath, + []MetricDescriptor{ + erasureSetOverallWriteQuorumMD, + erasureSetOverallHealthMD, + erasureSetReadQuorumMD, + erasureSetWriteQuorumMD, + erasureSetOnlineDrivesCountMD, + erasureSetHealingDrivesCountMD, + erasureSetHealthMD, + }, + loadClusterErasureSetMetrics, + ) + + allMetricGroups := []*MetricsGroup{ + apiRequestsMG, + apiBucketMG, + + systemNetworkInternodeMG, + systemDriveMG, + + clusterHealthMG, + clusterUsageObjectsMG, + clusterUsageBucketsMG, + clusterErasureSetMG, + } + + // Bucket metrics are special, they always include the bucket label. These + // metrics required a list of buckets to be passed to the loader, and the list + // of buckets is not known until the request is made. So we keep a separate + // map for bucket metrics and handle them specially. + + // Add the serverName and poolIndex labels to all non-cluster metrics. + // + // Also create metric group maps and set the cache. + metricsCache := newMetricsCache() + mgMap := make(map[collectorPath]*MetricsGroup) + bucketMGMap := make(map[collectorPath]*MetricsGroup) + for _, mg := range allMetricGroups { + if !strings.HasPrefix(string(mg.CollectorPath), clusterBasePath) { + mg.AddExtraLabels( + serverName, globalLocalNodeName, + // poolIndex, strconv.Itoa(globalLocalPoolIdx), + ) + } + mg.SetCache(metricsCache) + if mg.IsBucketMetricsGroup() { + bucketMGMap[mg.CollectorPath] = mg + } else { + mgMap[mg.CollectorPath] = mg + } + } + + // Prepare to register the collectors. Other than `MetricGroup` collectors, + // we also have standard collectors like `ProcessCollector` and `GoCollector`. + + // Create all Non-`MetricGroup` collectors here. + collectors := map[collectorPath]prometheus.Collector{ + systemProcessCollectorPath: collectors.NewProcessCollector(collectors.ProcessCollectorOpts{ + ReportErrors: true, + }), + systemGoCollectorPath: collectors.NewGoCollector(), + } + + // Add all `MetricGroup` collectors to the map. + for _, mg := range allMetricGroups { + collectors[mg.CollectorPath] = mg + } + + // Helper function to register a collector and return a gatherer for it. + mustRegister := func(c ...prometheus.Collector) prometheus.Gatherer { + subRegistry := prometheus.NewRegistry() + for _, col := range c { + subRegistry.MustRegister(col) + } + r.MustRegister(subRegistry) + return subRegistry + } + + // Register all collectors and create gatherers for them. + gatherers := make(map[collectorPath]prometheus.Gatherer, len(collectors)) + collectorPaths := make([]collectorPath, 0, len(collectors)) + for path, collector := range collectors { + gatherers[path] = mustRegister(collector) + collectorPaths = append(collectorPaths, path) + } + slices.Sort(collectorPaths) + return &metricsV3Collection{ + mgMap: mgMap, + bucketMGMap: bucketMGMap, + mgGatherers: gatherers, + collectorPaths: collectorPaths, + } +} diff --git a/cmd/notification.go b/cmd/notification.go index 14da58193..12fea844a 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -870,12 +870,12 @@ func (sys *NotificationSys) GetMetrics(ctx context.Context, t madmin.MetricType, } // GetResourceMetrics - gets the resource metrics from all nodes excluding self. -func (sys *NotificationSys) GetResourceMetrics(ctx context.Context) <-chan Metric { +func (sys *NotificationSys) GetResourceMetrics(ctx context.Context) <-chan MetricV2 { if sys == nil { return nil } g := errgroup.WithNErrs(len(sys.peerClients)) - peerChannels := make([]<-chan Metric, len(sys.peerClients)) + peerChannels := make([]<-chan MetricV2, len(sys.peerClients)) for index := range sys.peerClients { index := index g.Go(func() error { @@ -1214,8 +1214,8 @@ func (sys *NotificationSys) GetBandwidthReports(ctx context.Context, buckets ... return consolidatedReport } -func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels []<-chan Metric, g *errgroup.Group) <-chan Metric { - ch := make(chan Metric) +func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels []<-chan MetricV2, g *errgroup.Group) <-chan MetricV2 { + ch := make(chan MetricV2) var wg sync.WaitGroup for index, err := range g.Wait() { if err != nil { @@ -1229,7 +1229,7 @@ func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels continue } wg.Add(1) - go func(ctx context.Context, peerChannel <-chan Metric, wg *sync.WaitGroup) { + go func(ctx context.Context, peerChannel <-chan MetricV2, wg *sync.WaitGroup) { defer wg.Done() for { select { @@ -1248,7 +1248,7 @@ func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels } }(ctx, peerChannels[index], &wg) } - go func(wg *sync.WaitGroup, ch chan Metric) { + go func(wg *sync.WaitGroup, ch chan MetricV2) { wg.Wait() xioutil.SafeClose(ch) }(&wg, ch) @@ -1256,12 +1256,12 @@ func (sys *NotificationSys) collectPeerMetrics(ctx context.Context, peerChannels } // GetBucketMetrics - gets the cluster level bucket metrics from all nodes excluding self. -func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric { +func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan MetricV2 { if sys == nil { return nil } g := errgroup.WithNErrs(len(sys.peerClients)) - peerChannels := make([]<-chan Metric, len(sys.peerClients)) + peerChannels := make([]<-chan MetricV2, len(sys.peerClients)) for index := range sys.peerClients { index := index g.Go(func() error { @@ -1277,12 +1277,12 @@ func (sys *NotificationSys) GetBucketMetrics(ctx context.Context) <-chan Metric } // GetClusterMetrics - gets the cluster metrics from all nodes excluding self. -func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan Metric { +func (sys *NotificationSys) GetClusterMetrics(ctx context.Context) <-chan MetricV2 { if sys == nil { return nil } g := errgroup.WithNErrs(len(sys.peerClients)) - peerChannels := make([]<-chan Metric, len(sys.peerClients)) + peerChannels := make([]<-chan MetricV2, len(sys.peerClients)) for index := range sys.peerClients { index := index g.Go(func() error { diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index 16c22935a..404991358 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -641,13 +641,13 @@ func (client *peerRESTClient) MonitorBandwidth(ctx context.Context, buckets []st return getBandwidthRPC.Call(ctx, client.gridConn(), values) } -func (client *peerRESTClient) GetResourceMetrics(ctx context.Context) (<-chan Metric, error) { +func (client *peerRESTClient) GetResourceMetrics(ctx context.Context) (<-chan MetricV2, error) { resp, err := getResourceMetricsRPC.Call(ctx, client.gridConn(), grid.NewMSS()) if err != nil { return nil, err } - ch := make(chan Metric) - go func(ch chan<- Metric) { + ch := make(chan MetricV2) + go func(ch chan<- MetricV2) { defer close(ch) for _, m := range resp.Value() { if m == nil { @@ -663,12 +663,12 @@ func (client *peerRESTClient) GetResourceMetrics(ctx context.Context) (<-chan Me return ch, nil } -func (client *peerRESTClient) GetPeerMetrics(ctx context.Context) (<-chan Metric, error) { +func (client *peerRESTClient) GetPeerMetrics(ctx context.Context) (<-chan MetricV2, error) { resp, err := getPeerMetricsRPC.Call(ctx, client.gridConn(), grid.NewMSS()) if err != nil { return nil, err } - ch := make(chan Metric) + ch := make(chan MetricV2) go func() { defer close(ch) for _, m := range resp.Value() { @@ -685,12 +685,12 @@ func (client *peerRESTClient) GetPeerMetrics(ctx context.Context) (<-chan Metric return ch, nil } -func (client *peerRESTClient) GetPeerBucketMetrics(ctx context.Context) (<-chan Metric, error) { +func (client *peerRESTClient) GetPeerBucketMetrics(ctx context.Context) (<-chan MetricV2, error) { resp, err := getPeerBucketMetricsRPC.Call(ctx, client.gridConn(), grid.NewMSS()) if err != nil { return nil, err } - ch := make(chan Metric) + ch := make(chan MetricV2) go func() { defer close(ch) for _, m := range resp.Value() { diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 98a3fe683..b7adbccf8 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -54,7 +54,7 @@ type peerRESTServer struct{} var ( // Types & Wrappers aoBucketInfo = grid.NewArrayOf[*BucketInfo](func() *BucketInfo { return &BucketInfo{} }) - aoMetricsGroup = grid.NewArrayOf[*Metric](func() *Metric { return &Metric{} }) + aoMetricsGroup = grid.NewArrayOf[*MetricV2](func() *MetricV2 { return &MetricV2{} }) madminBgHealState = grid.NewJSONPool[madmin.BgHealState]() madminCPUs = grid.NewJSONPool[madmin.CPUs]() madminMemInfo = grid.NewJSONPool[madmin.MemInfo]() @@ -88,9 +88,9 @@ var ( getNetInfoRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.NetInfo]](grid.HandlerGetNetInfo, grid.NewMSS, madminNetInfo.NewJSON) getOSInfoRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.OSInfo]](grid.HandlerGetOSInfo, grid.NewMSS, madminOSInfo.NewJSON) getPartitionsRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.Partitions]](grid.HandlerGetPartitions, grid.NewMSS, madminPartitions.NewJSON) - getPeerBucketMetricsRPC = grid.NewSingleHandler[*grid.MSS, *grid.Array[*Metric]](grid.HandlerGetPeerBucketMetrics, grid.NewMSS, aoMetricsGroup.New) - getPeerMetricsRPC = grid.NewSingleHandler[*grid.MSS, *grid.Array[*Metric]](grid.HandlerGetPeerMetrics, grid.NewMSS, aoMetricsGroup.New) - getResourceMetricsRPC = grid.NewSingleHandler[*grid.MSS, *grid.Array[*Metric]](grid.HandlerGetResourceMetrics, grid.NewMSS, aoMetricsGroup.New) + getPeerBucketMetricsRPC = grid.NewSingleHandler[*grid.MSS, *grid.Array[*MetricV2]](grid.HandlerGetPeerBucketMetrics, grid.NewMSS, aoMetricsGroup.New) + getPeerMetricsRPC = grid.NewSingleHandler[*grid.MSS, *grid.Array[*MetricV2]](grid.HandlerGetPeerMetrics, grid.NewMSS, aoMetricsGroup.New) + getResourceMetricsRPC = grid.NewSingleHandler[*grid.MSS, *grid.Array[*MetricV2]](grid.HandlerGetResourceMetrics, grid.NewMSS, aoMetricsGroup.New) getProcInfoRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.ProcInfo]](grid.HandlerGetProcInfo, grid.NewMSS, madminProcInfo.NewJSON) getSRMetricsRPC = grid.NewSingleHandler[*grid.MSS, *SRMetricsSummary](grid.HandlerGetSRMetrics, grid.NewMSS, func() *SRMetricsSummary { return &SRMetricsSummary{} }) getSysConfigRPC = grid.NewSingleHandler[*grid.MSS, *grid.JSON[madmin.SysConfig]](grid.HandlerGetSysConfig, grid.NewMSS, madminSysConfig.NewJSON) @@ -1000,9 +1000,9 @@ func (s *peerRESTServer) GetBandwidth(params *grid.URLValues) (*bandwidth.Bucket return globalBucketMonitor.GetReport(selectBuckets), nil } -func (s *peerRESTServer) GetResourceMetrics(_ *grid.MSS) (*grid.Array[*Metric], *grid.RemoteErr) { - res := make([]*Metric, 0, len(resourceMetricsGroups)) - populateAndPublish(resourceMetricsGroups, func(m Metric) bool { +func (s *peerRESTServer) GetResourceMetrics(_ *grid.MSS) (*grid.Array[*MetricV2], *grid.RemoteErr) { + res := make([]*MetricV2, 0, len(resourceMetricsGroups)) + populateAndPublish(resourceMetricsGroups, func(m MetricV2) bool { if m.VariableLabels == nil { m.VariableLabels = make(map[string]string, 1) } @@ -1014,9 +1014,9 @@ func (s *peerRESTServer) GetResourceMetrics(_ *grid.MSS) (*grid.Array[*Metric], } // GetPeerMetrics gets the metrics to be federated across peers. -func (s *peerRESTServer) GetPeerMetrics(_ *grid.MSS) (*grid.Array[*Metric], *grid.RemoteErr) { - res := make([]*Metric, 0, len(peerMetricsGroups)) - populateAndPublish(peerMetricsGroups, func(m Metric) bool { +func (s *peerRESTServer) GetPeerMetrics(_ *grid.MSS) (*grid.Array[*MetricV2], *grid.RemoteErr) { + res := make([]*MetricV2, 0, len(peerMetricsGroups)) + populateAndPublish(peerMetricsGroups, func(m MetricV2) bool { if m.VariableLabels == nil { m.VariableLabels = make(map[string]string, 1) } @@ -1028,9 +1028,9 @@ func (s *peerRESTServer) GetPeerMetrics(_ *grid.MSS) (*grid.Array[*Metric], *gri } // GetPeerBucketMetrics gets the metrics to be federated across peers. -func (s *peerRESTServer) GetPeerBucketMetrics(_ *grid.MSS) (*grid.Array[*Metric], *grid.RemoteErr) { - res := make([]*Metric, 0, len(bucketPeerMetricsGroups)) - populateAndPublish(bucketPeerMetricsGroups, func(m Metric) bool { +func (s *peerRESTServer) GetPeerBucketMetrics(_ *grid.MSS) (*grid.Array[*MetricV2], *grid.RemoteErr) { + res := make([]*MetricV2, 0, len(bucketPeerMetricsGroups)) + populateAndPublish(bucketPeerMetricsGroups, func(m MetricV2) bool { if m.VariableLabels == nil { m.VariableLabels = make(map[string]string, 1) } diff --git a/cmd/tier.go b/cmd/tier.go index f785ce845..4d5229673 100644 --- a/cmd/tier.go +++ b/cmd/tier.go @@ -158,17 +158,17 @@ var ( } ) -func (t *tierMetrics) Report() []Metric { +func (t *tierMetrics) Report() []MetricV2 { metrics := getHistogramMetrics(t.histogram, tierTTLBMD, true) t.RLock() defer t.RUnlock() for tier, stat := range t.requestsCount { - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: tierRequestsSuccessMD, Value: float64(stat.success), VariableLabels: map[string]string{"tier": tier}, }) - metrics = append(metrics, Metric{ + metrics = append(metrics, MetricV2{ Description: tierRequestsFailureMD, Value: float64(stat.failure), VariableLabels: map[string]string{"tier": tier}, diff --git a/docs/metrics/v3.md b/docs/metrics/v3.md new file mode 100644 index 000000000..e048cf44f --- /dev/null +++ b/docs/metrics/v3.md @@ -0,0 +1,178 @@ +# Metrics Version 3 + +In metrics version 3, all metrics are available under the endpoint: + +``` +/minio/metrics/v3 +``` + +however, a specific path under this is required. + +Metrics are organized into groups at paths **relative** to the top-level endpoint above. + +## Metrics Request Handling + +Each endpoint below can be queried at different intervals as needed via a scrape configuration in Prometheus or a compatible metrics collection tool. + +For ease of configuration, each (non-empty) parent of the path serves all metric endpoints that are at descendant paths. For example, to query all system metrics one needs to only scrape `/minio/metrics/v3/system/`. + +Some metrics are bucket specific. These will have a `/bucket` component in their path. As the number of buckets can be large, the metrics scrape operation needs to be provided with a specific list of buckets via the `bucket` query parameter. Only metrics for the given buckets will be returned (with the bucket label set). For example to query API metrics for buckets `test1` and `test2`, make a scrape request to `/minio/metrics/v3/api/bucket?buckets=test1,test2`. + +Instead of a metrics scrape, it is also possible to list the metrics that would be returned by a path. This is done by adding a `?list` query parameter. The MinIO server will then list all possible metrics that could be returned. During an actual metrics scrape, only available metrics are returned - not all of them. With the `list` query parameter, the output format can be selected - just set the request `Content-Type` to `application/json` for JSON output, or `text/plain` for a simple markdown formatted table. The latter is the default. + +## Request, System and Cluster Metrics + +At a high level metrics are grouped into three categories, listed in the following sub-sections. The path in each of the tables is relative to the top-level endpoint. + +### Request metrics + +These are metrics about requests served by the (current) node. + +| Path | Description | +|-----------------|--------------------------------------------------| +| `/api/requests` | Metrics over all requests | +| `/api/bucket` | Metrics over all requests split by bucket labels | +| | | + + +### System metrics + +These are metrics about the minio process and the node. + +| Path | Description | +|-----------------------------|---------------------------------------------------| +| `/system/drive` | Metrics about drives on the system | +| `/system/network/internode` | Metrics about internode requests made by the node | +| `/system/process` | Standard process metrics | +| `/system/go` | Standard Go lang metrics | +| | | + +### Cluster metrics + +These present metrics about the whole MinIO cluster. + +| Path | Description | +|--------------------------|-----------------------------| +| `/cluster/health` | Cluster health metrics | +| `/cluster/usage/objects` | Object statistics | +| `/cluster/usage/buckets` | Object statistics by bucket | +| `/cluster/erasure-set` | Erasure set metrics | +| | | + +## Metrics Listing + +Each of the following sub-sections list metrics returned by each of the endpoints. + +The standard metrics groups for ProcessCollector and GoCollector are not shown below. + +### `/api/requests` + +| Name | Type | Help | Labels | +|------------------------------------------------|-----------|---------------------------------------------------------|----------------------------------| +| `minio_api_requests_rejected_auth_total` | `counter` | Total number of requests rejected for auth failure | `type,pool_index,server` | +| `minio_api_requests_rejected_header_total` | `counter` | Total number of requests rejected for invalid header | `type,pool_index,server` | +| `minio_api_requests_rejected_timestamp_total` | `counter` | Total number of requests rejected for invalid timestamp | `type,pool_index,server` | +| `minio_api_requests_rejected_invalid_total` | `counter` | Total number of invalid requests | `type,pool_index,server` | +| `minio_api_requests_waiting_total` | `gauge` | Total number of requests in the waiting queue | `type,pool_index,server` | +| `minio_api_requests_incoming_total` | `gauge` | Total number of incoming requests | `type,pool_index,server` | +| `minio_api_requests_inflight_total` | `gauge` | Total number of requests currently in flight | `name,type,pool_index,server` | +| `minio_api_requests_total` | `counter` | Total number of requests | `name,type,pool_index,server` | +| `minio_api_requests_errors_total` | `counter` | Total number of requests with (4xx and 5xx) errors | `name,type,pool_index,server` | +| `minio_api_requests_5xx_errors_total` | `counter` | Total number of requests with 5xx errors | `name,type,pool_index,server` | +| `minio_api_requests_4xx_errors_total` | `counter` | Total number of requests with 4xx errors | `name,type,pool_index,server` | +| `minio_api_requests_canceled_total` | `counter` | Total number of requests canceled by the client | `name,type,pool_index,server` | +| `minio_api_requests_ttfb_seconds_distribution` | `counter` | Distribution of time to first byte across API calls | `name,type,le,pool_index,server` | +| `minio_api_requests_traffic_sent_bytes` | `counter` | Total number of bytes sent | `type,pool_index,server` | +| `minio_api_requests_traffic_received_bytes` | `counter` | Total number of bytes received | `type,pool_index,server` | + +### `/api/bucket` + +| Name | Type | Help | Labels | +|----------------------------------------------|-----------|------------------------------------------------------------------|-----------------------------------------| +| `minio_api_bucket_traffic_received_bytes` | `counter` | Total number of bytes sent for a bucket | `bucket,type,server,pool_index` | +| `minio_api_bucket_traffic_sent_bytes` | `counter` | Total number of bytes received for a bucket | `bucket,type,server,pool_index` | +| `minio_api_bucket_inflight_total` | `gauge` | Total number of requests currently in flight for a bucket | `bucket,name,type,server,pool_index` | +| `minio_api_bucket_total` | `counter` | Total number of requests for a bucket | `bucket,name,type,server,pool_index` | +| `minio_api_bucket_canceled_total` | `counter` | Total number of requests canceled by the client for a bucket | `bucket,name,type,server,pool_index` | +| `minio_api_bucket_4xx_errors_total` | `counter` | Total number of requests with 4xx errors for a bucket | `bucket,name,type,server,pool_index` | +| `minio_api_bucket_5xx_errors_total` | `counter` | Total number of requests with 5xx errors for a bucket | `bucket,name,type,server,pool_index` | +| `minio_api_bucket_ttfb_seconds_distribution` | `counter` | Distribution of time to first byte across API calls for a bucket | `bucket,name,le,type,server,pool_index` | + +### `/system/drive` + +| Name | Type | Help | Labels | +|------------------------------------------------|-----------|-----------------------------------------------------------------------------------|-----------------------------------------------------| +| `minio_system_drive_used_bytes` | `gauge` | Total storage used on a drive in bytes | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_free_bytes` | `gauge` | Total storage free on a drive in bytes | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_total_bytes` | `gauge` | Total storage available on a drive in bytes | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_free_inodes` | `gauge` | Total free inodes on a drive | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_timeout_errors_total` | `counter` | Total timeout errors on a drive | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_availability_errors_total` | `counter` | Total availability errors (I/O errors, permission denied and timeouts) on a drive | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_waiting_io` | `gauge` | Total waiting I/O operations on a drive | `drive,set_index,drive_index,pool_index,server` | +| `minio_system_drive_api_latency_micros` | `gauge` | Average last minute latency in µs for drive API storage operations | `drive,api,set_index,drive_index,pool_index,server` | +| `minio_system_drive_offline_count` | `gauge` | Count of offline drives | `pool_index,server` | +| `minio_system_drive_online_count` | `gauge` | Count of online drives | `pool_index,server` | +| `minio_system_drive_count` | `gauge` | Count of all drives | `pool_index,server` | + +### `/system/network/internode` + +| Name | Type | Help | Labels | +|------------------------------------------------------|-----------|----------------------------------------------------------|---------------------| +| `minio_system_network_internode_errors_total` | `counter` | Total number of failed internode calls | `server,pool_index` | +| `minio_system_network_internode_dial_errors_total` | `counter` | Total number of internode TCP dial timeouts and errors | `server,pool_index` | +| `minio_system_network_internode_dial_avg_time_nanos` | `gauge` | Average dial time of internodes TCP calls in nanoseconds | `server,pool_index` | +| `minio_system_network_internode_sent_bytes_total` | `counter` | Total number of bytes sent to other peer nodes | `server,pool_index` | +| `minio_system_network_internode_recv_bytes_total` | `counter` | Total number of bytes received from other peer nodes | `server,pool_index` | + +### `/cluster/health` + +| Name | Type | Help | Labels | +|----------------------------------------------------|---------|------------------------------------------------|--------| +| `minio_cluster_health_drives_offline_count` | `gauge` | Count of offline drives in the cluster | | +| `minio_cluster_health_drives_online_count` | `gauge` | Count of online drives in the cluster | | +| `minio_cluster_health_drives_count` | `gauge` | Count of all drives in the cluster | | +| `minio_cluster_health_nodes_offline_count` | `gauge` | Count of offline nodes in the cluster | | +| `minio_cluster_health_nodes_online_count` | `gauge` | Count of online nodes in the cluster | | +| `minio_cluster_health_capacity_raw_total_bytes` | `gauge` | Total cluster raw storage capacity in bytes | | +| `minio_cluster_health_capacity_raw_free_bytes` | `gauge` | Total cluster raw storage free in bytes | | +| `minio_cluster_health_capacity_usable_total_bytes` | `gauge` | Total cluster usable storage capacity in bytes | | +| `minio_cluster_health_capacity_usable_free_bytes` | `gauge` | Total cluster usable storage free in bytes | | + + +### `/cluster/usage/objects` + +| Name | Type | Help | Labels | +|----------------------------------------------------------|---------|----------------------------------------------------------------|---------| +| `minio_cluster_usage_objects_since_last_update_seconds` | `gauge` | Time since last update of usage metrics in seconds | | +| `minio_cluster_usage_objects_total_bytes` | `gauge` | Total cluster usage in bytes | | +| `minio_cluster_usage_objects_count` | `gauge` | Total cluster objects count | | +| `minio_cluster_usage_objects_versions_count` | `gauge` | Total cluster object versions (including delete markers) count | | +| `minio_cluster_usage_objects_delete_markers_count` | `gauge` | Total cluster delete markers count | | +| `minio_cluster_usage_objects_buckets_count` | `gauge` | Total cluster buckets count | | +| `minio_cluster_usage_objects_size_distribution` | `gauge` | Cluster object size distribution | `range` | +| `minio_cluster_usage_objects_version_count_distribution` | `gauge` | Cluster object version count distribution | `range` | + +### `/cluster/usage/buckets` + +| Name | Type | Help | Labels | +|-----------------------------------------------------------------|---------|------------------------------------------------------------------|----------------| +| `minio_cluster_usage_buckets_since_last_update_seconds` | `gauge` | Time since last update of usage metrics in seconds | | +| `minio_cluster_usage_buckets_total_bytes` | `gauge` | Total bucket size in bytes | `bucket` | +| `minio_cluster_usage_buckets_objects_count` | `gauge` | Total objects count in bucket | `bucket` | +| `minio_cluster_usage_buckets_versions_count` | `gauge` | Total object versions (including delete markers) count in bucket | `bucket` | +| `minio_cluster_usage_buckets_delete_markers_count` | `gauge` | Total delete markers count in bucket | `bucket` | +| `minio_cluster_usage_buckets_quota_total_bytes` | `gauge` | Total bucket quota in bytes | `bucket` | +| `minio_cluster_usage_buckets_object_size_distribution` | `gauge` | Bucket object size distribution | `range,bucket` | +| `minio_cluster_usage_buckets_object_version_count_distribution` | `gauge` | Bucket object version count distribution | `range,bucket` | + +### `/cluster/erasure-set` + +| Name | Type | Help | Labels | +|--------------------------------------------------|---------|---------------------------------------------------------------|------------------| +| `minio_cluster_erasure_set_overall_write_quorum` | `gauge` | Overall write quorum across pools and sets | | +| `minio_cluster_erasure_set_overall_health` | `gauge` | Overall health across pools and sets (1=healthy, 0=unhealthy) | | +| `minio_cluster_erasure_set_read_quorum` | `gauge` | Read quorum for the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_write_quorum` | `gauge` | Write quorum for the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_online_drives_count` | `gauge` | Count of online drives in the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_healing_drives_count` | `gauge` | Count of healing drives in the erasure set in a pool | `pool_id,set_id` | +| `minio_cluster_erasure_set_health` | `gauge` | Health of the erasure set in a pool (1=healthy, 0=unhealthy) | `pool_id,set_id` |