From 34e7259f9517770ed3bbed0fe317538242f715b1 Mon Sep 17 00:00:00 2001 From: Sidhartha Mani Date: Tue, 29 Jan 2019 23:17:32 -0800 Subject: [PATCH] Add Historic CPU and memory stats (#7136) Collect historic cpu and mem stats. Also, use actual values instead of formatted strings while returning to the client. The string formatting prevents values from being processed by the server or by the client without parsing it. This change will allow the values to be processed (eg. compute rolling-average over the lifetime of the minio server) and offloads the formatting to the client. --- cmd/admin-handlers.go | 14 ++++---- cmd/endpoint.go | 16 ++++++--- pkg/cpu/cpu.go | 70 ++++++++++++++++++++++++++++++++----- pkg/madmin/API.md | 8 ++--- pkg/madmin/info-commands.go | 14 ++++---- pkg/mem/mem.go | 37 +++++++++++++++++--- 6 files changed, 126 insertions(+), 33 deletions(-) diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index fd4ccbcdf..532bd01b4 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -313,18 +313,20 @@ type ServerDrivesPerfInfo struct { // of one minio node. It also reports any errors if encountered // while trying to reach this server. type ServerCPULoadInfo struct { - Addr string `json:"addr"` - Error string `json:"error,omitempty"` - Load []cpu.Load `json:"load"` + Addr string `json:"addr"` + Error string `json:"error,omitempty"` + Load []cpu.Load `json:"load"` + HistoricLoad []cpu.Load `json:"historicLoad"` } // ServerMemUsageInfo holds informantion about memory utilization // of one minio node. It also reports any errors if encountered // while trying to reach this server. type ServerMemUsageInfo struct { - Addr string `json:"addr"` - Error string `json:"error,omitempty"` - Usage []mem.Usage `json:"usage"` + Addr string `json:"addr"` + Error string `json:"error,omitempty"` + Usage []mem.Usage `json:"usage"` + HistoricUsage []mem.Usage `json:"historicUsage"` } // PerfInfoHandler - GET /minio/admin/v1/performance?perfType={perfType} diff --git a/cmd/endpoint.go b/cmd/endpoint.go index ab17edc99..aaed13b7f 100644 --- a/cmd/endpoint.go +++ b/cmd/endpoint.go @@ -204,6 +204,7 @@ func (endpoints EndpointList) GetString(i int) string { // local endpoints from given list of endpoints func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo { var memUsages []mem.Usage + var historicUsages []mem.Usage var addr string scratchSpace := map[string]bool{} for _, endpoint := range endpoints { @@ -215,12 +216,15 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo { addr = GetLocalPeer(endpoints) memUsage := mem.GetUsage() memUsages = append(memUsages, memUsage) + historicUsage := mem.GetHistoricUsage() + historicUsages = append(historicUsages, historicUsage) scratchSpace[endpoint.Host] = true } } return ServerMemUsageInfo{ - Addr: addr, - Usage: memUsages, + Addr: addr, + Usage: memUsages, + HistoricUsage: historicUsages, } } @@ -228,6 +232,7 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo { // local endpoints from given list of endpoints func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo { var cpuLoads []cpu.Load + var historicLoads []cpu.Load var addr string scratchSpace := map[string]bool{} for _, endpoint := range endpoints { @@ -239,12 +244,15 @@ func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo { addr = GetLocalPeer(endpoints) cpuLoad := cpu.GetLoad() cpuLoads = append(cpuLoads, cpuLoad) + historicLoad := cpu.GetHistoricLoad() + historicLoads = append(historicLoads, historicLoad) scratchSpace[endpoint.Host] = true } } return ServerCPULoadInfo{ - Addr: addr, - Load: cpuLoads, + Addr: addr, + Load: cpuLoads, + HistoricLoad: historicLoads, } } diff --git a/pkg/cpu/cpu.go b/pkg/cpu/cpu.go index 147aab3df..7ed3d4d25 100644 --- a/pkg/cpu/cpu.go +++ b/pkg/cpu/cpu.go @@ -17,11 +17,44 @@ package cpu import ( - "fmt" "sync" "time" ) +// rollingAvg holds the rolling average of the cpu load on the minio +// server over its lifetime +var rollingAvg *Load + +// cpuMeasureInterval is the interval of time between two +// measurements of CPU load +const cpuLoadMeasureInterval = 5 * time.Second + +// triggers the average load computation at server spawn +func init() { + rollingAvg = &Load{ + Min: float64(0), + Max: float64(0), + Avg: float64(0), + } + var rollingSum float64 + var cycles float64 + go func() { + for { + time.Sleep(cpuLoadMeasureInterval) + cycles = cycles + 1 + currLoad := GetLoad() + if rollingAvg.Max < currLoad.Max || rollingAvg.Max == 0 { + rollingAvg.Max = currLoad.Max + } + if rollingAvg.Min > currLoad.Min || rollingAvg.Min == 0 { + rollingAvg.Min = currLoad.Min + } + rollingSum = rollingSum + currLoad.Avg + rollingAvg.Avg = rollingSum / cycles + } + }() +} + const ( // cpuLoadWindow is the interval of time for which the // cpu utilization is measured @@ -37,15 +70,34 @@ const ( // Load holds CPU utilization % measured in three intervals of 200ms each type Load struct { - Avg string `json:"avg"` - Max string `json:"max"` - Min string `json:"min"` - Error string `json:"error,omitempty"` + Avg float64 `json:"avg"` + Max float64 `json:"max"` + Min float64 `json:"min"` + Error string `json:"error,omitempty"` } type counter struct{} -// GetLoad returns the CPU utilization % of the current process +// GetHistoricLoad returns the historic CPU utilization of the current process +func GetHistoricLoad() Load { + return *rollingAvg +} + +// GetLoad returns the CPU utilization of the current process +// This function works by calcualating the amount of cpu clock +// cycles the current process used in a given time window +// +// This corresponds to the CPU utilization calculation done by +// tools like top. Here, we use the getclocktime with the +// CLOCK_PROCESS_CPUTIME_ID parameter to obtain the total number of +// clock ticks used by the process so far. Then we sleep for +// 200ms and obtain the the total number of clock ticks again. The +// difference between the two counts provides us the number of +// clock ticks used by the process in the 200ms interval. +// +// The ratio of clock ticks used (measured in nanoseconds) to number +// of nanoseconds in 200 milliseconds provides us the CPU usage +// for the process currently func GetLoad() Load { vals := make(chan time.Duration, 3) wg := sync.WaitGroup{} @@ -83,9 +135,9 @@ func GetLoad() Load { close(vals) avg := sum / 3 return Load{ - Avg: fmt.Sprintf("%.2f%%", toFixed4(float64(avg)/float64(200*time.Millisecond))*100), - Max: fmt.Sprintf("%.2f%%", toFixed4(float64(max)/float64(200*time.Millisecond))*100), - Min: fmt.Sprintf("%.2f%%", toFixed4(float64(min)/float64(200*time.Millisecond))*100), + Avg: toFixed4(float64(avg)/float64(200*time.Millisecond)) * 100, + Max: toFixed4(float64(max)/float64(200*time.Millisecond)) * 100, + Min: toFixed4(float64(min)/float64(200*time.Millisecond)) * 100, Error: "", } } diff --git a/pkg/madmin/API.md b/pkg/madmin/API.md index f65fea6a5..abb6688d5 100644 --- a/pkg/madmin/API.md +++ b/pkg/madmin/API.md @@ -235,9 +235,9 @@ Fetches CPU utilization for all cluster nodes. Returned value is in Bytes. | Param | Type | Description | |-------|------|-------------| -|`cpu.Load.Avg` | _string_ | The average utilization % of the CPU measured in a 200ms interval | -|`cpu.Load.Min` | _string_ | The minimum utilization % of the CPU measured in a 200ms interval | -|`cpu.Load.Max` | _string_ | The maximum utilization % of the CPU measured in a 200ms interval | +|`cpu.Load.Avg` | _float64_ | The average utilization of the CPU measured in a 200ms interval | +|`cpu.Load.Min` | _float64_ | The minimum utilization of the CPU measured in a 200ms interval | +|`cpu.Load.Max` | _float64_ | The maximum utilization of the CPU measured in a 200ms interval | |`cpu.Load.Error` | _string_ | Error (if any) encountered while accesing the CPU info | @@ -253,7 +253,7 @@ Fetches Mem utilization for all cluster nodes. Returned value is in Bytes. | Param | Type | Description | |-------|------|-------------| -|`mem.Usage.Mem` | _string_ | The total number of bytes obtained from the OS | +|`mem.Usage.Mem` | _uint64_ | The total number of bytes obtained from the OS | |`mem.Usage.Error` | _string_ | Error (if any) encountered while accesing the CPU info | ## 6. Heal operations diff --git a/pkg/madmin/info-commands.go b/pkg/madmin/info-commands.go index b3968117f..12479c3ff 100644 --- a/pkg/madmin/info-commands.go +++ b/pkg/madmin/info-commands.go @@ -199,9 +199,10 @@ func (adm *AdminClient) ServerDrivesPerfInfo() ([]ServerDrivesPerfInfo, error) { // ServerCPULoadInfo holds information about address and cpu load of // a single server node type ServerCPULoadInfo struct { - Addr string `json:"addr"` - Error string `json:"error,omitempty"` - Load []cpu.Load `json:"load"` + Addr string `json:"addr"` + Error string `json:"error,omitempty"` + Load []cpu.Load `json:"load"` + HistoricLoad []cpu.Load `json:"historicLoad"` } // ServerCPULoadInfo - Returns cpu utilization information @@ -242,9 +243,10 @@ func (adm *AdminClient) ServerCPULoadInfo() ([]ServerCPULoadInfo, error) { // ServerMemUsageInfo holds information about address and memory utilization of // a single server node type ServerMemUsageInfo struct { - Addr string `json:"addr"` - Error string `json:"error,omitempty"` - Usage []mem.Usage `json:"usage"` + Addr string `json:"addr"` + Error string `json:"error,omitempty"` + Usage []mem.Usage `json:"usage"` + HistoricUsage []mem.Usage `json:"historicUsage"` } // ServerMemUsageInfo - Returns mem utilization information diff --git a/pkg/mem/mem.go b/pkg/mem/mem.go index 95357d626..1eaed4fa4 100644 --- a/pkg/mem/mem.go +++ b/pkg/mem/mem.go @@ -18,22 +18,51 @@ package mem import ( "runtime" - - humanize "github.com/dustin/go-humanize" + "time" ) +// historicUsage holds the rolling average of memory used by +// minio server +var historicUsage *Usage + +// memUsageMeasureInterval is the window of time between +// two measurements of memory usage +const memUsageMeasureInterval = 5 * time.Second + +// triggers the collection of historic stats about the memory +// utilized by minio server +func init() { + historicUsage = &Usage{} + var cycles uint64 + go func() { + for { + time.Sleep(memUsageMeasureInterval) + currUsage := GetUsage() + currSum := cycles * historicUsage.Mem + cycles = cycles + 1 + historicUsage.Mem = (currSum + currUsage.Mem) / cycles + } + }() +} + // Usage holds memory utilization information in human readable format type Usage struct { - Mem string `json:"mem"` + Mem uint64 `json:"mem"` Error string `json:"error,omitempty"` } +// GetHistoricUsage measures the historic average of memory utilized by +// current process +func GetHistoricUsage() Usage { + return *historicUsage +} + // GetUsage measures the total memory provisioned for the current process // from the OS func GetUsage() Usage { memStats := new(runtime.MemStats) runtime.ReadMemStats(memStats) return Usage{ - Mem: humanize.IBytes(memStats.Sys), + Mem: memStats.Sys, } }