Add Historic CPU and memory stats (#7136)

Collect historic cpu and mem stats.  Also, use actual values 
instead of formatted strings while returning to the client. The string 
formatting prevents values from being processed by the server or 
by the client without parsing it. 

This change will allow the values to be processed (eg. 
compute rolling-average over the lifetime of the minio server)
and offloads the formatting to the client.
This commit is contained in:
Sidhartha Mani 2019-01-29 23:17:32 -08:00 committed by Nitish Tiwari
parent d0015b4d66
commit 34e7259f95
6 changed files with 126 additions and 33 deletions

View File

@ -313,18 +313,20 @@ type ServerDrivesPerfInfo struct {
// of one minio node. It also reports any errors if encountered // of one minio node. It also reports any errors if encountered
// while trying to reach this server. // while trying to reach this server.
type ServerCPULoadInfo struct { type ServerCPULoadInfo struct {
Addr string `json:"addr"` Addr string `json:"addr"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
Load []cpu.Load `json:"load"` Load []cpu.Load `json:"load"`
HistoricLoad []cpu.Load `json:"historicLoad"`
} }
// ServerMemUsageInfo holds informantion about memory utilization // ServerMemUsageInfo holds informantion about memory utilization
// of one minio node. It also reports any errors if encountered // of one minio node. It also reports any errors if encountered
// while trying to reach this server. // while trying to reach this server.
type ServerMemUsageInfo struct { type ServerMemUsageInfo struct {
Addr string `json:"addr"` Addr string `json:"addr"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
Usage []mem.Usage `json:"usage"` Usage []mem.Usage `json:"usage"`
HistoricUsage []mem.Usage `json:"historicUsage"`
} }
// PerfInfoHandler - GET /minio/admin/v1/performance?perfType={perfType} // PerfInfoHandler - GET /minio/admin/v1/performance?perfType={perfType}

View File

@ -204,6 +204,7 @@ func (endpoints EndpointList) GetString(i int) string {
// local endpoints from given list of endpoints // local endpoints from given list of endpoints
func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo { func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
var memUsages []mem.Usage var memUsages []mem.Usage
var historicUsages []mem.Usage
var addr string var addr string
scratchSpace := map[string]bool{} scratchSpace := map[string]bool{}
for _, endpoint := range endpoints { for _, endpoint := range endpoints {
@ -215,12 +216,15 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
addr = GetLocalPeer(endpoints) addr = GetLocalPeer(endpoints)
memUsage := mem.GetUsage() memUsage := mem.GetUsage()
memUsages = append(memUsages, memUsage) memUsages = append(memUsages, memUsage)
historicUsage := mem.GetHistoricUsage()
historicUsages = append(historicUsages, historicUsage)
scratchSpace[endpoint.Host] = true scratchSpace[endpoint.Host] = true
} }
} }
return ServerMemUsageInfo{ return ServerMemUsageInfo{
Addr: addr, Addr: addr,
Usage: memUsages, Usage: memUsages,
HistoricUsage: historicUsages,
} }
} }
@ -228,6 +232,7 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
// local endpoints from given list of endpoints // local endpoints from given list of endpoints
func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo { func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo {
var cpuLoads []cpu.Load var cpuLoads []cpu.Load
var historicLoads []cpu.Load
var addr string var addr string
scratchSpace := map[string]bool{} scratchSpace := map[string]bool{}
for _, endpoint := range endpoints { for _, endpoint := range endpoints {
@ -239,12 +244,15 @@ func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo {
addr = GetLocalPeer(endpoints) addr = GetLocalPeer(endpoints)
cpuLoad := cpu.GetLoad() cpuLoad := cpu.GetLoad()
cpuLoads = append(cpuLoads, cpuLoad) cpuLoads = append(cpuLoads, cpuLoad)
historicLoad := cpu.GetHistoricLoad()
historicLoads = append(historicLoads, historicLoad)
scratchSpace[endpoint.Host] = true scratchSpace[endpoint.Host] = true
} }
} }
return ServerCPULoadInfo{ return ServerCPULoadInfo{
Addr: addr, Addr: addr,
Load: cpuLoads, Load: cpuLoads,
HistoricLoad: historicLoads,
} }
} }

View File

@ -17,11 +17,44 @@
package cpu package cpu
import ( import (
"fmt"
"sync" "sync"
"time" "time"
) )
// rollingAvg holds the rolling average of the cpu load on the minio
// server over its lifetime
var rollingAvg *Load
// cpuMeasureInterval is the interval of time between two
// measurements of CPU load
const cpuLoadMeasureInterval = 5 * time.Second
// triggers the average load computation at server spawn
func init() {
rollingAvg = &Load{
Min: float64(0),
Max: float64(0),
Avg: float64(0),
}
var rollingSum float64
var cycles float64
go func() {
for {
time.Sleep(cpuLoadMeasureInterval)
cycles = cycles + 1
currLoad := GetLoad()
if rollingAvg.Max < currLoad.Max || rollingAvg.Max == 0 {
rollingAvg.Max = currLoad.Max
}
if rollingAvg.Min > currLoad.Min || rollingAvg.Min == 0 {
rollingAvg.Min = currLoad.Min
}
rollingSum = rollingSum + currLoad.Avg
rollingAvg.Avg = rollingSum / cycles
}
}()
}
const ( const (
// cpuLoadWindow is the interval of time for which the // cpuLoadWindow is the interval of time for which the
// cpu utilization is measured // cpu utilization is measured
@ -37,15 +70,34 @@ const (
// Load holds CPU utilization % measured in three intervals of 200ms each // Load holds CPU utilization % measured in three intervals of 200ms each
type Load struct { type Load struct {
Avg string `json:"avg"` Avg float64 `json:"avg"`
Max string `json:"max"` Max float64 `json:"max"`
Min string `json:"min"` Min float64 `json:"min"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
} }
type counter struct{} type counter struct{}
// GetLoad returns the CPU utilization % of the current process // GetHistoricLoad returns the historic CPU utilization of the current process
func GetHistoricLoad() Load {
return *rollingAvg
}
// GetLoad returns the CPU utilization of the current process
// This function works by calcualating the amount of cpu clock
// cycles the current process used in a given time window
//
// This corresponds to the CPU utilization calculation done by
// tools like top. Here, we use the getclocktime with the
// CLOCK_PROCESS_CPUTIME_ID parameter to obtain the total number of
// clock ticks used by the process so far. Then we sleep for
// 200ms and obtain the the total number of clock ticks again. The
// difference between the two counts provides us the number of
// clock ticks used by the process in the 200ms interval.
//
// The ratio of clock ticks used (measured in nanoseconds) to number
// of nanoseconds in 200 milliseconds provides us the CPU usage
// for the process currently
func GetLoad() Load { func GetLoad() Load {
vals := make(chan time.Duration, 3) vals := make(chan time.Duration, 3)
wg := sync.WaitGroup{} wg := sync.WaitGroup{}
@ -83,9 +135,9 @@ func GetLoad() Load {
close(vals) close(vals)
avg := sum / 3 avg := sum / 3
return Load{ return Load{
Avg: fmt.Sprintf("%.2f%%", toFixed4(float64(avg)/float64(200*time.Millisecond))*100), Avg: toFixed4(float64(avg)/float64(200*time.Millisecond)) * 100,
Max: fmt.Sprintf("%.2f%%", toFixed4(float64(max)/float64(200*time.Millisecond))*100), Max: toFixed4(float64(max)/float64(200*time.Millisecond)) * 100,
Min: fmt.Sprintf("%.2f%%", toFixed4(float64(min)/float64(200*time.Millisecond))*100), Min: toFixed4(float64(min)/float64(200*time.Millisecond)) * 100,
Error: "", Error: "",
} }
} }

View File

@ -235,9 +235,9 @@ Fetches CPU utilization for all cluster nodes. Returned value is in Bytes.
| Param | Type | Description | | Param | Type | Description |
|-------|------|-------------| |-------|------|-------------|
|`cpu.Load.Avg` | _string_ | The average utilization % of the CPU measured in a 200ms interval | |`cpu.Load.Avg` | _float64_ | The average utilization of the CPU measured in a 200ms interval |
|`cpu.Load.Min` | _string_ | The minimum utilization % of the CPU measured in a 200ms interval | |`cpu.Load.Min` | _float64_ | The minimum utilization of the CPU measured in a 200ms interval |
|`cpu.Load.Max` | _string_ | The maximum utilization % of the CPU measured in a 200ms interval | |`cpu.Load.Max` | _float64_ | The maximum utilization of the CPU measured in a 200ms interval |
|`cpu.Load.Error` | _string_ | Error (if any) encountered while accesing the CPU info | |`cpu.Load.Error` | _string_ | Error (if any) encountered while accesing the CPU info |
<a name="ServerMemUsageInfo"></a> <a name="ServerMemUsageInfo"></a>
@ -253,7 +253,7 @@ Fetches Mem utilization for all cluster nodes. Returned value is in Bytes.
| Param | Type | Description | | Param | Type | Description |
|-------|------|-------------| |-------|------|-------------|
|`mem.Usage.Mem` | _string_ | The total number of bytes obtained from the OS | |`mem.Usage.Mem` | _uint64_ | The total number of bytes obtained from the OS |
|`mem.Usage.Error` | _string_ | Error (if any) encountered while accesing the CPU info | |`mem.Usage.Error` | _string_ | Error (if any) encountered while accesing the CPU info |
## 6. Heal operations ## 6. Heal operations

View File

@ -199,9 +199,10 @@ func (adm *AdminClient) ServerDrivesPerfInfo() ([]ServerDrivesPerfInfo, error) {
// ServerCPULoadInfo holds information about address and cpu load of // ServerCPULoadInfo holds information about address and cpu load of
// a single server node // a single server node
type ServerCPULoadInfo struct { type ServerCPULoadInfo struct {
Addr string `json:"addr"` Addr string `json:"addr"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
Load []cpu.Load `json:"load"` Load []cpu.Load `json:"load"`
HistoricLoad []cpu.Load `json:"historicLoad"`
} }
// ServerCPULoadInfo - Returns cpu utilization information // ServerCPULoadInfo - Returns cpu utilization information
@ -242,9 +243,10 @@ func (adm *AdminClient) ServerCPULoadInfo() ([]ServerCPULoadInfo, error) {
// ServerMemUsageInfo holds information about address and memory utilization of // ServerMemUsageInfo holds information about address and memory utilization of
// a single server node // a single server node
type ServerMemUsageInfo struct { type ServerMemUsageInfo struct {
Addr string `json:"addr"` Addr string `json:"addr"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
Usage []mem.Usage `json:"usage"` Usage []mem.Usage `json:"usage"`
HistoricUsage []mem.Usage `json:"historicUsage"`
} }
// ServerMemUsageInfo - Returns mem utilization information // ServerMemUsageInfo - Returns mem utilization information

View File

@ -18,22 +18,51 @@ package mem
import ( import (
"runtime" "runtime"
"time"
humanize "github.com/dustin/go-humanize"
) )
// historicUsage holds the rolling average of memory used by
// minio server
var historicUsage *Usage
// memUsageMeasureInterval is the window of time between
// two measurements of memory usage
const memUsageMeasureInterval = 5 * time.Second
// triggers the collection of historic stats about the memory
// utilized by minio server
func init() {
historicUsage = &Usage{}
var cycles uint64
go func() {
for {
time.Sleep(memUsageMeasureInterval)
currUsage := GetUsage()
currSum := cycles * historicUsage.Mem
cycles = cycles + 1
historicUsage.Mem = (currSum + currUsage.Mem) / cycles
}
}()
}
// Usage holds memory utilization information in human readable format // Usage holds memory utilization information in human readable format
type Usage struct { type Usage struct {
Mem string `json:"mem"` Mem uint64 `json:"mem"`
Error string `json:"error,omitempty"` Error string `json:"error,omitempty"`
} }
// GetHistoricUsage measures the historic average of memory utilized by
// current process
func GetHistoricUsage() Usage {
return *historicUsage
}
// GetUsage measures the total memory provisioned for the current process // GetUsage measures the total memory provisioned for the current process
// from the OS // from the OS
func GetUsage() Usage { func GetUsage() Usage {
memStats := new(runtime.MemStats) memStats := new(runtime.MemStats)
runtime.ReadMemStats(memStats) runtime.ReadMemStats(memStats)
return Usage{ return Usage{
Mem: humanize.IBytes(memStats.Sys), Mem: memStats.Sys,
} }
} }