mirror of
https://github.com/minio/minio.git
synced 2025-01-25 21:53:16 -05:00
Add Historic CPU and memory stats (#7136)
Collect historic cpu and mem stats. Also, use actual values instead of formatted strings while returning to the client. The string formatting prevents values from being processed by the server or by the client without parsing it. This change will allow the values to be processed (eg. compute rolling-average over the lifetime of the minio server) and offloads the formatting to the client.
This commit is contained in:
parent
d0015b4d66
commit
34e7259f95
@ -313,18 +313,20 @@ type ServerDrivesPerfInfo struct {
|
||||
// of one minio node. It also reports any errors if encountered
|
||||
// while trying to reach this server.
|
||||
type ServerCPULoadInfo struct {
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Load []cpu.Load `json:"load"`
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Load []cpu.Load `json:"load"`
|
||||
HistoricLoad []cpu.Load `json:"historicLoad"`
|
||||
}
|
||||
|
||||
// ServerMemUsageInfo holds informantion about memory utilization
|
||||
// of one minio node. It also reports any errors if encountered
|
||||
// while trying to reach this server.
|
||||
type ServerMemUsageInfo struct {
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Usage []mem.Usage `json:"usage"`
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Usage []mem.Usage `json:"usage"`
|
||||
HistoricUsage []mem.Usage `json:"historicUsage"`
|
||||
}
|
||||
|
||||
// PerfInfoHandler - GET /minio/admin/v1/performance?perfType={perfType}
|
||||
|
@ -204,6 +204,7 @@ func (endpoints EndpointList) GetString(i int) string {
|
||||
// local endpoints from given list of endpoints
|
||||
func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
|
||||
var memUsages []mem.Usage
|
||||
var historicUsages []mem.Usage
|
||||
var addr string
|
||||
scratchSpace := map[string]bool{}
|
||||
for _, endpoint := range endpoints {
|
||||
@ -215,12 +216,15 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
|
||||
addr = GetLocalPeer(endpoints)
|
||||
memUsage := mem.GetUsage()
|
||||
memUsages = append(memUsages, memUsage)
|
||||
historicUsage := mem.GetHistoricUsage()
|
||||
historicUsages = append(historicUsages, historicUsage)
|
||||
scratchSpace[endpoint.Host] = true
|
||||
}
|
||||
}
|
||||
return ServerMemUsageInfo{
|
||||
Addr: addr,
|
||||
Usage: memUsages,
|
||||
Addr: addr,
|
||||
Usage: memUsages,
|
||||
HistoricUsage: historicUsages,
|
||||
}
|
||||
}
|
||||
|
||||
@ -228,6 +232,7 @@ func localEndpointsMemUsage(endpoints EndpointList) ServerMemUsageInfo {
|
||||
// local endpoints from given list of endpoints
|
||||
func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo {
|
||||
var cpuLoads []cpu.Load
|
||||
var historicLoads []cpu.Load
|
||||
var addr string
|
||||
scratchSpace := map[string]bool{}
|
||||
for _, endpoint := range endpoints {
|
||||
@ -239,12 +244,15 @@ func localEndpointsCPULoad(endpoints EndpointList) ServerCPULoadInfo {
|
||||
addr = GetLocalPeer(endpoints)
|
||||
cpuLoad := cpu.GetLoad()
|
||||
cpuLoads = append(cpuLoads, cpuLoad)
|
||||
historicLoad := cpu.GetHistoricLoad()
|
||||
historicLoads = append(historicLoads, historicLoad)
|
||||
scratchSpace[endpoint.Host] = true
|
||||
}
|
||||
}
|
||||
return ServerCPULoadInfo{
|
||||
Addr: addr,
|
||||
Load: cpuLoads,
|
||||
Addr: addr,
|
||||
Load: cpuLoads,
|
||||
HistoricLoad: historicLoads,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -17,11 +17,44 @@
|
||||
package cpu
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
// rollingAvg holds the rolling average of the cpu load on the minio
|
||||
// server over its lifetime
|
||||
var rollingAvg *Load
|
||||
|
||||
// cpuMeasureInterval is the interval of time between two
|
||||
// measurements of CPU load
|
||||
const cpuLoadMeasureInterval = 5 * time.Second
|
||||
|
||||
// triggers the average load computation at server spawn
|
||||
func init() {
|
||||
rollingAvg = &Load{
|
||||
Min: float64(0),
|
||||
Max: float64(0),
|
||||
Avg: float64(0),
|
||||
}
|
||||
var rollingSum float64
|
||||
var cycles float64
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(cpuLoadMeasureInterval)
|
||||
cycles = cycles + 1
|
||||
currLoad := GetLoad()
|
||||
if rollingAvg.Max < currLoad.Max || rollingAvg.Max == 0 {
|
||||
rollingAvg.Max = currLoad.Max
|
||||
}
|
||||
if rollingAvg.Min > currLoad.Min || rollingAvg.Min == 0 {
|
||||
rollingAvg.Min = currLoad.Min
|
||||
}
|
||||
rollingSum = rollingSum + currLoad.Avg
|
||||
rollingAvg.Avg = rollingSum / cycles
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
const (
|
||||
// cpuLoadWindow is the interval of time for which the
|
||||
// cpu utilization is measured
|
||||
@ -37,15 +70,34 @@ const (
|
||||
|
||||
// Load holds CPU utilization % measured in three intervals of 200ms each
|
||||
type Load struct {
|
||||
Avg string `json:"avg"`
|
||||
Max string `json:"max"`
|
||||
Min string `json:"min"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Avg float64 `json:"avg"`
|
||||
Max float64 `json:"max"`
|
||||
Min float64 `json:"min"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
type counter struct{}
|
||||
|
||||
// GetLoad returns the CPU utilization % of the current process
|
||||
// GetHistoricLoad returns the historic CPU utilization of the current process
|
||||
func GetHistoricLoad() Load {
|
||||
return *rollingAvg
|
||||
}
|
||||
|
||||
// GetLoad returns the CPU utilization of the current process
|
||||
// This function works by calcualating the amount of cpu clock
|
||||
// cycles the current process used in a given time window
|
||||
//
|
||||
// This corresponds to the CPU utilization calculation done by
|
||||
// tools like top. Here, we use the getclocktime with the
|
||||
// CLOCK_PROCESS_CPUTIME_ID parameter to obtain the total number of
|
||||
// clock ticks used by the process so far. Then we sleep for
|
||||
// 200ms and obtain the the total number of clock ticks again. The
|
||||
// difference between the two counts provides us the number of
|
||||
// clock ticks used by the process in the 200ms interval.
|
||||
//
|
||||
// The ratio of clock ticks used (measured in nanoseconds) to number
|
||||
// of nanoseconds in 200 milliseconds provides us the CPU usage
|
||||
// for the process currently
|
||||
func GetLoad() Load {
|
||||
vals := make(chan time.Duration, 3)
|
||||
wg := sync.WaitGroup{}
|
||||
@ -83,9 +135,9 @@ func GetLoad() Load {
|
||||
close(vals)
|
||||
avg := sum / 3
|
||||
return Load{
|
||||
Avg: fmt.Sprintf("%.2f%%", toFixed4(float64(avg)/float64(200*time.Millisecond))*100),
|
||||
Max: fmt.Sprintf("%.2f%%", toFixed4(float64(max)/float64(200*time.Millisecond))*100),
|
||||
Min: fmt.Sprintf("%.2f%%", toFixed4(float64(min)/float64(200*time.Millisecond))*100),
|
||||
Avg: toFixed4(float64(avg)/float64(200*time.Millisecond)) * 100,
|
||||
Max: toFixed4(float64(max)/float64(200*time.Millisecond)) * 100,
|
||||
Min: toFixed4(float64(min)/float64(200*time.Millisecond)) * 100,
|
||||
Error: "",
|
||||
}
|
||||
}
|
||||
|
@ -235,9 +235,9 @@ Fetches CPU utilization for all cluster nodes. Returned value is in Bytes.
|
||||
|
||||
| Param | Type | Description |
|
||||
|-------|------|-------------|
|
||||
|`cpu.Load.Avg` | _string_ | The average utilization % of the CPU measured in a 200ms interval |
|
||||
|`cpu.Load.Min` | _string_ | The minimum utilization % of the CPU measured in a 200ms interval |
|
||||
|`cpu.Load.Max` | _string_ | The maximum utilization % of the CPU measured in a 200ms interval |
|
||||
|`cpu.Load.Avg` | _float64_ | The average utilization of the CPU measured in a 200ms interval |
|
||||
|`cpu.Load.Min` | _float64_ | The minimum utilization of the CPU measured in a 200ms interval |
|
||||
|`cpu.Load.Max` | _float64_ | The maximum utilization of the CPU measured in a 200ms interval |
|
||||
|`cpu.Load.Error` | _string_ | Error (if any) encountered while accesing the CPU info |
|
||||
|
||||
<a name="ServerMemUsageInfo"></a>
|
||||
@ -253,7 +253,7 @@ Fetches Mem utilization for all cluster nodes. Returned value is in Bytes.
|
||||
|
||||
| Param | Type | Description |
|
||||
|-------|------|-------------|
|
||||
|`mem.Usage.Mem` | _string_ | The total number of bytes obtained from the OS |
|
||||
|`mem.Usage.Mem` | _uint64_ | The total number of bytes obtained from the OS |
|
||||
|`mem.Usage.Error` | _string_ | Error (if any) encountered while accesing the CPU info |
|
||||
|
||||
## 6. Heal operations
|
||||
|
@ -199,9 +199,10 @@ func (adm *AdminClient) ServerDrivesPerfInfo() ([]ServerDrivesPerfInfo, error) {
|
||||
// ServerCPULoadInfo holds information about address and cpu load of
|
||||
// a single server node
|
||||
type ServerCPULoadInfo struct {
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Load []cpu.Load `json:"load"`
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Load []cpu.Load `json:"load"`
|
||||
HistoricLoad []cpu.Load `json:"historicLoad"`
|
||||
}
|
||||
|
||||
// ServerCPULoadInfo - Returns cpu utilization information
|
||||
@ -242,9 +243,10 @@ func (adm *AdminClient) ServerCPULoadInfo() ([]ServerCPULoadInfo, error) {
|
||||
// ServerMemUsageInfo holds information about address and memory utilization of
|
||||
// a single server node
|
||||
type ServerMemUsageInfo struct {
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Usage []mem.Usage `json:"usage"`
|
||||
Addr string `json:"addr"`
|
||||
Error string `json:"error,omitempty"`
|
||||
Usage []mem.Usage `json:"usage"`
|
||||
HistoricUsage []mem.Usage `json:"historicUsage"`
|
||||
}
|
||||
|
||||
// ServerMemUsageInfo - Returns mem utilization information
|
||||
|
@ -18,22 +18,51 @@ package mem
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
"time"
|
||||
)
|
||||
|
||||
// historicUsage holds the rolling average of memory used by
|
||||
// minio server
|
||||
var historicUsage *Usage
|
||||
|
||||
// memUsageMeasureInterval is the window of time between
|
||||
// two measurements of memory usage
|
||||
const memUsageMeasureInterval = 5 * time.Second
|
||||
|
||||
// triggers the collection of historic stats about the memory
|
||||
// utilized by minio server
|
||||
func init() {
|
||||
historicUsage = &Usage{}
|
||||
var cycles uint64
|
||||
go func() {
|
||||
for {
|
||||
time.Sleep(memUsageMeasureInterval)
|
||||
currUsage := GetUsage()
|
||||
currSum := cycles * historicUsage.Mem
|
||||
cycles = cycles + 1
|
||||
historicUsage.Mem = (currSum + currUsage.Mem) / cycles
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
// Usage holds memory utilization information in human readable format
|
||||
type Usage struct {
|
||||
Mem string `json:"mem"`
|
||||
Mem uint64 `json:"mem"`
|
||||
Error string `json:"error,omitempty"`
|
||||
}
|
||||
|
||||
// GetHistoricUsage measures the historic average of memory utilized by
|
||||
// current process
|
||||
func GetHistoricUsage() Usage {
|
||||
return *historicUsage
|
||||
}
|
||||
|
||||
// GetUsage measures the total memory provisioned for the current process
|
||||
// from the OS
|
||||
func GetUsage() Usage {
|
||||
memStats := new(runtime.MemStats)
|
||||
runtime.ReadMemStats(memStats)
|
||||
return Usage{
|
||||
Mem: humanize.IBytes(memStats.Sys),
|
||||
Mem: memStats.Sys,
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user