mirror of https://github.com/minio/minio.git
avoid caching metrics for timeout errors per drive (#18584)
Bonus: combine the loop for drive/REST registration.
This commit is contained in:
parent
8fdfcfb562
commit
05bb655efc
|
@ -1348,6 +1348,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||
if !endpoint.IsLocal {
|
||||
continue
|
||||
}
|
||||
|
||||
driveHandlers[pool][set] = &storageRESTServer{}
|
||||
server := driveHandlers[pool][set]
|
||||
|
||||
|
@ -1392,15 +1393,8 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||
Handle: server.WalkDirHandler,
|
||||
OutCapacity: 1,
|
||||
}), "unable to register handler")
|
||||
}
|
||||
}
|
||||
|
||||
for pool, serverPool := range endpointServerPools {
|
||||
for set, endpoint := range serverPool.Endpoints {
|
||||
if !endpoint.IsLocal {
|
||||
continue
|
||||
}
|
||||
createStorage := func(pool, set int, endpoint Endpoint) bool {
|
||||
createStorage := func(server *storageRESTServer) bool {
|
||||
xl, err := newXLStorage(endpoint, false)
|
||||
if err != nil {
|
||||
// if supported errors don't fail, we proceed to
|
||||
|
@ -1410,21 +1404,22 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||
}
|
||||
storage := newXLStorageDiskIDCheck(xl, true)
|
||||
storage.SetDiskID(xl.diskID)
|
||||
driveHandlers[pool][set].setStorage(storage)
|
||||
server.setStorage(storage)
|
||||
return true
|
||||
}
|
||||
if createStorage(pool, set, endpoint) {
|
||||
if createStorage(server) {
|
||||
continue
|
||||
}
|
||||
// Start async goroutine to create storage.
|
||||
go func(pool, set int, endpoint Endpoint) {
|
||||
go func(server *storageRESTServer) {
|
||||
for {
|
||||
time.Sleep(time.Minute)
|
||||
if createStorage(pool, set, endpoint) {
|
||||
time.Sleep(5 * time.Second)
|
||||
if createStorage(server) {
|
||||
return
|
||||
}
|
||||
}
|
||||
}(pool, set, endpoint)
|
||||
}(server)
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -78,8 +78,9 @@ const (
|
|||
|
||||
// Detects change in underlying disk.
|
||||
type xlStorageDiskIDCheck struct {
|
||||
totalErrsAvailability uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors.
|
||||
totalErrsTimeout uint64 // Captures all timeout only errors
|
||||
totalErrsTimeout atomic.Uint64 // Captures all timeout only errors
|
||||
totalErrsAvailability atomic.Uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors.
|
||||
|
||||
// apiCalls should be placed first so alignment is guaranteed for atomic operations.
|
||||
apiCalls [storageMetricLast]uint64
|
||||
apiLatencies [storageMetricLast]*lockedLastMinuteLatency
|
||||
|
@ -102,7 +103,7 @@ type xlStorageDiskIDCheck struct {
|
|||
|
||||
func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
|
||||
p.metricsCache.Once.Do(func() {
|
||||
p.metricsCache.TTL = 1 * time.Second
|
||||
p.metricsCache.TTL = 5 * time.Second
|
||||
p.metricsCache.Update = func() (interface{}, error) {
|
||||
diskMetric := DiskMetrics{
|
||||
LastMinute: make(map[string]AccElem, len(p.apiLatencies)),
|
||||
|
@ -114,13 +115,19 @@ func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
|
|||
for i := range p.apiCalls {
|
||||
diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i])
|
||||
}
|
||||
diskMetric.TotalErrorsAvailability = atomic.LoadUint64(&p.totalErrsAvailability)
|
||||
diskMetric.TotalErrorsTimeout = atomic.LoadUint64(&p.totalErrsTimeout)
|
||||
return diskMetric, nil
|
||||
}
|
||||
})
|
||||
m, _ := p.metricsCache.Get()
|
||||
return m.(DiskMetrics)
|
||||
diskMetric := DiskMetrics{}
|
||||
if m != nil {
|
||||
diskMetric = m.(DiskMetrics)
|
||||
}
|
||||
|
||||
// Do not need this value to be cached.
|
||||
diskMetric.TotalErrorsTimeout = p.totalErrsTimeout.Load()
|
||||
diskMetric.TotalErrorsAvailability = p.totalErrsAvailability.Load()
|
||||
return diskMetric
|
||||
}
|
||||
|
||||
// lockedLastMinuteLatency accumulates totals lockless for each second.
|
||||
|
@ -746,9 +753,9 @@ func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...st
|
|||
context.DeadlineExceeded,
|
||||
context.Canceled,
|
||||
}...) {
|
||||
atomic.AddUint64(&p.totalErrsAvailability, 1)
|
||||
p.totalErrsAvailability.Add(1)
|
||||
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
||||
atomic.AddUint64(&p.totalErrsTimeout, 1)
|
||||
p.totalErrsTimeout.Add(1)
|
||||
}
|
||||
}
|
||||
p.apiLatencies[s].add(duration)
|
||||
|
|
Loading…
Reference in New Issue