mirror of
https://github.com/minio/minio.git
synced 2025-03-30 09:13:41 -04:00
avoid caching metrics for timeout errors per drive (#18584)
Bonus: combine the loop for drive/REST registration.
This commit is contained in:
parent
8fdfcfb562
commit
05bb655efc
@ -1348,6 +1348,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||||||
if !endpoint.IsLocal {
|
if !endpoint.IsLocal {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
driveHandlers[pool][set] = &storageRESTServer{}
|
driveHandlers[pool][set] = &storageRESTServer{}
|
||||||
server := driveHandlers[pool][set]
|
server := driveHandlers[pool][set]
|
||||||
|
|
||||||
@ -1392,15 +1393,8 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||||||
Handle: server.WalkDirHandler,
|
Handle: server.WalkDirHandler,
|
||||||
OutCapacity: 1,
|
OutCapacity: 1,
|
||||||
}), "unable to register handler")
|
}), "unable to register handler")
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for pool, serverPool := range endpointServerPools {
|
createStorage := func(server *storageRESTServer) bool {
|
||||||
for set, endpoint := range serverPool.Endpoints {
|
|
||||||
if !endpoint.IsLocal {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
createStorage := func(pool, set int, endpoint Endpoint) bool {
|
|
||||||
xl, err := newXLStorage(endpoint, false)
|
xl, err := newXLStorage(endpoint, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// if supported errors don't fail, we proceed to
|
// if supported errors don't fail, we proceed to
|
||||||
@ -1410,21 +1404,22 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||||||
}
|
}
|
||||||
storage := newXLStorageDiskIDCheck(xl, true)
|
storage := newXLStorageDiskIDCheck(xl, true)
|
||||||
storage.SetDiskID(xl.diskID)
|
storage.SetDiskID(xl.diskID)
|
||||||
driveHandlers[pool][set].setStorage(storage)
|
server.setStorage(storage)
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
if createStorage(pool, set, endpoint) {
|
if createStorage(server) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
// Start async goroutine to create storage.
|
// Start async goroutine to create storage.
|
||||||
go func(pool, set int, endpoint Endpoint) {
|
go func(server *storageRESTServer) {
|
||||||
for {
|
for {
|
||||||
time.Sleep(time.Minute)
|
time.Sleep(5 * time.Second)
|
||||||
if createStorage(pool, set, endpoint) {
|
if createStorage(server) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}(pool, set, endpoint)
|
}(server)
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -78,8 +78,9 @@ const (
|
|||||||
|
|
||||||
// Detects change in underlying disk.
|
// Detects change in underlying disk.
|
||||||
type xlStorageDiskIDCheck struct {
|
type xlStorageDiskIDCheck struct {
|
||||||
totalErrsAvailability uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors.
|
totalErrsTimeout atomic.Uint64 // Captures all timeout only errors
|
||||||
totalErrsTimeout uint64 // Captures all timeout only errors
|
totalErrsAvailability atomic.Uint64 // Captures all data availability errors such as permission denied, faulty disk and timeout errors.
|
||||||
|
|
||||||
// apiCalls should be placed first so alignment is guaranteed for atomic operations.
|
// apiCalls should be placed first so alignment is guaranteed for atomic operations.
|
||||||
apiCalls [storageMetricLast]uint64
|
apiCalls [storageMetricLast]uint64
|
||||||
apiLatencies [storageMetricLast]*lockedLastMinuteLatency
|
apiLatencies [storageMetricLast]*lockedLastMinuteLatency
|
||||||
@ -102,7 +103,7 @@ type xlStorageDiskIDCheck struct {
|
|||||||
|
|
||||||
func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
|
func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
|
||||||
p.metricsCache.Once.Do(func() {
|
p.metricsCache.Once.Do(func() {
|
||||||
p.metricsCache.TTL = 1 * time.Second
|
p.metricsCache.TTL = 5 * time.Second
|
||||||
p.metricsCache.Update = func() (interface{}, error) {
|
p.metricsCache.Update = func() (interface{}, error) {
|
||||||
diskMetric := DiskMetrics{
|
diskMetric := DiskMetrics{
|
||||||
LastMinute: make(map[string]AccElem, len(p.apiLatencies)),
|
LastMinute: make(map[string]AccElem, len(p.apiLatencies)),
|
||||||
@ -114,13 +115,19 @@ func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
|
|||||||
for i := range p.apiCalls {
|
for i := range p.apiCalls {
|
||||||
diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i])
|
diskMetric.APICalls[storageMetric(i).String()] = atomic.LoadUint64(&p.apiCalls[i])
|
||||||
}
|
}
|
||||||
diskMetric.TotalErrorsAvailability = atomic.LoadUint64(&p.totalErrsAvailability)
|
|
||||||
diskMetric.TotalErrorsTimeout = atomic.LoadUint64(&p.totalErrsTimeout)
|
|
||||||
return diskMetric, nil
|
return diskMetric, nil
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
m, _ := p.metricsCache.Get()
|
m, _ := p.metricsCache.Get()
|
||||||
return m.(DiskMetrics)
|
diskMetric := DiskMetrics{}
|
||||||
|
if m != nil {
|
||||||
|
diskMetric = m.(DiskMetrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do not need this value to be cached.
|
||||||
|
diskMetric.TotalErrorsTimeout = p.totalErrsTimeout.Load()
|
||||||
|
diskMetric.TotalErrorsAvailability = p.totalErrsAvailability.Load()
|
||||||
|
return diskMetric
|
||||||
}
|
}
|
||||||
|
|
||||||
// lockedLastMinuteLatency accumulates totals lockless for each second.
|
// lockedLastMinuteLatency accumulates totals lockless for each second.
|
||||||
@ -746,9 +753,9 @@ func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...st
|
|||||||
context.DeadlineExceeded,
|
context.DeadlineExceeded,
|
||||||
context.Canceled,
|
context.Canceled,
|
||||||
}...) {
|
}...) {
|
||||||
atomic.AddUint64(&p.totalErrsAvailability, 1)
|
p.totalErrsAvailability.Add(1)
|
||||||
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
if errors.Is(err, context.DeadlineExceeded) || errors.Is(err, context.Canceled) {
|
||||||
atomic.AddUint64(&p.totalErrsTimeout, 1)
|
p.totalErrsTimeout.Add(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p.apiLatencies[s].add(duration)
|
p.apiLatencies[s].add(duration)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user