mirror of
https://github.com/minio/minio.git
synced 2025-02-23 19:42:30 -05:00
capture I/O in waiting and total tokens in diskMetrics (#18819)
This is needed for the subsequent changes in ServerUpdate(), ServerRestart() etc.
This commit is contained in:
parent
7ce0d71a96
commit
7c0673279b
@ -73,6 +73,8 @@ type DiskInfo struct {
|
|||||||
type DiskMetrics struct {
|
type DiskMetrics struct {
|
||||||
LastMinute map[string]AccElem `json:"apiLatencies,omitempty"`
|
LastMinute map[string]AccElem `json:"apiLatencies,omitempty"`
|
||||||
APICalls map[string]uint64 `json:"apiCalls,omitempty"`
|
APICalls map[string]uint64 `json:"apiCalls,omitempty"`
|
||||||
|
TotalTokens uint32 `json:"totalTokens,omitempty"`
|
||||||
|
TotalWaiting uint32 `json:"totalWaiting,omitempty"`
|
||||||
TotalErrorsAvailability uint64 `json:"totalErrsAvailability"`
|
TotalErrorsAvailability uint64 `json:"totalErrsAvailability"`
|
||||||
TotalErrorsTimeout uint64 `json:"totalErrsTimeout"`
|
TotalErrorsTimeout uint64 `json:"totalErrsTimeout"`
|
||||||
TotalWrites uint64 `json:"totalWrites"`
|
TotalWrites uint64 `json:"totalWrites"`
|
||||||
|
@ -1327,6 +1327,18 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||||||
}
|
}
|
||||||
z.APICalls[za0003] = za0004
|
z.APICalls[za0003] = za0004
|
||||||
}
|
}
|
||||||
|
case "TotalTokens":
|
||||||
|
z.TotalTokens, err = dc.ReadUint32()
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "TotalTokens")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case "TotalWaiting":
|
||||||
|
z.TotalWaiting, err = dc.ReadUint32()
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "TotalWaiting")
|
||||||
|
return
|
||||||
|
}
|
||||||
case "TotalErrorsAvailability":
|
case "TotalErrorsAvailability":
|
||||||
z.TotalErrorsAvailability, err = dc.ReadUint64()
|
z.TotalErrorsAvailability, err = dc.ReadUint64()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1364,9 +1376,9 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) {
|
|||||||
|
|
||||||
// EncodeMsg implements msgp.Encodable
|
// EncodeMsg implements msgp.Encodable
|
||||||
func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
||||||
// map header, size 6
|
// map header, size 8
|
||||||
// write "LastMinute"
|
// write "LastMinute"
|
||||||
err = en.Append(0x86, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
err = en.Append(0x88, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@ -1409,6 +1421,26 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// write "TotalTokens"
|
||||||
|
err = en.Append(0xab, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err = en.WriteUint32(z.TotalTokens)
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "TotalTokens")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
// write "TotalWaiting"
|
||||||
|
err = en.Append(0xac, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x57, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67)
|
||||||
|
if err != nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
err = en.WriteUint32(z.TotalWaiting)
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "TotalWaiting")
|
||||||
|
return
|
||||||
|
}
|
||||||
// write "TotalErrorsAvailability"
|
// write "TotalErrorsAvailability"
|
||||||
err = en.Append(0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79)
|
err = en.Append(0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1455,9 +1487,9 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
|||||||
// MarshalMsg implements msgp.Marshaler
|
// MarshalMsg implements msgp.Marshaler
|
||||||
func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
|
func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
|
||||||
o = msgp.Require(b, z.Msgsize())
|
o = msgp.Require(b, z.Msgsize())
|
||||||
// map header, size 6
|
// map header, size 8
|
||||||
// string "LastMinute"
|
// string "LastMinute"
|
||||||
o = append(o, 0x86, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
o = append(o, 0x88, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
||||||
o = msgp.AppendMapHeader(o, uint32(len(z.LastMinute)))
|
o = msgp.AppendMapHeader(o, uint32(len(z.LastMinute)))
|
||||||
for za0001, za0002 := range z.LastMinute {
|
for za0001, za0002 := range z.LastMinute {
|
||||||
o = msgp.AppendString(o, za0001)
|
o = msgp.AppendString(o, za0001)
|
||||||
@ -1474,6 +1506,12 @@ func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
|
|||||||
o = msgp.AppendString(o, za0003)
|
o = msgp.AppendString(o, za0003)
|
||||||
o = msgp.AppendUint64(o, za0004)
|
o = msgp.AppendUint64(o, za0004)
|
||||||
}
|
}
|
||||||
|
// string "TotalTokens"
|
||||||
|
o = append(o, 0xab, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73)
|
||||||
|
o = msgp.AppendUint32(o, z.TotalTokens)
|
||||||
|
// string "TotalWaiting"
|
||||||
|
o = append(o, 0xac, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x57, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67)
|
||||||
|
o = msgp.AppendUint32(o, z.TotalWaiting)
|
||||||
// string "TotalErrorsAvailability"
|
// string "TotalErrorsAvailability"
|
||||||
o = append(o, 0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79)
|
o = append(o, 0xb7, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x73, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79)
|
||||||
o = msgp.AppendUint64(o, z.TotalErrorsAvailability)
|
o = msgp.AppendUint64(o, z.TotalErrorsAvailability)
|
||||||
@ -1567,6 +1605,18 @@ func (z *DiskMetrics) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|||||||
}
|
}
|
||||||
z.APICalls[za0003] = za0004
|
z.APICalls[za0003] = za0004
|
||||||
}
|
}
|
||||||
|
case "TotalTokens":
|
||||||
|
z.TotalTokens, bts, err = msgp.ReadUint32Bytes(bts)
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "TotalTokens")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
case "TotalWaiting":
|
||||||
|
z.TotalWaiting, bts, err = msgp.ReadUint32Bytes(bts)
|
||||||
|
if err != nil {
|
||||||
|
err = msgp.WrapError(err, "TotalWaiting")
|
||||||
|
return
|
||||||
|
}
|
||||||
case "TotalErrorsAvailability":
|
case "TotalErrorsAvailability":
|
||||||
z.TotalErrorsAvailability, bts, err = msgp.ReadUint64Bytes(bts)
|
z.TotalErrorsAvailability, bts, err = msgp.ReadUint64Bytes(bts)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -1619,7 +1669,7 @@ func (z *DiskMetrics) Msgsize() (s int) {
|
|||||||
s += msgp.StringPrefixSize + len(za0003) + msgp.Uint64Size
|
s += msgp.StringPrefixSize + len(za0003) + msgp.Uint64Size
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s += 24 + msgp.Uint64Size + 19 + msgp.Uint64Size + 12 + msgp.Uint64Size + 13 + msgp.Uint64Size
|
s += 12 + msgp.Uint32Size + 13 + msgp.Uint32Size + 24 + msgp.Uint64Size + 19 + msgp.Uint64Size + 12 + msgp.Uint64Size + 13 + msgp.Uint64Size
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ package cmd
|
|||||||
//go:generate msgp -file $GOFILE -unexported
|
//go:generate msgp -file $GOFILE -unexported
|
||||||
|
|
||||||
const (
|
const (
|
||||||
storageRESTVersion = "v53" // Remove deprecated APIs
|
storageRESTVersion = "v54" // Add more metrics per drive
|
||||||
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
|
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
|
||||||
storageRESTPrefix = minioReservedBucketPath + "/storage"
|
storageRESTPrefix = minioReservedBucketPath + "/storage"
|
||||||
)
|
)
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
// Copyright (c) 2015-2021 MinIO, Inc.
|
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||||
//
|
//
|
||||||
// This file is part of MinIO Object Storage stack
|
// This file is part of MinIO Object Storage stack
|
||||||
//
|
//
|
||||||
@ -90,13 +90,13 @@ type xlStorageDiskIDCheck struct {
|
|||||||
storage *xlStorage
|
storage *xlStorage
|
||||||
health *diskHealthTracker
|
health *diskHealthTracker
|
||||||
|
|
||||||
// diskStartChecking is a threshold above which we will start to check
|
// driveStartChecking is a threshold above which we will start to check
|
||||||
// the state of disks, generally this value is less than diskMaxConcurrent
|
// the state of disks, generally this value is less than driveMaxConcurrent
|
||||||
diskStartChecking int
|
driveStartChecking int
|
||||||
|
|
||||||
// diskMaxConcurrent represents maximum number of running concurrent
|
// driveMaxConcurrent represents maximum number of running concurrent
|
||||||
// operations for local and (incoming) remote disk operations.
|
// operations for local and (incoming) remote disk operations.
|
||||||
diskMaxConcurrent int
|
driveMaxConcurrent int
|
||||||
|
|
||||||
metricsCache timedValue
|
metricsCache timedValue
|
||||||
diskCtx context.Context
|
diskCtx context.Context
|
||||||
@ -127,8 +127,11 @@ func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Do not need this value to be cached.
|
// Do not need this value to be cached.
|
||||||
|
diskMetric.TotalTokens = uint32(p.driveMaxConcurrent)
|
||||||
|
diskMetric.TotalWaiting = uint32(p.health.waiting.Load())
|
||||||
diskMetric.TotalErrorsTimeout = p.totalErrsTimeout.Load()
|
diskMetric.TotalErrorsTimeout = p.totalErrsTimeout.Load()
|
||||||
diskMetric.TotalErrorsAvailability = p.totalErrsAvailability.Load()
|
diskMetric.TotalErrorsAvailability = p.totalErrsAvailability.Load()
|
||||||
|
|
||||||
return diskMetric
|
return diskMetric
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,42 +192,42 @@ func (e *lockedLastMinuteLatency) total() AccElem {
|
|||||||
var maxConcurrentOnce sync.Once
|
var maxConcurrentOnce sync.Once
|
||||||
|
|
||||||
func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDiskIDCheck {
|
func newXLStorageDiskIDCheck(storage *xlStorage, healthCheck bool) *xlStorageDiskIDCheck {
|
||||||
// diskMaxConcurrent represents maximum number of running concurrent
|
// driveMaxConcurrent represents maximum number of running concurrent
|
||||||
// operations for local and (incoming) remote disk operations.
|
// operations for local and (incoming) remote disk operations.
|
||||||
//
|
//
|
||||||
// this value is a placeholder it is overridden via ENV for custom settings
|
// this value is a placeholder it is overridden via ENV for custom settings
|
||||||
// or this default value is used to pick the correct value HDDs v/s NVMe's
|
// or this default value is used to pick the correct value HDDs v/s NVMe's
|
||||||
diskMaxConcurrent := -1
|
driveMaxConcurrent := -1
|
||||||
maxConcurrentOnce.Do(func() {
|
maxConcurrentOnce.Do(func() {
|
||||||
s := env.Get("_MINIO_DRIVE_MAX_CONCURRENT", "")
|
s := env.Get("_MINIO_DRIVE_MAX_CONCURRENT", "")
|
||||||
if s == "" {
|
if s == "" {
|
||||||
s = env.Get("_MINIO_DISK_MAX_CONCURRENT", "")
|
s = env.Get("_MINIO_DISK_MAX_CONCURRENT", "")
|
||||||
}
|
}
|
||||||
if s != "" {
|
if s != "" {
|
||||||
diskMaxConcurrent, _ = strconv.Atoi(s)
|
driveMaxConcurrent, _ = strconv.Atoi(s)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
if diskMaxConcurrent <= 0 {
|
if driveMaxConcurrent <= 0 {
|
||||||
diskMaxConcurrent = 512
|
driveMaxConcurrent = 512
|
||||||
if storage.rotational {
|
if storage.rotational {
|
||||||
diskMaxConcurrent = int(storage.nrRequests) / 2
|
driveMaxConcurrent = int(storage.nrRequests) / 2
|
||||||
if diskMaxConcurrent < 32 {
|
if driveMaxConcurrent < 32 {
|
||||||
diskMaxConcurrent = 32
|
driveMaxConcurrent = 32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
diskStartChecking := 16 + diskMaxConcurrent/8
|
driveStartChecking := 16 + driveMaxConcurrent/8
|
||||||
if diskStartChecking > diskMaxConcurrent {
|
if driveStartChecking > driveMaxConcurrent {
|
||||||
diskStartChecking = diskMaxConcurrent
|
driveStartChecking = driveMaxConcurrent
|
||||||
}
|
}
|
||||||
|
|
||||||
xl := xlStorageDiskIDCheck{
|
xl := xlStorageDiskIDCheck{
|
||||||
storage: storage,
|
storage: storage,
|
||||||
health: newDiskHealthTracker(diskMaxConcurrent),
|
health: newDiskHealthTracker(driveMaxConcurrent),
|
||||||
diskMaxConcurrent: diskMaxConcurrent,
|
driveMaxConcurrent: driveMaxConcurrent,
|
||||||
diskStartChecking: diskStartChecking,
|
driveStartChecking: driveStartChecking,
|
||||||
}
|
}
|
||||||
|
|
||||||
if driveQuorum {
|
if driveQuorum {
|
||||||
@ -347,6 +350,8 @@ func (p *xlStorageDiskIDCheck) DiskInfo(ctx context.Context, metrics bool) (info
|
|||||||
info.Metrics.TotalWrites = p.totalWrites.Load()
|
info.Metrics.TotalWrites = p.totalWrites.Load()
|
||||||
info.Metrics.TotalDeletes = p.totalDeletes.Load()
|
info.Metrics.TotalDeletes = p.totalDeletes.Load()
|
||||||
}
|
}
|
||||||
|
info.Metrics.TotalTokens = uint32(p.driveMaxConcurrent)
|
||||||
|
info.Metrics.TotalWaiting = uint32(p.health.waiting.Load())
|
||||||
info.Metrics.TotalErrorsTimeout = p.totalErrsTimeout.Load()
|
info.Metrics.TotalErrorsTimeout = p.totalErrsTimeout.Load()
|
||||||
info.Metrics.TotalErrorsAvailability = p.totalErrsAvailability.Load()
|
info.Metrics.TotalErrorsAvailability = p.totalErrsAvailability.Load()
|
||||||
}()
|
}()
|
||||||
@ -842,7 +847,7 @@ func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...st
|
|||||||
}
|
}
|
||||||
|
|
||||||
const (
|
const (
|
||||||
diskHealthOK = iota
|
diskHealthOK int32 = iota
|
||||||
diskHealthFaulty
|
diskHealthFaulty
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -867,24 +872,24 @@ type diskHealthTracker struct {
|
|||||||
lastStarted int64
|
lastStarted int64
|
||||||
|
|
||||||
// Atomic status of disk.
|
// Atomic status of disk.
|
||||||
status int32
|
status atomic.Int32
|
||||||
|
|
||||||
// Atomic number of requests blocking for a token.
|
// Atomic number of requests waiting for a token.
|
||||||
blocked int32
|
waiting atomic.Int32
|
||||||
|
|
||||||
// Concurrency tokens.
|
// Concurrency tokens.
|
||||||
tokens chan struct{}
|
tokens chan struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// newDiskHealthTracker creates a new disk health tracker.
|
// newDiskHealthTracker creates a new disk health tracker.
|
||||||
func newDiskHealthTracker(diskMaxConcurrent int) *diskHealthTracker {
|
func newDiskHealthTracker(driveMaxConcurrent int) *diskHealthTracker {
|
||||||
d := diskHealthTracker{
|
d := diskHealthTracker{
|
||||||
lastSuccess: time.Now().UnixNano(),
|
lastSuccess: time.Now().UnixNano(),
|
||||||
lastStarted: time.Now().UnixNano(),
|
lastStarted: time.Now().UnixNano(),
|
||||||
status: diskHealthOK,
|
tokens: make(chan struct{}, driveMaxConcurrent),
|
||||||
tokens: make(chan struct{}, diskMaxConcurrent),
|
|
||||||
}
|
}
|
||||||
for i := 0; i < diskMaxConcurrent; i++ {
|
d.status.Store(diskHealthOK)
|
||||||
|
for i := 0; i < driveMaxConcurrent; i++ {
|
||||||
d.tokens <- struct{}{}
|
d.tokens <- struct{}{}
|
||||||
}
|
}
|
||||||
return &d
|
return &d
|
||||||
@ -896,7 +901,7 @@ func (d *diskHealthTracker) logSuccess() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (d *diskHealthTracker) isFaulty() bool {
|
func (d *diskHealthTracker) isFaulty() bool {
|
||||||
return atomic.LoadInt32(&d.status) == diskHealthFaulty
|
return d.status.Load() == diskHealthFaulty
|
||||||
}
|
}
|
||||||
|
|
||||||
type (
|
type (
|
||||||
@ -982,10 +987,9 @@ func (p *xlStorageDiskIDCheck) TrackDiskHealth(ctx context.Context, s storageMet
|
|||||||
// checking the disk status.
|
// checking the disk status.
|
||||||
// If nil is returned a token was picked up.
|
// If nil is returned a token was picked up.
|
||||||
func (p *xlStorageDiskIDCheck) waitForToken(ctx context.Context) (err error) {
|
func (p *xlStorageDiskIDCheck) waitForToken(ctx context.Context) (err error) {
|
||||||
atomic.AddInt32(&p.health.blocked, 1)
|
p.health.waiting.Add(1)
|
||||||
defer func() {
|
defer p.health.waiting.Add(-1)
|
||||||
atomic.AddInt32(&p.health.blocked, -1)
|
|
||||||
}()
|
|
||||||
// Avoid stampeding herd...
|
// Avoid stampeding herd...
|
||||||
ticker := time.NewTicker(5*time.Second + time.Duration(rand.Int63n(int64(5*time.Second))))
|
ticker := time.NewTicker(5*time.Second + time.Duration(rand.Int63n(int64(5*time.Second))))
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
@ -1008,11 +1012,11 @@ func (p *xlStorageDiskIDCheck) waitForToken(ctx context.Context) (err error) {
|
|||||||
// checkHealth should only be called when tokens have run out.
|
// checkHealth should only be called when tokens have run out.
|
||||||
// This will check if disk should be taken offline.
|
// This will check if disk should be taken offline.
|
||||||
func (p *xlStorageDiskIDCheck) checkHealth(ctx context.Context) (err error) {
|
func (p *xlStorageDiskIDCheck) checkHealth(ctx context.Context) (err error) {
|
||||||
if atomic.LoadInt32(&p.health.status) == diskHealthFaulty {
|
if p.health.status.Load() == diskHealthFaulty {
|
||||||
return errFaultyDisk
|
return errFaultyDisk
|
||||||
}
|
}
|
||||||
// Check if there are tokens.
|
// Check if there are tokens.
|
||||||
if p.diskMaxConcurrent-len(p.health.tokens) < p.diskStartChecking {
|
if p.driveMaxConcurrent-len(p.health.tokens) < p.driveStartChecking {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1030,8 +1034,9 @@ func (p *xlStorageDiskIDCheck) checkHealth(ctx context.Context) (err error) {
|
|||||||
// If also more than 15 seconds since last success, take disk offline.
|
// If also more than 15 seconds since last success, take disk offline.
|
||||||
t = time.Since(time.Unix(0, atomic.LoadInt64(&p.health.lastSuccess)))
|
t = time.Since(time.Unix(0, atomic.LoadInt64(&p.health.lastSuccess)))
|
||||||
if t > maxTimeSinceLastSuccess {
|
if t > maxTimeSinceLastSuccess {
|
||||||
if atomic.CompareAndSwapInt32(&p.health.status, diskHealthOK, diskHealthFaulty) {
|
if p.health.status.CompareAndSwap(diskHealthOK, diskHealthFaulty) {
|
||||||
logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline, time since last response %v", globalLocalNodeName, p.storage.String(), t.Round(time.Millisecond)))
|
logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline, time since last response %v", globalLocalNodeName, p.storage.String(), t.Round(time.Millisecond)))
|
||||||
|
p.health.waiting.Add(1)
|
||||||
go p.monitorDiskStatus(0, mustGetUUID())
|
go p.monitorDiskStatus(0, mustGetUUID())
|
||||||
}
|
}
|
||||||
return errFaultyDisk
|
return errFaultyDisk
|
||||||
@ -1077,12 +1082,9 @@ func (p *xlStorageDiskIDCheck) monitorDiskStatus(spent time.Duration, fn string)
|
|||||||
})
|
})
|
||||||
|
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t := time.Unix(0, atomic.LoadInt64(&p.health.lastSuccess))
|
logger.Info("node(%s): Read/Write/Delete successful, bringing drive %s online", globalLocalNodeName, p.storage.String())
|
||||||
if spent > 0 {
|
p.health.status.Store(diskHealthOK)
|
||||||
t = t.Add(spent)
|
p.health.waiting.Add(-1)
|
||||||
}
|
|
||||||
logger.Info("node(%s): Read/Write/Delete successful, bringing drive %s online. Drive was offline for %s.", globalLocalNodeName, p.storage.String(), time.Since(t))
|
|
||||||
atomic.StoreInt32(&p.health.status, diskHealthOK)
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1128,7 +1130,7 @@ func (p *xlStorageDiskIDCheck) monitorDiskWritable(ctx context.Context) {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if atomic.LoadInt32(&p.health.status) != diskHealthOK {
|
if p.health.status.Load() != diskHealthOK {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1138,8 +1140,9 @@ func (p *xlStorageDiskIDCheck) monitorDiskWritable(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
goOffline := func(err error, spent time.Duration) {
|
goOffline := func(err error, spent time.Duration) {
|
||||||
if atomic.CompareAndSwapInt32(&p.health.status, diskHealthOK, diskHealthFaulty) {
|
if p.health.status.CompareAndSwap(diskHealthOK, diskHealthFaulty) {
|
||||||
logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline: %v", globalLocalNodeName, p.storage.String(), err))
|
logger.LogAlwaysIf(ctx, fmt.Errorf("node(%s): taking drive %s offline: %v", globalLocalNodeName, p.storage.String(), err))
|
||||||
|
p.health.waiting.Add(1)
|
||||||
go p.monitorDiskStatus(spent, fn)
|
go p.monitorDiskStatus(spent, fn)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user