mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
perform healthchecks before initializing everything fully (#19953)
adds more informative logs that provide details on which erasure set is losing quorum etc.
This commit is contained in:
@@ -2425,6 +2425,7 @@ const (
|
||||
type HealthOptions struct {
|
||||
Maintenance bool
|
||||
DeploymentType string
|
||||
Startup bool
|
||||
}
|
||||
|
||||
// HealthResult returns the current state of the system, also
|
||||
@@ -2449,6 +2450,24 @@ type HealthResult struct {
|
||||
UsingDefaults bool
|
||||
}
|
||||
|
||||
func (hr HealthResult) String() string {
|
||||
var str strings.Builder
|
||||
for i, es := range hr.ESHealth {
|
||||
str.WriteString("(Pool: ")
|
||||
str.WriteString(strconv.Itoa(es.PoolID))
|
||||
str.WriteString(" Set: ")
|
||||
str.WriteString(strconv.Itoa(es.SetID))
|
||||
str.WriteString(" Healthy: ")
|
||||
str.WriteString(strconv.FormatBool(es.Healthy))
|
||||
if i == 0 {
|
||||
str.WriteString(")")
|
||||
} else {
|
||||
str.WriteString("), ")
|
||||
}
|
||||
}
|
||||
return str.String()
|
||||
}
|
||||
|
||||
// Health - returns current status of the object layer health,
|
||||
// provides if write access exists across sets, additionally
|
||||
// can be used to query scenarios if health may be lost
|
||||
@@ -2567,17 +2586,29 @@ func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) Hea
|
||||
|
||||
healthy := erasureSetUpCount[poolIdx][setIdx].online >= poolWriteQuorums[poolIdx]
|
||||
if !healthy {
|
||||
storageLogIf(logger.SetReqInfo(ctx, reqInfo),
|
||||
fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d",
|
||||
poolIdx, setIdx, poolWriteQuorums[poolIdx]), logger.FatalKind)
|
||||
if opts.Startup {
|
||||
storageLogIf(logger.SetReqInfo(ctx, reqInfo),
|
||||
fmt.Errorf("Write quorum was not established on pool: %d, set: %d, expected write quorum: %d",
|
||||
poolIdx, setIdx, poolWriteQuorums[poolIdx]), logger.FatalKind)
|
||||
} else {
|
||||
storageLogIf(logger.SetReqInfo(ctx, reqInfo),
|
||||
fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d",
|
||||
poolIdx, setIdx, poolWriteQuorums[poolIdx]), logger.FatalKind)
|
||||
}
|
||||
}
|
||||
result.Healthy = result.Healthy && healthy
|
||||
|
||||
healthyRead := erasureSetUpCount[poolIdx][setIdx].online >= poolReadQuorums[poolIdx]
|
||||
if !healthyRead {
|
||||
storageLogIf(logger.SetReqInfo(ctx, reqInfo),
|
||||
fmt.Errorf("Read quorum may be lost on pool: %d, set: %d, expected read quorum: %d",
|
||||
poolIdx, setIdx, poolReadQuorums[poolIdx]))
|
||||
if opts.Startup {
|
||||
storageLogIf(logger.SetReqInfo(ctx, reqInfo),
|
||||
fmt.Errorf("Read quorum was not established on pool: %d, set: %d, expected read quorum: %d",
|
||||
poolIdx, setIdx, poolReadQuorums[poolIdx]))
|
||||
} else {
|
||||
storageLogIf(logger.SetReqInfo(ctx, reqInfo),
|
||||
fmt.Errorf("Read quorum may be lost on pool: %d, set: %d, expected read quorum: %d",
|
||||
poolIdx, setIdx, poolReadQuorums[poolIdx]))
|
||||
}
|
||||
}
|
||||
result.HealthyRead = result.HealthyRead && healthyRead
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user