remove local disk metrics from cluster metrics (#18886)

local disk metrics were polluting cluster metrics
Please remove them instead of adding relevant ones.

- batch job metrics were incorrectly kept at bucket
  metrics endpoint, move it to cluster metrics.

- add tier metrics to cluster peer metrics from the node.

- fix missing set level cluster health metrics
This commit is contained in:
Harshavardhana
2024-01-28 12:53:59 -08:00
committed by GitHub
parent 1d3bd02089
commit 944f3c1477
6 changed files with 1490 additions and 2534 deletions

View File

@@ -2286,6 +2286,7 @@ type HealthResult struct {
ESHealth []struct {
Maintenance bool
PoolID, SetID int
Healthy bool
HealthyDrives int
HealingDrives int
ReadQuorum int
@@ -2409,23 +2410,25 @@ func (z *erasureServerPools) Health(ctx context.Context, opts HealthOptions) Hea
result.ESHealth = append(result.ESHealth, struct {
Maintenance bool
PoolID, SetID int
Healthy bool
HealthyDrives, HealingDrives int
ReadQuorum, WriteQuorum int
}{
Maintenance: opts.Maintenance,
SetID: setIdx,
PoolID: poolIdx,
Healthy: erasureSetUpCount[poolIdx][setIdx].online >= poolWriteQuorums[poolIdx],
HealthyDrives: erasureSetUpCount[poolIdx][setIdx].online,
HealingDrives: erasureSetUpCount[poolIdx][setIdx].healing,
ReadQuorum: poolReadQuorums[poolIdx],
WriteQuorum: poolWriteQuorums[poolIdx],
})
if erasureSetUpCount[poolIdx][setIdx].online < poolWriteQuorums[poolIdx] {
result.Healthy = erasureSetUpCount[poolIdx][setIdx].online >= poolWriteQuorums[poolIdx]
if !result.Healthy {
logger.LogIf(logger.SetReqInfo(ctx, reqInfo),
fmt.Errorf("Write quorum may be lost on pool: %d, set: %d, expected write quorum: %d",
poolIdx, setIdx, poolWriteQuorums[poolIdx]))
result.Healthy = false
}
}
}