From eac4e4b2797cb82062abe27c89a084a8937a1459 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 12 Feb 2024 13:00:20 -0800 Subject: [PATCH] honor replaced disk properly by updating globalLocalDrives (#19038) globalLocalDrives seem to be not updated during the HealFormat() leads to a requirement where the server needs to be restarted for the healing to continue. --- cmd/erasure-sets.go | 13 ++++++++++-- cmd/erasure.go | 1 - cmd/globals.go | 2 +- cmd/metrics-v2.go | 16 --------------- cmd/peer-rest-server.go | 11 +++++++---- cmd/storage-datatypes.go | 1 - cmd/storage-datatypes_gen.go | 35 +++++---------------------------- cmd/storage-rest-common.go | 4 +--- docs/metrics/prometheus/list.md | 1 - 9 files changed, 25 insertions(+), 59 deletions(-) diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 0409531ec..b58bade0c 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -1155,9 +1155,18 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H s.erasureDisks[m][n] = disk - if disk.IsLocal() && globalIsDistErasure { + if disk.IsLocal() { globalLocalDrivesMu.Lock() - globalLocalSetDrives[s.poolIndex][m][n] = disk + if globalIsDistErasure { + globalLocalSetDrives[s.poolIndex][m][n] = disk + } + for i, ldisk := range globalLocalDrives { + _, k, l := ldisk.GetDiskLoc() + if k == m && l == n { + globalLocalDrives[i] = disk + break + } + } globalLocalDrivesMu.Unlock() } } diff --git a/cmd/erasure.go b/cmd/erasure.go index 0a2bd1adc..7100fae59 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -203,7 +203,6 @@ func getDisksInfo(disks []StorageAPI, endpoints []Endpoint, metrics bool) (disks APICalls: make(map[string]uint64, len(info.Metrics.APICalls)), TotalErrorsAvailability: info.Metrics.TotalErrorsAvailability, TotalErrorsTimeout: info.Metrics.TotalErrorsTimeout, - TotalTokens: info.Metrics.TotalTokens, TotalWaiting: info.Metrics.TotalWaiting, } for k, v := range info.Metrics.LastMinute { diff --git a/cmd/globals.go b/cmd/globals.go index 659f37e4e..57ff8ee33 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -419,7 +419,7 @@ var ( globalServiceFreezeMu sync.Mutex // Updates. // List of local drives to this node, this is only set during server startup, - // and should never be mutated. Hold globalLocalDrivesMu to access. + // and is only mutated by HealFormat. Hold globalLocalDrivesMu to access. globalLocalDrives []StorageAPI globalLocalDrivesMu sync.RWMutex diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 3a1f2f06f..5d7167389 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -556,16 +556,6 @@ func getNodeDriveWaitingIOMD() MetricDescription { } } -func getNodeDriveTokensIOMD() MetricDescription { - return MetricDescription{ - Namespace: nodeMetricNamespace, - Subsystem: driveSubsystem, - Name: "io_tokens", - Help: "Total number concurrent I/O operations configured on drive", - Type: counterMetric, - } -} - func getNodeDriveFreeBytesMD() MetricDescription { return MetricDescription{ Namespace: nodeMetricNamespace, @@ -3485,12 +3475,6 @@ func getLocalStorageMetrics(opts MetricsGroupOpts) *MetricsGroup { VariableLabels: map[string]string{"drive": disk.DrivePath}, }) - metrics = append(metrics, Metric{ - Description: getNodeDriveTokensIOMD(), - Value: float64(disk.Metrics.TotalTokens), - VariableLabels: map[string]string{"drive": disk.DrivePath}, - }) - for apiName, latency := range disk.Metrics.LastMinute { metrics = append(metrics, Metric{ Description: getNodeDriveAPILatencyMD(), diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 5f3cb5495..f8c69969a 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -833,16 +833,19 @@ func (s *peerRESTServer) CommitBinaryHandler(w http.ResponseWriter, r *http.Requ var errUnsupportedSignal = fmt.Errorf("unsupported signal") func waitingDrivesNode() map[string]madmin.DiskMetrics { - errs := make([]error, len(globalLocalDrives)) - infos := make([]DiskInfo, len(globalLocalDrives)) - for i, drive := range globalLocalDrives { + globalLocalDrivesMu.RLock() + localDrives := globalLocalDrives + globalLocalDrivesMu.RUnlock() + + errs := make([]error, len(localDrives)) + infos := make([]DiskInfo, len(localDrives)) + for i, drive := range localDrives { infos[i], errs[i] = drive.DiskInfo(GlobalContext, DiskInfoOptions{}) } infoMaps := make(map[string]madmin.DiskMetrics) for i := range infos { if infos[i].Metrics.TotalWaiting >= 1 && errors.Is(errs[i], errFaultyDisk) { infoMaps[infos[i].Endpoint] = madmin.DiskMetrics{ - TotalTokens: infos[i].Metrics.TotalTokens, TotalWaiting: infos[i].Metrics.TotalWaiting, } } diff --git a/cmd/storage-datatypes.go b/cmd/storage-datatypes.go index 30adad605..a5065e5bb 100644 --- a/cmd/storage-datatypes.go +++ b/cmd/storage-datatypes.go @@ -80,7 +80,6 @@ type DiskInfo struct { type DiskMetrics struct { LastMinute map[string]AccElem `json:"apiLatencies,omitempty"` APICalls map[string]uint64 `json:"apiCalls,omitempty"` - TotalTokens uint32 `json:"totalTokens,omitempty"` TotalWaiting uint32 `json:"totalWaiting,omitempty"` TotalErrorsAvailability uint64 `json:"totalErrsAvailability"` TotalErrorsTimeout uint64 `json:"totalErrsTimeout"` diff --git a/cmd/storage-datatypes_gen.go b/cmd/storage-datatypes_gen.go index 0eb12c43c..d186929b1 100644 --- a/cmd/storage-datatypes_gen.go +++ b/cmd/storage-datatypes_gen.go @@ -1480,12 +1480,6 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) { } z.APICalls[za0003] = za0004 } - case "TotalTokens": - z.TotalTokens, err = dc.ReadUint32() - if err != nil { - err = msgp.WrapError(err, "TotalTokens") - return - } case "TotalWaiting": z.TotalWaiting, err = dc.ReadUint32() if err != nil { @@ -1529,9 +1523,9 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) { // EncodeMsg implements msgp.Encodable func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) { - // map header, size 8 + // map header, size 7 // write "LastMinute" - err = en.Append(0x88, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) + err = en.Append(0x87, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) if err != nil { return } @@ -1574,16 +1568,6 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) { return } } - // write "TotalTokens" - err = en.Append(0xab, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73) - if err != nil { - return - } - err = en.WriteUint32(z.TotalTokens) - if err != nil { - err = msgp.WrapError(err, "TotalTokens") - return - } // write "TotalWaiting" err = en.Append(0xac, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x57, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67) if err != nil { @@ -1640,9 +1624,9 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) { // MarshalMsg implements msgp.Marshaler func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, z.Msgsize()) - // map header, size 8 + // map header, size 7 // string "LastMinute" - o = append(o, 0x88, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) + o = append(o, 0x87, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65) o = msgp.AppendMapHeader(o, uint32(len(z.LastMinute))) for za0001, za0002 := range z.LastMinute { o = msgp.AppendString(o, za0001) @@ -1659,9 +1643,6 @@ func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.AppendString(o, za0003) o = msgp.AppendUint64(o, za0004) } - // string "TotalTokens" - o = append(o, 0xab, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73) - o = msgp.AppendUint32(o, z.TotalTokens) // string "TotalWaiting" o = append(o, 0xac, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x57, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67) o = msgp.AppendUint32(o, z.TotalWaiting) @@ -1758,12 +1739,6 @@ func (z *DiskMetrics) UnmarshalMsg(bts []byte) (o []byte, err error) { } z.APICalls[za0003] = za0004 } - case "TotalTokens": - z.TotalTokens, bts, err = msgp.ReadUint32Bytes(bts) - if err != nil { - err = msgp.WrapError(err, "TotalTokens") - return - } case "TotalWaiting": z.TotalWaiting, bts, err = msgp.ReadUint32Bytes(bts) if err != nil { @@ -1822,7 +1797,7 @@ func (z *DiskMetrics) Msgsize() (s int) { s += msgp.StringPrefixSize + len(za0003) + msgp.Uint64Size } } - s += 12 + msgp.Uint32Size + 13 + msgp.Uint32Size + 24 + msgp.Uint64Size + 19 + msgp.Uint64Size + 12 + msgp.Uint64Size + 13 + msgp.Uint64Size + s += 13 + msgp.Uint32Size + 24 + msgp.Uint64Size + 19 + msgp.Uint64Size + 12 + msgp.Uint64Size + 13 + msgp.Uint64Size return } diff --git a/cmd/storage-rest-common.go b/cmd/storage-rest-common.go index 509ff7c49..32fe56c1a 100644 --- a/cmd/storage-rest-common.go +++ b/cmd/storage-rest-common.go @@ -20,9 +20,7 @@ package cmd //go:generate msgp -file $GOFILE -unexported const ( - // Added orig-volume support for CreateFile, WriteMetadata, ReadVersion, ListDir - // this is needed for performance optimization on bucket checks. - storageRESTVersion = "v56" + storageRESTVersion = "v57" // Remove TotalTokens from DiskMetrics storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTPrefix = minioReservedBucketPath + "/storage" ) diff --git a/docs/metrics/prometheus/list.md b/docs/metrics/prometheus/list.md index 2ceed69b7..985705445 100644 --- a/docs/metrics/prometheus/list.md +++ b/docs/metrics/prometheus/list.md @@ -194,7 +194,6 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc | `minio_node_drive_errors_timeout` | Total number of drive timeout errors since server start | | `minio_node_drive_errors_availability` | Total number of drive I/O errors, permission denied and timeouts since server start | | `minio_node_drive_io_waiting` | Total number I/O operations waiting on drive | -| `minio_node_drive_io_tokens` | Total number concurrent I/O operations configured on drive | ## Identity and Access Management (IAM) Metrics