mirror of
https://github.com/minio/minio.git
synced 2025-01-23 12:43:16 -05:00
honor replaced disk properly by updating globalLocalDrives (#19038)
globalLocalDrives seem to be not updated during the HealFormat() leads to a requirement where the server needs to be restarted for the healing to continue.
This commit is contained in:
parent
6d381f7c0a
commit
eac4e4b279
@ -1155,9 +1155,18 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
|
||||
|
||||
s.erasureDisks[m][n] = disk
|
||||
|
||||
if disk.IsLocal() && globalIsDistErasure {
|
||||
if disk.IsLocal() {
|
||||
globalLocalDrivesMu.Lock()
|
||||
globalLocalSetDrives[s.poolIndex][m][n] = disk
|
||||
if globalIsDistErasure {
|
||||
globalLocalSetDrives[s.poolIndex][m][n] = disk
|
||||
}
|
||||
for i, ldisk := range globalLocalDrives {
|
||||
_, k, l := ldisk.GetDiskLoc()
|
||||
if k == m && l == n {
|
||||
globalLocalDrives[i] = disk
|
||||
break
|
||||
}
|
||||
}
|
||||
globalLocalDrivesMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
@ -203,7 +203,6 @@ func getDisksInfo(disks []StorageAPI, endpoints []Endpoint, metrics bool) (disks
|
||||
APICalls: make(map[string]uint64, len(info.Metrics.APICalls)),
|
||||
TotalErrorsAvailability: info.Metrics.TotalErrorsAvailability,
|
||||
TotalErrorsTimeout: info.Metrics.TotalErrorsTimeout,
|
||||
TotalTokens: info.Metrics.TotalTokens,
|
||||
TotalWaiting: info.Metrics.TotalWaiting,
|
||||
}
|
||||
for k, v := range info.Metrics.LastMinute {
|
||||
|
@ -419,7 +419,7 @@ var (
|
||||
globalServiceFreezeMu sync.Mutex // Updates.
|
||||
|
||||
// List of local drives to this node, this is only set during server startup,
|
||||
// and should never be mutated. Hold globalLocalDrivesMu to access.
|
||||
// and is only mutated by HealFormat. Hold globalLocalDrivesMu to access.
|
||||
globalLocalDrives []StorageAPI
|
||||
globalLocalDrivesMu sync.RWMutex
|
||||
|
||||
|
@ -556,16 +556,6 @@ func getNodeDriveWaitingIOMD() MetricDescription {
|
||||
}
|
||||
}
|
||||
|
||||
func getNodeDriveTokensIOMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
Subsystem: driveSubsystem,
|
||||
Name: "io_tokens",
|
||||
Help: "Total number concurrent I/O operations configured on drive",
|
||||
Type: counterMetric,
|
||||
}
|
||||
}
|
||||
|
||||
func getNodeDriveFreeBytesMD() MetricDescription {
|
||||
return MetricDescription{
|
||||
Namespace: nodeMetricNamespace,
|
||||
@ -3485,12 +3475,6 @@ func getLocalStorageMetrics(opts MetricsGroupOpts) *MetricsGroup {
|
||||
VariableLabels: map[string]string{"drive": disk.DrivePath},
|
||||
})
|
||||
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getNodeDriveTokensIOMD(),
|
||||
Value: float64(disk.Metrics.TotalTokens),
|
||||
VariableLabels: map[string]string{"drive": disk.DrivePath},
|
||||
})
|
||||
|
||||
for apiName, latency := range disk.Metrics.LastMinute {
|
||||
metrics = append(metrics, Metric{
|
||||
Description: getNodeDriveAPILatencyMD(),
|
||||
|
@ -833,16 +833,19 @@ func (s *peerRESTServer) CommitBinaryHandler(w http.ResponseWriter, r *http.Requ
|
||||
var errUnsupportedSignal = fmt.Errorf("unsupported signal")
|
||||
|
||||
func waitingDrivesNode() map[string]madmin.DiskMetrics {
|
||||
errs := make([]error, len(globalLocalDrives))
|
||||
infos := make([]DiskInfo, len(globalLocalDrives))
|
||||
for i, drive := range globalLocalDrives {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := globalLocalDrives
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
errs := make([]error, len(localDrives))
|
||||
infos := make([]DiskInfo, len(localDrives))
|
||||
for i, drive := range localDrives {
|
||||
infos[i], errs[i] = drive.DiskInfo(GlobalContext, DiskInfoOptions{})
|
||||
}
|
||||
infoMaps := make(map[string]madmin.DiskMetrics)
|
||||
for i := range infos {
|
||||
if infos[i].Metrics.TotalWaiting >= 1 && errors.Is(errs[i], errFaultyDisk) {
|
||||
infoMaps[infos[i].Endpoint] = madmin.DiskMetrics{
|
||||
TotalTokens: infos[i].Metrics.TotalTokens,
|
||||
TotalWaiting: infos[i].Metrics.TotalWaiting,
|
||||
}
|
||||
}
|
||||
|
@ -80,7 +80,6 @@ type DiskInfo struct {
|
||||
type DiskMetrics struct {
|
||||
LastMinute map[string]AccElem `json:"apiLatencies,omitempty"`
|
||||
APICalls map[string]uint64 `json:"apiCalls,omitempty"`
|
||||
TotalTokens uint32 `json:"totalTokens,omitempty"`
|
||||
TotalWaiting uint32 `json:"totalWaiting,omitempty"`
|
||||
TotalErrorsAvailability uint64 `json:"totalErrsAvailability"`
|
||||
TotalErrorsTimeout uint64 `json:"totalErrsTimeout"`
|
||||
|
@ -1480,12 +1480,6 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
}
|
||||
z.APICalls[za0003] = za0004
|
||||
}
|
||||
case "TotalTokens":
|
||||
z.TotalTokens, err = dc.ReadUint32()
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "TotalTokens")
|
||||
return
|
||||
}
|
||||
case "TotalWaiting":
|
||||
z.TotalWaiting, err = dc.ReadUint32()
|
||||
if err != nil {
|
||||
@ -1529,9 +1523,9 @@ func (z *DiskMetrics) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 8
|
||||
// map header, size 7
|
||||
// write "LastMinute"
|
||||
err = en.Append(0x88, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
||||
err = en.Append(0x87, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@ -1574,16 +1568,6 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
return
|
||||
}
|
||||
}
|
||||
// write "TotalTokens"
|
||||
err = en.Append(0xab, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = en.WriteUint32(z.TotalTokens)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "TotalTokens")
|
||||
return
|
||||
}
|
||||
// write "TotalWaiting"
|
||||
err = en.Append(0xac, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x57, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67)
|
||||
if err != nil {
|
||||
@ -1640,9 +1624,9 @@ func (z *DiskMetrics) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 8
|
||||
// map header, size 7
|
||||
// string "LastMinute"
|
||||
o = append(o, 0x88, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
||||
o = append(o, 0x87, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x4d, 0x69, 0x6e, 0x75, 0x74, 0x65)
|
||||
o = msgp.AppendMapHeader(o, uint32(len(z.LastMinute)))
|
||||
for za0001, za0002 := range z.LastMinute {
|
||||
o = msgp.AppendString(o, za0001)
|
||||
@ -1659,9 +1643,6 @@ func (z *DiskMetrics) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.AppendString(o, za0003)
|
||||
o = msgp.AppendUint64(o, za0004)
|
||||
}
|
||||
// string "TotalTokens"
|
||||
o = append(o, 0xab, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x54, 0x6f, 0x6b, 0x65, 0x6e, 0x73)
|
||||
o = msgp.AppendUint32(o, z.TotalTokens)
|
||||
// string "TotalWaiting"
|
||||
o = append(o, 0xac, 0x54, 0x6f, 0x74, 0x61, 0x6c, 0x57, 0x61, 0x69, 0x74, 0x69, 0x6e, 0x67)
|
||||
o = msgp.AppendUint32(o, z.TotalWaiting)
|
||||
@ -1758,12 +1739,6 @@ func (z *DiskMetrics) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
}
|
||||
z.APICalls[za0003] = za0004
|
||||
}
|
||||
case "TotalTokens":
|
||||
z.TotalTokens, bts, err = msgp.ReadUint32Bytes(bts)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "TotalTokens")
|
||||
return
|
||||
}
|
||||
case "TotalWaiting":
|
||||
z.TotalWaiting, bts, err = msgp.ReadUint32Bytes(bts)
|
||||
if err != nil {
|
||||
@ -1822,7 +1797,7 @@ func (z *DiskMetrics) Msgsize() (s int) {
|
||||
s += msgp.StringPrefixSize + len(za0003) + msgp.Uint64Size
|
||||
}
|
||||
}
|
||||
s += 12 + msgp.Uint32Size + 13 + msgp.Uint32Size + 24 + msgp.Uint64Size + 19 + msgp.Uint64Size + 12 + msgp.Uint64Size + 13 + msgp.Uint64Size
|
||||
s += 13 + msgp.Uint32Size + 24 + msgp.Uint64Size + 19 + msgp.Uint64Size + 12 + msgp.Uint64Size + 13 + msgp.Uint64Size
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -20,9 +20,7 @@ package cmd
|
||||
//go:generate msgp -file $GOFILE -unexported
|
||||
|
||||
const (
|
||||
// Added orig-volume support for CreateFile, WriteMetadata, ReadVersion, ListDir
|
||||
// this is needed for performance optimization on bucket checks.
|
||||
storageRESTVersion = "v56"
|
||||
storageRESTVersion = "v57" // Remove TotalTokens from DiskMetrics
|
||||
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
|
||||
storageRESTPrefix = minioReservedBucketPath + "/storage"
|
||||
)
|
||||
|
@ -194,7 +194,6 @@ For deployments with [bucket](https://min.io/docs/minio/linux/administration/buc
|
||||
| `minio_node_drive_errors_timeout` | Total number of drive timeout errors since server start |
|
||||
| `minio_node_drive_errors_availability` | Total number of drive I/O errors, permission denied and timeouts since server start |
|
||||
| `minio_node_drive_io_waiting` | Total number I/O operations waiting on drive |
|
||||
| `minio_node_drive_io_tokens` | Total number concurrent I/O operations configured on drive |
|
||||
|
||||
## Identity and Access Management (IAM) Metrics
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user