mirror of https://github.com/minio/minio.git
properly reload a fresh drive when found in a failed state during startup (#20145)
When a drive is in a failed state when a single node multiple drives deployment is started, a replacement of a fresh disk will not be properly healed unless the user restarts the node. Fix this by always adding the new fresh disk to globalLocalDrivesMap. Also remove globalLocalDrives for simplification, a map to store local node drives can still be used since the order of local drives of a node is not defined.
This commit is contained in:
parent
33c101544d
commit
b7f319b62a
|
@ -362,7 +362,7 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {
|
|||
|
||||
func getLocalDisksToHeal() (disksToHeal Endpoints) {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
for _, disk := range localDrives {
|
||||
_, err := disk.DiskInfo(context.Background(), DiskInfoOptions{})
|
||||
|
|
|
@ -3553,7 +3553,7 @@ func (p *ReplicationPool) persistToDrive(ctx context.Context, v MRFReplicateEntr
|
|||
}
|
||||
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
for _, localDrive := range localDrives {
|
||||
|
@ -3620,7 +3620,7 @@ func (p *ReplicationPool) loadMRF() (mrfRec MRFReplicateEntries, err error) {
|
|||
}
|
||||
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
for _, localDrive := range localDrives {
|
||||
|
|
|
@ -168,7 +168,7 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
|
|||
|
||||
if !globalIsDistErasure {
|
||||
globalLocalDrivesMu.Lock()
|
||||
globalLocalDrives = localDrives
|
||||
globalLocalDrivesMap = make(map[string]StorageAPI, len(localDrives))
|
||||
for _, drive := range localDrives {
|
||||
globalLocalDrivesMap[drive.Endpoint().String()] = drive
|
||||
}
|
||||
|
|
|
@ -262,13 +262,7 @@ func (s *erasureSets) connectDisks(log bool) {
|
|||
if globalIsDistErasure {
|
||||
globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk
|
||||
}
|
||||
for i, ldisk := range globalLocalDrives {
|
||||
_, k, l := ldisk.GetDiskLoc()
|
||||
if k == setIndex && l == diskIndex {
|
||||
globalLocalDrives[i] = disk
|
||||
break
|
||||
}
|
||||
}
|
||||
globalLocalDrivesMap[disk.Endpoint().String()] = disk
|
||||
globalLocalDrivesMu.Unlock()
|
||||
}
|
||||
s.erasureDisksMu.Unlock()
|
||||
|
@ -1135,13 +1129,7 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
|
|||
if globalIsDistErasure {
|
||||
globalLocalSetDrives[s.poolIndex][m][n] = disk
|
||||
}
|
||||
for i, ldisk := range globalLocalDrives {
|
||||
_, k, l := ldisk.GetDiskLoc()
|
||||
if k == m && l == n {
|
||||
globalLocalDrives[i] = disk
|
||||
break
|
||||
}
|
||||
}
|
||||
globalLocalDrivesMap[disk.Endpoint().String()] = disk
|
||||
globalLocalDrivesMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -414,10 +414,9 @@ var (
|
|||
globalServiceFreezeCnt int32
|
||||
globalServiceFreezeMu sync.Mutex // Updates.
|
||||
|
||||
// List of local drives to this node, this is only set during server startup,
|
||||
// and is only mutated by HealFormat. Hold globalLocalDrivesMu to access.
|
||||
globalLocalDrives []StorageAPI
|
||||
globalLocalDrivesMap = make(map[string]StorageAPI)
|
||||
// Map of local drives to this node, this is set during server startup,
|
||||
// disk reconnect and mutated by HealFormat. Hold globalLocalDrivesMu to access.
|
||||
globalLocalDrivesMap map[string]StorageAPI
|
||||
globalLocalDrivesMu sync.RWMutex
|
||||
|
||||
globalDriveMonitoring = env.Get("_MINIO_DRIVE_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn
|
||||
|
|
|
@ -262,7 +262,7 @@ func collectDriveMetrics(m madmin.RealtimeMetrics) {
|
|||
latestDriveStatsMu.Unlock()
|
||||
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
for _, d := range localDrives {
|
||||
|
|
|
@ -664,7 +664,7 @@ var errUnsupportedSignal = fmt.Errorf("unsupported signal")
|
|||
|
||||
func waitingDrivesNode() map[string]madmin.DiskMetrics {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
errs := make([]error, len(localDrives))
|
||||
|
|
|
@ -34,7 +34,7 @@ const (
|
|||
|
||||
func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) (res madmin.HealResultItem, err error) {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
// Initialize sync waitgroup.
|
||||
|
@ -158,7 +158,7 @@ func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) (
|
|||
|
||||
func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []BucketInfo, err error) {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
quorum := (len(localDrives) / 2)
|
||||
|
@ -204,15 +204,17 @@ func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []Bucket
|
|||
return buckets, nil
|
||||
}
|
||||
|
||||
func cloneDrives(drives []StorageAPI) []StorageAPI {
|
||||
newDrives := make([]StorageAPI, len(drives))
|
||||
copy(newDrives, drives)
|
||||
return newDrives
|
||||
func cloneDrives(drives map[string]StorageAPI) []StorageAPI {
|
||||
copyDrives := make([]StorageAPI, 0, len(drives))
|
||||
for _, drive := range drives {
|
||||
copyDrives = append(copyDrives, drive)
|
||||
}
|
||||
return copyDrives
|
||||
}
|
||||
|
||||
func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
||||
|
@ -261,7 +263,7 @@ func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions)
|
|||
|
||||
func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOptions) error {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
||||
|
@ -299,7 +301,7 @@ func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOpti
|
|||
|
||||
func makeBucketLocal(ctx context.Context, bucket string, opts MakeBucketOptions) error {
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrives)
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
||||
|
|
|
@ -1340,6 +1340,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||
return collectInternodeStats(httpTraceHdrs(f))
|
||||
}
|
||||
|
||||
globalLocalDrivesMap = make(map[string]StorageAPI)
|
||||
globalLocalSetDrives = make([][][]StorageAPI, len(endpointServerPools))
|
||||
for pool := range globalLocalSetDrives {
|
||||
globalLocalSetDrives[pool] = make([][]StorageAPI, endpointServerPools[pool].SetCount)
|
||||
|
@ -1413,7 +1414,6 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
|||
globalLocalDrivesMu.Lock()
|
||||
defer globalLocalDrivesMu.Unlock()
|
||||
|
||||
globalLocalDrives = append(globalLocalDrives, storage)
|
||||
globalLocalDrivesMap[endpoint.String()] = storage
|
||||
globalLocalSetDrives[endpoint.PoolIdx][endpoint.SetIdx][endpoint.DiskIdx] = storage
|
||||
return true
|
||||
|
|
Loading…
Reference in New Issue