mirror of https://github.com/minio/minio.git
properly reload a fresh drive when found in a failed state during startup (#20145)
When a drive is in a failed state when a single node multiple drives deployment is started, a replacement of a fresh disk will not be properly healed unless the user restarts the node. Fix this by always adding the new fresh disk to globalLocalDrivesMap. Also remove globalLocalDrives for simplification, a map to store local node drives can still be used since the order of local drives of a node is not defined.
This commit is contained in:
parent
33c101544d
commit
b7f319b62a
|
@ -362,7 +362,7 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {
|
||||||
|
|
||||||
func getLocalDisksToHeal() (disksToHeal Endpoints) {
|
func getLocalDisksToHeal() (disksToHeal Endpoints) {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
for _, disk := range localDrives {
|
for _, disk := range localDrives {
|
||||||
_, err := disk.DiskInfo(context.Background(), DiskInfoOptions{})
|
_, err := disk.DiskInfo(context.Background(), DiskInfoOptions{})
|
||||||
|
|
|
@ -3553,7 +3553,7 @@ func (p *ReplicationPool) persistToDrive(ctx context.Context, v MRFReplicateEntr
|
||||||
}
|
}
|
||||||
|
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
for _, localDrive := range localDrives {
|
for _, localDrive := range localDrives {
|
||||||
|
@ -3620,7 +3620,7 @@ func (p *ReplicationPool) loadMRF() (mrfRec MRFReplicateEntries, err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
for _, localDrive := range localDrives {
|
for _, localDrive := range localDrives {
|
||||||
|
|
|
@ -168,7 +168,7 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
|
||||||
|
|
||||||
if !globalIsDistErasure {
|
if !globalIsDistErasure {
|
||||||
globalLocalDrivesMu.Lock()
|
globalLocalDrivesMu.Lock()
|
||||||
globalLocalDrives = localDrives
|
globalLocalDrivesMap = make(map[string]StorageAPI, len(localDrives))
|
||||||
for _, drive := range localDrives {
|
for _, drive := range localDrives {
|
||||||
globalLocalDrivesMap[drive.Endpoint().String()] = drive
|
globalLocalDrivesMap[drive.Endpoint().String()] = drive
|
||||||
}
|
}
|
||||||
|
|
|
@ -262,13 +262,7 @@ func (s *erasureSets) connectDisks(log bool) {
|
||||||
if globalIsDistErasure {
|
if globalIsDistErasure {
|
||||||
globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk
|
globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk
|
||||||
}
|
}
|
||||||
for i, ldisk := range globalLocalDrives {
|
globalLocalDrivesMap[disk.Endpoint().String()] = disk
|
||||||
_, k, l := ldisk.GetDiskLoc()
|
|
||||||
if k == setIndex && l == diskIndex {
|
|
||||||
globalLocalDrives[i] = disk
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
globalLocalDrivesMu.Unlock()
|
globalLocalDrivesMu.Unlock()
|
||||||
}
|
}
|
||||||
s.erasureDisksMu.Unlock()
|
s.erasureDisksMu.Unlock()
|
||||||
|
@ -1135,13 +1129,7 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
|
||||||
if globalIsDistErasure {
|
if globalIsDistErasure {
|
||||||
globalLocalSetDrives[s.poolIndex][m][n] = disk
|
globalLocalSetDrives[s.poolIndex][m][n] = disk
|
||||||
}
|
}
|
||||||
for i, ldisk := range globalLocalDrives {
|
globalLocalDrivesMap[disk.Endpoint().String()] = disk
|
||||||
_, k, l := ldisk.GetDiskLoc()
|
|
||||||
if k == m && l == n {
|
|
||||||
globalLocalDrives[i] = disk
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
globalLocalDrivesMu.Unlock()
|
globalLocalDrivesMu.Unlock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -414,10 +414,9 @@ var (
|
||||||
globalServiceFreezeCnt int32
|
globalServiceFreezeCnt int32
|
||||||
globalServiceFreezeMu sync.Mutex // Updates.
|
globalServiceFreezeMu sync.Mutex // Updates.
|
||||||
|
|
||||||
// List of local drives to this node, this is only set during server startup,
|
// Map of local drives to this node, this is set during server startup,
|
||||||
// and is only mutated by HealFormat. Hold globalLocalDrivesMu to access.
|
// disk reconnect and mutated by HealFormat. Hold globalLocalDrivesMu to access.
|
||||||
globalLocalDrives []StorageAPI
|
globalLocalDrivesMap map[string]StorageAPI
|
||||||
globalLocalDrivesMap = make(map[string]StorageAPI)
|
|
||||||
globalLocalDrivesMu sync.RWMutex
|
globalLocalDrivesMu sync.RWMutex
|
||||||
|
|
||||||
globalDriveMonitoring = env.Get("_MINIO_DRIVE_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn
|
globalDriveMonitoring = env.Get("_MINIO_DRIVE_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn
|
||||||
|
|
|
@ -262,7 +262,7 @@ func collectDriveMetrics(m madmin.RealtimeMetrics) {
|
||||||
latestDriveStatsMu.Unlock()
|
latestDriveStatsMu.Unlock()
|
||||||
|
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
for _, d := range localDrives {
|
for _, d := range localDrives {
|
||||||
|
|
|
@ -664,7 +664,7 @@ var errUnsupportedSignal = fmt.Errorf("unsupported signal")
|
||||||
|
|
||||||
func waitingDrivesNode() map[string]madmin.DiskMetrics {
|
func waitingDrivesNode() map[string]madmin.DiskMetrics {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
errs := make([]error, len(localDrives))
|
errs := make([]error, len(localDrives))
|
||||||
|
|
|
@ -34,7 +34,7 @@ const (
|
||||||
|
|
||||||
func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) (res madmin.HealResultItem, err error) {
|
func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) (res madmin.HealResultItem, err error) {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
// Initialize sync waitgroup.
|
// Initialize sync waitgroup.
|
||||||
|
@ -158,7 +158,7 @@ func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) (
|
||||||
|
|
||||||
func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []BucketInfo, err error) {
|
func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []BucketInfo, err error) {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
quorum := (len(localDrives) / 2)
|
quorum := (len(localDrives) / 2)
|
||||||
|
@ -204,15 +204,17 @@ func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []Bucket
|
||||||
return buckets, nil
|
return buckets, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func cloneDrives(drives []StorageAPI) []StorageAPI {
|
func cloneDrives(drives map[string]StorageAPI) []StorageAPI {
|
||||||
newDrives := make([]StorageAPI, len(drives))
|
copyDrives := make([]StorageAPI, 0, len(drives))
|
||||||
copy(newDrives, drives)
|
for _, drive := range drives {
|
||||||
return newDrives
|
copyDrives = append(copyDrives, drive)
|
||||||
|
}
|
||||||
|
return copyDrives
|
||||||
}
|
}
|
||||||
|
|
||||||
func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) {
|
func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
||||||
|
@ -261,7 +263,7 @@ func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions)
|
||||||
|
|
||||||
func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOptions) error {
|
func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOptions) error {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
||||||
|
@ -299,7 +301,7 @@ func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOpti
|
||||||
|
|
||||||
func makeBucketLocal(ctx context.Context, bucket string, opts MakeBucketOptions) error {
|
func makeBucketLocal(ctx context.Context, bucket string, opts MakeBucketOptions) error {
|
||||||
globalLocalDrivesMu.RLock()
|
globalLocalDrivesMu.RLock()
|
||||||
localDrives := cloneDrives(globalLocalDrives)
|
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||||
globalLocalDrivesMu.RUnlock()
|
globalLocalDrivesMu.RUnlock()
|
||||||
|
|
||||||
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32)
|
||||||
|
|
|
@ -1340,6 +1340,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
||||||
return collectInternodeStats(httpTraceHdrs(f))
|
return collectInternodeStats(httpTraceHdrs(f))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
globalLocalDrivesMap = make(map[string]StorageAPI)
|
||||||
globalLocalSetDrives = make([][][]StorageAPI, len(endpointServerPools))
|
globalLocalSetDrives = make([][][]StorageAPI, len(endpointServerPools))
|
||||||
for pool := range globalLocalSetDrives {
|
for pool := range globalLocalSetDrives {
|
||||||
globalLocalSetDrives[pool] = make([][]StorageAPI, endpointServerPools[pool].SetCount)
|
globalLocalSetDrives[pool] = make([][]StorageAPI, endpointServerPools[pool].SetCount)
|
||||||
|
@ -1413,7 +1414,6 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
|
||||||
globalLocalDrivesMu.Lock()
|
globalLocalDrivesMu.Lock()
|
||||||
defer globalLocalDrivesMu.Unlock()
|
defer globalLocalDrivesMu.Unlock()
|
||||||
|
|
||||||
globalLocalDrives = append(globalLocalDrives, storage)
|
|
||||||
globalLocalDrivesMap[endpoint.String()] = storage
|
globalLocalDrivesMap[endpoint.String()] = storage
|
||||||
globalLocalSetDrives[endpoint.PoolIdx][endpoint.SetIdx][endpoint.DiskIdx] = storage
|
globalLocalSetDrives[endpoint.PoolIdx][endpoint.SetIdx][endpoint.DiskIdx] = storage
|
||||||
return true
|
return true
|
||||||
|
|
Loading…
Reference in New Issue