mirror of
				https://github.com/minio/minio.git
				synced 2025-10-29 15:55:00 -04:00 
			
		
		
		
	properly reload a fresh drive when found in a failed state during startup (#20145)
When a drive is in a failed state when a single node multiple drives deployment is started, a replacement of a fresh disk will not be properly healed unless the user restarts the node. Fix this by always adding the new fresh disk to globalLocalDrivesMap. Also remove globalLocalDrives for simplification, a map to store local node drives can still be used since the order of local drives of a node is not defined.
This commit is contained in:
		
							parent
							
								
									33c101544d
								
							
						
					
					
						commit
						b7f319b62a
					
				| @ -362,7 +362,7 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) { | ||||
| 
 | ||||
| func getLocalDisksToHeal() (disksToHeal Endpoints) { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 	for _, disk := range localDrives { | ||||
| 		_, err := disk.DiskInfo(context.Background(), DiskInfoOptions{}) | ||||
|  | ||||
| @ -3553,7 +3553,7 @@ func (p *ReplicationPool) persistToDrive(ctx context.Context, v MRFReplicateEntr | ||||
| 	} | ||||
| 
 | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	for _, localDrive := range localDrives { | ||||
| @ -3620,7 +3620,7 @@ func (p *ReplicationPool) loadMRF() (mrfRec MRFReplicateEntries, err error) { | ||||
| 	} | ||||
| 
 | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	for _, localDrive := range localDrives { | ||||
|  | ||||
| @ -168,7 +168,7 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ | ||||
| 
 | ||||
| 	if !globalIsDistErasure { | ||||
| 		globalLocalDrivesMu.Lock() | ||||
| 		globalLocalDrives = localDrives | ||||
| 		globalLocalDrivesMap = make(map[string]StorageAPI, len(localDrives)) | ||||
| 		for _, drive := range localDrives { | ||||
| 			globalLocalDrivesMap[drive.Endpoint().String()] = drive | ||||
| 		} | ||||
|  | ||||
| @ -262,13 +262,7 @@ func (s *erasureSets) connectDisks(log bool) { | ||||
| 				if globalIsDistErasure { | ||||
| 					globalLocalSetDrives[s.poolIndex][setIndex][diskIndex] = disk | ||||
| 				} | ||||
| 				for i, ldisk := range globalLocalDrives { | ||||
| 					_, k, l := ldisk.GetDiskLoc() | ||||
| 					if k == setIndex && l == diskIndex { | ||||
| 						globalLocalDrives[i] = disk | ||||
| 						break | ||||
| 					} | ||||
| 				} | ||||
| 				globalLocalDrivesMap[disk.Endpoint().String()] = disk | ||||
| 				globalLocalDrivesMu.Unlock() | ||||
| 			} | ||||
| 			s.erasureDisksMu.Unlock() | ||||
| @ -1135,13 +1129,7 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H | ||||
| 					if globalIsDistErasure { | ||||
| 						globalLocalSetDrives[s.poolIndex][m][n] = disk | ||||
| 					} | ||||
| 					for i, ldisk := range globalLocalDrives { | ||||
| 						_, k, l := ldisk.GetDiskLoc() | ||||
| 						if k == m && l == n { | ||||
| 							globalLocalDrives[i] = disk | ||||
| 							break | ||||
| 						} | ||||
| 					} | ||||
| 					globalLocalDrivesMap[disk.Endpoint().String()] = disk | ||||
| 					globalLocalDrivesMu.Unlock() | ||||
| 				} | ||||
| 			} | ||||
|  | ||||
| @ -414,10 +414,9 @@ var ( | ||||
| 	globalServiceFreezeCnt int32 | ||||
| 	globalServiceFreezeMu  sync.Mutex // Updates. | ||||
| 
 | ||||
| 	// List of local drives to this node, this is only set during server startup, | ||||
| 	// and is only mutated by HealFormat. Hold globalLocalDrivesMu to access. | ||||
| 	globalLocalDrives    []StorageAPI | ||||
| 	globalLocalDrivesMap = make(map[string]StorageAPI) | ||||
| 	// Map of local drives to this node, this is set during server startup, | ||||
| 	// disk reconnect and mutated by HealFormat. Hold globalLocalDrivesMu to access. | ||||
| 	globalLocalDrivesMap map[string]StorageAPI | ||||
| 	globalLocalDrivesMu  sync.RWMutex | ||||
| 
 | ||||
| 	globalDriveMonitoring = env.Get("_MINIO_DRIVE_ACTIVE_MONITORING", config.EnableOn) == config.EnableOn | ||||
|  | ||||
| @ -262,7 +262,7 @@ func collectDriveMetrics(m madmin.RealtimeMetrics) { | ||||
| 	latestDriveStatsMu.Unlock() | ||||
| 
 | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	for _, d := range localDrives { | ||||
|  | ||||
| @ -664,7 +664,7 @@ var errUnsupportedSignal = fmt.Errorf("unsupported signal") | ||||
| 
 | ||||
| func waitingDrivesNode() map[string]madmin.DiskMetrics { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	errs := make([]error, len(localDrives)) | ||||
|  | ||||
| @ -34,7 +34,7 @@ const ( | ||||
| 
 | ||||
| func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) (res madmin.HealResultItem, err error) { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	// Initialize sync waitgroup. | ||||
| @ -158,7 +158,7 @@ func healBucketLocal(ctx context.Context, bucket string, opts madmin.HealOpts) ( | ||||
| 
 | ||||
| func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []BucketInfo, err error) { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	quorum := (len(localDrives) / 2) | ||||
| @ -204,15 +204,17 @@ func listBucketsLocal(ctx context.Context, opts BucketOptions) (buckets []Bucket | ||||
| 	return buckets, nil | ||||
| } | ||||
| 
 | ||||
| func cloneDrives(drives []StorageAPI) []StorageAPI { | ||||
| 	newDrives := make([]StorageAPI, len(drives)) | ||||
| 	copy(newDrives, drives) | ||||
| 	return newDrives | ||||
| func cloneDrives(drives map[string]StorageAPI) []StorageAPI { | ||||
| 	copyDrives := make([]StorageAPI, 0, len(drives)) | ||||
| 	for _, drive := range drives { | ||||
| 		copyDrives = append(copyDrives, drive) | ||||
| 	} | ||||
| 	return copyDrives | ||||
| } | ||||
| 
 | ||||
| func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions) (BucketInfo, error) { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32) | ||||
| @ -261,7 +263,7 @@ func getBucketInfoLocal(ctx context.Context, bucket string, opts BucketOptions) | ||||
| 
 | ||||
| func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOptions) error { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32) | ||||
| @ -299,7 +301,7 @@ func deleteBucketLocal(ctx context.Context, bucket string, opts DeleteBucketOpti | ||||
| 
 | ||||
| func makeBucketLocal(ctx context.Context, bucket string, opts MakeBucketOptions) error { | ||||
| 	globalLocalDrivesMu.RLock() | ||||
| 	localDrives := cloneDrives(globalLocalDrives) | ||||
| 	localDrives := cloneDrives(globalLocalDrivesMap) | ||||
| 	globalLocalDrivesMu.RUnlock() | ||||
| 
 | ||||
| 	g := errgroup.WithNErrs(len(localDrives)).WithConcurrency(32) | ||||
|  | ||||
| @ -1340,6 +1340,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin | ||||
| 		return collectInternodeStats(httpTraceHdrs(f)) | ||||
| 	} | ||||
| 
 | ||||
| 	globalLocalDrivesMap = make(map[string]StorageAPI) | ||||
| 	globalLocalSetDrives = make([][][]StorageAPI, len(endpointServerPools)) | ||||
| 	for pool := range globalLocalSetDrives { | ||||
| 		globalLocalSetDrives[pool] = make([][]StorageAPI, endpointServerPools[pool].SetCount) | ||||
| @ -1413,7 +1414,6 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin | ||||
| 				globalLocalDrivesMu.Lock() | ||||
| 				defer globalLocalDrivesMu.Unlock() | ||||
| 
 | ||||
| 				globalLocalDrives = append(globalLocalDrives, storage) | ||||
| 				globalLocalDrivesMap[endpoint.String()] = storage | ||||
| 				globalLocalSetDrives[endpoint.PoolIdx][endpoint.SetIdx][endpoint.DiskIdx] = storage | ||||
| 				return true | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user