From 0cbdc458c5eba05a4c097a9875be3184f36ee08a Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Mon, 21 Feb 2022 15:51:54 -0800 Subject: [PATCH] fix: do not reload disk format.json on a reconnected disk (#14351) An onlineDisk means its a valid disk but it may be a re-connected disk, this PR verifies that based on LastConn() to only trigger MRF. Current code would again re-load the disk 'format.json' which is not necessary and perhaps an unnecessary call. A potential side affect of this is closing perfectly online disks and getting re-replaced by reloading 'format.json'. This PR tries to avoid this situation by making sure MRF is triggered but not reloading 'format.json' because of MRF. --- cmd/erasure-sets.go | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 9a9aa66c2..331b0f47d 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -97,21 +97,6 @@ type erasureSets struct { lastConnectDisksOpTime time.Time } -// Return false if endpoint is not connected or has been reconnected after last check -func isEndpointConnectionStable(diskMap map[Endpoint]StorageAPI, endpoint Endpoint, lastCheck time.Time) bool { - disk := diskMap[endpoint] - if disk == nil { - return false - } - if !disk.IsOnline() { - return false - } - if !lastCheck.IsZero() && disk.LastConn().After(lastCheck) { - return false - } - return true -} - func (s *erasureSets) getDiskMap() map[Endpoint]StorageAPI { diskMap := make(map[Endpoint]StorageAPI) @@ -209,12 +194,26 @@ func (s *erasureSets) connectDisks() { }() var wg sync.WaitGroup - setsJustConnected := make([]bool, s.setCount) diskMap := s.getDiskMap() + setsJustConnected := make([]bool, s.setCount) for _, endpoint := range s.endpoints.Endpoints { - if isEndpointConnectionStable(diskMap, endpoint, s.lastConnectDisksOpTime) { - continue + cdisk := diskMap[endpoint] + if cdisk != nil && cdisk.IsOnline() { + if s.lastConnectDisksOpTime.IsZero() { + continue + } + + // An online-disk means its a valid disk but it may be a re-connected disk + // we verify that here based on LastConn(), however we make sure to avoid + // putting it back into the s.erasureDisks by re-placing the disk again. + _, setIndex, _ := cdisk.GetDiskLoc() + if setIndex != -1 { + // Recently disconnected disks must go to MRF + setsJustConnected[setIndex] = cdisk.LastConn().After(s.lastConnectDisksOpTime) + continue + } } + wg.Add(1) go func(endpoint Endpoint) { defer wg.Done() @@ -268,7 +267,7 @@ func (s *erasureSets) connectDisks() { s.erasureDisks[setIndex][diskIndex] = disk } disk.SetDiskLoc(s.poolIndex, setIndex, diskIndex) - setsJustConnected[setIndex] = true + setsJustConnected[setIndex] = true // disk just went online we treat it is as MRF event s.erasureDisksMu.Unlock() }(endpoint) }