make sure to preserve UUID from reference format (#10748)

reference format should be source of truth
for inconsistent drives which reconnect,
add them back to their original position

remove automatic fix for existing offline
disk uuids
This commit is contained in:
Harshavardhana 2020-10-24 13:23:08 -07:00 committed by GitHub
parent 4442382c16
commit 6a8c62f9fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 164 additions and 49 deletions

View File

@ -73,6 +73,8 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {
} }
} }
go monitorLocalDisksInconsistentAndHeal(ctx, z, bgSeq)
go monitorLocalDisksAndHeal(ctx, z, bgSeq) go monitorLocalDisksAndHeal(ctx, z, bgSeq)
} }
@ -96,6 +98,50 @@ func getLocalDisksToHeal() (disksToHeal Endpoints) {
} }
func getLocalDisksToHealInconsistent() (refFormats []*formatErasureV3, diskFormats [][]*formatErasureV3, disksToHeal [][]StorageAPI) {
disksToHeal = make([][]StorageAPI, len(globalEndpoints))
diskFormats = make([][]*formatErasureV3, len(globalEndpoints))
refFormats = make([]*formatErasureV3, len(globalEndpoints))
for k, ep := range globalEndpoints {
disksToHeal[k] = make([]StorageAPI, len(ep.Endpoints))
diskFormats[k] = make([]*formatErasureV3, len(ep.Endpoints))
formats := make([]*formatErasureV3, len(ep.Endpoints))
storageDisks, _ := initStorageDisksWithErrors(ep.Endpoints)
for i, disk := range storageDisks {
if disk != nil {
format, err := loadFormatErasure(disk)
if err != nil {
// any error we don't care proceed.
continue
}
formats[i] = format
}
}
refFormat, err := getFormatErasureInQuorum(formats)
if err != nil {
logger.LogIf(GlobalContext, fmt.Errorf("No erasured disks are in quorum or too many disks are offline - please investigate immediately"))
continue
}
// We have obtained reference format - check if disks are inconsistent
for i, format := range formats {
if format == nil {
continue
}
if err := formatErasureV3Check(refFormat, format); err != nil {
if errors.Is(err, errInconsistentDisk) {
// Found inconsistencies - check which disk it is.
if storageDisks[i] != nil && storageDisks[i].IsLocal() {
disksToHeal[k][i] = storageDisks[i]
}
}
}
}
refFormats[k] = refFormat
diskFormats[k] = formats
}
return refFormats, diskFormats, disksToHeal
}
func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) { func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
// Run the background healer // Run the background healer
globalBackgroundHealRoutine = newHealRoutine() globalBackgroundHealRoutine = newHealRoutine()
@ -104,6 +150,36 @@ func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
globalBackgroundHealState.LaunchNewHealSequence(newBgHealSequence()) globalBackgroundHealState.LaunchNewHealSequence(newBgHealSequence())
} }
// monitorLocalDisksInconsistentAndHeal - ensures that inconsistent
// disks are healed appropriately.
func monitorLocalDisksInconsistentAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *healSequence) {
// Perform automatic disk healing when a disk is found to be inconsistent.
for {
select {
case <-ctx.Done():
return
case <-time.After(defaultMonitorNewDiskInterval):
waitForLowHTTPReq(int32(globalEndpoints.NEndpoints()), time.Second)
refFormats, diskFormats, localDisksHeal := getLocalDisksToHealInconsistent()
for k := range refFormats {
for j, disk := range localDisksHeal[k] {
if disk == nil {
continue
}
format := diskFormats[k][j].Clone()
format.Erasure.Sets = refFormats[k].Erasure.Sets
if err := saveFormatErasure(disk, format, true); err != nil {
logger.LogIf(ctx, fmt.Errorf("Unable fix inconsistent format for drive %s: %w", disk, err))
continue
}
globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint())
}
}
}
}
}
// monitorLocalDisksAndHeal - ensures that detected new disks are healed // monitorLocalDisksAndHeal - ensures that detected new disks are healed
// 1. Only the concerned erasure set will be listed and healed // 1. Only the concerned erasure set will be listed and healed
// 2. Only the node hosting the disk is responsible to perform the heal // 2. Only the node hosting the disk is responsible to perform the heal
@ -149,7 +225,10 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *
} }
// Calculate the set index where the current endpoint belongs // Calculate the set index where the current endpoint belongs
z.serverSets[zoneIdx].erasureDisksMu.RLock()
// Protect reading reference format.
setIndex, _, err := findDiskIndex(z.serverSets[zoneIdx].format, format) setIndex, _, err := findDiskIndex(z.serverSets[zoneIdx].format, format)
z.serverSets[zoneIdx].erasureDisksMu.RUnlock()
if err != nil { if err != nil {
printEndpointError(endpoint, err, false) printEndpointError(endpoint, err, false)
continue continue
@ -173,7 +252,7 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *
logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1)) logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1))
if err := disk.DeleteFile(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix), if err := disk.DeleteFile(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix),
healingTrackerFilename); err != nil { healingTrackerFilename); err != nil && !errors.Is(err, errFileNotFound) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
continue continue
} }

View File

@ -83,7 +83,8 @@ type erasureSets struct {
setCount, setDriveCount int setCount, setDriveCount int
listTolerancePerSet int listTolerancePerSet int
disksConnectEvent chan diskConnectInfo monitorContextCancel context.CancelFunc
disksConnectEvent chan diskConnectInfo
// Distribution algorithm of choice. // Distribution algorithm of choice.
distributionAlgo string distributionAlgo string
@ -220,23 +221,18 @@ func (s *erasureSets) connectDisks() {
} }
return return
} }
s.erasureDisksMu.RLock()
setIndex, diskIndex, err := findDiskIndex(s.format, format)
s.erasureDisksMu.RUnlock()
if err != nil {
if endpoint.IsLocal {
globalBackgroundHealState.pushHealLocalDisks(endpoint)
logger.Info(fmt.Sprintf("Found inconsistent drive %s with format.json, attempting to heal... (%s)", endpoint, err))
} else {
printEndpointError(endpoint, err, false)
}
return
}
disk.SetDiskID(format.Erasure.This)
if endpoint.IsLocal && disk.Healing() { if endpoint.IsLocal && disk.Healing() {
globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint()) globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint())
logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk)) logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk))
} }
s.erasureDisksMu.RLock()
setIndex, diskIndex, err := findDiskIndex(s.format, format)
s.erasureDisksMu.RUnlock()
if err != nil {
printEndpointError(endpoint, err, false)
return
}
disk.SetDiskID(format.Erasure.This)
s.erasureDisksMu.Lock() s.erasureDisksMu.Lock()
if s.erasureDisks[setIndex][diskIndex] != nil { if s.erasureDisks[setIndex][diskIndex] != nil {
@ -341,7 +337,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
listTolerancePerSet := 3 listTolerancePerSet := 3
// By default this is off // By default this is off
if env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOff) == config.EnableOn { if env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOn) == config.EnableOn {
listTolerancePerSet = -1 listTolerancePerSet = -1
} }
@ -412,8 +408,11 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
GlobalStaleUploadsCleanupInterval, GlobalStaleUploadsExpiry) GlobalStaleUploadsCleanupInterval, GlobalStaleUploadsExpiry)
} }
mctx, mctxCancel := context.WithCancel(ctx)
s.monitorContextCancel = mctxCancel
// Start the disk monitoring and connect routine. // Start the disk monitoring and connect routine.
go s.monitorAndConnectEndpoints(ctx, defaultMonitorConnectEndpointInterval) go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
go s.maintainMRFList() go s.maintainMRFList()
go s.healMRFRoutine() go s.healMRFRoutine()
@ -1155,6 +1154,8 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
return err return err
} }
s.monitorContextCancel() // turn-off disk monitoring and replace format.
s.erasureDisksMu.Lock() s.erasureDisksMu.Lock()
// Replace with new reference format. // Replace with new reference format.
@ -1186,6 +1187,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
s.erasureDisksMu.Unlock() s.erasureDisksMu.Unlock()
mctx, mctxCancel := context.WithCancel(GlobalContext)
s.monitorContextCancel = mctxCancel
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
return nil return nil
} }
@ -1269,14 +1275,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
return res, err return res, err
} }
for i, format := range formats {
if format != nil {
if ferr := formatErasureV3Check(refFormat, format); ferr != nil {
sErrs[i] = errUnformattedDisk
}
}
}
// Prepare heal-result // Prepare heal-result
res = madmin.HealResultItem{ res = madmin.HealResultItem{
Type: madmin.HealItemMetadata, Type: madmin.HealItemMetadata,
@ -1297,14 +1295,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
} }
if countErrs(sErrs, errUnformattedDisk) == 0 { if countErrs(sErrs, errUnformattedDisk) == 0 {
// No unformatted disks found disks are either offline
// or online, no healing is required.
return res, errNoHealRequired return res, errNoHealRequired
} }
// Mark all UUIDs which might be offline, use list // Mark all UUIDs which might be offline, use list
// of formats to mark them appropriately. // of formats to mark them appropriately.
markUUIDsOffline(refFormat, formats) markUUIDsOffline(refFormat, formats, sErrs)
// Initialize a new set of set formats which will be written to disk. // Initialize a new set of set formats which will be written to disk.
newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs) newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs)
@ -1358,6 +1354,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
return madmin.HealResultItem{}, err return madmin.HealResultItem{}, err
} }
s.monitorContextCancel() // turn-off disk monitoring and replace format.
s.erasureDisksMu.Lock() s.erasureDisksMu.Lock()
// Replace with new reference format. // Replace with new reference format.
@ -1388,6 +1386,10 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
} }
s.erasureDisksMu.Unlock() s.erasureDisksMu.Unlock()
mctx, mctxCancel := context.WithCancel(GlobalContext)
s.monitorContextCancel = mctxCancel
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
} }
return res, nil return res, nil

View File

@ -131,13 +131,17 @@ func (f *formatErasureV3) Clone() *formatErasureV3 {
} }
// Returns formatErasure.Erasure.Version // Returns formatErasure.Erasure.Version
func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 { func newFormatErasureV3(numSets int, setLen int, distributionAlgo string) *formatErasureV3 {
format := &formatErasureV3{} format := &formatErasureV3{}
format.Version = formatMetaVersionV1 format.Version = formatMetaVersionV1
format.Format = formatBackendErasure format.Format = formatBackendErasure
format.ID = mustGetUUID() format.ID = mustGetUUID()
format.Erasure.Version = formatErasureVersionV3 format.Erasure.Version = formatErasureVersionV3
format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgo if distributionAlgo == "" {
format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgo
} else {
format.Erasure.DistributionAlgo = distributionAlgo
}
format.Erasure.Sets = make([][]string, numSets) format.Erasure.Sets = make([][]string, numSets)
for i := 0; i < numSets; i++ { for i := 0; i < numSets; i++ {
@ -645,8 +649,8 @@ func formatErasureV3Check(reference *formatErasureV3, format *formatErasureV3) e
} }
for j := range reference.Erasure.Sets[i] { for j := range reference.Erasure.Sets[i] {
if reference.Erasure.Sets[i][j] != format.Erasure.Sets[i][j] { if reference.Erasure.Sets[i][j] != format.Erasure.Sets[i][j] {
return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s", return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s: (%w)",
i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j]) i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j], errInconsistentDisk)
} }
} }
} }
@ -824,8 +828,8 @@ func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats
} }
// initFormatErasure - save Erasure format configuration on all disks. // initFormatErasure - save Erasure format configuration on all disks.
func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, deploymentID string, sErrs []error) (*formatErasureV3, error) { func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, distributionAlgo string, deploymentID string, sErrs []error) (*formatErasureV3, error) {
format := newFormatErasureV3(setCount, setDriveCount) format := newFormatErasureV3(setCount, setDriveCount, distributionAlgo)
formats := make([]*formatErasureV3, len(storageDisks)) formats := make([]*formatErasureV3, len(storageDisks))
wantAtMost := ecDrivesNoConfig(setDriveCount) wantAtMost := ecDrivesNoConfig(setDriveCount)
@ -942,12 +946,15 @@ func getOfflineUUIDs(refFormat *formatErasureV3, formats []*formatErasureV3) (of
} }
// Mark all UUIDs that are offline. // Mark all UUIDs that are offline.
func markUUIDsOffline(refFormat *formatErasureV3, formats []*formatErasureV3) { func markUUIDsOffline(refFormat *formatErasureV3, formats []*formatErasureV3, errs []error) {
offlineUUIDs := getOfflineUUIDs(refFormat, formats) offlineUUIDs := getOfflineUUIDs(refFormat, formats)
for i, set := range refFormat.Erasure.Sets { for i, set := range refFormat.Erasure.Sets {
setDriveCount := len(set)
for j := range set { for j := range set {
for _, offlineUUID := range offlineUUIDs { for _, offlineUUID := range offlineUUIDs {
if refFormat.Erasure.Sets[i][j] == offlineUUID { if refFormat.Erasure.Sets[i][j] == offlineUUID &&
errors.Is(errs[i*setDriveCount+j], errUnformattedDisk) {
// Unformatted drive gets an offline disk UUID
refFormat.Erasure.Sets[i][j] = offlineDiskUUID refFormat.Erasure.Sets[i][j] = offlineDiskUUID
} }
} }

View File

@ -27,7 +27,7 @@ import (
// Test get offline/online uuids. // Test get offline/online uuids.
func TestGetUUIDs(t *testing.T) { func TestGetUUIDs(t *testing.T) {
fmtV2 := newFormatErasureV3(4, 16) fmtV2 := newFormatErasureV3(4, 16, "CRCMOD")
formats := make([]*formatErasureV3, 64) formats := make([]*formatErasureV3, 64)
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
@ -61,7 +61,12 @@ func TestGetUUIDs(t *testing.T) {
t.Errorf("Expected offline count '16', got '%d'", gotCount) t.Errorf("Expected offline count '16', got '%d'", gotCount)
} }
markUUIDsOffline(fmtV2, formats) var errs []error
for i := 0; i < 4*16; i++ {
errs = append(errs, errUnformattedDisk)
}
markUUIDsOffline(fmtV2, formats, errs)
gotCount = 0 gotCount = 0
for i := range fmtV2.Erasure.Sets { for i := range fmtV2.Erasure.Sets {
for j := range fmtV2.Erasure.Sets[i] { for j := range fmtV2.Erasure.Sets[i] {
@ -93,7 +98,7 @@ func TestFixFormatV3(t *testing.T) {
} }
} }
format := newFormatErasureV3(1, 8) format := newFormatErasureV3(1, 8, "CRCMOD")
formats := make([]*formatErasureV3, 8) formats := make([]*formatErasureV3, 8)
for j := 0; j < 8; j++ { for j := 0; j < 8; j++ {
@ -127,7 +132,7 @@ func TestFixFormatV3(t *testing.T) {
// tests formatErasureV3ThisEmpty conditions. // tests formatErasureV3ThisEmpty conditions.
func TestFormatErasureEmpty(t *testing.T) { func TestFormatErasureEmpty(t *testing.T) {
format := newFormatErasureV3(1, 16) format := newFormatErasureV3(1, 16, "CRCMOD")
formats := make([]*formatErasureV3, 16) formats := make([]*formatErasureV3, 16)
for j := 0; j < 16; j++ { for j := 0; j < 16; j++ {
@ -326,7 +331,7 @@ func TestGetFormatErasureInQuorumCheck(t *testing.T) {
setCount := 2 setCount := 2
setDriveCount := 16 setDriveCount := 16
format := newFormatErasureV3(setCount, setDriveCount) format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD")
formats := make([]*formatErasureV3, 32) formats := make([]*formatErasureV3, 32)
for i := 0; i < setCount; i++ { for i := 0; i < setCount; i++ {
@ -392,7 +397,7 @@ func TestGetErasureID(t *testing.T) {
setCount := 2 setCount := 2
setDriveCount := 8 setDriveCount := 8
format := newFormatErasureV3(setCount, setDriveCount) format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD")
formats := make([]*formatErasureV3, 16) formats := make([]*formatErasureV3, 16)
for i := 0; i < setCount; i++ { for i := 0; i < setCount; i++ {
@ -447,7 +452,7 @@ func TestNewFormatSets(t *testing.T) {
setCount := 2 setCount := 2
setDriveCount := 16 setDriveCount := 16
format := newFormatErasureV3(setCount, setDriveCount) format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD")
formats := make([]*formatErasureV3, 32) formats := make([]*formatErasureV3, 32)
errs := make([]error, 32) errs := make([]error, 32)

View File

@ -120,6 +120,12 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis
Name: pathJoin(minioMetaBucket, bucketConfigPrefix), Name: pathJoin(minioMetaBucket, bucketConfigPrefix),
}) // add metadata .minio.sys/ bucket prefixes to heal }) // add metadata .minio.sys/ bucket prefixes to heal
// Try to pro-actively heal backend-encrypted file.
bgSeq.sourceCh <- healSource{
bucket: minioMetaBucket,
object: backendEncryptedFile,
}
// Heal all buckets with all objects // Heal all buckets with all objects
for _, bucket := range buckets { for _, bucket := range buckets {
// Heal current bucket // Heal current bucket

View File

@ -18,6 +18,7 @@ package cmd
import ( import (
"context" "context"
"errors"
"sync" "sync"
"strings" "strings"
@ -77,7 +78,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
if !HasSuffix(entryPath, SlashSeparator) { if !HasSuffix(entryPath, SlashSeparator) {
// Delete the file entry. // Delete the file entry.
err := storage.DeleteFile(ctx, volume, entryPath) err := storage.DeleteFile(ctx, volume, entryPath)
if err != errDiskNotFound && err != errUnformattedDisk { if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
errFileNotFound,
}...) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
} }
return err return err
@ -85,11 +90,15 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
// If it's a directory, list and call delFunc() for each entry. // If it's a directory, list and call delFunc() for each entry.
entries, err := storage.ListDir(ctx, volume, entryPath, -1) entries, err := storage.ListDir(ctx, volume, entryPath, -1)
// If entryPath prefix never existed, safe to ignore. // If entryPath prefix never existed, safe to ignore
if err == errFileNotFound { if errors.Is(err, errFileNotFound) {
return nil return nil
} else if err != nil { // For any other errors fail. } else if err != nil { // For any other errors fail.
if err != errDiskNotFound && err != errUnformattedDisk { if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
errFileNotFound,
}...) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
} }
return err return err
@ -98,7 +107,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
// Entry path is empty, just delete it. // Entry path is empty, just delete it.
if len(entries) == 0 { if len(entries) == 0 {
err = storage.DeleteFile(ctx, volume, entryPath) err = storage.DeleteFile(ctx, volume, entryPath)
if err != errDiskNotFound && err != errUnformattedDisk { if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
errFileNotFound,
}...) {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
} }
return err return err

View File

@ -278,7 +278,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
humanize.Ordinal(zoneCount), setCount, setDriveCount) humanize.Ordinal(zoneCount), setCount, setDriveCount)
// Initialize erasure code format on disks // Initialize erasure code format on disks
format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs) format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, "", deploymentID, sErrs)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
} }

View File

@ -27,6 +27,9 @@ var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mo
// errUnformattedDisk - unformatted disk found. // errUnformattedDisk - unformatted disk found.
var errUnformattedDisk = StorageErr("unformatted disk found") var errUnformattedDisk = StorageErr("unformatted disk found")
// errInconsistentDisk - inconsistent disk found.
var errInconsistentDisk = StorageErr("inconsistent disk found")
// errUnsupporteDisk - when disk does not support O_DIRECT flag. // errUnsupporteDisk - when disk does not support O_DIRECT flag.
var errUnsupportedDisk = StorageErr("disk does not support O_DIRECT") var errUnsupportedDisk = StorageErr("disk does not support O_DIRECT")