make sure to preserve UUID from reference format (#10748)

reference format should be source of truth
for inconsistent drives which reconnect,
add them back to their original position

remove automatic fix for existing offline
disk uuids
This commit is contained in:
Harshavardhana 2020-10-24 13:23:08 -07:00 committed by GitHub
parent 4442382c16
commit 6a8c62f9fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 164 additions and 49 deletions

View File

@ -73,6 +73,8 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {
}
}
go monitorLocalDisksInconsistentAndHeal(ctx, z, bgSeq)
go monitorLocalDisksAndHeal(ctx, z, bgSeq)
}
@ -96,6 +98,50 @@ func getLocalDisksToHeal() (disksToHeal Endpoints) {
}
func getLocalDisksToHealInconsistent() (refFormats []*formatErasureV3, diskFormats [][]*formatErasureV3, disksToHeal [][]StorageAPI) {
disksToHeal = make([][]StorageAPI, len(globalEndpoints))
diskFormats = make([][]*formatErasureV3, len(globalEndpoints))
refFormats = make([]*formatErasureV3, len(globalEndpoints))
for k, ep := range globalEndpoints {
disksToHeal[k] = make([]StorageAPI, len(ep.Endpoints))
diskFormats[k] = make([]*formatErasureV3, len(ep.Endpoints))
formats := make([]*formatErasureV3, len(ep.Endpoints))
storageDisks, _ := initStorageDisksWithErrors(ep.Endpoints)
for i, disk := range storageDisks {
if disk != nil {
format, err := loadFormatErasure(disk)
if err != nil {
// any error we don't care proceed.
continue
}
formats[i] = format
}
}
refFormat, err := getFormatErasureInQuorum(formats)
if err != nil {
logger.LogIf(GlobalContext, fmt.Errorf("No erasured disks are in quorum or too many disks are offline - please investigate immediately"))
continue
}
// We have obtained reference format - check if disks are inconsistent
for i, format := range formats {
if format == nil {
continue
}
if err := formatErasureV3Check(refFormat, format); err != nil {
if errors.Is(err, errInconsistentDisk) {
// Found inconsistencies - check which disk it is.
if storageDisks[i] != nil && storageDisks[i].IsLocal() {
disksToHeal[k][i] = storageDisks[i]
}
}
}
}
refFormats[k] = refFormat
diskFormats[k] = formats
}
return refFormats, diskFormats, disksToHeal
}
func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
// Run the background healer
globalBackgroundHealRoutine = newHealRoutine()
@ -104,6 +150,36 @@ func initBackgroundHealing(ctx context.Context, objAPI ObjectLayer) {
globalBackgroundHealState.LaunchNewHealSequence(newBgHealSequence())
}
// monitorLocalDisksInconsistentAndHeal - ensures that inconsistent
// disks are healed appropriately.
func monitorLocalDisksInconsistentAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *healSequence) {
// Perform automatic disk healing when a disk is found to be inconsistent.
for {
select {
case <-ctx.Done():
return
case <-time.After(defaultMonitorNewDiskInterval):
waitForLowHTTPReq(int32(globalEndpoints.NEndpoints()), time.Second)
refFormats, diskFormats, localDisksHeal := getLocalDisksToHealInconsistent()
for k := range refFormats {
for j, disk := range localDisksHeal[k] {
if disk == nil {
continue
}
format := diskFormats[k][j].Clone()
format.Erasure.Sets = refFormats[k].Erasure.Sets
if err := saveFormatErasure(disk, format, true); err != nil {
logger.LogIf(ctx, fmt.Errorf("Unable fix inconsistent format for drive %s: %w", disk, err))
continue
}
globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint())
}
}
}
}
}
// monitorLocalDisksAndHeal - ensures that detected new disks are healed
// 1. Only the concerned erasure set will be listed and healed
// 2. Only the node hosting the disk is responsible to perform the heal
@ -149,7 +225,10 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *
}
// Calculate the set index where the current endpoint belongs
z.serverSets[zoneIdx].erasureDisksMu.RLock()
// Protect reading reference format.
setIndex, _, err := findDiskIndex(z.serverSets[zoneIdx].format, format)
z.serverSets[zoneIdx].erasureDisksMu.RUnlock()
if err != nil {
printEndpointError(endpoint, err, false)
continue
@ -173,7 +252,7 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *
logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1))
if err := disk.DeleteFile(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix),
healingTrackerFilename); err != nil {
healingTrackerFilename); err != nil && !errors.Is(err, errFileNotFound) {
logger.LogIf(ctx, err)
continue
}

View File

@ -83,6 +83,7 @@ type erasureSets struct {
setCount, setDriveCount int
listTolerancePerSet int
monitorContextCancel context.CancelFunc
disksConnectEvent chan diskConnectInfo
// Distribution algorithm of choice.
@ -220,23 +221,18 @@ func (s *erasureSets) connectDisks() {
}
return
}
s.erasureDisksMu.RLock()
setIndex, diskIndex, err := findDiskIndex(s.format, format)
s.erasureDisksMu.RUnlock()
if err != nil {
if endpoint.IsLocal {
globalBackgroundHealState.pushHealLocalDisks(endpoint)
logger.Info(fmt.Sprintf("Found inconsistent drive %s with format.json, attempting to heal... (%s)", endpoint, err))
} else {
printEndpointError(endpoint, err, false)
}
return
}
disk.SetDiskID(format.Erasure.This)
if endpoint.IsLocal && disk.Healing() {
globalBackgroundHealState.pushHealLocalDisks(disk.Endpoint())
logger.Info(fmt.Sprintf("Found the drive %s that needs healing, attempting to heal...", disk))
}
s.erasureDisksMu.RLock()
setIndex, diskIndex, err := findDiskIndex(s.format, format)
s.erasureDisksMu.RUnlock()
if err != nil {
printEndpointError(endpoint, err, false)
return
}
disk.SetDiskID(format.Erasure.This)
s.erasureDisksMu.Lock()
if s.erasureDisks[setIndex][diskIndex] != nil {
@ -341,7 +337,7 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
listTolerancePerSet := 3
// By default this is off
if env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOff) == config.EnableOn {
if env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOn) == config.EnableOn {
listTolerancePerSet = -1
}
@ -412,8 +408,11 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
GlobalStaleUploadsCleanupInterval, GlobalStaleUploadsExpiry)
}
mctx, mctxCancel := context.WithCancel(ctx)
s.monitorContextCancel = mctxCancel
// Start the disk monitoring and connect routine.
go s.monitorAndConnectEndpoints(ctx, defaultMonitorConnectEndpointInterval)
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
go s.maintainMRFList()
go s.healMRFRoutine()
@ -1155,6 +1154,8 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
return err
}
s.monitorContextCancel() // turn-off disk monitoring and replace format.
s.erasureDisksMu.Lock()
// Replace with new reference format.
@ -1186,6 +1187,11 @@ func (s *erasureSets) ReloadFormat(ctx context.Context, dryRun bool) (err error)
s.erasureDisksMu.Unlock()
mctx, mctxCancel := context.WithCancel(GlobalContext)
s.monitorContextCancel = mctxCancel
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
return nil
}
@ -1269,14 +1275,6 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
return res, err
}
for i, format := range formats {
if format != nil {
if ferr := formatErasureV3Check(refFormat, format); ferr != nil {
sErrs[i] = errUnformattedDisk
}
}
}
// Prepare heal-result
res = madmin.HealResultItem{
Type: madmin.HealItemMetadata,
@ -1297,14 +1295,12 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
}
if countErrs(sErrs, errUnformattedDisk) == 0 {
// No unformatted disks found disks are either offline
// or online, no healing is required.
return res, errNoHealRequired
}
// Mark all UUIDs which might be offline, use list
// of formats to mark them appropriately.
markUUIDsOffline(refFormat, formats)
markUUIDsOffline(refFormat, formats, sErrs)
// Initialize a new set of set formats which will be written to disk.
newFormatSets := newHealFormatSets(refFormat, s.setCount, s.setDriveCount, formats, sErrs)
@ -1358,6 +1354,8 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
return madmin.HealResultItem{}, err
}
s.monitorContextCancel() // turn-off disk monitoring and replace format.
s.erasureDisksMu.Lock()
// Replace with new reference format.
@ -1388,6 +1386,10 @@ func (s *erasureSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.H
}
s.erasureDisksMu.Unlock()
mctx, mctxCancel := context.WithCancel(GlobalContext)
s.monitorContextCancel = mctxCancel
go s.monitorAndConnectEndpoints(mctx, defaultMonitorConnectEndpointInterval)
}
return res, nil

View File

@ -131,13 +131,17 @@ func (f *formatErasureV3) Clone() *formatErasureV3 {
}
// Returns formatErasure.Erasure.Version
func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 {
func newFormatErasureV3(numSets int, setLen int, distributionAlgo string) *formatErasureV3 {
format := &formatErasureV3{}
format.Version = formatMetaVersionV1
format.Format = formatBackendErasure
format.ID = mustGetUUID()
format.Erasure.Version = formatErasureVersionV3
if distributionAlgo == "" {
format.Erasure.DistributionAlgo = formatErasureVersionV3DistributionAlgo
} else {
format.Erasure.DistributionAlgo = distributionAlgo
}
format.Erasure.Sets = make([][]string, numSets)
for i := 0; i < numSets; i++ {
@ -645,8 +649,8 @@ func formatErasureV3Check(reference *formatErasureV3, format *formatErasureV3) e
}
for j := range reference.Erasure.Sets[i] {
if reference.Erasure.Sets[i][j] != format.Erasure.Sets[i][j] {
return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s",
i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j])
return fmt.Errorf("UUID on positions %d:%d do not match with, expected %s got %s: (%w)",
i, j, reference.Erasure.Sets[i][j], format.Erasure.Sets[i][j], errInconsistentDisk)
}
}
}
@ -824,8 +828,8 @@ func fixFormatErasureV3(storageDisks []StorageAPI, endpoints Endpoints, formats
}
// initFormatErasure - save Erasure format configuration on all disks.
func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, deploymentID string, sErrs []error) (*formatErasureV3, error) {
format := newFormatErasureV3(setCount, setDriveCount)
func initFormatErasure(ctx context.Context, storageDisks []StorageAPI, setCount, setDriveCount int, distributionAlgo string, deploymentID string, sErrs []error) (*formatErasureV3, error) {
format := newFormatErasureV3(setCount, setDriveCount, distributionAlgo)
formats := make([]*formatErasureV3, len(storageDisks))
wantAtMost := ecDrivesNoConfig(setDriveCount)
@ -942,12 +946,15 @@ func getOfflineUUIDs(refFormat *formatErasureV3, formats []*formatErasureV3) (of
}
// Mark all UUIDs that are offline.
func markUUIDsOffline(refFormat *formatErasureV3, formats []*formatErasureV3) {
func markUUIDsOffline(refFormat *formatErasureV3, formats []*formatErasureV3, errs []error) {
offlineUUIDs := getOfflineUUIDs(refFormat, formats)
for i, set := range refFormat.Erasure.Sets {
setDriveCount := len(set)
for j := range set {
for _, offlineUUID := range offlineUUIDs {
if refFormat.Erasure.Sets[i][j] == offlineUUID {
if refFormat.Erasure.Sets[i][j] == offlineUUID &&
errors.Is(errs[i*setDriveCount+j], errUnformattedDisk) {
// Unformatted drive gets an offline disk UUID
refFormat.Erasure.Sets[i][j] = offlineDiskUUID
}
}

View File

@ -27,7 +27,7 @@ import (
// Test get offline/online uuids.
func TestGetUUIDs(t *testing.T) {
fmtV2 := newFormatErasureV3(4, 16)
fmtV2 := newFormatErasureV3(4, 16, "CRCMOD")
formats := make([]*formatErasureV3, 64)
for i := 0; i < 4; i++ {
@ -61,7 +61,12 @@ func TestGetUUIDs(t *testing.T) {
t.Errorf("Expected offline count '16', got '%d'", gotCount)
}
markUUIDsOffline(fmtV2, formats)
var errs []error
for i := 0; i < 4*16; i++ {
errs = append(errs, errUnformattedDisk)
}
markUUIDsOffline(fmtV2, formats, errs)
gotCount = 0
for i := range fmtV2.Erasure.Sets {
for j := range fmtV2.Erasure.Sets[i] {
@ -93,7 +98,7 @@ func TestFixFormatV3(t *testing.T) {
}
}
format := newFormatErasureV3(1, 8)
format := newFormatErasureV3(1, 8, "CRCMOD")
formats := make([]*formatErasureV3, 8)
for j := 0; j < 8; j++ {
@ -127,7 +132,7 @@ func TestFixFormatV3(t *testing.T) {
// tests formatErasureV3ThisEmpty conditions.
func TestFormatErasureEmpty(t *testing.T) {
format := newFormatErasureV3(1, 16)
format := newFormatErasureV3(1, 16, "CRCMOD")
formats := make([]*formatErasureV3, 16)
for j := 0; j < 16; j++ {
@ -326,7 +331,7 @@ func TestGetFormatErasureInQuorumCheck(t *testing.T) {
setCount := 2
setDriveCount := 16
format := newFormatErasureV3(setCount, setDriveCount)
format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD")
formats := make([]*formatErasureV3, 32)
for i := 0; i < setCount; i++ {
@ -392,7 +397,7 @@ func TestGetErasureID(t *testing.T) {
setCount := 2
setDriveCount := 8
format := newFormatErasureV3(setCount, setDriveCount)
format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD")
formats := make([]*formatErasureV3, 16)
for i := 0; i < setCount; i++ {
@ -447,7 +452,7 @@ func TestNewFormatSets(t *testing.T) {
setCount := 2
setDriveCount := 16
format := newFormatErasureV3(setCount, setDriveCount)
format := newFormatErasureV3(setCount, setDriveCount, "CRCMOD")
formats := make([]*formatErasureV3, 32)
errs := make([]error, 32)

View File

@ -120,6 +120,12 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis
Name: pathJoin(minioMetaBucket, bucketConfigPrefix),
}) // add metadata .minio.sys/ bucket prefixes to heal
// Try to pro-actively heal backend-encrypted file.
bgSeq.sourceCh <- healSource{
bucket: minioMetaBucket,
object: backendEncryptedFile,
}
// Heal all buckets with all objects
for _, bucket := range buckets {
// Heal current bucket

View File

@ -18,6 +18,7 @@ package cmd
import (
"context"
"errors"
"sync"
"strings"
@ -77,7 +78,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
if !HasSuffix(entryPath, SlashSeparator) {
// Delete the file entry.
err := storage.DeleteFile(ctx, volume, entryPath)
if err != errDiskNotFound && err != errUnformattedDisk {
if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
errFileNotFound,
}...) {
logger.LogIf(ctx, err)
}
return err
@ -85,11 +90,15 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
// If it's a directory, list and call delFunc() for each entry.
entries, err := storage.ListDir(ctx, volume, entryPath, -1)
// If entryPath prefix never existed, safe to ignore.
if err == errFileNotFound {
// If entryPath prefix never existed, safe to ignore
if errors.Is(err, errFileNotFound) {
return nil
} else if err != nil { // For any other errors fail.
if err != errDiskNotFound && err != errUnformattedDisk {
if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
errFileNotFound,
}...) {
logger.LogIf(ctx, err)
}
return err
@ -98,7 +107,11 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
// Entry path is empty, just delete it.
if len(entries) == 0 {
err = storage.DeleteFile(ctx, volume, entryPath)
if err != errDiskNotFound && err != errUnformattedDisk {
if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
errFileNotFound,
}...) {
logger.LogIf(ctx, err)
}
return err

View File

@ -278,7 +278,7 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
humanize.Ordinal(zoneCount), setCount, setDriveCount)
// Initialize erasure code format on disks
format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, deploymentID, sErrs)
format, err = initFormatErasure(GlobalContext, storageDisks, setCount, setDriveCount, "", deploymentID, sErrs)
if err != nil {
return nil, nil, err
}

View File

@ -27,6 +27,9 @@ var errCorruptedFormat = StorageErr("corrupted backend format, specified disk mo
// errUnformattedDisk - unformatted disk found.
var errUnformattedDisk = StorageErr("unformatted disk found")
// errInconsistentDisk - inconsistent disk found.
var errInconsistentDisk = StorageErr("inconsistent disk found")
// errUnsupporteDisk - when disk does not support O_DIRECT flag.
var errUnsupportedDisk = StorageErr("disk does not support O_DIRECT")