From f85c28e960013df229392e4a16908b4cd54f7bad Mon Sep 17 00:00:00 2001 From: Anis Eleuch Date: Sat, 26 Oct 2024 10:58:27 +0100 Subject: [PATCH] heal: large objects fix and avoid .healing.bin corner case premature exit (#20577) xlStorage.Healing() returns nil if there is an error reading .healing.bin or if this latter is empty. healing.bin update() call returns early if .healing.bin is empty; hence, no further update of .healing.bin is possible. A .healing.bin can be empty if os.Open() with O_TRUNC is successful but the next Write returns an error. To avoid this weird situation, avoid making healingTracker.update() to return early if .healing.bin is empty, so write again. This commit also fixes wrong error log printing when an object is healed in another drive in the same erasure set but not in the drive that is actively healing by fresh drive healing code. Currently, it prints instead of a factual error. * heal: Scan .minio.sys metadata only during site-wide heal (#137) mc admin heal always invoke .minio.sys heal, but sometimes, this latter contains a lot of data, many service accounts, STS accounts etc, which makes mc admin heal command very slow. Only invoke .minio.sys healing when no bucket was specified in `mc admin heal` command. --- cmd/admin-heal-ops.go | 19 ++++++++++--------- cmd/background-newdisks-heal-ops.go | 3 --- cmd/global-heal.go | 14 +++++++------- cmd/xl-storage.go | 16 ++++++++-------- 4 files changed, 25 insertions(+), 27 deletions(-) diff --git a/cmd/admin-heal-ops.go b/cmd/admin-heal-ops.go index 4f90f03b8..9b8653923 100644 --- a/cmd/admin-heal-ops.go +++ b/cmd/admin-heal-ops.go @@ -806,18 +806,20 @@ func (h *healSequence) healDiskMeta(objAPI ObjectLayer) error { return h.healMinioSysMeta(objAPI, minioConfigPrefix)() } -func (h *healSequence) healItems(objAPI ObjectLayer, bucketsOnly bool) error { +func (h *healSequence) healItems(objAPI ObjectLayer) error { if h.clientToken == bgHealingUUID { // For background heal do nothing. return nil } - if err := h.healDiskMeta(objAPI); err != nil { - return err + if h.bucket == "" { // heal internal meta only during a site-wide heal + if err := h.healDiskMeta(objAPI); err != nil { + return err + } } // Heal buckets and objects - return h.healBuckets(objAPI, bucketsOnly) + return h.healBuckets(objAPI) } // traverseAndHeal - traverses on-disk data and performs healing @@ -828,8 +830,7 @@ func (h *healSequence) healItems(objAPI ObjectLayer, bucketsOnly bool) error { // has to wait until a safe point is reached, such as between scanning // two objects. func (h *healSequence) traverseAndHeal(objAPI ObjectLayer) { - bucketsOnly := false // Heals buckets and objects also. - h.traverseAndHealDoneCh <- h.healItems(objAPI, bucketsOnly) + h.traverseAndHealDoneCh <- h.healItems(objAPI) xioutil.SafeClose(h.traverseAndHealDoneCh) } @@ -856,14 +857,14 @@ func (h *healSequence) healMinioSysMeta(objAPI ObjectLayer, metaPrefix string) f } // healBuckets - check for all buckets heal or just particular bucket. -func (h *healSequence) healBuckets(objAPI ObjectLayer, bucketsOnly bool) error { +func (h *healSequence) healBuckets(objAPI ObjectLayer) error { if h.isQuitting() { return errHealStopSignalled } // 1. If a bucket was specified, heal only the bucket. if h.bucket != "" { - return h.healBucket(objAPI, h.bucket, bucketsOnly) + return h.healBucket(objAPI, h.bucket, false) } buckets, err := objAPI.ListBuckets(h.ctx, BucketOptions{}) @@ -877,7 +878,7 @@ func (h *healSequence) healBuckets(objAPI ObjectLayer, bucketsOnly bool) error { }) for _, bucket := range buckets { - if err = h.healBucket(objAPI, bucket.Name, bucketsOnly); err != nil { + if err = h.healBucket(objAPI, bucket.Name, false); err != nil { return err } } diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index 894af1cd0..1f1c535f7 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -223,9 +223,6 @@ func (h *healingTracker) updateProgress(success, skipped bool, bytes uint64) { // update will update the tracker on the disk. // If the tracker has been deleted an error is returned. func (h *healingTracker) update(ctx context.Context) error { - if h.disk.Healing() == nil { - return fmt.Errorf("healingTracker: drive %q is not marked as healing", h.ID) - } h.mu.Lock() if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 { h.ID, _ = h.disk.GetDiskID() diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 9ae58a323..298550487 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -459,8 +459,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, continue } - var versionHealed bool - res, err := er.HealObject(ctx, bucket, encodedEntryName, version.VersionID, madmin.HealOpts{ ScanMode: scanMode, @@ -475,21 +473,23 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string, } } else { // Look for the healing results - if res.After.Drives[tracker.DiskIndex].State == madmin.DriveStateOk { - versionHealed = true + if res.After.Drives[tracker.DiskIndex].State != madmin.DriveStateOk { + err = fmt.Errorf("unexpected after heal state: %s", res.After.Drives[tracker.DiskIndex].State) } } - if versionHealed { + if err == nil { bgSeq.countHealed(madmin.HealItemObject) result = healEntrySuccess(uint64(version.Size)) } else { bgSeq.countFailed(madmin.HealItemObject) result = healEntryFailure(uint64(version.Size)) if version.VersionID != "" { - healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err)) + healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s (version-id=%s): %w", + bucket, version.Name, version.VersionID, err)) } else { - healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err)) + healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", + bucket, version.Name, err)) } } diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index f655ea5e3..b32a0b444 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -432,15 +432,19 @@ func (s *xlStorage) Healing() *healingTracker { bucketMetaPrefix, healingTrackerFilename) b, err := os.ReadFile(healingFile) if err != nil { + if !errors.Is(err, os.ErrNotExist) { + internalLogIf(GlobalContext, fmt.Errorf("unable to read %s: %w", healingFile, err)) + } return nil } if len(b) == 0 { + internalLogIf(GlobalContext, fmt.Errorf("%s is empty", healingFile)) // 'healing.bin' might be truncated return nil } h := newHealingTracker() _, err = h.UnmarshalMsg(b) - bugLogIf(GlobalContext, err) + internalLogIf(GlobalContext, err) return h } @@ -2246,6 +2250,7 @@ func (s *xlStorage) writeAllMeta(ctx context.Context, volume string, path string return renameAll(tmpFilePath, filePath, volumeDir) } +// Create or truncate an existing file before writing func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []byte, sync bool, skipParent string) (err error) { flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC @@ -2268,16 +2273,11 @@ func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []b return err } - n, err := w.Write(b) + _, err = w.Write(b) if err != nil { - w.Close() - return err - } - - if n != len(b) { w.Truncate(0) // to indicate that we did partial write. w.Close() - return io.ErrShortWrite + return err } // Dealing with error returns from close() - 'man 2 close'