mirror of https://github.com/minio/minio.git
heal: large objects fix and avoid .healing.bin corner case premature exit (#20577)
xlStorage.Healing() returns nil if there is an error reading .healing.bin or if this latter is empty. healing.bin update() call returns early if .healing.bin is empty; hence, no further update of .healing.bin is possible. A .healing.bin can be empty if os.Open() with O_TRUNC is successful but the next Write returns an error. To avoid this weird situation, avoid making healingTracker.update() to return early if .healing.bin is empty, so write again. This commit also fixes wrong error log printing when an object is healed in another drive in the same erasure set but not in the drive that is actively healing by fresh drive healing code. Currently, it prints <nil> instead of a factual error. * heal: Scan .minio.sys metadata only during site-wide heal (#137) mc admin heal always invoke .minio.sys heal, but sometimes, this latter contains a lot of data, many service accounts, STS accounts etc, which makes mc admin heal command very slow. Only invoke .minio.sys healing when no bucket was specified in `mc admin heal` command.
This commit is contained in:
parent
f7e176d4ca
commit
f85c28e960
|
@ -806,18 +806,20 @@ func (h *healSequence) healDiskMeta(objAPI ObjectLayer) error {
|
|||
return h.healMinioSysMeta(objAPI, minioConfigPrefix)()
|
||||
}
|
||||
|
||||
func (h *healSequence) healItems(objAPI ObjectLayer, bucketsOnly bool) error {
|
||||
func (h *healSequence) healItems(objAPI ObjectLayer) error {
|
||||
if h.clientToken == bgHealingUUID {
|
||||
// For background heal do nothing.
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := h.healDiskMeta(objAPI); err != nil {
|
||||
return err
|
||||
if h.bucket == "" { // heal internal meta only during a site-wide heal
|
||||
if err := h.healDiskMeta(objAPI); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
// Heal buckets and objects
|
||||
return h.healBuckets(objAPI, bucketsOnly)
|
||||
return h.healBuckets(objAPI)
|
||||
}
|
||||
|
||||
// traverseAndHeal - traverses on-disk data and performs healing
|
||||
|
@ -828,8 +830,7 @@ func (h *healSequence) healItems(objAPI ObjectLayer, bucketsOnly bool) error {
|
|||
// has to wait until a safe point is reached, such as between scanning
|
||||
// two objects.
|
||||
func (h *healSequence) traverseAndHeal(objAPI ObjectLayer) {
|
||||
bucketsOnly := false // Heals buckets and objects also.
|
||||
h.traverseAndHealDoneCh <- h.healItems(objAPI, bucketsOnly)
|
||||
h.traverseAndHealDoneCh <- h.healItems(objAPI)
|
||||
xioutil.SafeClose(h.traverseAndHealDoneCh)
|
||||
}
|
||||
|
||||
|
@ -856,14 +857,14 @@ func (h *healSequence) healMinioSysMeta(objAPI ObjectLayer, metaPrefix string) f
|
|||
}
|
||||
|
||||
// healBuckets - check for all buckets heal or just particular bucket.
|
||||
func (h *healSequence) healBuckets(objAPI ObjectLayer, bucketsOnly bool) error {
|
||||
func (h *healSequence) healBuckets(objAPI ObjectLayer) error {
|
||||
if h.isQuitting() {
|
||||
return errHealStopSignalled
|
||||
}
|
||||
|
||||
// 1. If a bucket was specified, heal only the bucket.
|
||||
if h.bucket != "" {
|
||||
return h.healBucket(objAPI, h.bucket, bucketsOnly)
|
||||
return h.healBucket(objAPI, h.bucket, false)
|
||||
}
|
||||
|
||||
buckets, err := objAPI.ListBuckets(h.ctx, BucketOptions{})
|
||||
|
@ -877,7 +878,7 @@ func (h *healSequence) healBuckets(objAPI ObjectLayer, bucketsOnly bool) error {
|
|||
})
|
||||
|
||||
for _, bucket := range buckets {
|
||||
if err = h.healBucket(objAPI, bucket.Name, bucketsOnly); err != nil {
|
||||
if err = h.healBucket(objAPI, bucket.Name, false); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
|
|
@ -223,9 +223,6 @@ func (h *healingTracker) updateProgress(success, skipped bool, bytes uint64) {
|
|||
// update will update the tracker on the disk.
|
||||
// If the tracker has been deleted an error is returned.
|
||||
func (h *healingTracker) update(ctx context.Context) error {
|
||||
if h.disk.Healing() == nil {
|
||||
return fmt.Errorf("healingTracker: drive %q is not marked as healing", h.ID)
|
||||
}
|
||||
h.mu.Lock()
|
||||
if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
||||
h.ID, _ = h.disk.GetDiskID()
|
||||
|
|
|
@ -459,8 +459,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||
continue
|
||||
}
|
||||
|
||||
var versionHealed bool
|
||||
|
||||
res, err := er.HealObject(ctx, bucket, encodedEntryName,
|
||||
version.VersionID, madmin.HealOpts{
|
||||
ScanMode: scanMode,
|
||||
|
@ -475,21 +473,23 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
|||
}
|
||||
} else {
|
||||
// Look for the healing results
|
||||
if res.After.Drives[tracker.DiskIndex].State == madmin.DriveStateOk {
|
||||
versionHealed = true
|
||||
if res.After.Drives[tracker.DiskIndex].State != madmin.DriveStateOk {
|
||||
err = fmt.Errorf("unexpected after heal state: %s", res.After.Drives[tracker.DiskIndex].State)
|
||||
}
|
||||
}
|
||||
|
||||
if versionHealed {
|
||||
if err == nil {
|
||||
bgSeq.countHealed(madmin.HealItemObject)
|
||||
result = healEntrySuccess(uint64(version.Size))
|
||||
} else {
|
||||
bgSeq.countFailed(madmin.HealItemObject)
|
||||
result = healEntryFailure(uint64(version.Size))
|
||||
if version.VersionID != "" {
|
||||
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err))
|
||||
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s (version-id=%s): %w",
|
||||
bucket, version.Name, version.VersionID, err))
|
||||
} else {
|
||||
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err))
|
||||
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w",
|
||||
bucket, version.Name, err))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -432,15 +432,19 @@ func (s *xlStorage) Healing() *healingTracker {
|
|||
bucketMetaPrefix, healingTrackerFilename)
|
||||
b, err := os.ReadFile(healingFile)
|
||||
if err != nil {
|
||||
if !errors.Is(err, os.ErrNotExist) {
|
||||
internalLogIf(GlobalContext, fmt.Errorf("unable to read %s: %w", healingFile, err))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if len(b) == 0 {
|
||||
internalLogIf(GlobalContext, fmt.Errorf("%s is empty", healingFile))
|
||||
// 'healing.bin' might be truncated
|
||||
return nil
|
||||
}
|
||||
h := newHealingTracker()
|
||||
_, err = h.UnmarshalMsg(b)
|
||||
bugLogIf(GlobalContext, err)
|
||||
internalLogIf(GlobalContext, err)
|
||||
return h
|
||||
}
|
||||
|
||||
|
@ -2246,6 +2250,7 @@ func (s *xlStorage) writeAllMeta(ctx context.Context, volume string, path string
|
|||
return renameAll(tmpFilePath, filePath, volumeDir)
|
||||
}
|
||||
|
||||
// Create or truncate an existing file before writing
|
||||
func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []byte, sync bool, skipParent string) (err error) {
|
||||
flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
|
||||
|
||||
|
@ -2268,16 +2273,11 @@ func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []b
|
|||
return err
|
||||
}
|
||||
|
||||
n, err := w.Write(b)
|
||||
_, err = w.Write(b)
|
||||
if err != nil {
|
||||
w.Close()
|
||||
return err
|
||||
}
|
||||
|
||||
if n != len(b) {
|
||||
w.Truncate(0) // to indicate that we did partial write.
|
||||
w.Close()
|
||||
return io.ErrShortWrite
|
||||
return err
|
||||
}
|
||||
|
||||
// Dealing with error returns from close() - 'man 2 close'
|
||||
|
|
Loading…
Reference in New Issue