mirror of https://github.com/minio/minio.git
heal: large objects fix and avoid .healing.bin corner case premature exit (#20577)
xlStorage.Healing() returns nil if there is an error reading .healing.bin or if this latter is empty. healing.bin update() call returns early if .healing.bin is empty; hence, no further update of .healing.bin is possible. A .healing.bin can be empty if os.Open() with O_TRUNC is successful but the next Write returns an error. To avoid this weird situation, avoid making healingTracker.update() to return early if .healing.bin is empty, so write again. This commit also fixes wrong error log printing when an object is healed in another drive in the same erasure set but not in the drive that is actively healing by fresh drive healing code. Currently, it prints <nil> instead of a factual error. * heal: Scan .minio.sys metadata only during site-wide heal (#137) mc admin heal always invoke .minio.sys heal, but sometimes, this latter contains a lot of data, many service accounts, STS accounts etc, which makes mc admin heal command very slow. Only invoke .minio.sys healing when no bucket was specified in `mc admin heal` command.
This commit is contained in:
parent
f7e176d4ca
commit
f85c28e960
|
@ -806,18 +806,20 @@ func (h *healSequence) healDiskMeta(objAPI ObjectLayer) error {
|
||||||
return h.healMinioSysMeta(objAPI, minioConfigPrefix)()
|
return h.healMinioSysMeta(objAPI, minioConfigPrefix)()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (h *healSequence) healItems(objAPI ObjectLayer, bucketsOnly bool) error {
|
func (h *healSequence) healItems(objAPI ObjectLayer) error {
|
||||||
if h.clientToken == bgHealingUUID {
|
if h.clientToken == bgHealingUUID {
|
||||||
// For background heal do nothing.
|
// For background heal do nothing.
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := h.healDiskMeta(objAPI); err != nil {
|
if h.bucket == "" { // heal internal meta only during a site-wide heal
|
||||||
return err
|
if err := h.healDiskMeta(objAPI); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Heal buckets and objects
|
// Heal buckets and objects
|
||||||
return h.healBuckets(objAPI, bucketsOnly)
|
return h.healBuckets(objAPI)
|
||||||
}
|
}
|
||||||
|
|
||||||
// traverseAndHeal - traverses on-disk data and performs healing
|
// traverseAndHeal - traverses on-disk data and performs healing
|
||||||
|
@ -828,8 +830,7 @@ func (h *healSequence) healItems(objAPI ObjectLayer, bucketsOnly bool) error {
|
||||||
// has to wait until a safe point is reached, such as between scanning
|
// has to wait until a safe point is reached, such as between scanning
|
||||||
// two objects.
|
// two objects.
|
||||||
func (h *healSequence) traverseAndHeal(objAPI ObjectLayer) {
|
func (h *healSequence) traverseAndHeal(objAPI ObjectLayer) {
|
||||||
bucketsOnly := false // Heals buckets and objects also.
|
h.traverseAndHealDoneCh <- h.healItems(objAPI)
|
||||||
h.traverseAndHealDoneCh <- h.healItems(objAPI, bucketsOnly)
|
|
||||||
xioutil.SafeClose(h.traverseAndHealDoneCh)
|
xioutil.SafeClose(h.traverseAndHealDoneCh)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -856,14 +857,14 @@ func (h *healSequence) healMinioSysMeta(objAPI ObjectLayer, metaPrefix string) f
|
||||||
}
|
}
|
||||||
|
|
||||||
// healBuckets - check for all buckets heal or just particular bucket.
|
// healBuckets - check for all buckets heal or just particular bucket.
|
||||||
func (h *healSequence) healBuckets(objAPI ObjectLayer, bucketsOnly bool) error {
|
func (h *healSequence) healBuckets(objAPI ObjectLayer) error {
|
||||||
if h.isQuitting() {
|
if h.isQuitting() {
|
||||||
return errHealStopSignalled
|
return errHealStopSignalled
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. If a bucket was specified, heal only the bucket.
|
// 1. If a bucket was specified, heal only the bucket.
|
||||||
if h.bucket != "" {
|
if h.bucket != "" {
|
||||||
return h.healBucket(objAPI, h.bucket, bucketsOnly)
|
return h.healBucket(objAPI, h.bucket, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
buckets, err := objAPI.ListBuckets(h.ctx, BucketOptions{})
|
buckets, err := objAPI.ListBuckets(h.ctx, BucketOptions{})
|
||||||
|
@ -877,7 +878,7 @@ func (h *healSequence) healBuckets(objAPI ObjectLayer, bucketsOnly bool) error {
|
||||||
})
|
})
|
||||||
|
|
||||||
for _, bucket := range buckets {
|
for _, bucket := range buckets {
|
||||||
if err = h.healBucket(objAPI, bucket.Name, bucketsOnly); err != nil {
|
if err = h.healBucket(objAPI, bucket.Name, false); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -223,9 +223,6 @@ func (h *healingTracker) updateProgress(success, skipped bool, bytes uint64) {
|
||||||
// update will update the tracker on the disk.
|
// update will update the tracker on the disk.
|
||||||
// If the tracker has been deleted an error is returned.
|
// If the tracker has been deleted an error is returned.
|
||||||
func (h *healingTracker) update(ctx context.Context) error {
|
func (h *healingTracker) update(ctx context.Context) error {
|
||||||
if h.disk.Healing() == nil {
|
|
||||||
return fmt.Errorf("healingTracker: drive %q is not marked as healing", h.ID)
|
|
||||||
}
|
|
||||||
h.mu.Lock()
|
h.mu.Lock()
|
||||||
if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
if h.ID == "" || h.PoolIndex < 0 || h.SetIndex < 0 || h.DiskIndex < 0 {
|
||||||
h.ID, _ = h.disk.GetDiskID()
|
h.ID, _ = h.disk.GetDiskID()
|
||||||
|
|
|
@ -459,8 +459,6 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
var versionHealed bool
|
|
||||||
|
|
||||||
res, err := er.HealObject(ctx, bucket, encodedEntryName,
|
res, err := er.HealObject(ctx, bucket, encodedEntryName,
|
||||||
version.VersionID, madmin.HealOpts{
|
version.VersionID, madmin.HealOpts{
|
||||||
ScanMode: scanMode,
|
ScanMode: scanMode,
|
||||||
|
@ -475,21 +473,23 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Look for the healing results
|
// Look for the healing results
|
||||||
if res.After.Drives[tracker.DiskIndex].State == madmin.DriveStateOk {
|
if res.After.Drives[tracker.DiskIndex].State != madmin.DriveStateOk {
|
||||||
versionHealed = true
|
err = fmt.Errorf("unexpected after heal state: %s", res.After.Drives[tracker.DiskIndex].State)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if versionHealed {
|
if err == nil {
|
||||||
bgSeq.countHealed(madmin.HealItemObject)
|
bgSeq.countHealed(madmin.HealItemObject)
|
||||||
result = healEntrySuccess(uint64(version.Size))
|
result = healEntrySuccess(uint64(version.Size))
|
||||||
} else {
|
} else {
|
||||||
bgSeq.countFailed(madmin.HealItemObject)
|
bgSeq.countFailed(madmin.HealItemObject)
|
||||||
result = healEntryFailure(uint64(version.Size))
|
result = healEntryFailure(uint64(version.Size))
|
||||||
if version.VersionID != "" {
|
if version.VersionID != "" {
|
||||||
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s-v(%s): %w", bucket, version.Name, version.VersionID, err))
|
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s (version-id=%s): %w",
|
||||||
|
bucket, version.Name, version.VersionID, err))
|
||||||
} else {
|
} else {
|
||||||
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w", bucket, version.Name, err))
|
healingLogIf(ctx, fmt.Errorf("unable to heal object %s/%s: %w",
|
||||||
|
bucket, version.Name, err))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -432,15 +432,19 @@ func (s *xlStorage) Healing() *healingTracker {
|
||||||
bucketMetaPrefix, healingTrackerFilename)
|
bucketMetaPrefix, healingTrackerFilename)
|
||||||
b, err := os.ReadFile(healingFile)
|
b, err := os.ReadFile(healingFile)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
if !errors.Is(err, os.ErrNotExist) {
|
||||||
|
internalLogIf(GlobalContext, fmt.Errorf("unable to read %s: %w", healingFile, err))
|
||||||
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
if len(b) == 0 {
|
if len(b) == 0 {
|
||||||
|
internalLogIf(GlobalContext, fmt.Errorf("%s is empty", healingFile))
|
||||||
// 'healing.bin' might be truncated
|
// 'healing.bin' might be truncated
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
h := newHealingTracker()
|
h := newHealingTracker()
|
||||||
_, err = h.UnmarshalMsg(b)
|
_, err = h.UnmarshalMsg(b)
|
||||||
bugLogIf(GlobalContext, err)
|
internalLogIf(GlobalContext, err)
|
||||||
return h
|
return h
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2246,6 +2250,7 @@ func (s *xlStorage) writeAllMeta(ctx context.Context, volume string, path string
|
||||||
return renameAll(tmpFilePath, filePath, volumeDir)
|
return renameAll(tmpFilePath, filePath, volumeDir)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create or truncate an existing file before writing
|
||||||
func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []byte, sync bool, skipParent string) (err error) {
|
func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []byte, sync bool, skipParent string) (err error) {
|
||||||
flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
|
flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
|
||||||
|
|
||||||
|
@ -2268,16 +2273,11 @@ func (s *xlStorage) writeAllInternal(ctx context.Context, filePath string, b []b
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
n, err := w.Write(b)
|
_, err = w.Write(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
w.Close()
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
if n != len(b) {
|
|
||||||
w.Truncate(0) // to indicate that we did partial write.
|
w.Truncate(0) // to indicate that we did partial write.
|
||||||
w.Close()
|
w.Close()
|
||||||
return io.ErrShortWrite
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dealing with error returns from close() - 'man 2 close'
|
// Dealing with error returns from close() - 'man 2 close'
|
||||||
|
|
Loading…
Reference in New Issue