mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
fix: do not listAndHeal() inline with PutObject() (#17499)
there is a possibility that slow drives can actually add latency to the overall call, leading to a large spike in latency. this can happen if there are other parallel listObjects() calls to the same drive, in-turn causing each other to sort of serialize. this potentially improves performance and makes PutObject() also non-blocking.
This commit is contained in:
parent
fcbed41cc3
commit
1f8b9b4bd5
@ -23,6 +23,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@ -66,6 +67,68 @@ func (fi FileInfo) DataShardFixed() bool {
|
||||
return fi.Metadata[reservedMetadataPrefixLowerDataShardFix] == "true"
|
||||
}
|
||||
|
||||
func (er erasureObjects) listAndHeal(bucket, prefix string, healEntry func(string, metaCacheEntry) error) error {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
disks, _ := er.getOnlineDisksWithHealing()
|
||||
if len(disks) == 0 {
|
||||
return errors.New("listAndHeal: No non-healing drives found")
|
||||
}
|
||||
|
||||
// How to resolve partial results.
|
||||
resolver := metadataResolutionParams{
|
||||
dirQuorum: 1,
|
||||
objQuorum: 1,
|
||||
bucket: bucket,
|
||||
strict: false, // Allow less strict matching.
|
||||
}
|
||||
|
||||
path := baseDirFromPrefix(prefix)
|
||||
filterPrefix := strings.Trim(strings.TrimPrefix(prefix, path), slashSeparator)
|
||||
if path == prefix {
|
||||
filterPrefix = ""
|
||||
}
|
||||
|
||||
lopts := listPathRawOptions{
|
||||
disks: disks,
|
||||
bucket: bucket,
|
||||
path: path,
|
||||
filterPrefix: filterPrefix,
|
||||
recursive: true,
|
||||
forwardTo: "",
|
||||
minDisks: 1,
|
||||
reportNotFound: false,
|
||||
agreed: func(entry metaCacheEntry) {
|
||||
if err := healEntry(bucket, entry); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
cancel()
|
||||
}
|
||||
},
|
||||
partial: func(entries metaCacheEntries, _ []error) {
|
||||
entry, ok := entries.resolve(&resolver)
|
||||
if !ok {
|
||||
// check if we can get one entry atleast
|
||||
// proceed to heal nonetheless.
|
||||
entry, _ = entries.firstFound()
|
||||
}
|
||||
|
||||
if err := healEntry(bucket, *entry); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
},
|
||||
finished: nil,
|
||||
}
|
||||
|
||||
if err := listPathRaw(ctx, lopts); err != nil {
|
||||
return fmt.Errorf("listPathRaw returned %w: opts(%#v)", err, lopts)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// HealBucket heals a bucket if it doesn't exist on one of the disks, additionally
|
||||
// also heals the missing entries for bucket metadata files
|
||||
// `policy.json, notification.xml, listeners.json`.
|
||||
|
@ -1249,7 +1249,14 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str
|
||||
}
|
||||
|
||||
if !opts.Speedtest && versionsDisparity {
|
||||
listAndHeal(ctx, bucket, object, &er, healObjectVersionsDisparity)
|
||||
globalMRFState.addPartialOp(partialOperation{
|
||||
bucket: bucket,
|
||||
object: object,
|
||||
queued: time.Now(),
|
||||
allVersions: true,
|
||||
setIndex: er.setIndex,
|
||||
poolIndex: er.poolIndex,
|
||||
})
|
||||
}
|
||||
|
||||
// Check if there is any offline disk and add it to the MRF list
|
||||
|
@ -1349,7 +1349,14 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
|
||||
}
|
||||
|
||||
if versionsDisparity {
|
||||
listAndHeal(ctx, bucket, object, &er, healObjectVersionsDisparity)
|
||||
globalMRFState.addPartialOp(partialOperation{
|
||||
bucket: bucket,
|
||||
object: object,
|
||||
queued: time.Now(),
|
||||
allVersions: true,
|
||||
setIndex: er.setIndex,
|
||||
poolIndex: er.poolIndex,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1905,68 +1905,6 @@ func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, re
|
||||
// HealObjectFn closure function heals the object.
|
||||
type HealObjectFn func(bucket, object, versionID string) error
|
||||
|
||||
func listAndHeal(ctx context.Context, bucket, prefix string, set *erasureObjects, healEntry func(string, metaCacheEntry) error) error {
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
|
||||
disks, _ := set.getOnlineDisksWithHealing()
|
||||
if len(disks) == 0 {
|
||||
return errors.New("listAndHeal: No non-healing drives found")
|
||||
}
|
||||
|
||||
// How to resolve partial results.
|
||||
resolver := metadataResolutionParams{
|
||||
dirQuorum: 1,
|
||||
objQuorum: 1,
|
||||
bucket: bucket,
|
||||
strict: false, // Allow less strict matching.
|
||||
}
|
||||
|
||||
path := baseDirFromPrefix(prefix)
|
||||
filterPrefix := strings.Trim(strings.TrimPrefix(prefix, path), slashSeparator)
|
||||
if path == prefix {
|
||||
filterPrefix = ""
|
||||
}
|
||||
|
||||
lopts := listPathRawOptions{
|
||||
disks: disks,
|
||||
bucket: bucket,
|
||||
path: path,
|
||||
filterPrefix: filterPrefix,
|
||||
recursive: true,
|
||||
forwardTo: "",
|
||||
minDisks: 1,
|
||||
reportNotFound: false,
|
||||
agreed: func(entry metaCacheEntry) {
|
||||
if err := healEntry(bucket, entry); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
cancel()
|
||||
}
|
||||
},
|
||||
partial: func(entries metaCacheEntries, _ []error) {
|
||||
entry, ok := entries.resolve(&resolver)
|
||||
if !ok {
|
||||
// check if we can get one entry atleast
|
||||
// proceed to heal nonetheless.
|
||||
entry, _ = entries.firstFound()
|
||||
}
|
||||
|
||||
if err := healEntry(bucket, *entry); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
cancel()
|
||||
return
|
||||
}
|
||||
},
|
||||
finished: nil,
|
||||
}
|
||||
|
||||
if err := listPathRaw(ctx, lopts); err != nil {
|
||||
return fmt.Errorf("listPathRaw returned %w: opts(%#v)", err, lopts)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix string, opts madmin.HealOpts, healObjectFn HealObjectFn) error {
|
||||
healEntry := func(bucket string, entry metaCacheEntry) error {
|
||||
if entry.isDir() {
|
||||
@ -2024,7 +1962,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
|
||||
go func(idx int, set *erasureObjects) {
|
||||
defer wg.Done()
|
||||
|
||||
errs[idx] = listAndHeal(ctx, bucket, prefix, set, healEntry)
|
||||
errs[idx] = set.listAndHeal(bucket, prefix, healEntry)
|
||||
}(idx, set)
|
||||
}
|
||||
wg.Wait()
|
||||
|
27
cmd/mrf.go
27
cmd/mrf.go
@ -32,17 +32,19 @@ const (
|
||||
// partialOperation is a successful upload/delete of an object
|
||||
// but not written in all disks (having quorum)
|
||||
type partialOperation struct {
|
||||
bucket string
|
||||
object string
|
||||
versionID string
|
||||
queued time.Time
|
||||
bucket string
|
||||
object string
|
||||
versionID string
|
||||
allVersions bool
|
||||
setIndex, poolIndex int
|
||||
queued time.Time
|
||||
}
|
||||
|
||||
// mrfState sncapsulates all the information
|
||||
// related to the global background MRF.
|
||||
type mrfState struct {
|
||||
ctx context.Context
|
||||
objectAPI ObjectLayer
|
||||
ctx context.Context
|
||||
pools *erasureServerPools
|
||||
|
||||
mu sync.Mutex
|
||||
opCh chan partialOperation
|
||||
@ -55,9 +57,12 @@ func (m *mrfState) init(ctx context.Context, objAPI ObjectLayer) {
|
||||
|
||||
m.ctx = ctx
|
||||
m.opCh = make(chan partialOperation, mrfOpsQueueSize)
|
||||
m.objectAPI = objAPI
|
||||
|
||||
go globalMRFState.healRoutine()
|
||||
var ok bool
|
||||
m.pools, ok = objAPI.(*erasureServerPools)
|
||||
if ok {
|
||||
go m.healRoutine()
|
||||
}
|
||||
}
|
||||
|
||||
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
||||
@ -101,7 +106,11 @@ func (m *mrfState) healRoutine() {
|
||||
if u.object == "" {
|
||||
healBucket(u.bucket, madmin.HealNormalScan)
|
||||
} else {
|
||||
healObject(u.bucket, u.object, u.versionID, madmin.HealNormalScan)
|
||||
if u.allVersions {
|
||||
m.pools.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, healObjectVersionsDisparity)
|
||||
} else {
|
||||
healObject(u.bucket, u.object, u.versionID, madmin.HealNormalScan)
|
||||
}
|
||||
}
|
||||
|
||||
wait()
|
||||
|
Loading…
Reference in New Issue
Block a user