mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
fix: do not listAndHeal() inline with PutObject() (#17499)
there is a possibility that slow drives can actually add latency to the overall call, leading to a large spike in latency. this can happen if there are other parallel listObjects() calls to the same drive, in-turn causing each other to sort of serialize. this potentially improves performance and makes PutObject() also non-blocking.
This commit is contained in:
parent
fcbed41cc3
commit
1f8b9b4bd5
@ -23,6 +23,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -66,6 +67,68 @@ func (fi FileInfo) DataShardFixed() bool {
|
|||||||
return fi.Metadata[reservedMetadataPrefixLowerDataShardFix] == "true"
|
return fi.Metadata[reservedMetadataPrefixLowerDataShardFix] == "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (er erasureObjects) listAndHeal(bucket, prefix string, healEntry func(string, metaCacheEntry) error) error {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
|
disks, _ := er.getOnlineDisksWithHealing()
|
||||||
|
if len(disks) == 0 {
|
||||||
|
return errors.New("listAndHeal: No non-healing drives found")
|
||||||
|
}
|
||||||
|
|
||||||
|
// How to resolve partial results.
|
||||||
|
resolver := metadataResolutionParams{
|
||||||
|
dirQuorum: 1,
|
||||||
|
objQuorum: 1,
|
||||||
|
bucket: bucket,
|
||||||
|
strict: false, // Allow less strict matching.
|
||||||
|
}
|
||||||
|
|
||||||
|
path := baseDirFromPrefix(prefix)
|
||||||
|
filterPrefix := strings.Trim(strings.TrimPrefix(prefix, path), slashSeparator)
|
||||||
|
if path == prefix {
|
||||||
|
filterPrefix = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
lopts := listPathRawOptions{
|
||||||
|
disks: disks,
|
||||||
|
bucket: bucket,
|
||||||
|
path: path,
|
||||||
|
filterPrefix: filterPrefix,
|
||||||
|
recursive: true,
|
||||||
|
forwardTo: "",
|
||||||
|
minDisks: 1,
|
||||||
|
reportNotFound: false,
|
||||||
|
agreed: func(entry metaCacheEntry) {
|
||||||
|
if err := healEntry(bucket, entry); err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
cancel()
|
||||||
|
}
|
||||||
|
},
|
||||||
|
partial: func(entries metaCacheEntries, _ []error) {
|
||||||
|
entry, ok := entries.resolve(&resolver)
|
||||||
|
if !ok {
|
||||||
|
// check if we can get one entry atleast
|
||||||
|
// proceed to heal nonetheless.
|
||||||
|
entry, _ = entries.firstFound()
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := healEntry(bucket, *entry); err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
cancel()
|
||||||
|
return
|
||||||
|
}
|
||||||
|
},
|
||||||
|
finished: nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := listPathRaw(ctx, lopts); err != nil {
|
||||||
|
return fmt.Errorf("listPathRaw returned %w: opts(%#v)", err, lopts)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// HealBucket heals a bucket if it doesn't exist on one of the disks, additionally
|
// HealBucket heals a bucket if it doesn't exist on one of the disks, additionally
|
||||||
// also heals the missing entries for bucket metadata files
|
// also heals the missing entries for bucket metadata files
|
||||||
// `policy.json, notification.xml, listeners.json`.
|
// `policy.json, notification.xml, listeners.json`.
|
||||||
|
@ -1249,7 +1249,14 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !opts.Speedtest && versionsDisparity {
|
if !opts.Speedtest && versionsDisparity {
|
||||||
listAndHeal(ctx, bucket, object, &er, healObjectVersionsDisparity)
|
globalMRFState.addPartialOp(partialOperation{
|
||||||
|
bucket: bucket,
|
||||||
|
object: object,
|
||||||
|
queued: time.Now(),
|
||||||
|
allVersions: true,
|
||||||
|
setIndex: er.setIndex,
|
||||||
|
poolIndex: er.poolIndex,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check if there is any offline disk and add it to the MRF list
|
// Check if there is any offline disk and add it to the MRF list
|
||||||
|
@ -1349,7 +1349,14 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
|
|||||||
}
|
}
|
||||||
|
|
||||||
if versionsDisparity {
|
if versionsDisparity {
|
||||||
listAndHeal(ctx, bucket, object, &er, healObjectVersionsDisparity)
|
globalMRFState.addPartialOp(partialOperation{
|
||||||
|
bucket: bucket,
|
||||||
|
object: object,
|
||||||
|
queued: time.Now(),
|
||||||
|
allVersions: true,
|
||||||
|
setIndex: er.setIndex,
|
||||||
|
poolIndex: er.poolIndex,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1905,68 +1905,6 @@ func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, re
|
|||||||
// HealObjectFn closure function heals the object.
|
// HealObjectFn closure function heals the object.
|
||||||
type HealObjectFn func(bucket, object, versionID string) error
|
type HealObjectFn func(bucket, object, versionID string) error
|
||||||
|
|
||||||
func listAndHeal(ctx context.Context, bucket, prefix string, set *erasureObjects, healEntry func(string, metaCacheEntry) error) error {
|
|
||||||
ctx, cancel := context.WithCancel(ctx)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
disks, _ := set.getOnlineDisksWithHealing()
|
|
||||||
if len(disks) == 0 {
|
|
||||||
return errors.New("listAndHeal: No non-healing drives found")
|
|
||||||
}
|
|
||||||
|
|
||||||
// How to resolve partial results.
|
|
||||||
resolver := metadataResolutionParams{
|
|
||||||
dirQuorum: 1,
|
|
||||||
objQuorum: 1,
|
|
||||||
bucket: bucket,
|
|
||||||
strict: false, // Allow less strict matching.
|
|
||||||
}
|
|
||||||
|
|
||||||
path := baseDirFromPrefix(prefix)
|
|
||||||
filterPrefix := strings.Trim(strings.TrimPrefix(prefix, path), slashSeparator)
|
|
||||||
if path == prefix {
|
|
||||||
filterPrefix = ""
|
|
||||||
}
|
|
||||||
|
|
||||||
lopts := listPathRawOptions{
|
|
||||||
disks: disks,
|
|
||||||
bucket: bucket,
|
|
||||||
path: path,
|
|
||||||
filterPrefix: filterPrefix,
|
|
||||||
recursive: true,
|
|
||||||
forwardTo: "",
|
|
||||||
minDisks: 1,
|
|
||||||
reportNotFound: false,
|
|
||||||
agreed: func(entry metaCacheEntry) {
|
|
||||||
if err := healEntry(bucket, entry); err != nil {
|
|
||||||
logger.LogIf(ctx, err)
|
|
||||||
cancel()
|
|
||||||
}
|
|
||||||
},
|
|
||||||
partial: func(entries metaCacheEntries, _ []error) {
|
|
||||||
entry, ok := entries.resolve(&resolver)
|
|
||||||
if !ok {
|
|
||||||
// check if we can get one entry atleast
|
|
||||||
// proceed to heal nonetheless.
|
|
||||||
entry, _ = entries.firstFound()
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := healEntry(bucket, *entry); err != nil {
|
|
||||||
logger.LogIf(ctx, err)
|
|
||||||
cancel()
|
|
||||||
return
|
|
||||||
}
|
|
||||||
},
|
|
||||||
finished: nil,
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := listPathRaw(ctx, lopts); err != nil {
|
|
||||||
return fmt.Errorf("listPathRaw returned %w: opts(%#v)", err, lopts)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix string, opts madmin.HealOpts, healObjectFn HealObjectFn) error {
|
func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix string, opts madmin.HealOpts, healObjectFn HealObjectFn) error {
|
||||||
healEntry := func(bucket string, entry metaCacheEntry) error {
|
healEntry := func(bucket string, entry metaCacheEntry) error {
|
||||||
if entry.isDir() {
|
if entry.isDir() {
|
||||||
@ -2024,7 +1962,7 @@ func (z *erasureServerPools) HealObjects(ctx context.Context, bucket, prefix str
|
|||||||
go func(idx int, set *erasureObjects) {
|
go func(idx int, set *erasureObjects) {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
|
||||||
errs[idx] = listAndHeal(ctx, bucket, prefix, set, healEntry)
|
errs[idx] = set.listAndHeal(bucket, prefix, healEntry)
|
||||||
}(idx, set)
|
}(idx, set)
|
||||||
}
|
}
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
|
27
cmd/mrf.go
27
cmd/mrf.go
@ -32,17 +32,19 @@ const (
|
|||||||
// partialOperation is a successful upload/delete of an object
|
// partialOperation is a successful upload/delete of an object
|
||||||
// but not written in all disks (having quorum)
|
// but not written in all disks (having quorum)
|
||||||
type partialOperation struct {
|
type partialOperation struct {
|
||||||
bucket string
|
bucket string
|
||||||
object string
|
object string
|
||||||
versionID string
|
versionID string
|
||||||
queued time.Time
|
allVersions bool
|
||||||
|
setIndex, poolIndex int
|
||||||
|
queued time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
// mrfState sncapsulates all the information
|
// mrfState sncapsulates all the information
|
||||||
// related to the global background MRF.
|
// related to the global background MRF.
|
||||||
type mrfState struct {
|
type mrfState struct {
|
||||||
ctx context.Context
|
ctx context.Context
|
||||||
objectAPI ObjectLayer
|
pools *erasureServerPools
|
||||||
|
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
opCh chan partialOperation
|
opCh chan partialOperation
|
||||||
@ -55,9 +57,12 @@ func (m *mrfState) init(ctx context.Context, objAPI ObjectLayer) {
|
|||||||
|
|
||||||
m.ctx = ctx
|
m.ctx = ctx
|
||||||
m.opCh = make(chan partialOperation, mrfOpsQueueSize)
|
m.opCh = make(chan partialOperation, mrfOpsQueueSize)
|
||||||
m.objectAPI = objAPI
|
|
||||||
|
|
||||||
go globalMRFState.healRoutine()
|
var ok bool
|
||||||
|
m.pools, ok = objAPI.(*erasureServerPools)
|
||||||
|
if ok {
|
||||||
|
go m.healRoutine()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
||||||
@ -101,7 +106,11 @@ func (m *mrfState) healRoutine() {
|
|||||||
if u.object == "" {
|
if u.object == "" {
|
||||||
healBucket(u.bucket, madmin.HealNormalScan)
|
healBucket(u.bucket, madmin.HealNormalScan)
|
||||||
} else {
|
} else {
|
||||||
healObject(u.bucket, u.object, u.versionID, madmin.HealNormalScan)
|
if u.allVersions {
|
||||||
|
m.pools.serverPools[u.poolIndex].sets[u.setIndex].listAndHeal(u.bucket, u.object, healObjectVersionsDisparity)
|
||||||
|
} else {
|
||||||
|
healObject(u.bucket, u.object, u.versionID, madmin.HealNormalScan)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
wait()
|
wait()
|
||||||
|
Loading…
Reference in New Issue
Block a user