fix: discarding results do not attempt in-memory metacache writer (#11163)

Optimizations include

- do not write the metacache block if the size of the
  block is '0' and it is the first block - where listing
  is attempted for a transient prefix, this helps to
  avoid creating lots of empty metacache entries for
  `minioMetaBucket`

- avoid the entire initialization sequence of cacheCh
  , metacacheBlockWriter if we are simply going to skip
  them when discardResults is set to true.

- No need to hold write locks while writing metacache
  blocks - each block is unique, per bucket, per prefix
  and also is written by a single node.
This commit is contained in:
Harshavardhana 2020-12-24 15:02:02 -08:00 committed by GitHub
parent 45ea161f8d
commit 027e17468a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 101 additions and 63 deletions

View File

@ -570,8 +570,13 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
bucket: bucket,
object: entry.name,
versionID: "",
opts: &madmin.HealOpts{
Remove: true,
},
}, madmin.HealItemObject)
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err)
}
foundObjs = foundObjs || err == nil
return
}
@ -583,8 +588,13 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
bucket: bucket,
object: fiv.Name,
versionID: ver.VersionID,
opts: &madmin.HealOpts{
Remove: true,
},
}, madmin.HealItemObject)
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err)
}
foundObjs = foundObjs || err == nil
}
},

View File

@ -674,11 +674,13 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
return ObjectInfo{}, IncompleteBody{Bucket: bucket, Object: object}
}
if !opts.NoLock {
lk := er.NewNSLock(bucket, object)
if err := lk.GetLock(ctx, globalOperationTimeout); err != nil {
return ObjectInfo{}, err
}
defer lk.Unlock()
}
for i, w := range writers {
if w == nil {

View File

@ -130,8 +130,10 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis
bucket: minioMetaBucket,
object: backendEncryptedFile,
}, madmin.HealItemMetadata); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err)
}
}
// Heal all buckets with all objects
for _, bucket := range buckets {
@ -139,8 +141,10 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis
if err := bgSeq.queueHealTask(healSource{
bucket: bucket.Name,
}, madmin.HealItemBucket); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err)
}
}
var entryChs []FileInfoVersionsCh
var mu sync.Mutex
@ -179,11 +183,13 @@ func healErasureSet(ctx context.Context, setIndex int, buckets []BucketInfo, dis
object: version.Name,
versionID: version.VersionID,
}, madmin.HealItemObject); err != nil {
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
logger.LogIf(ctx, err)
}
}
}
}
}
return nil
}

View File

@ -615,13 +615,18 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
}
// Create output for our results.
cacheCh := make(chan metaCacheEntry, metacacheBlockSize)
var cacheCh chan metaCacheEntry
if !o.discardResult {
cacheCh = make(chan metaCacheEntry, metacacheBlockSize)
}
// Create filter for results.
filterCh := make(chan metaCacheEntry, 100)
filteredResults := o.gatherResults(filterCh)
closeChannels := func() {
if !o.discardResult {
close(cacheCh)
}
close(filterCh)
}
@ -657,19 +662,26 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
}()
const retryDelay = 200 * time.Millisecond
const maxTries = 10
const maxTries = 5
// Write results to disk.
bw := newMetacacheBlockWriter(cacheCh, func(b *metacacheBlock) error {
if o.discardResult {
var bw *metacacheBlockWriter
// Don't save single object listings.
if !o.discardResult {
// Write results to disk.
bw = newMetacacheBlockWriter(cacheCh, func(b *metacacheBlock) error {
// if the block is 0 bytes and its a first block skip it.
// skip only this for Transient caches.
if len(b.data) == 0 && b.n == 0 && o.Transient {
return nil
}
o.debugln("listPath: saving block", b.n, "to", o.objectPath(b.n))
r, err := hash.NewReader(bytes.NewBuffer(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false)
r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false)
logger.LogIf(ctx, err)
custom := b.headerKV()
_, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r, nil, nil), ObjectOptions{UserDefined: custom})
_, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r, nil, nil), ObjectOptions{
UserDefined: custom,
NoLock: true, // No need to hold namespace lock, each prefix caches uniquely.
})
if err != nil {
metaMu.Lock()
if meta.error != "" {
@ -705,6 +717,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
}
return nil
})
}
// How to resolve results.
resolver := metadataResolutionParams{
@ -721,14 +734,18 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
filterPrefix: o.FilterPrefix,
minDisks: listingQuorum,
agreed: func(entry metaCacheEntry) {
if !o.discardResult {
cacheCh <- entry
}
filterCh <- entry
},
partial: func(entries metaCacheEntries, nAgreed int, errs []error) {
// Results Disagree :-(
entry, ok := entries.resolve(&resolver)
if ok {
if !o.discardResult {
cacheCh <- *entry
}
filterCh <- *entry
}
},
@ -749,6 +766,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
metaMu.Unlock()
closeChannels()
if !o.discardResult {
if err := bw.Close(); err != nil {
metaMu.Lock()
meta.error = err.Error()
@ -756,6 +774,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
meta, err = o.updateMetacacheListing(meta, rpc)
metaMu.Unlock()
}
}
}()
return filteredResults()

View File

@ -51,6 +51,7 @@ type ObjectOptions struct {
DeleteMarkerReplicationStatus string // Is only set in DELETE operations
VersionPurgeStatus VersionPurgeStatusType // Is only set in DELETE operations for delete marker version to be permanently deleted.
TransitionStatus string // status of the transition
NoLock bool // indicates to lower layers if the caller is expecting to hold locks.
}
// BucketOptions represents bucket options for ObjectLayer bucket operations