metacache: Check only one disk for updates (#10809)

Check only one disk for updates.

This will reduce IO while waiting for lists to finish.
This commit is contained in:
Klaus Post 2020-11-02 17:20:27 -08:00 committed by GitHub
parent 37749f4623
commit 0a796505c1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 62 additions and 39 deletions

View File

@ -182,19 +182,14 @@ func (o listPathOptions) checkMetacacheState(ctx context.Context, rpc *peerRESTC
if cache.status == scanStateNone || cache.fileNotFound {
return errFileNotFound
}
if cache.status == scanStateSuccess {
if cache.status == scanStateSuccess || cache.status == scanStateStarted {
if time.Since(cache.lastUpdate) > metacacheMaxRunningAge {
return fmt.Errorf("timeout: list %s finished and no update for 1 minute", cache.id)
return fmt.Errorf("timeout: list %s not updated for 1 minute", cache.id)
}
return nil
}
if cache.error != "" {
return fmt.Errorf("async cache listing failed with: %s", cache.error)
}
if cache.status == scanStateStarted {
if time.Since(cache.lastUpdate) > metacacheMaxRunningAge {
return fmt.Errorf("cache id %s listing not updating. Last update %s seconds ago", cache.id, time.Since(cache.lastUpdate).Round(time.Second))
}
}
return nil
}

View File

@ -351,23 +351,42 @@ func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOpt
default:
}
// If many failures, check the cache state.
if retries > 10 {
err := o.checkMetacacheState(ctx, rpc)
if debugPrint {
logger.Info("waiting for first part (%s), err: %v", o.objectPath(0), err)
}
if err != nil {
return entries, err
}
retries = 1
}
const retryDelay = 500 * time.Millisecond
// Load first part metadata...
// All operations are performed without locks, so we must be careful and allow for failures.
// Read metadata associated with the object from a disk.
if retries > 0 {
disks := er.getOnlineDisks()
if len(disks) == 0 {
time.Sleep(retryDelay)
retries++
continue
}
_, err := disks[0].ReadVersion(ctx, minioMetaBucket, o.objectPath(0), "", false)
if err != nil {
time.Sleep(retryDelay)
retries++
continue
}
}
// Read metadata associated with the object from all disks.
fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(0), ObjectOptions{})
if err != nil {
if err == errFileNotFound || errors.Is(err, errErasureReadQuorum) || errors.Is(err, InsufficientReadQuorum{}) {
// Not ready yet...
if retries == 10 {
err := o.checkMetacacheState(ctx, rpc)
if debugPrint {
logger.Info("waiting for first part (%s), err: %v", o.objectPath(0), err)
}
if err != nil {
return entries, err
}
retries = -1
}
retries++
time.Sleep(retryDelay)
continue
@ -414,28 +433,41 @@ func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOpt
}
if partN != loadedPart {
if retries > 10 {
err := o.checkMetacacheState(ctx, rpc)
if debugPrint {
logger.Info("waiting for part data (%v), err: %v", o.objectPath(partN), err)
}
if err != nil {
return entries, err
}
retries = 1
}
if retries > 0 {
// Load from one disk only
disks := er.getOnlineDisks()
if len(disks) == 0 {
time.Sleep(retryDelay)
retries++
continue
}
_, err := disks[0].ReadVersion(ctx, minioMetaBucket, o.objectPath(partN), "", false)
if err != nil {
time.Sleep(retryDelay)
retries++
continue
}
}
// Load first part metadata...
fi, metaArr, onlineDisks, err = er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(partN), ObjectOptions{})
switch err {
case errFileNotFound, errErasureReadQuorum, InsufficientReadQuorum{}:
if retries >= 10 {
err := o.checkMetacacheState(ctx, rpc)
if debugPrint {
logger.Info("waiting for part data (%v), err: %v", o.objectPath(partN), err)
}
if err != nil {
return entries, err
}
retries = -1
}
time.Sleep(retryDelay)
retries++
continue
default:
if retries >= 20 {
// We had at least 10 retries without getting a result.
logger.LogIf(ctx, err)
return entries, err
}
time.Sleep(retryDelay)
retries++
continue
@ -457,13 +489,8 @@ func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOpt
err := er.getObjectWithFileInfo(ctx, minioMetaBucket, o.objectPath(partN), 0, fi.Size, &buf, fi, metaArr, onlineDisks)
switch err {
case errFileNotFound, errErasureReadQuorum, InsufficientReadQuorum{}:
if retries >= 20 {
// We had at least 10 retries without getting a result.
logger.LogIf(ctx, err)
return entries, err
}
retries++
time.Sleep(retryDelay)
retries++
continue
default:
logger.LogIf(ctx, err)
@ -495,6 +522,7 @@ func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOpt
return entries, io.EOF
}
partN++
retries = 0
case nil:
// We stopped within the listing, we are done for now...
return entries, nil