optimize startup sequence performance (#19009)

- bucket metadata does not need to look for legacy things
  anymore if b.Created is non-zero

- stagger bucket metadata loads across lots of nodes to
  avoid the current thundering herd problem.

- Remove deadlines for RenameData, RenameFile - these
  calls should not ever be timed out and should wait
  until completion or wait for client timeout. Do not
  choose timeouts for applications during the WRITE phase.

- increase R/W buffer size, increase maxMergeMessages to 30
This commit is contained in:
Harshavardhana
2024-02-08 11:21:21 -08:00
committed by GitHub
parent 7ec43bd177
commit 035a3ea4ae
6 changed files with 37 additions and 39 deletions

View File

@@ -22,6 +22,7 @@ import (
"encoding/xml"
"errors"
"fmt"
"math/rand"
"sync"
"time"
@@ -482,11 +483,11 @@ func (sys *BucketMetadataSys) concurrentLoad(ctx context.Context, buckets []Buck
for index := range buckets {
index := index
g.Go(func() error {
_, _ = sys.objAPI.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{
// Ensure heal opts for bucket metadata be deep healed all the time.
ScanMode: madmin.HealDeepScan,
Recreate: true,
})
// Sleep and stagger to avoid blocked CPU and thundering
// herd upon start up sequence.
time.Sleep(25*time.Millisecond + time.Duration(rand.Int63n(int64(100*time.Millisecond))))
_, _ = sys.objAPI.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{Recreate: true})
meta, err := loadBucketMetadata(ctx, sys.objAPI, buckets[index].Name)
if err != nil {
return err

View File

@@ -182,26 +182,34 @@ func loadBucketMetadataParse(ctx context.Context, objectAPI ObjectLayer, bucket
b.defaultTimestamps()
}
configs, err := b.getAllLegacyConfigs(ctx, objectAPI)
if err != nil {
return b, err
// If bucket metadata is missing look for legacy files,
// since we only ever had b.Created as non-zero when
// migration was complete in 2020-May release. So this
// a check to avoid migrating for buckets that already
// have this field set.
if b.Created.IsZero() {
configs, err := b.getAllLegacyConfigs(ctx, objectAPI)
if err != nil {
return b, err
}
if len(configs) > 0 {
// Old bucket without bucket metadata. Hence we migrate existing settings.
if err = b.convertLegacyConfigs(ctx, objectAPI, configs); err != nil {
return b, err
}
}
}
if len(configs) == 0 {
if parse {
// nothing to update, parse and proceed.
err = b.parseAllConfigs(ctx, objectAPI)
if parse {
// nothing to update, parse and proceed.
if err = b.parseAllConfigs(ctx, objectAPI); err != nil {
return b, err
}
} else {
// Old bucket without bucket metadata. Hence we migrate existing settings.
err = b.convertLegacyConfigs(ctx, objectAPI, configs)
}
if err != nil {
return b, err
}
// migrate unencrypted remote targets
if err := b.migrateTargetConfig(ctx, objectAPI); err != nil {
if err = b.migrateTargetConfig(ctx, objectAPI); err != nil {
return b, err
}
@@ -331,7 +339,7 @@ func (b *BucketMetadata) getAllLegacyConfigs(ctx context.Context, objectAPI Obje
for _, legacyFile := range legacyConfigs {
configFile := path.Join(bucketMetaPrefix, b.Name, legacyFile)
configData, err := readConfig(ctx, objectAPI, configFile)
configData, info, err := readConfigWithMetadata(ctx, objectAPI, configFile, ObjectOptions{})
if err != nil {
if _, ok := err.(ObjectExistsAsDirectory); ok {
// in FS mode it possible that we have actual
@@ -346,6 +354,7 @@ func (b *BucketMetadata) getAllLegacyConfigs(ctx context.Context, objectAPI Obje
return nil, err
}
configs[legacyFile] = configData
b.Created = info.ModTime
}
return configs, nil

View File

@@ -1958,11 +1958,7 @@ func (z *erasureServerPools) HealFormat(ctx context.Context, dryRun bool) (madmi
}
func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) {
// Attempt heal on the bucket metadata, ignore any failures
hopts := opts
hopts.Recreate = false
defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", hopts)
// .metadata.bin healing is not needed here, it is automatically healed via read() call.
return z.s3Peer.HealBucket(ctx, bucket, opts)
}

View File

@@ -313,7 +313,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
infop, err := storageDiskInfoHandler.Call(ctx, client.gridConn, &opts)
if err != nil {
return info, err
return info, toStorageErr(err)
}
info = *infop
if info.Error != "" {
@@ -442,10 +442,6 @@ func (client *storageRESTClient) CheckParts(ctx context.Context, volume string,
// RenameData - rename source path to destination path atomically, metadata and data file.
func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) {
// Set a very long timeout for rename data.
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
resp, err := storageRenameDataHandler.Call(ctx, client.gridConn, &RenameDataHandlerParams{
DiskID: client.diskID,
SrcVolume: srcVolume,
@@ -704,10 +700,6 @@ func (client *storageRESTClient) DeleteVersions(ctx context.Context, volume stri
// RenameFile - renames a file.
func (client *storageRESTClient) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) {
// Set a very long timeout for rename file
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
defer cancel()
_, err = storageRenameFileHandler.Call(ctx, client.gridConn, &RenameFileHandlerParams{
DiskID: client.diskID,
SrcVolume: srcVolume,