From 035a3ea4aef5c1b682087af8969b1c6dc9be85ec Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 8 Feb 2024 11:21:21 -0800 Subject: [PATCH] optimize startup sequence performance (#19009) - bucket metadata does not need to look for legacy things anymore if b.Created is non-zero - stagger bucket metadata loads across lots of nodes to avoid the current thundering herd problem. - Remove deadlines for RenameData, RenameFile - these calls should not ever be timed out and should wait until completion or wait for client timeout. Do not choose timeouts for applications during the WRITE phase. - increase R/W buffer size, increase maxMergeMessages to 30 --- cmd/bucket-metadata-sys.go | 11 ++++++----- cmd/bucket-metadata.go | 39 +++++++++++++++++++++++-------------- cmd/erasure-server-pool.go | 6 +----- cmd/storage-rest-client.go | 10 +--------- internal/grid/connection.go | 8 ++++---- internal/grid/grid.go | 2 +- 6 files changed, 37 insertions(+), 39 deletions(-) diff --git a/cmd/bucket-metadata-sys.go b/cmd/bucket-metadata-sys.go index 14c86144d..865171773 100644 --- a/cmd/bucket-metadata-sys.go +++ b/cmd/bucket-metadata-sys.go @@ -22,6 +22,7 @@ import ( "encoding/xml" "errors" "fmt" + "math/rand" "sync" "time" @@ -482,11 +483,11 @@ func (sys *BucketMetadataSys) concurrentLoad(ctx context.Context, buckets []Buck for index := range buckets { index := index g.Go(func() error { - _, _ = sys.objAPI.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{ - // Ensure heal opts for bucket metadata be deep healed all the time. - ScanMode: madmin.HealDeepScan, - Recreate: true, - }) + // Sleep and stagger to avoid blocked CPU and thundering + // herd upon start up sequence. + time.Sleep(25*time.Millisecond + time.Duration(rand.Int63n(int64(100*time.Millisecond)))) + + _, _ = sys.objAPI.HealBucket(ctx, buckets[index].Name, madmin.HealOpts{Recreate: true}) meta, err := loadBucketMetadata(ctx, sys.objAPI, buckets[index].Name) if err != nil { return err diff --git a/cmd/bucket-metadata.go b/cmd/bucket-metadata.go index 797e9ed9f..848962cc6 100644 --- a/cmd/bucket-metadata.go +++ b/cmd/bucket-metadata.go @@ -182,26 +182,34 @@ func loadBucketMetadataParse(ctx context.Context, objectAPI ObjectLayer, bucket b.defaultTimestamps() } - configs, err := b.getAllLegacyConfigs(ctx, objectAPI) - if err != nil { - return b, err + // If bucket metadata is missing look for legacy files, + // since we only ever had b.Created as non-zero when + // migration was complete in 2020-May release. So this + // a check to avoid migrating for buckets that already + // have this field set. + if b.Created.IsZero() { + configs, err := b.getAllLegacyConfigs(ctx, objectAPI) + if err != nil { + return b, err + } + + if len(configs) > 0 { + // Old bucket without bucket metadata. Hence we migrate existing settings. + if err = b.convertLegacyConfigs(ctx, objectAPI, configs); err != nil { + return b, err + } + } } - if len(configs) == 0 { - if parse { - // nothing to update, parse and proceed. - err = b.parseAllConfigs(ctx, objectAPI) + if parse { + // nothing to update, parse and proceed. + if err = b.parseAllConfigs(ctx, objectAPI); err != nil { + return b, err } - } else { - // Old bucket without bucket metadata. Hence we migrate existing settings. - err = b.convertLegacyConfigs(ctx, objectAPI, configs) - } - if err != nil { - return b, err } // migrate unencrypted remote targets - if err := b.migrateTargetConfig(ctx, objectAPI); err != nil { + if err = b.migrateTargetConfig(ctx, objectAPI); err != nil { return b, err } @@ -331,7 +339,7 @@ func (b *BucketMetadata) getAllLegacyConfigs(ctx context.Context, objectAPI Obje for _, legacyFile := range legacyConfigs { configFile := path.Join(bucketMetaPrefix, b.Name, legacyFile) - configData, err := readConfig(ctx, objectAPI, configFile) + configData, info, err := readConfigWithMetadata(ctx, objectAPI, configFile, ObjectOptions{}) if err != nil { if _, ok := err.(ObjectExistsAsDirectory); ok { // in FS mode it possible that we have actual @@ -346,6 +354,7 @@ func (b *BucketMetadata) getAllLegacyConfigs(ctx context.Context, objectAPI Obje return nil, err } configs[legacyFile] = configData + b.Created = info.ModTime } return configs, nil diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index 65e2fc67c..779b20bda 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -1958,11 +1958,7 @@ func (z *erasureServerPools) HealFormat(ctx context.Context, dryRun bool) (madmi } func (z *erasureServerPools) HealBucket(ctx context.Context, bucket string, opts madmin.HealOpts) (madmin.HealResultItem, error) { - // Attempt heal on the bucket metadata, ignore any failures - hopts := opts - hopts.Recreate = false - defer z.HealObject(ctx, minioMetaBucket, pathJoin(bucketMetaPrefix, bucket, bucketMetadataFile), "", hopts) - + // .metadata.bin healing is not needed here, it is automatically healed via read() call. return z.s3Peer.HealBucket(ctx, bucket, opts) } diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 20543d468..ade7be5cb 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -313,7 +313,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti infop, err := storageDiskInfoHandler.Call(ctx, client.gridConn, &opts) if err != nil { - return info, err + return info, toStorageErr(err) } info = *infop if info.Error != "" { @@ -442,10 +442,6 @@ func (client *storageRESTClient) CheckParts(ctx context.Context, volume string, // RenameData - rename source path to destination path atomically, metadata and data file. func (client *storageRESTClient) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string, opts RenameOptions) (sign uint64, err error) { - // Set a very long timeout for rename data. - ctx, cancel := context.WithTimeout(ctx, 5*time.Minute) - defer cancel() - resp, err := storageRenameDataHandler.Call(ctx, client.gridConn, &RenameDataHandlerParams{ DiskID: client.diskID, SrcVolume: srcVolume, @@ -704,10 +700,6 @@ func (client *storageRESTClient) DeleteVersions(ctx context.Context, volume stri // RenameFile - renames a file. func (client *storageRESTClient) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) { - // Set a very long timeout for rename file - ctx, cancel := context.WithTimeout(ctx, 5*time.Minute) - defer cancel() - _, err = storageRenameFileHandler.Call(ctx, client.gridConn, &RenameFileHandlerParams{ DiskID: client.diskID, SrcVolume: srcVolume, diff --git a/internal/grid/connection.go b/internal/grid/connection.go index 188ee04e1..3d312b1ae 100644 --- a/internal/grid/connection.go +++ b/internal/grid/connection.go @@ -175,8 +175,8 @@ func (c ContextDialer) DialContext(ctx context.Context, network, address string) const ( defaultOutQueue = 10000 - readBufferSize = 16 << 10 - writeBufferSize = 16 << 10 + readBufferSize = 32 << 10 // 32 KiB is the most optimal on Linux + writeBufferSize = 32 << 10 // 32 KiB is the most optimal on Linux defaultDialTimeout = 2 * time.Second connPingInterval = 10 * time.Second connWriteTimeout = 3 * time.Second @@ -654,7 +654,7 @@ func (c *Connection) connect() { fmt.Printf("%v Connecting to %v: %v. Retrying.\n", c.Local, toDial, err) } sleep := defaultDialTimeout + time.Duration(rng.Int63n(int64(defaultDialTimeout))) - next := dialStarted.Add(sleep) + next := dialStarted.Add(sleep / 2) sleep = time.Until(next).Round(time.Millisecond) if sleep < 0 { sleep = 0 @@ -950,7 +950,7 @@ func (c *Connection) handleMessages(ctx context.Context, conn net.Conn) { cancel(ErrDisconnected) return } - if cap(msg) > readBufferSize*8 { + if cap(msg) > readBufferSize*4 { // Don't keep too much memory around. msg = nil } diff --git a/internal/grid/grid.go b/internal/grid/grid.go index f54f6dab0..a0813a7f8 100644 --- a/internal/grid/grid.go +++ b/internal/grid/grid.go @@ -45,7 +45,7 @@ const ( maxBufferSize = 64 << 10 // If there is a queue, merge up to this many messages. - maxMergeMessages = 20 + maxMergeMessages = 30 // clientPingInterval will ping the remote handler every 15 seconds. // Clients disconnect when we exceed 2 intervals.