From 3995355150891aacf19183aaaff1d433a3a59131 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 2 Sep 2023 07:49:24 -0700 Subject: [PATCH] avoid repeated large allocations for large parts (#17968) objects with 10,000 parts and many of them can cause a large memory spike which can potentially lead to OOM due to lack of GC. with previous PR reducing the memory usage significantly in #17963, this PR reduces this further by 80% under repeated calls. Scanner sub-system has no use for the slice of Parts(), it is better left empty. ``` benchmark old ns/op new ns/op delta BenchmarkToFileInfo/ToFileInfo-8 295658 188143 -36.36% benchmark old allocs new allocs delta BenchmarkToFileInfo/ToFileInfo-8 61 60 -1.64% benchmark old bytes new bytes delta BenchmarkToFileInfo/ToFileInfo-8 1097210 227255 -79.29% ``` --- cmd/erasure-object.go | 14 ++++----- cmd/metacache-entries.go | 6 ++-- cmd/xl-storage-format-utils.go | 16 +++++----- cmd/xl-storage-format-utils_test.go | 2 +- cmd/xl-storage-format-v2.go | 47 +++++++++++++++-------------- cmd/xl-storage-format-v2_test.go | 40 +++++++++++++++++++++++- cmd/xl-storage-format_test.go | 8 ++--- cmd/xl-storage-free-version_test.go | 4 +-- cmd/xl-storage.go | 6 ++-- 9 files changed, 92 insertions(+), 51 deletions(-) diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 0e1610775..cefa1d7b1 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -93,7 +93,7 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d if srcOpts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, storageDisks, srcBucket, srcObject, srcOpts.VersionID, true) } else { - metaArr, errs = readAllXL(ctx, storageDisks, srcBucket, srcObject, true, false) + metaArr, errs = readAllXL(ctx, storageDisks, srcBucket, srcObject, true, false, true) } readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) @@ -533,7 +533,7 @@ func (er erasureObjects) deleteIfDangling(ctx context.Context, bucket, object st return m, err } -func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, readData, inclFreeVers bool) ([]FileInfo, []error) { +func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, readData, inclFreeVers, allParts bool) ([]FileInfo, []error) { metadataArray := make([]*xlMetaV2, len(disks)) metaFileInfos := make([]FileInfo, len(metadataArray)) metadataShallowVersions := make([][]xlMetaV2ShallowVersion, len(disks)) @@ -601,7 +601,7 @@ func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, r readQuorum := (len(disks) + 1) / 2 meta := &xlMetaV2{versions: mergeXLV2Versions(readQuorum, false, 1, metadataShallowVersions...)} - lfi, err := meta.ToFileInfo(bucket, object, "", inclFreeVers) + lfi, err := meta.ToFileInfo(bucket, object, "", inclFreeVers, allParts) if err != nil { for i := range errs { if errs[i] == nil { @@ -631,7 +631,7 @@ func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, r // make sure to preserve this for diskmtime based healing bugfix. diskMTime := metaFileInfos[index].DiskMTime - metaFileInfos[index], errs[index] = metadataArray[index].ToFileInfo(bucket, object, versionID, inclFreeVers) + metaFileInfos[index], errs[index] = metadataArray[index].ToFileInfo(bucket, object, versionID, inclFreeVers, allParts) if errs[index] != nil { continue } @@ -660,7 +660,7 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s if opts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, readData) } else { - metaArr, errs = readAllXL(ctx, disks, bucket, object, readData, opts.InclFreeVersions) + metaArr, errs = readAllXL(ctx, disks, bucket, object, readData, opts.InclFreeVersions, true) } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) @@ -1834,7 +1834,7 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s if opts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false) } else { - metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false) + metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false, true) } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) @@ -1907,7 +1907,7 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin if opts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false) } else { - metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false) + metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false, true) } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go index 1277b0370..7298c39ed 100644 --- a/cmd/metacache-entries.go +++ b/cmd/metacache-entries.go @@ -248,9 +248,9 @@ func (e *metaCacheEntry) fileInfo(bucket string) (FileInfo, error) { ModTime: timeSentinel1970, }, nil } - return e.cached.ToFileInfo(bucket, e.name, "", false) + return e.cached.ToFileInfo(bucket, e.name, "", false, false) } - return getFileInfo(e.metadata, bucket, e.name, "", false) + return getFileInfo(e.metadata, bucket, e.name, "", false, false) } // xlmeta returns the decoded metadata. @@ -291,7 +291,7 @@ func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, erro }, nil } // Too small gains to reuse cache here. - return getFileInfoVersions(e.metadata, bucket, e.name) + return getFileInfoVersions(e.metadata, bucket, e.name, false) } // metaCacheEntries is a slice of metacache entries. diff --git a/cmd/xl-storage-format-utils.go b/cmd/xl-storage-format-utils.go index 11844a1f0..b9b0fab2b 100644 --- a/cmd/xl-storage-format-utils.go +++ b/cmd/xl-storage-format-utils.go @@ -23,8 +23,8 @@ import ( "github.com/zeebo/xxh3" ) -func getFileInfoVersions(xlMetaBuf []byte, volume, path string) (FileInfoVersions, error) { - fivs, err := getAllFileInfoVersions(xlMetaBuf, volume, path) +func getFileInfoVersions(xlMetaBuf []byte, volume, path string, allParts bool) (FileInfoVersions, error) { + fivs, err := getAllFileInfoVersions(xlMetaBuf, volume, path, allParts) if err != nil { return fivs, err } @@ -46,20 +46,20 @@ func getFileInfoVersions(xlMetaBuf []byte, volume, path string) (FileInfoVersion return fivs, nil } -func getAllFileInfoVersions(xlMetaBuf []byte, volume, path string) (FileInfoVersions, error) { +func getAllFileInfoVersions(xlMetaBuf []byte, volume, path string, allParts bool) (FileInfoVersions, error) { var versions []FileInfo var err error if buf, _, e := isIndexedMetaV2(xlMetaBuf); e != nil { return FileInfoVersions{}, e } else if buf != nil { - versions, err = buf.ListVersions(volume, path) + versions, err = buf.ListVersions(volume, path, allParts) } else { var xlMeta xlMetaV2 if err := xlMeta.LoadOrConvert(xlMetaBuf); err != nil { return FileInfoVersions{}, err } - versions, err = xlMeta.ListVersions(volume, path) + versions, err = xlMeta.ListVersions(volume, path, allParts) } if err == nil && len(versions) == 0 { // This special case is needed to handle len(xlMeta.versions) == 0 @@ -85,7 +85,7 @@ func getAllFileInfoVersions(xlMetaBuf []byte, volume, path string) (FileInfoVers }, nil } -func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, data bool) (FileInfo, error) { +func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, data, allParts bool) (FileInfo, error) { var fi FileInfo var err error var inData xlMetaInlineData @@ -93,7 +93,7 @@ func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, data bool) (F return FileInfo{}, e } else if buf != nil { inData = data - fi, err = buf.ToFileInfo(volume, path, versionID) + fi, err = buf.ToFileInfo(volume, path, versionID, allParts) if len(buf) != 0 && errors.Is(err, errFileNotFound) { // This special case is needed to handle len(xlMeta.versions) == 0 return FileInfo{ @@ -122,7 +122,7 @@ func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, data bool) (F }, nil } inData = xlMeta.data - fi, err = xlMeta.ToFileInfo(volume, path, versionID, false) + fi, err = xlMeta.ToFileInfo(volume, path, versionID, false, allParts) } if !data || err != nil { return fi, err diff --git a/cmd/xl-storage-format-utils_test.go b/cmd/xl-storage-format-utils_test.go index 8a45d2732..dfbb43408 100644 --- a/cmd/xl-storage-format-utils_test.go +++ b/cmd/xl-storage-format-utils_test.go @@ -175,7 +175,7 @@ func TestGetFileInfoVersions(t *testing.T) { if err != nil { t.Fatalf("Failed to serialize xlmeta %v", err) } - fivs, err := getFileInfoVersions(buf, basefi.Volume, basefi.Name) + fivs, err := getFileInfoVersions(buf, basefi.Volume, basefi.Name, true) if err != nil { t.Fatalf("getFileInfoVersions failed: %v", err) } diff --git a/cmd/xl-storage-format-v2.go b/cmd/xl-storage-format-v2.go index a6f6232e3..c92cd7745 100644 --- a/cmd/xl-storage-format-v2.go +++ b/cmd/xl-storage-format-v2.go @@ -432,13 +432,13 @@ func (j xlMetaV2Version) getVersionID() [16]byte { } // ToFileInfo returns FileInfo of the underlying type. -func (j *xlMetaV2Version) ToFileInfo(volume, path string) (fi FileInfo, err error) { +func (j *xlMetaV2Version) ToFileInfo(volume, path string, allParts bool) (fi FileInfo, err error) { if j == nil { return fi, errFileNotFound } switch j.Type { case ObjectType: - fi, err = j.ObjectV2.ToFileInfo(volume, path) + fi, err = j.ObjectV2.ToFileInfo(volume, path, allParts) case DeleteType: fi, err = j.DeleteMarker.ToFileInfo(volume, path) case LegacyType: @@ -585,7 +585,7 @@ func (j *xlMetaV2Object) Signature() [4]byte { return tmp } -func (j xlMetaV2Object) ToFileInfo(volume, path string) (FileInfo, error) { +func (j xlMetaV2Object) ToFileInfo(volume, path string, allParts bool) (FileInfo, error) { versionID := "" var uv uuid.UUID // check if the version is not "null" @@ -599,18 +599,21 @@ func (j xlMetaV2Object) ToFileInfo(volume, path string) (FileInfo, error) { ModTime: time.Unix(0, j.ModTime).UTC(), VersionID: versionID, } - fi.Parts = make([]ObjectPartInfo, len(j.PartNumbers)) - for i := range fi.Parts { - fi.Parts[i].Number = j.PartNumbers[i] - fi.Parts[i].Size = j.PartSizes[i] - if len(j.PartETags) == len(fi.Parts) { - fi.Parts[i].ETag = j.PartETags[i] - } - fi.Parts[i].ActualSize = j.PartActualSizes[i] - if len(j.PartIndices) == len(fi.Parts) { - fi.Parts[i].Index = j.PartIndices[i] + if allParts { + fi.Parts = make([]ObjectPartInfo, len(j.PartNumbers)) + for i := range fi.Parts { + fi.Parts[i].Number = j.PartNumbers[i] + fi.Parts[i].Size = j.PartSizes[i] + if len(j.PartETags) == len(fi.Parts) { + fi.Parts[i].ETag = j.PartETags[i] + } + fi.Parts[i].ActualSize = j.PartActualSizes[i] + if len(j.PartIndices) == len(fi.Parts) { + fi.Parts[i].Index = j.PartIndices[i] + } } } + // fi.Erasure.Checksums - is left empty since we do not have any // whole checksums for many years now, no need to allocate. @@ -1719,7 +1722,7 @@ func (x *xlMetaV2) AddLegacy(m *xlMetaV1Object) error { // ToFileInfo converts xlMetaV2 into a common FileInfo datastructure // for consumption across callers. -func (x xlMetaV2) ToFileInfo(volume, path, versionID string, inclFreeVers bool) (fi FileInfo, err error) { +func (x xlMetaV2) ToFileInfo(volume, path, versionID string, inclFreeVers, allParts bool) (fi FileInfo, err error) { var uv uuid.UUID if versionID != "" && versionID != nullVersionID { uv, err = uuid.Parse(versionID) @@ -1747,7 +1750,7 @@ func (x xlMetaV2) ToFileInfo(volume, path, versionID string, inclFreeVers bool) if inclFreeVers && !freeFound { // ignore unmarshalling errors, will return errFileNotFound in that case if _, err := freeVersion.unmarshalV(x.metaV, ver.meta); err == nil { - if freeFi, err = freeVersion.ToFileInfo(volume, path); err == nil { + if freeFi, err = freeVersion.ToFileInfo(volume, path, allParts); err == nil { freeFi.IsLatest = true // when this is returned, it would be the latest free version remaining. freeFound = true } @@ -1775,7 +1778,7 @@ func (x xlMetaV2) ToFileInfo(volume, path, versionID string, inclFreeVers bool) if _, err := version.unmarshalV(x.metaV, ver.meta); err != nil { return fi, err } - if fi, err = version.ToFileInfo(volume, path); err != nil { + if fi, err = version.ToFileInfo(volume, path, allParts); err != nil { return fi, err } fi.IsLatest = isLatest @@ -1803,7 +1806,7 @@ func (x xlMetaV2) ToFileInfo(volume, path, versionID string, inclFreeVers bool) // versions returns error for unexpected entries. // showPendingDeletes is set to true if ListVersions needs to list objects marked deleted // but waiting to be replicated -func (x xlMetaV2) ListVersions(volume, path string) ([]FileInfo, error) { +func (x xlMetaV2) ListVersions(volume, path string, allParts bool) ([]FileInfo, error) { versions := make([]FileInfo, 0, len(x.versions)) var err error @@ -1813,7 +1816,7 @@ func (x xlMetaV2) ListVersions(volume, path string) ([]FileInfo, error) { if err != nil { return versions, err } - fi, err := dst.ToFileInfo(volume, path) + fi, err := dst.ToFileInfo(volume, path, allParts) if err != nil { return versions, err } @@ -2024,7 +2027,7 @@ type xlMetaBuf []byte // ToFileInfo converts xlMetaV2 into a common FileInfo datastructure // for consumption across callers. -func (x xlMetaBuf) ToFileInfo(volume, path, versionID string) (fi FileInfo, err error) { +func (x xlMetaBuf) ToFileInfo(volume, path, versionID string, allParts bool) (fi FileInfo, err error) { var uv uuid.UUID if versionID != "" && versionID != nullVersionID { uv, err = uuid.Parse(versionID) @@ -2071,7 +2074,7 @@ func (x xlMetaBuf) ToFileInfo(volume, path, versionID string) (fi FileInfo, err if _, err := version.unmarshalV(metaV, meta); err != nil { return err } - if fi, err = version.ToFileInfo(volume, path); err != nil { + if fi, err = version.ToFileInfo(volume, path, allParts); err != nil { return err } fi.IsLatest = isLatest @@ -2095,7 +2098,7 @@ func (x xlMetaBuf) ToFileInfo(volume, path, versionID string) (fi FileInfo, err // versions returns error for unexpected entries. // showPendingDeletes is set to true if ListVersions needs to list objects marked deleted // but waiting to be replicated -func (x xlMetaBuf) ListVersions(volume, path string) ([]FileInfo, error) { +func (x xlMetaBuf) ListVersions(volume, path string, allParts bool) ([]FileInfo, error) { vers, _, metaV, buf, err := decodeXLHeaders(x) if err != nil { return nil, err @@ -2111,7 +2114,7 @@ func (x xlMetaBuf) ListVersions(volume, path string) ([]FileInfo, error) { if !xl.Valid() { return errFileCorrupt } - fi, err := xl.ToFileInfo(volume, path) + fi, err := xl.ToFileInfo(volume, path, allParts) if err != nil { return err } diff --git a/cmd/xl-storage-format-v2_test.go b/cmd/xl-storage-format-v2_test.go index f9b42c5ce..3162d6e55 100644 --- a/cmd/xl-storage-format-v2_test.go +++ b/cmd/xl-storage-format-v2_test.go @@ -1049,7 +1049,7 @@ func TestXMinIOHealingSkip(t *testing.T) { failOnErr(xl.AddVersion(fi)) var err error - fi, err = xl.ToFileInfo(fi.Volume, fi.Name, fi.VersionID, false) + fi, err = xl.ToFileInfo(fi.Volume, fi.Name, fi.VersionID, false, true) if err != nil { t.Fatalf("xl.ToFileInfo failed with %v", err) } @@ -1058,3 +1058,41 @@ func TestXMinIOHealingSkip(t *testing.T) { t.Fatal("Expected fi.Healing to be false") } } + +func benchmarkManyPartsOptionally(b *testing.B, allParts bool) { + f, err := os.Open("testdata/xl-many-parts.meta") + if err != nil { + b.Fatal(err) + } + defer f.Close() + + data, err := io.ReadAll(f) + if err != nil { + b.Fatal(err) + } + + buf, _, _ := isIndexedMetaV2(data) + if buf == nil { + b.Fatal("buf == nil") + } + + b.Run("ToFileInfo", func(b *testing.B) { + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + _, err = buf.ToFileInfo("volume", "path", "", allParts) + if err != nil { + b.Fatal(err) + } + } + }) +} + +func BenchmarkToFileInfoNoParts(b *testing.B) { + benchmarkManyPartsOptionally(b, false) +} + +func BenchmarkToFileInfoWithParts(b *testing.B) { + benchmarkManyPartsOptionally(b, true) +} diff --git a/cmd/xl-storage-format_test.go b/cmd/xl-storage-format_test.go index 05ed2a9f7..3fe8d319c 100644 --- a/cmd/xl-storage-format_test.go +++ b/cmd/xl-storage-format_test.go @@ -485,7 +485,7 @@ func BenchmarkXlMetaV2Shallow(b *testing.B) { b.Fatal(err) } // List... - _, err = xl.ToFileInfo("volume", "path", ids[rng.Intn(size)], false) + _, err = xl.ToFileInfo("volume", "path", ids[rng.Intn(size)], false, true) if err != nil { b.Fatal(err) } @@ -503,7 +503,7 @@ func BenchmarkXlMetaV2Shallow(b *testing.B) { b.Fatal(err) } // List... - _, err = xl.ListVersions("volume", "path") + _, err = xl.ListVersions("volume", "path", true) if err != nil { b.Fatal(err) } @@ -518,7 +518,7 @@ func BenchmarkXlMetaV2Shallow(b *testing.B) { if buf == nil { b.Fatal("buf == nil") } - _, err = buf.ToFileInfo("volume", "path", ids[rng.Intn(size)]) + _, err = buf.ToFileInfo("volume", "path", ids[rng.Intn(size)], true) if err != nil { b.Fatal(err) } @@ -533,7 +533,7 @@ func BenchmarkXlMetaV2Shallow(b *testing.B) { if buf == nil { b.Fatal("buf == nil") } - _, err = buf.ListVersions("volume", "path") + _, err = buf.ListVersions("volume", "path", true) if err != nil { b.Fatal(err) } diff --git a/cmd/xl-storage-free-version_test.go b/cmd/xl-storage-free-version_test.go index 3fb927110..f8e4df941 100644 --- a/cmd/xl-storage-free-version_test.go +++ b/cmd/xl-storage-free-version_test.go @@ -26,7 +26,7 @@ import ( ) func (x xlMetaV2) listFreeVersions(volume, path string) ([]FileInfo, error) { - fivs, err := x.ListVersions(volume, path) + fivs, err := x.ListVersions(volume, path, true) if err != nil { return nil, err } @@ -179,7 +179,7 @@ func TestFreeVersion(t *testing.T) { } for _, ft := range freeVersionsTests { - fi, err := xl.ToFileInfo(ft.vol, ft.name, "", ft.inclFreeVers) + fi, err := xl.ToFileInfo(ft.vol, ft.name, "", ft.inclFreeVers, true) if err != nil && !errors.Is(err, ft.expectedErr) { t.Fatalf("ToFileInfo failed due to %v", err) } diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index f65ef7cdf..6401f55a9 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -528,7 +528,7 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates // Remove filename which is the meta file. item.transformMetaDir() - fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath()) + fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath(), false) metaDataPoolPut(buf) if err != nil { res["err"] = err.Error() @@ -1437,7 +1437,7 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str return fi, err } - fi, err = getFileInfo(buf, volume, path, versionID, readData) + fi, err = getFileInfo(buf, volume, path, versionID, readData, true) if err != nil { return fi, err } @@ -2410,7 +2410,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f } // Replace the data of null version or any other existing version-id - ofi, err := xlMeta.ToFileInfo(dstVolume, dstPath, reqVID, false) + ofi, err := xlMeta.ToFileInfo(dstVolume, dstPath, reqVID, false, false) if err == nil && !ofi.Deleted { if xlMeta.SharedDataDirCountStr(reqVID, ofi.DataDir) == 0 { // Purge the destination path as we are not preserving anything