From 89c58ce87ddb32cb28416567c1eaf0d495c36a27 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Thu, 8 Aug 2024 08:29:58 -0700 Subject: [PATCH] enhance getActualSize() to return valid values for most situations (#20228) --- cmd/bucket-replication.go | 16 ++++++------- cmd/encryption-v1.go | 5 ++-- cmd/erasure-multipart.go | 4 +++- cmd/erasure-object.go | 23 +++++++++--------- cmd/metacache-entries.go | 4 ++-- cmd/object-api-options.go | 2 +- cmd/object-api-utils.go | 37 +++++++++++++++++++---------- cmd/object-api-utils_test.go | 4 +++- cmd/xl-storage-format-utils.go | 9 ++++--- cmd/xl-storage-format-utils_test.go | 4 ++-- cmd/xl-storage.go | 3 +-- cmd/xl-storage_test.go | 2 +- 12 files changed, 64 insertions(+), 49 deletions(-) diff --git a/cmd/bucket-replication.go b/cmd/bucket-replication.go index 4153b9098..b7c63b044 100644 --- a/cmd/bucket-replication.go +++ b/cmd/bucket-replication.go @@ -1641,7 +1641,6 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob var ( hr *hash.Reader - pInfo minio.ObjectPart isSSEC = crypto.SSEC.IsEncrypted(objInfo.UserDefined) ) @@ -1668,18 +1667,19 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob CustomHeader: cHeader, } + var size int64 if isSSEC { - objectSize += partInfo.Size - pInfo, err = c.PutObjectPart(ctx, bucket, object, uploadID, partInfo.Number, hr, partInfo.Size, popts) + size = partInfo.Size } else { - objectSize += partInfo.ActualSize - pInfo, err = c.PutObjectPart(ctx, bucket, object, uploadID, partInfo.Number, hr, partInfo.ActualSize, popts) + size = partInfo.ActualSize } + objectSize += size + pInfo, err := c.PutObjectPart(ctx, bucket, object, uploadID, partInfo.Number, hr, size, popts) if err != nil { return err } - if !isSSEC && pInfo.Size != partInfo.ActualSize { - return fmt.Errorf("Part size mismatch: got %d, want %d", pInfo.Size, partInfo.ActualSize) + if pInfo.Size != size { + return fmt.Errorf("ssec(%t): Part size mismatch: got %d, want %d", isSSEC, pInfo.Size, size) } uploadedParts = append(uploadedParts, minio.CompletePart{ PartNumber: pInfo.PartNumber, @@ -1691,7 +1691,7 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob }) } userMeta := map[string]string{ - validSSEReplicationHeaders[ReservedMetadataPrefix+"Actual-Object-Size"]: objInfo.UserDefined[ReservedMetadataPrefix+"actual-size"], + xhttp.MinIOReplicationActualObjectSize: objInfo.UserDefined[ReservedMetadataPrefix+"actual-size"], } if isSSEC && objInfo.UserDefined[ReplicationSsecChecksumHeader] != "" { userMeta[ReplicationSsecChecksumHeader] = objInfo.UserDefined[ReplicationSsecChecksumHeader] diff --git a/cmd/encryption-v1.go b/cmd/encryption-v1.go index 6c3ea3e48..d893e260d 100644 --- a/cmd/encryption-v1.go +++ b/cmd/encryption-v1.go @@ -743,8 +743,9 @@ func (d *DecryptBlocksReader) Read(p []byte) (int, error) { // but has an invalid size. func (o ObjectInfo) DecryptedSize() (int64, error) { if _, ok := crypto.IsEncrypted(o.UserDefined); !ok { - return 0, errors.New("Cannot compute decrypted size of an unencrypted object") + return -1, errors.New("Cannot compute decrypted size of an unencrypted object") } + if !o.isMultipart() { size, err := sio.DecryptedSize(uint64(o.Size)) if err != nil { @@ -757,7 +758,7 @@ func (o ObjectInfo) DecryptedSize() (int64, error) { for _, part := range o.Parts { partSize, err := sio.DecryptedSize(uint64(part.Size)) if err != nil { - return 0, errObjectTampered + return -1, errObjectTampered } size += int64(partSize) } diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 933e49110..293428c2d 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -1301,7 +1301,9 @@ func (er erasureObjects) CompleteMultipartUpload(ctx context.Context, bucket str // Save the consolidated actual size. if opts.ReplicationRequest { - fi.Metadata[ReservedMetadataPrefix+"actual-size"] = opts.UserDefined["X-Minio-Internal-Actual-Object-Size"] + if v := opts.UserDefined[ReservedMetadataPrefix+"Actual-Object-Size"]; v != "" { + fi.Metadata[ReservedMetadataPrefix+"actual-size"] = v + } } else { fi.Metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(objectActualSize, 10) } diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index f3f7953a5..468b2c883 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -97,7 +97,7 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d if srcOpts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, storageDisks, "", srcBucket, srcObject, srcOpts.VersionID, true, false) } else { - metaArr, errs = readAllXL(ctx, storageDisks, srcBucket, srcObject, true, false, true) + metaArr, errs = readAllXL(ctx, storageDisks, srcBucket, srcObject, true, false) } readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) @@ -575,11 +575,10 @@ func (er erasureObjects) deleteIfDangling(ctx context.Context, bucket, object st return m, nil } -func fileInfoFromRaw(ri RawFileInfo, bucket, object string, readData, inclFreeVers, allParts bool) (FileInfo, error) { +func fileInfoFromRaw(ri RawFileInfo, bucket, object string, readData, inclFreeVers bool) (FileInfo, error) { return getFileInfo(ri.Buf, bucket, object, "", fileInfoOpts{ Data: readData, InclFreeVersions: inclFreeVers, - AllParts: allParts, }) } @@ -604,7 +603,7 @@ func readAllRawFileInfo(ctx context.Context, disks []StorageAPI, bucket, object return rawFileInfos, g.Wait() } -func pickLatestQuorumFilesInfo(ctx context.Context, rawFileInfos []RawFileInfo, errs []error, bucket, object string, readData, inclFreeVers, allParts bool) ([]FileInfo, []error) { +func pickLatestQuorumFilesInfo(ctx context.Context, rawFileInfos []RawFileInfo, errs []error, bucket, object string, readData, inclFreeVers bool) ([]FileInfo, []error) { metadataArray := make([]*xlMetaV2, len(rawFileInfos)) metaFileInfos := make([]FileInfo, len(rawFileInfos)) metadataShallowVersions := make([][]xlMetaV2ShallowVersion, len(rawFileInfos)) @@ -641,7 +640,7 @@ func pickLatestQuorumFilesInfo(ctx context.Context, rawFileInfos []RawFileInfo, readQuorum := (len(rawFileInfos) + 1) / 2 meta := &xlMetaV2{versions: mergeXLV2Versions(readQuorum, false, 1, metadataShallowVersions...)} - lfi, err := meta.ToFileInfo(bucket, object, "", inclFreeVers, allParts) + lfi, err := meta.ToFileInfo(bucket, object, "", inclFreeVers, true) if err != nil { for i := range errs { if errs[i] == nil { @@ -670,7 +669,7 @@ func pickLatestQuorumFilesInfo(ctx context.Context, rawFileInfos []RawFileInfo, } // make sure to preserve this for diskmtime based healing bugfix. - metaFileInfos[index], errs[index] = metadataArray[index].ToFileInfo(bucket, object, versionID, inclFreeVers, allParts) + metaFileInfos[index], errs[index] = metadataArray[index].ToFileInfo(bucket, object, versionID, inclFreeVers, true) if errs[index] != nil { continue } @@ -715,9 +714,9 @@ func shouldCheckForDangling(err error, errs []error, bucket string) bool { return false } -func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, readData, inclFreeVers, allParts bool) ([]FileInfo, []error) { +func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, readData, inclFreeVers bool) ([]FileInfo, []error) { rawFileInfos, errs := readAllRawFileInfo(ctx, disks, bucket, object, readData) - return pickLatestQuorumFilesInfo(ctx, rawFileInfos, errs, bucket, object, readData, inclFreeVers, allParts) + return pickLatestQuorumFilesInfo(ctx, rawFileInfos, errs, bucket, object, readData, inclFreeVers) } func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object string, opts ObjectOptions, readData bool) (FileInfo, []FileInfo, []StorageAPI, error) { @@ -774,7 +773,7 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s // Read the latest version rfi, err = disk.ReadXL(ctx, bucket, object, readData) if err == nil { - fi, err = fileInfoFromRaw(rfi, bucket, object, readData, opts.InclFreeVersions, true) + fi, err = fileInfoFromRaw(rfi, bucket, object, readData, opts.InclFreeVersions) } } @@ -912,7 +911,7 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s // we must use rawFileInfo to resolve versions to figure out the latest version. if opts.VersionID == "" && totalResp == er.setDriveCount { fi, onlineMeta, onlineDisks, modTime, etag, err = calcQuorum(pickLatestQuorumFilesInfo(ctx, - rawArr, errs, bucket, object, readData, opts.InclFreeVersions, true)) + rawArr, errs, bucket, object, readData, opts.InclFreeVersions)) } else { fi, onlineMeta, onlineDisks, modTime, etag, err = calcQuorum(metaArr, errs) } @@ -2142,7 +2141,7 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s if opts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, disks, "", bucket, object, opts.VersionID, false, false) } else { - metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false, true) + metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false) } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) @@ -2219,7 +2218,7 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin if opts.VersionID != "" { metaArr, errs = readAllFileInfo(ctx, disks, "", bucket, object, opts.VersionID, false, false) } else { - metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false, true) + metaArr, errs = readAllXL(ctx, disks, bucket, object, false, false) } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go index f7ea2d43f..69c5e835c 100644 --- a/cmd/metacache-entries.go +++ b/cmd/metacache-entries.go @@ -257,7 +257,7 @@ func (e *metaCacheEntry) fileInfo(bucket string) (FileInfo, error) { ModTime: timeSentinel1970, }, nil } - return e.cached.ToFileInfo(bucket, e.name, "", false, false) + return e.cached.ToFileInfo(bucket, e.name, "", false, true) } return getFileInfo(e.metadata, bucket, e.name, "", fileInfoOpts{}) } @@ -300,7 +300,7 @@ func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, erro }, nil } // Too small gains to reuse cache here. - return getFileInfoVersions(e.metadata, bucket, e.name, false, true) + return getFileInfoVersions(e.metadata, bucket, e.name, true) } // metaCacheEntries is a slice of metacache entries. diff --git a/cmd/object-api-options.go b/cmd/object-api-options.go index f50bd71ab..232739816 100644 --- a/cmd/object-api-options.go +++ b/cmd/object-api-options.go @@ -482,7 +482,6 @@ func completeMultipartOpts(ctx context.Context, r *http.Request, bucket, object } opts.MTime = mtime opts.UserDefined = make(map[string]string) - opts.UserDefined[ReservedMetadataPrefix+"Actual-Object-Size"] = r.Header.Get(xhttp.MinIOReplicationActualObjectSize) // Transfer SSEC key in opts.EncryptFn if crypto.SSEC.IsRequested(r.Header) { key, err := ParseSSECustomerRequest(r) @@ -495,6 +494,7 @@ func completeMultipartOpts(ctx context.Context, r *http.Request, bucket, object } if _, ok := r.Header[xhttp.MinIOSourceReplicationRequest]; ok { opts.ReplicationRequest = true + opts.UserDefined[ReservedMetadataPrefix+"Actual-Object-Size"] = r.Header.Get(xhttp.MinIOReplicationActualObjectSize) } if r.Header.Get(ReplicationSsecChecksumHeader) != "" { opts.UserDefined[ReplicationSsecChecksumHeader] = r.Header.Get(ReplicationSsecChecksumHeader) diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index d40423f6e..15aa1f00e 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -1,4 +1,4 @@ -// Copyright (c) 2015-2021 MinIO, Inc. +// Copyright (c) 2015-2024 MinIO, Inc. // // This file is part of MinIO Object Storage stack // @@ -554,28 +554,41 @@ func (o ObjectInfo) GetActualSize() (int64, error) { return *o.ActualSize, nil } if o.IsCompressed() { - sizeStr, ok := o.UserDefined[ReservedMetadataPrefix+"actual-size"] - if !ok { + sizeStr := o.UserDefined[ReservedMetadataPrefix+"actual-size"] + if sizeStr != "" { + size, err := strconv.ParseInt(sizeStr, 10, 64) + if err != nil { + return -1, errInvalidDecompressedSize + } + return size, nil + } + var actualSize int64 + for _, part := range o.Parts { + actualSize += part.ActualSize + } + if (actualSize == 0) && (actualSize != o.Size) { return -1, errInvalidDecompressedSize } - size, err := strconv.ParseInt(sizeStr, 10, 64) - if err != nil { - return -1, errInvalidDecompressedSize - } - return size, nil + return actualSize, nil } if _, ok := crypto.IsEncrypted(o.UserDefined); ok { - sizeStr, ok := o.UserDefined[ReservedMetadataPrefix+"actual-size"] - if ok { + sizeStr := o.UserDefined[ReservedMetadataPrefix+"actual-size"] + if sizeStr != "" { size, err := strconv.ParseInt(sizeStr, 10, 64) if err != nil { return -1, errObjectTampered } return size, nil } - return o.DecryptedSize() + actualSize, err := o.DecryptedSize() + if err != nil { + return -1, err + } + if (actualSize == 0) && (actualSize != o.Size) { + return -1, errObjectTampered + } + return actualSize, nil } - return o.Size, nil } diff --git a/cmd/object-api-utils_test.go b/cmd/object-api-utils_test.go index 7d12d36d7..933a18ccb 100644 --- a/cmd/object-api-utils_test.go +++ b/cmd/object-api-utils_test.go @@ -609,7 +609,6 @@ func TestGetActualSize(t *testing.T) { objInfo: ObjectInfo{ UserDefined: map[string]string{ "X-Minio-Internal-compression": "klauspost/compress/s2", - "X-Minio-Internal-actual-size": "100000001", "content-type": "application/octet-stream", "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2", }, @@ -623,6 +622,7 @@ func TestGetActualSize(t *testing.T) { ActualSize: 32891137, }, }, + Size: 100000001, }, result: 100000001, }, @@ -635,6 +635,7 @@ func TestGetActualSize(t *testing.T) { "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2", }, Parts: []ObjectPartInfo{}, + Size: 841, }, result: 841, }, @@ -646,6 +647,7 @@ func TestGetActualSize(t *testing.T) { "etag": "b3ff3ef3789147152fbfbc50efba4bfd-2", }, Parts: []ObjectPartInfo{}, + Size: 100, }, result: -1, }, diff --git a/cmd/xl-storage-format-utils.go b/cmd/xl-storage-format-utils.go index bd2a7639f..13c6836e6 100644 --- a/cmd/xl-storage-format-utils.go +++ b/cmd/xl-storage-format-utils.go @@ -30,8 +30,8 @@ import ( // if inclFreeVersions is true all the versions are in fivs.Versions, free and non-free versions alike. // // Note: Only the scanner requires fivs.Versions to have exclusively non-free versions. This is used while enforcing NewerNoncurrentVersions lifecycle element. -func getFileInfoVersions(xlMetaBuf []byte, volume, path string, allParts, inclFreeVersions bool) (FileInfoVersions, error) { - fivs, err := getAllFileInfoVersions(xlMetaBuf, volume, path, allParts) +func getFileInfoVersions(xlMetaBuf []byte, volume, path string, inclFreeVersions bool) (FileInfoVersions, error) { + fivs, err := getAllFileInfoVersions(xlMetaBuf, volume, path, true) if err != nil { return fivs, err } @@ -106,7 +106,6 @@ func getAllFileInfoVersions(xlMetaBuf []byte, volume, path string, allParts bool type fileInfoOpts struct { InclFreeVersions bool Data bool - AllParts bool } func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, opts fileInfoOpts) (FileInfo, error) { @@ -117,7 +116,7 @@ func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, opts fileInfo return FileInfo{}, e } else if buf != nil { inData = data - fi, err = buf.ToFileInfo(volume, path, versionID, opts.AllParts) + fi, err = buf.ToFileInfo(volume, path, versionID, true) if len(buf) != 0 && errors.Is(err, errFileNotFound) { // This special case is needed to handle len(xlMeta.versions) == 0 return FileInfo{ @@ -146,7 +145,7 @@ func getFileInfo(xlMetaBuf []byte, volume, path, versionID string, opts fileInfo }, nil } inData = xlMeta.data - fi, err = xlMeta.ToFileInfo(volume, path, versionID, opts.InclFreeVersions, opts.AllParts) + fi, err = xlMeta.ToFileInfo(volume, path, versionID, opts.InclFreeVersions, true) } if !opts.Data || err != nil { return fi, err diff --git a/cmd/xl-storage-format-utils_test.go b/cmd/xl-storage-format-utils_test.go index e9462fe51..91a5e4019 100644 --- a/cmd/xl-storage-format-utils_test.go +++ b/cmd/xl-storage-format-utils_test.go @@ -178,7 +178,7 @@ func TestGetFileInfoVersions(t *testing.T) { if err != nil { t.Fatalf("Failed to serialize xlmeta %v", err) } - fivs, err := getFileInfoVersions(buf, basefi.Volume, basefi.Name, true, false) + fivs, err := getFileInfoVersions(buf, basefi.Volume, basefi.Name, false) if err != nil { t.Fatalf("getFileInfoVersions failed: %v", err) } @@ -222,7 +222,7 @@ func TestGetFileInfoVersions(t *testing.T) { // versions are stored in xl-meta sorted in descending order of their ModTime slices.Reverse(allVersionIDs) - fivs, err = getFileInfoVersions(buf, basefi.Volume, basefi.Name, true, true) + fivs, err = getFileInfoVersions(buf, basefi.Volume, basefi.Name, true) if err != nil { t.Fatalf("getFileInfoVersions failed: %v", err) } diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index ac663080f..725615368 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -590,7 +590,7 @@ func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates // Remove filename which is the meta file. item.transformMetaDir() - fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath(), false, false) + fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath(), false) metaDataPoolPut(buf) if err != nil { res["err"] = err.Error() @@ -1671,7 +1671,6 @@ func (s *xlStorage) ReadVersion(ctx context.Context, origvolume, volume, path, v fi, err = getFileInfo(buf, volume, path, versionID, fileInfoOpts{ Data: opts.ReadData, InclFreeVersions: opts.InclFreeVersions, - AllParts: true, }) if err != nil { return fi, err diff --git a/cmd/xl-storage_test.go b/cmd/xl-storage_test.go index bd03aadf7..189a242f7 100644 --- a/cmd/xl-storage_test.go +++ b/cmd/xl-storage_test.go @@ -271,7 +271,7 @@ func TestXLStorageReadVersion(t *testing.T) { } xlMeta, _ := os.ReadFile("testdata/xl.meta") - fi, _ := getFileInfo(xlMeta, "exists", "as-file", "", fileInfoOpts{Data: false, AllParts: true}) + fi, _ := getFileInfo(xlMeta, "exists", "as-file", "", fileInfoOpts{Data: false}) // Create files for the test cases. if err = xlStorage.MakeVol(context.Background(), "exists"); err != nil {