Remove partName, partETag requirement (#9044)

This is a precursor change before versioning,
removes/deprecates the requirement of remembering
partName and partETag which are not useful after
a multipart transaction has finished.

This PR reduces the overall size of the backend
JSON for large file uploads.
This commit is contained in:
Harshavardhana 2020-03-03 03:29:30 +03:00 committed by GitHub
parent 978bd4e2c4
commit e3b44c3829
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 77 additions and 70 deletions

View File

@ -263,6 +263,7 @@ func TestAdminServerInfo(t *testing.T) {
if err != nil {
t.Fatal("Failed to initialize a single node XL backend for admin handler tests.")
}
defer adminTestBed.TearDown()
// Initialize admin peers to make admin RPC calls.

View File

@ -280,15 +280,11 @@ func TestGetDecryptedRange_Issue50(t *testing.T) {
Parts: []ObjectPartInfo{
{
Number: 1,
Name: "part.1",
ETag: "etag1",
Size: 297580380,
ActualSize: 297435132,
},
{
Number: 2,
Name: "part.2",
ETag: "etag2",
Size: 297580380,
ActualSize: 297435132,
},

View File

@ -575,7 +575,6 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
fsMeta.Parts[i] = ObjectPartInfo{
Number: part.PartNumber,
ETag: part.ETag,
Size: fi.Size(),
ActualSize: actualSize,
}

View File

@ -506,7 +506,6 @@ func (l *s3EncObjects) PutObjectPart(ctx context.Context, bucket string, object
Number: partID,
ETag: pi.ETag,
Size: pi.Size,
Name: strconv.Itoa(partID),
}
gwMeta.ETag = data.MD5CurrentHexString() // encrypted ETag
gwMeta.Stat.Size = pi.Size

View File

@ -18,6 +18,7 @@ package cmd
import (
"context"
"fmt"
"time"
"github.com/minio/minio/cmd/logger"
@ -181,8 +182,9 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
// parts. This is considered an outdated disk, since
// it needs healing too.
for _, part := range partsMetadata[i].Parts {
checksumInfo := erasureInfo.GetChecksumInfo(part.Name)
err = onlineDisk.VerifyFile(bucket, pathJoin(object, part.Name), erasure.ShardFileSize(part.Size), checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
checksumInfo := erasureInfo.GetChecksumInfo(part.Number)
partPath := pathJoin(object, fmt.Sprintf("part.%d", part.Number))
err = onlineDisk.VerifyFile(bucket, partPath, erasure.ShardFileSize(part.Size), checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
if err != nil {
if !IsErr(err, []error{
errFileNotFound,
@ -198,7 +200,8 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
case madmin.HealNormalScan:
for _, part := range partsMetadata[i].Parts {
_, err := onlineDisk.StatFile(bucket, pathJoin(object, part.Name))
partPath := pathJoin(object, fmt.Sprintf("part.%d", part.Number))
_, err := onlineDisk.StatFile(bucket, partPath)
if err != nil {
dataErrs[i] = err
break

View File

@ -19,6 +19,7 @@ package cmd
import (
"bytes"
"context"
"fmt"
"os"
"path/filepath"
"testing"
@ -317,8 +318,8 @@ func TestDisksWithAllParts(t *testing.T) {
diskFailures[15] = "part.1"
for diskIndex, partName := range diskFailures {
for _, info := range partsMetadata[diskIndex].Erasure.Checksums {
if info.Name == partName {
for i := range partsMetadata[diskIndex].Erasure.Checksums {
if fmt.Sprintf("part.%d", i+1) == partName {
filePath := pathJoin(xlDisks[diskIndex].String(), bucket, object, partName)
f, err := os.OpenFile(filePath, os.O_WRONLY|os.O_SYNC, 0)
if err != nil {

View File

@ -388,26 +388,27 @@ func (xl xlObjects) healObject(ctx context.Context, bucket string, object string
erasureInfo := latestMeta.Erasure
for partIndex := 0; partIndex < len(latestMeta.Parts); partIndex++ {
partName := latestMeta.Parts[partIndex].Name
partSize := latestMeta.Parts[partIndex].Size
partActualSize := latestMeta.Parts[partIndex].ActualSize
partNumber := latestMeta.Parts[partIndex].Number
tillOffset := erasure.ShardFileTillOffset(0, partSize, partSize)
readers := make([]io.ReaderAt, len(latestDisks))
checksumAlgo := erasureInfo.GetChecksumInfo(partName).Algorithm
checksumAlgo := erasureInfo.GetChecksumInfo(partNumber).Algorithm
for i, disk := range latestDisks {
if disk == OfflineDisk {
continue
}
checksumInfo := partsMetadata[i].Erasure.GetChecksumInfo(partName)
readers[i] = newBitrotReader(disk, bucket, pathJoin(object, partName), tillOffset, checksumAlgo, checksumInfo.Hash, erasure.ShardSize())
checksumInfo := partsMetadata[i].Erasure.GetChecksumInfo(partNumber)
partPath := pathJoin(object, fmt.Sprintf("part.%d", partNumber))
readers[i] = newBitrotReader(disk, bucket, partPath, tillOffset, checksumAlgo, checksumInfo.Hash, erasure.ShardSize())
}
writers := make([]io.Writer, len(outDatedDisks))
for i, disk := range outDatedDisks {
if disk == OfflineDisk {
continue
}
writers[i] = newBitrotWriter(disk, minioMetaTmpBucket, pathJoin(tmpID, partName), tillOffset, checksumAlgo, erasure.ShardSize())
partPath := pathJoin(tmpID, fmt.Sprintf("part.%d", partNumber))
writers[i] = newBitrotWriter(disk, minioMetaTmpBucket, partPath, tillOffset, checksumAlgo, erasure.ShardSize())
}
hErr := erasure.Heal(ctx, readers, writers, partSize)
closeBitrotReaders(readers)
@ -428,8 +429,12 @@ func (xl xlObjects) healObject(ctx context.Context, bucket string, object string
disksToHealCount--
continue
}
partsMetadata[i].AddObjectPart(partNumber, partName, "", partSize, partActualSize)
partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{partName, checksumAlgo, bitrotWriterSum(writers[i])})
partsMetadata[i].AddObjectPart(partNumber, "", partSize, partActualSize)
partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{
PartNumber: partNumber,
Algorithm: checksumAlgo,
Hash: bitrotWriterSum(writers[i]),
})
}
// If all disks are having errors, we give up.

View File

@ -60,6 +60,10 @@ func TestUndoMakeBucket(t *testing.T) {
}
func TestHealObjectCorrupted(t *testing.T) {
resetGlobalHealState()
defer resetGlobalHealState()
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {

View File

@ -21,6 +21,7 @@ import (
"context"
"encoding/hex"
"encoding/json"
"fmt"
"net/http"
"path"
"sort"
@ -38,9 +39,8 @@ const erasureAlgorithmKlauspost = "klauspost/reedsolomon/vandermonde"
// ObjectPartInfo Info of each part kept in the multipart metadata
// file after CompleteMultipartUpload() is called.
type ObjectPartInfo struct {
Number int `json:"number"`
Name string `json:"name"`
ETag string `json:"etag,omitempty"`
Number int `json:"number"`
Size int64 `json:"size"`
ActualSize int64 `json:"actualSize"`
}
@ -54,9 +54,9 @@ func (t byObjectPartNumber) Less(i, j int) bool { return t[i].Number < t[j].Numb
// ChecksumInfo - carries checksums of individual scattered parts per disk.
type ChecksumInfo struct {
Name string
Algorithm BitrotAlgorithm
Hash []byte
PartNumber int
Algorithm BitrotAlgorithm
Hash []byte
}
type checksumInfoJSON struct {
@ -68,7 +68,7 @@ type checksumInfoJSON struct {
// MarshalJSON marshals the ChecksumInfo struct
func (c ChecksumInfo) MarshalJSON() ([]byte, error) {
info := checksumInfoJSON{
Name: c.Name,
Name: fmt.Sprintf("part.%d", c.PartNumber),
Algorithm: c.Algorithm.String(),
Hash: hex.EncodeToString(c.Hash),
}
@ -86,9 +86,11 @@ func (c *ChecksumInfo) UnmarshalJSON(data []byte) error {
if err != nil {
return err
}
c.Name = info.Name
c.Algorithm = BitrotAlgorithmFromString(info.Algorithm)
c.Hash = sum
if _, err = fmt.Sscanf(info.Name, "part.%d", &c.PartNumber); err != nil {
return err
}
if !c.Algorithm.Available() {
logger.LogIf(context.Background(), errBitrotHashAlgoInvalid)
@ -118,7 +120,7 @@ type ErasureInfo struct {
// AddChecksumInfo adds a checksum of a part.
func (e *ErasureInfo) AddChecksumInfo(ckSumInfo ChecksumInfo) {
for i, sum := range e.Checksums {
if sum.Name == ckSumInfo.Name {
if sum.PartNumber == ckSumInfo.PartNumber {
e.Checksums[i] = ckSumInfo
return
}
@ -127,10 +129,10 @@ func (e *ErasureInfo) AddChecksumInfo(ckSumInfo ChecksumInfo) {
}
// GetChecksumInfo - get checksum of a part.
func (e ErasureInfo) GetChecksumInfo(partName string) (ckSum ChecksumInfo) {
// Return the checksum.
func (e ErasureInfo) GetChecksumInfo(partNumber int) (ckSum ChecksumInfo) {
for _, sum := range e.Checksums {
if sum.Name == partName {
if sum.PartNumber == partNumber {
// Return the checksum
return sum
}
}
@ -279,10 +281,9 @@ func objectPartIndex(parts []ObjectPartInfo, partNumber int) int {
}
// AddObjectPart - add a new object part in order.
func (m *xlMetaV1) AddObjectPart(partNumber int, partName string, partETag string, partSize int64, actualSize int64) {
func (m *xlMetaV1) AddObjectPart(partNumber int, partETag string, partSize int64, actualSize int64) {
partInfo := ObjectPartInfo{
Number: partNumber,
Name: partName,
ETag: partETag,
Size: partSize,
ActualSize: actualSize,
@ -330,8 +331,8 @@ func getXLMetaInQuorum(ctx context.Context, metaArr []xlMetaV1, modTime time.Tim
for i, meta := range metaArr {
if meta.IsValid() && meta.Stat.ModTime.Equal(modTime) {
h := sha256.New()
for _, p := range meta.Parts {
h.Write([]byte(p.Name))
for _, part := range meta.Parts {
h.Write([]byte(fmt.Sprintf("part.%d", part.Number)))
}
metaHashes[i] = hex.EncodeToString(h.Sum(nil))
}

View File

@ -21,7 +21,6 @@ import (
"context"
"os"
"path"
"strconv"
"testing"
"time"
@ -218,8 +217,7 @@ func TestAddObjectPart(t *testing.T) {
// Test them.
for _, testCase := range testCases {
if testCase.expectedIndex > -1 {
partNumString := strconv.Itoa(testCase.partNum)
xlMeta.AddObjectPart(testCase.partNum, "part."+partNumString, "etag."+partNumString, int64(testCase.partNum+humanize.MiByte), ActualSize)
xlMeta.AddObjectPart(testCase.partNum, "", int64(testCase.partNum+humanize.MiByte), ActualSize)
}
if index := objectPartIndex(xlMeta.Parts, testCase.partNum); index != testCase.expectedIndex {
@ -250,8 +248,7 @@ func TestObjectPartIndex(t *testing.T) {
// Add some parts for testing.
for _, testCase := range testCases {
partNumString := strconv.Itoa(testCase.partNum)
xlMeta.AddObjectPart(testCase.partNum, "part."+partNumString, "etag."+partNumString, int64(testCase.partNum+humanize.MiByte), ActualSize)
xlMeta.AddObjectPart(testCase.partNum, "", int64(testCase.partNum+humanize.MiByte), ActualSize)
}
// Add failure test case.
@ -279,8 +276,7 @@ func TestObjectToPartOffset(t *testing.T) {
// Add some parts for testing.
// Total size of all parts is 5,242,899 bytes.
for _, partNum := range []int{1, 2, 4, 5, 7} {
partNumString := strconv.Itoa(partNum)
xlMeta.AddObjectPart(partNum, "part."+partNumString, "etag."+partNumString, int64(partNum+humanize.MiByte), ActualSize)
xlMeta.AddObjectPart(partNum, "", int64(partNum+humanize.MiByte), ActualSize)
}
testCases := []struct {

View File

@ -55,8 +55,8 @@ func (xl xlObjects) checkUploadIDExists(ctx context.Context, bucket, object, upl
}
// Removes part given by partName belonging to a mulitpart upload from minioMetaBucket
func (xl xlObjects) removeObjectPart(bucket, object, uploadID, partName string) {
curpartPath := path.Join(bucket, object, uploadID, partName)
func (xl xlObjects) removeObjectPart(bucket, object, uploadID string, partNumber int) {
curpartPath := pathJoin(bucket, object, uploadID, fmt.Sprintf("part.%d", partNumber))
storageDisks := xl.getDisks()
g := errgroup.WithNErrs(len(storageDisks))
@ -391,7 +391,7 @@ func (xl xlObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID
md5hex := r.MD5CurrentHexString()
// Add the current part.
xlMeta.AddObjectPart(partID, partSuffix, md5hex, n, data.ActualSize())
xlMeta.AddObjectPart(partID, md5hex, n, data.ActualSize())
for i, disk := range onlineDisks {
if disk == OfflineDisk {
@ -399,7 +399,11 @@ func (xl xlObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID
}
partsMetadata[i].Stat = xlMeta.Stat
partsMetadata[i].Parts = xlMeta.Parts
partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{partSuffix, DefaultBitrotAlgorithm, bitrotWriterSum(writers[i])})
partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{
PartNumber: partID,
Algorithm: DefaultBitrotAlgorithm,
Hash: bitrotWriterSum(writers[i]),
})
}
// Write all the checksum metadata.
@ -421,8 +425,8 @@ func (xl xlObjects) PutObjectPart(ctx context.Context, bucket, object, uploadID
// Return success.
return PartInfo{
PartNumber: partID,
LastModified: xlMeta.Stat.ModTime,
ETag: md5hex,
LastModified: xlMeta.Stat.ModTime,
Size: xlMeta.Stat.Size,
ActualSize: data.ActualSize(),
}, nil
@ -630,7 +634,6 @@ func (xl xlObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
xlMeta.Parts[i] = ObjectPartInfo{
Number: part.PartNumber,
Size: currentXLMeta.Parts[partIdx].Size,
Name: fmt.Sprintf("part.%d", part.PartNumber),
ActualSize: currentXLMeta.Parts[partIdx].ActualSize,
}
}
@ -701,7 +704,7 @@ func (xl xlObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
// Request 3: PutObjectPart 2
// Request 4: CompleteMultipartUpload --part 2
// N.B. 1st part is not present. This part should be removed from the storage.
xl.removeObjectPart(bucket, object, uploadID, curpart.Name)
xl.removeObjectPart(bucket, object, uploadID, curpart.Number)
}
}

View File

@ -19,6 +19,7 @@ package cmd
import (
"bytes"
"context"
"fmt"
"io"
"net/http"
"path"
@ -264,8 +265,10 @@ func (xl xlObjects) getObject(ctx context.Context, bucket, object string, startO
if length == totalBytesRead {
break
}
partNumber := xlMeta.Parts[partIndex].Number
// Save the current part name and size.
partName := xlMeta.Parts[partIndex].Name
partSize := xlMeta.Parts[partIndex].Size
partLength := partSize - partOffset
@ -281,8 +284,10 @@ func (xl xlObjects) getObject(ctx context.Context, bucket, object string, startO
if disk == OfflineDisk {
continue
}
checksumInfo := metaArr[index].Erasure.GetChecksumInfo(partName)
readers[index] = newBitrotReader(disk, bucket, pathJoin(object, partName), tillOffset, checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
checksumInfo := metaArr[index].Erasure.GetChecksumInfo(partNumber)
partPath := pathJoin(object, fmt.Sprintf("part.%d", partNumber))
readers[index] = newBitrotReader(disk, bucket, partPath, tillOffset,
checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
}
err := erasure.Decode(ctx, writer, readers, partOffset, partLength, partSize)
// Note: we should not be defer'ing the following closeBitrotReaders() call as we are inside a for loop i.e if we use defer, we would accumulate a lot of open files by the time
@ -596,8 +601,12 @@ func (xl xlObjects) putObject(ctx context.Context, bucket string, object string,
onlineDisks[i] = nil
continue
}
partsMetadata[i].AddObjectPart(1, partName, "", n, data.ActualSize())
partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{partName, DefaultBitrotAlgorithm, bitrotWriterSum(w)})
partsMetadata[i].AddObjectPart(1, "", n, data.ActualSize())
partsMetadata[i].Erasure.AddChecksumInfo(ChecksumInfo{
PartNumber: 1,
Algorithm: DefaultBitrotAlgorithm,
Hash: bitrotWriterSum(w),
})
}
// Save additional erasureMetadata.

View File

@ -22,7 +22,6 @@ import (
"encoding/hex"
"encoding/json"
"reflect"
"strconv"
"testing"
humanize "github.com/dustin/go-humanize"
@ -164,25 +163,23 @@ func newTestXLMetaV1() xlMetaV1 {
return xlMeta
}
func (m *xlMetaV1) AddTestObjectCheckSum(checkSumNum int, name string, algorithm BitrotAlgorithm, hash string) {
func (m *xlMetaV1) AddTestObjectCheckSum(partNumber int, algorithm BitrotAlgorithm, hash string) {
checksum, err := hex.DecodeString(hash)
if err != nil {
panic(err)
}
m.Erasure.Checksums[checkSumNum] = ChecksumInfo{name, algorithm, checksum}
m.Erasure.Checksums[partNumber-1] = ChecksumInfo{partNumber, algorithm, checksum}
}
// AddTestObjectPart - add a new object part in order.
func (m *xlMetaV1) AddTestObjectPart(partNumber int, partName string, partETag string, partSize int64) {
func (m *xlMetaV1) AddTestObjectPart(partNumber int, partSize int64) {
partInfo := ObjectPartInfo{
Number: partNumber,
Name: partName,
ETag: partETag,
Size: partSize,
}
// Proceed to include new part info.
m.Parts[partNumber] = partInfo
m.Parts[partNumber-1] = partInfo
}
// Constructs xlMetaV1{} for given number of parts and converts it into bytes.
@ -203,11 +200,10 @@ func getSampleXLMeta(totalParts int) xlMetaV1 {
// total number of parts.
xlMeta.Parts = make([]ObjectPartInfo, totalParts)
for i := 0; i < totalParts; i++ {
partName := "part." + strconv.Itoa(i+1)
// hard coding hash and algo value for the checksum, Since we are benchmarking the parsing of xl.json the magnitude doesn't affect the test,
// The magnitude doesn't make a difference, only the size does.
xlMeta.AddTestObjectCheckSum(i, partName, BLAKE2b512, "a23f5eff248c4372badd9f3b2455a285cd4ca86c3d9a570b091d3fc5cd7ca6d9484bbea3f8c5d8d4f84daae96874419eda578fd736455334afbac2c924b3915a")
xlMeta.AddTestObjectPart(i, partName, "d3fdd79cc3efd5fe5c068d7be397934b", 67108864)
xlMeta.AddTestObjectCheckSum(i+1, BLAKE2b512, "a23f5eff248c4372badd9f3b2455a285cd4ca86c3d9a570b091d3fc5cd7ca6d9484bbea3f8c5d8d4f84daae96874419eda578fd736455334afbac2c924b3915a")
xlMeta.AddTestObjectPart(i+1, 67108864)
}
return xlMeta
}
@ -256,8 +252,8 @@ func compareXLMetaV1(t *testing.T, unMarshalXLMeta, jsoniterXLMeta xlMetaV1) {
t.Errorf("Expected the size of Erasure Checksums to be %d, but got %d.", len(unMarshalXLMeta.Erasure.Checksums), len(jsoniterXLMeta.Erasure.Checksums))
} else {
for i := 0; i < len(unMarshalXLMeta.Erasure.Checksums); i++ {
if unMarshalXLMeta.Erasure.Checksums[i].Name != jsoniterXLMeta.Erasure.Checksums[i].Name {
t.Errorf("Expected the Erasure Checksum Name to be \"%s\", got \"%s\".", unMarshalXLMeta.Erasure.Checksums[i].Name, jsoniterXLMeta.Erasure.Checksums[i].Name)
if unMarshalXLMeta.Erasure.Checksums[i].PartNumber != jsoniterXLMeta.Erasure.Checksums[i].PartNumber {
t.Errorf("Expected the Erasure Checksum PartNumber to be \"%d\", got \"%d\".", unMarshalXLMeta.Erasure.Checksums[i].PartNumber, jsoniterXLMeta.Erasure.Checksums[i].PartNumber)
}
if unMarshalXLMeta.Erasure.Checksums[i].Algorithm != jsoniterXLMeta.Erasure.Checksums[i].Algorithm {
t.Errorf("Expected the Erasure Checksum Algorithm to be \"%s\", got \"%s\".", unMarshalXLMeta.Erasure.Checksums[i].Algorithm, jsoniterXLMeta.Erasure.Checksums[i].Algorithm)
@ -275,12 +271,6 @@ func compareXLMetaV1(t *testing.T, unMarshalXLMeta, jsoniterXLMeta xlMetaV1) {
t.Errorf("Expected info of %d parts to be present, but got %d instead.", len(unMarshalXLMeta.Parts), len(jsoniterXLMeta.Parts))
} else {
for i := 0; i < len(unMarshalXLMeta.Parts); i++ {
if unMarshalXLMeta.Parts[i].Name != jsoniterXLMeta.Parts[i].Name {
t.Errorf("Expected the name of part %d to be \"%s\", got \"%s\".", i+1, unMarshalXLMeta.Parts[i].Name, jsoniterXLMeta.Parts[i].Name)
}
if unMarshalXLMeta.Parts[i].ETag != jsoniterXLMeta.Parts[i].ETag {
t.Errorf("Expected the ETag of part %d to be \"%s\", got \"%s\".", i+1, unMarshalXLMeta.Parts[i].ETag, jsoniterXLMeta.Parts[i].ETag)
}
if unMarshalXLMeta.Parts[i].Number != jsoniterXLMeta.Parts[i].Number {
t.Errorf("Expected the number of part %d to be \"%d\", got \"%d\".", i+1, unMarshalXLMeta.Parts[i].Number, jsoniterXLMeta.Parts[i].Number)
}