azure: Fix upload corruption with PutObject() on certain sizes (#8330)

On objects bigger than 100MiB can have a corrupted object
stored due to partial blockListing attempted right after
each blocks uploaded. Simplify this code to ensure that
all the blocks successfully uploaded are committed right
away.

This PR also updates the azure-sdk-go to latest release.
This commit is contained in:
Harshavardhana 2019-09-30 18:42:18 -07:00 committed by GitHub
parent 65ac7c5671
commit 4ec9b349d0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 43 deletions

View File

@ -761,7 +761,7 @@ func (a *azureObjects) GetObjectInfo(ctx context.Context, bucket, object string,
// uses Azure equivalent CreateBlockBlobFromReader.
func (a *azureObjects) PutObject(ctx context.Context, bucket, object string, r *minio.PutObjReader, opts minio.ObjectOptions) (objInfo minio.ObjectInfo, err error) {
data := r.Reader
if data.Size() < azureBlockSize/10 {
if data.Size() <= azureBlockSize/2 {
blob := a.client.GetContainerReference(bucket).GetBlobReference(object)
blob.Metadata, blob.Properties, err = s3MetaToAzureProperties(ctx, opts.UserDefined)
if err != nil {
@ -773,9 +773,8 @@ func (a *azureObjects) PutObject(ctx context.Context, bucket, object string, r *
return a.GetObjectInfo(ctx, bucket, object, opts)
}
blockIDs := make(map[string]string)
blob := a.client.GetContainerReference(bucket).GetBlobReference(object)
var blocks []storage.Block
subPartSize, subPartNumber := int64(azureBlockSize), 1
for remainingSize := data.Size(); remainingSize >= 0; remainingSize -= subPartSize {
// Allow to create zero sized part.
@ -788,45 +787,18 @@ func (a *azureObjects) PutObject(ctx context.Context, bucket, object string, r *
}
id := base64.StdEncoding.EncodeToString([]byte(minio.MustGetUUID()))
blockIDs[id] = ""
if err = blob.PutBlockWithLength(id, uint64(subPartSize), io.LimitReader(data, subPartSize), nil); err != nil {
err = blob.PutBlockWithLength(id, uint64(subPartSize), io.LimitReader(data, subPartSize), nil)
if err != nil {
return objInfo, azureToObjectError(err, bucket, object)
}
blocks = append(blocks, storage.Block{
ID: id,
Status: storage.BlockStatusUncommitted,
})
subPartNumber++
}
objBlob := a.client.GetContainerReference(bucket).GetBlobReference(object)
resp, err := objBlob.GetBlockList(storage.BlockListTypeUncommitted, nil)
if err != nil {
return objInfo, azureToObjectError(err, bucket, object)
}
getBlocks := func(blocksMap map[string]string) (blocks []storage.Block, size int64, aerr error) {
for _, part := range resp.UncommittedBlocks {
if _, ok := blocksMap[part.Name]; ok {
blocks = append(blocks, storage.Block{
ID: part.Name,
Status: storage.BlockStatusUncommitted,
})
size += part.Size
}
}
if len(blocks) == 0 {
return nil, 0, minio.InvalidPart{}
}
return blocks, size, nil
}
var blocks []storage.Block
blocks, _, err = getBlocks(blockIDs)
if err != nil {
logger.LogIf(ctx, err)
return objInfo, err
}
if err = objBlob.PutBlockList(blocks, nil); err != nil {
if err = blob.PutBlockList(blocks, nil); err != nil {
return objInfo, azureToObjectError(err, bucket, object)
}
@ -836,14 +808,14 @@ func (a *azureObjects) PutObject(ctx context.Context, bucket, object string, r *
// Save md5sum for future processing on the object.
opts.UserDefined["x-amz-meta-md5sum"] = r.MD5CurrentHexString()
objBlob.Metadata, objBlob.Properties, err = s3MetaToAzureProperties(ctx, opts.UserDefined)
blob.Metadata, blob.Properties, err = s3MetaToAzureProperties(ctx, opts.UserDefined)
if err != nil {
return objInfo, azureToObjectError(err, bucket, object)
}
if err = objBlob.SetProperties(nil); err != nil {
if err = blob.SetProperties(nil); err != nil {
return objInfo, azureToObjectError(err, bucket, object)
}
if err = objBlob.SetMetadata(nil); err != nil {
if err = blob.SetMetadata(nil); err != nil {
return objInfo, azureToObjectError(err, bucket, object)
}
@ -995,13 +967,12 @@ func (a *azureObjects) PutObjectPart(ctx context.Context, bucket, object, upload
}
id := base64.StdEncoding.EncodeToString([]byte(minio.MustGetUUID()))
partMetaV1.BlockIDs = append(partMetaV1.BlockIDs, id)
blob := a.client.GetContainerReference(bucket).GetBlobReference(object)
err = blob.PutBlockWithLength(id, uint64(subPartSize), io.LimitReader(data, subPartSize), nil)
if err != nil {
return info, azureToObjectError(err, bucket, object)
}
partMetaV1.BlockIDs = append(partMetaV1.BlockIDs, id)
subPartNumber++
}

2
go.mod
View File

@ -4,7 +4,7 @@ go 1.13
require (
cloud.google.com/go v0.37.2
github.com/Azure/azure-sdk-for-go v27.0.0+incompatible
github.com/Azure/azure-sdk-for-go v33.4.0+incompatible
github.com/Azure/go-autorest v11.7.0+incompatible
github.com/alecthomas/participle v0.2.1
github.com/aliyun/aliyun-oss-go-sdk v0.0.0-20190307165228-86c17b95fcd5

2
go.sum
View File

@ -15,6 +15,8 @@ git.apache.org/thrift.git v0.12.0/go.mod h1:fPE2ZNJGynbRyZ4dJvy6G277gSllfV2HJqbl
github.com/Azure/azure-sdk-for-go v26.4.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/azure-sdk-for-go v27.0.0+incompatible h1:JknnG+RYTnwzpi+YuQ04/dAWIssbubSRD8arN78I+Qo=
github.com/Azure/azure-sdk-for-go v27.0.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/azure-sdk-for-go v33.4.0+incompatible h1:yzJKzcKTX0WwDdZC8kAqxiGVZz66uqpajhgphstEUN0=
github.com/Azure/azure-sdk-for-go v33.4.0+incompatible/go.mod h1:9XXNKU+eRnpl9moKnB4QOLf1HestfXbmab5FXxiDBjc=
github.com/Azure/go-ansiterm v0.0.0-20170929234023-d6e3b3328b78/go.mod h1:LmzpDX56iTiv29bbRTIsUNlaFfuhWRQBWjQdVyAevI8=
github.com/Azure/go-autorest v11.5.2+incompatible/go.mod h1:r+4oMnoxhatjLLJ6zxSWATqVooLgysK6ZNox3g/xq24=
github.com/Azure/go-autorest v11.7.0+incompatible h1:gzma19dc9ejB75D90E5S+/wXouzpZyA+CV+/MJPSD/k=