From d4b822d697b8c0d9b4f3e35e4e942b309692dcc4 Mon Sep 17 00:00:00 2001 From: Andreas Auernhammer Date: Tue, 23 Feb 2021 21:31:53 +0100 Subject: [PATCH] pkg/etag: add new package for S3 ETag handling (#11577) This commit adds a new package `etag` for dealing with S3 ETags. Even though ETag is often viewed as MD5 checksum of an object, handling S3 ETags correctly is a surprisingly complex task. While it is true that the ETag corresponds to the MD5 for the most basic S3 API operations, there are many exceptions in case of multipart uploads or encryption. In worse, some S3 clients expect very specific behavior when it comes to ETags. For example, some clients expect that the ETag is a double-quoted string and fail otherwise. Non-AWS compliant ETag handling has been a source of many bugs in the past. Therefore, this commit adds a dedicated `etag` package that provides functionality for parsing, generating and converting S3 ETags. Further, this commit removes the ETag computation from the `hash` package. Instead, the `hash` package (i.e. `hash.Reader`) should focus only on computing and verifying the content-sha256. One core feature of this commit is to provide a mechanism to communicate a computed ETag from a low-level `io.Reader` to a high-level `io.Reader`. This problem occurs when an S3 server receives a request and has to compute the ETag of the content. However, the server may also wrap the initial body with several other `io.Reader`, e.g. when encrypting or compressing the content: ``` reader := Encrypt(Compress(ETag(content))) ``` In such a case, the ETag should be accessible by the high-level `io.Reader`. The `etag` provides a mechanism to wrap `io.Reader` implementations such that the `ETag` can be accessed by a type-check. This technique is applied to the PUT, COPY and Upload handlers. --- cmd/auth-handler.go | 3 +- cmd/bucket-handlers.go | 4 +- cmd/bucket-lifecycle.go | 2 +- cmd/config-common.go | 2 +- cmd/data-scanner.go | 2 +- cmd/data-usage-cache.go | 2 +- cmd/data-usage.go | 2 +- cmd/disk-cache.go | 2 +- cmd/gateway/s3/gateway-s3-metadata.go | 2 +- cmd/metacache-bucket.go | 2 +- cmd/metacache-set.go | 2 +- cmd/object-handlers.go | 64 +++--- cmd/test-utils_test.go | 2 +- cmd/web-handlers.go | 18 +- pkg/etag/etag.go | 284 ++++++++++++++++++++++++++ pkg/etag/etag_test.go | 139 +++++++++++++ pkg/etag/reader.go | 151 ++++++++++++++ pkg/hash/reader.go | 270 +++++++++++++----------- pkg/hash/reader_test.go | 102 ++++----- 19 files changed, 821 insertions(+), 234 deletions(-) create mode 100644 pkg/etag/etag.go create mode 100644 pkg/etag/etag_test.go create mode 100644 pkg/etag/reader.go diff --git a/cmd/auth-handler.go b/cmd/auth-handler.go index 570b03a98..260699a84 100644 --- a/cmd/auth-handler.go +++ b/cmd/auth-handler.go @@ -459,8 +459,7 @@ func isReqAuthenticated(ctx context.Context, r *http.Request, region string, sty // Verify 'Content-Md5' and/or 'X-Amz-Content-Sha256' if present. // The verification happens implicit during reading. - reader, err := hash.NewReader(r.Body, -1, hex.EncodeToString(contentMD5), - hex.EncodeToString(contentSHA256), -1, globalCLIContext.StrictS3Compat) + reader, err := hash.NewReader(r.Body, -1, hex.EncodeToString(contentMD5), hex.EncodeToString(contentSHA256), -1) if err != nil { return toAPIErrorCode(ctx, err) } diff --git a/cmd/bucket-handlers.go b/cmd/bucket-handlers.go index 649f67b8f..7ef1948e3 100644 --- a/cmd/bucket-handlers.go +++ b/cmd/bucket-handlers.go @@ -917,7 +917,7 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h return } - hashReader, err := hash.NewReader(fileBody, fileSize, "", "", fileSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(fileBody, fileSize, "", "", fileSize) if err != nil { logger.LogIf(ctx, err) writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) @@ -964,7 +964,7 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h } info := ObjectInfo{Size: fileSize} // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", fileSize, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", fileSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return diff --git a/cmd/bucket-lifecycle.go b/cmd/bucket-lifecycle.go index 788fe8235..db74658b0 100644 --- a/cmd/bucket-lifecycle.go +++ b/cmd/bucket-lifecycle.go @@ -692,7 +692,7 @@ func restoreTransitionedObject(ctx context.Context, bucket, object string, objAP return err } defer gr.Close() - hashReader, err := hash.NewReader(gr, objInfo.Size, "", "", objInfo.Size, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(gr, objInfo.Size, "", "", objInfo.Size) if err != nil { return err } diff --git a/cmd/config-common.go b/cmd/config-common.go index e8fa13f1d..b2cf3b624 100644 --- a/cmd/config-common.go +++ b/cmd/config-common.go @@ -64,7 +64,7 @@ func deleteConfig(ctx context.Context, objAPI objectDeleter, configFile string) } func saveConfig(ctx context.Context, objAPI ObjectLayer, configFile string, data []byte) error { - hashReader, err := hash.NewReader(bytes.NewReader(data), int64(len(data)), "", getSHA256Hash(data), int64(len(data)), globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(bytes.NewReader(data), int64(len(data)), "", getSHA256Hash(data), int64(len(data))) if err != nil { return err } diff --git a/cmd/data-scanner.go b/cmd/data-scanner.go index 953d837ba..848ef317b 100644 --- a/cmd/data-scanner.go +++ b/cmd/data-scanner.go @@ -128,7 +128,7 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) { nextBloomCycle++ var tmp [8]byte binary.LittleEndian.PutUint64(tmp[:], nextBloomCycle) - r, err := hash.NewReader(bytes.NewReader(tmp[:]), int64(len(tmp)), "", "", int64(len(tmp)), false) + r, err := hash.NewReader(bytes.NewReader(tmp[:]), int64(len(tmp)), "", "", int64(len(tmp))) if err != nil { logger.LogIf(ctx, err) continue diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go index 664e95f80..b0a854b1a 100644 --- a/cmd/data-usage-cache.go +++ b/cmd/data-usage-cache.go @@ -513,7 +513,7 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string) }() defer pr.Close() - r, err := hash.NewReader(pr, -1, "", "", -1, false) + r, err := hash.NewReader(pr, -1, "", "", -1) if err != nil { return err } diff --git a/cmd/data-usage.go b/cmd/data-usage.go index d4464829f..b08f191d3 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -47,7 +47,7 @@ func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, dui <-chan continue } size := int64(len(dataUsageJSON)) - r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false) + r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size) if err != nil { logger.LogIf(ctx, err) continue diff --git a/cmd/disk-cache.go b/cmd/disk-cache.go index 049de51b1..58681dbfc 100644 --- a/cmd/disk-cache.go +++ b/cmd/disk-cache.go @@ -705,7 +705,7 @@ func (c *cacheObjects) uploadObject(ctx context.Context, oi ObjectInfo) { if st == CommitComplete || st.String() == "" { return } - hashReader, err := hash.NewReader(cReader, oi.Size, "", "", oi.Size, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(cReader, oi.Size, "", "", oi.Size) if err != nil { return } diff --git a/cmd/gateway/s3/gateway-s3-metadata.go b/cmd/gateway/s3/gateway-s3-metadata.go index 61e547a45..a4f0ff4ec 100644 --- a/cmd/gateway/s3/gateway-s3-metadata.go +++ b/cmd/gateway/s3/gateway-s3-metadata.go @@ -168,7 +168,7 @@ func getGWMetadata(ctx context.Context, bucket, prefix string, gwMeta gwMetaV1) logger.LogIf(ctx, err) return nil, err } - hashReader, err := hash.NewReader(bytes.NewReader(metadataBytes), int64(len(metadataBytes)), "", "", int64(len(metadataBytes)), false) + hashReader, err := hash.NewReader(bytes.NewReader(metadataBytes), int64(len(metadataBytes)), "", "", int64(len(metadataBytes))) if err != nil { return nil, err } diff --git a/cmd/metacache-bucket.go b/cmd/metacache-bucket.go index 77fc55b42..44d00ae2a 100644 --- a/cmd/metacache-bucket.go +++ b/cmd/metacache-bucket.go @@ -176,7 +176,7 @@ func (b *bucketMetacache) save(ctx context.Context) error { b.updated = false b.mu.Unlock() - hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len()), false) + hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len())) if err != nil { return err } diff --git a/cmd/metacache-set.go b/cmd/metacache-set.go index 82bb3220c..4f42e42cd 100644 --- a/cmd/metacache-set.go +++ b/cmd/metacache-set.go @@ -672,7 +672,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr return nil } o.debugln(color.Green("listPath:")+" saving block", b.n, "to", o.objectPath(b.n)) - r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false) + r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data))) logger.LogIf(ctx, err) custom := b.headerKV() _, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r), ObjectOptions{ diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go index dc5b45118..115c92c9b 100644 --- a/cmd/object-handlers.go +++ b/cmd/object-handlers.go @@ -48,6 +48,7 @@ import ( objectlock "github.com/minio/minio/pkg/bucket/object/lock" "github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/replication" + "github.com/minio/minio/pkg/etag" "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/handlers" "github.com/minio/minio/pkg/hash" @@ -1025,7 +1026,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re srcInfo.metadataOnly = false } - var reader io.Reader + var reader io.Reader = gr // Set the actual size to the compressed/decrypted size if encrypted. actualSize, err := srcInfo.GetActualSize() @@ -1057,15 +1058,15 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re // avoid copying them in target object. crypto.RemoveInternalEntries(srcInfo.UserDefined) - s2c := newS2CompressReader(gr, actualSize) + s2c := newS2CompressReader(reader, actualSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, reader) length = -1 } else { reader = gr } - srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1162,11 +1163,13 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re } if isTargetEncrypted { - reader, objEncKey, err = newEncryptReader(srcInfo.Reader, newKey, dstBucket, dstObject, encMetadata, sseS3) + var encReader io.Reader + encReader, objEncKey, err = newEncryptReader(srcInfo.Reader, newKey, dstBucket, dstObject, encMetadata, sseS3) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return } + reader = etag.Wrap(encReader, srcInfo.Reader) } if isSourceEncrypted { @@ -1176,7 +1179,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re } // do not try to verify encrypted content - srcInfo.Reader, err = hash.NewReader(reader, targetSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, targetSize, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1457,13 +1460,12 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req } var ( - md5hex = hex.EncodeToString(md5Bytes) - sha256hex = "" - reader io.Reader + md5hex = hex.EncodeToString(md5Bytes) + sha256hex = "" + reader io.Reader = r.Body s3Err APIErrorCode putObject = objectAPI.PutObject ) - reader = r.Body // Check if put is allowed if s3Err = isPutActionAllowed(ctx, rAuthType, bucket, object, r, iampolicy.PutObjectAction); s3Err != ErrNone { @@ -1509,13 +1511,12 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req } actualSize := size - if objectAPI.IsCompressionSupported() && isCompressible(r.Header, object) && size > 0 { // Storing the compression metadata. metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2 metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(size, 10) - actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1524,13 +1525,13 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req // Set compression metrics. s2c := newS2CompressReader(actualReader, actualSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, actualReader) size = -1 // Since compressed size is un-predictable. md5hex = "" // Do not try to verify the content. sha256hex = "" } - hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1601,7 +1602,7 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req } // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, wantSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(etag.Wrap(reader, hashReader), wantSize, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -1999,8 +2000,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt return } - partInfo, err := core.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, - gr, length, "", "", dstOpts.ServerSideEncryption) + partInfo, err := core.PutObjectPart(ctx, dstBucket, dstObject, uploadID, partID, gr, length, "", "", dstOpts.ServerSideEncryption) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2015,7 +2015,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt } actualPartSize = length - var reader io.Reader + var reader io.Reader = etag.NewReader(gr, nil) mi, err := objectAPI.GetMultipartInfo(ctx, dstBucket, dstObject, uploadID, dstOpts) if err != nil { @@ -2027,15 +2027,13 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt _, isCompressed := mi.UserDefined[ReservedMetadataPrefix+"compression"] // Compress only if the compression is enabled during initial multipart. if isCompressed { - s2c := newS2CompressReader(gr, actualPartSize) + s2c := newS2CompressReader(reader, actualPartSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, reader) length = -1 - } else { - reader = gr } - srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualPartSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, length, "", "", actualPartSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2077,11 +2075,12 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt copy(objectEncryptionKey[:], key) partEncryptionKey := objectEncryptionKey.DerivePartKey(uint32(partID)) - reader, err = sio.EncryptReader(reader, sio.Config{Key: partEncryptionKey[:]}) + encReader, err := sio.EncryptReader(reader, sio.Config{Key: partEncryptionKey[:]}) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return } + reader = etag.Wrap(encReader, reader) wantSize := int64(-1) if length >= 0 { @@ -2089,7 +2088,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt wantSize = info.EncryptedSize() } - srcInfo.Reader, err = hash.NewReader(reader, wantSize, "", "", actualPartSize, globalCLIContext.StrictS3Compat) + srcInfo.Reader, err = hash.NewReader(reader, wantSize, "", "", actualPartSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2209,12 +2208,11 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http } var ( - md5hex = hex.EncodeToString(md5Bytes) - sha256hex = "" - reader io.Reader + md5hex = hex.EncodeToString(md5Bytes) + sha256hex = "" + reader io.Reader = r.Body s3Error APIErrorCode ) - reader = r.Body if s3Error = isPutActionAllowed(ctx, rAuthType, bucket, object, r, iampolicy.PutObjectAction); s3Error != ErrNone { writeErrorResponse(ctx, w, errorCodes.ToAPIErr(s3Error), r.URL, guessIsBrowserReq(r)) return @@ -2271,7 +2269,7 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http _, isCompressed := mi.UserDefined[ReservedMetadataPrefix+"compression"] if objectAPI.IsCompressionSupported() && isCompressed { - actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + actualReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2280,13 +2278,13 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http // Set compression metrics. s2c := newS2CompressReader(actualReader, actualSize) defer s2c.Close() - reader = s2c + reader = etag.Wrap(s2c, actualReader) size = -1 // Since compressed size is un-predictable. md5hex = "" // Do not try to verify the content. sha256hex = "" } - hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(reader, size, md5hex, sha256hex, actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return @@ -2343,7 +2341,7 @@ func (api objectAPIHandlers) PutObjectPartHandler(w http.ResponseWriter, r *http wantSize = info.EncryptedSize() } // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, wantSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(etag.Wrap(reader, hashReader), wantSize, "", "", actualSize) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index 623f80956..6e90e5ca0 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -160,7 +160,7 @@ func calculateSignedChunkLength(chunkDataSize int64) int64 { } func mustGetPutObjReader(t TestErrHandler, data io.Reader, size int64, md5hex, sha256hex string) *PutObjReader { - hr, err := hash.NewReader(data, size, md5hex, sha256hex, size, globalCLIContext.StrictS3Compat) + hr, err := hash.NewReader(data, size, md5hex, sha256hex, size) if err != nil { t.Fatal(err) } diff --git a/cmd/web-handlers.go b/cmd/web-handlers.go index e7004814c..bab5ce6c4 100644 --- a/cmd/web-handlers.go +++ b/cmd/web-handlers.go @@ -50,6 +50,7 @@ import ( objectlock "github.com/minio/minio/pkg/bucket/object/lock" "github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/replication" + "github.com/minio/minio/pkg/etag" "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/handlers" "github.com/minio/minio/pkg/hash" @@ -1233,7 +1234,7 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { var reader io.Reader = r.Body actualSize := size - hashReader, err := hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat) + hashReader, err := hash.NewReader(reader, size, "", "", actualSize) if err != nil { writeWebErrorResponse(w, err) return @@ -1244,7 +1245,7 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { metadata[ReservedMetadataPrefix+"compression"] = compressionAlgorithmV2 metadata[ReservedMetadataPrefix+"actual-size"] = strconv.FormatInt(actualSize, 10) - actualReader, err := hash.NewReader(reader, actualSize, "", "", actualSize, globalCLIContext.StrictS3Compat) + actualReader, err := hash.NewReader(reader, actualSize, "", "", actualSize) if err != nil { writeWebErrorResponse(w, err) return @@ -1254,8 +1255,8 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { size = -1 // Since compressed size is un-predictable. s2c := newS2CompressReader(actualReader, actualSize) defer s2c.Close() - reader = s2c - hashReader, err = hash.NewReader(reader, size, "", "", actualSize, globalCLIContext.StrictS3Compat) + reader = etag.Wrap(s2c, actualReader) + hashReader, err = hash.NewReader(reader, size, "", "", actualSize) if err != nil { writeWebErrorResponse(w, err) return @@ -1276,15 +1277,18 @@ func (web *webAPIHandlers) Upload(w http.ResponseWriter, r *http.Request) { if objectAPI.IsEncryptionSupported() { if _, ok := crypto.IsRequested(r.Header); ok && !HasSuffix(object, SlashSeparator) { // handle SSE requests - var objectEncryptionKey crypto.ObjectKey - reader, objectEncryptionKey, err = EncryptRequest(hashReader, r, bucket, object, metadata) + var ( + objectEncryptionKey crypto.ObjectKey + encReader io.Reader + ) + encReader, objectEncryptionKey, err = EncryptRequest(hashReader, r, bucket, object, metadata) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return } info := ObjectInfo{Size: size} // do not try to verify encrypted content - hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", size, globalCLIContext.StrictS3Compat) + hashReader, err = hash.NewReader(etag.Wrap(encReader, hashReader), info.EncryptedSize(), "", "", size) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) return diff --git a/pkg/etag/etag.go b/pkg/etag/etag.go new file mode 100644 index 000000000..8c8085afe --- /dev/null +++ b/pkg/etag/etag.go @@ -0,0 +1,284 @@ +// MinIO Cloud Storage, (C) 2021 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package etag provides an implementation of S3 ETags. +// +// Each S3 object has an associated ETag that can be +// used to e.g. quickly compare objects or check whether +// the content of an object has changed. +// +// In general, an S3 ETag is an MD5 checksum of the object +// content. However, there are many exceptions to this rule. +// +// +// Single-part Upload +// +// In case of a basic single-part PUT operation - without server +// side encryption or object compression - the ETag of an object +// is its content MD5. +// +// +// Multi-part Upload +// +// The ETag of an object does not correspond to its content MD5 +// when the object is uploaded in multiple parts via the S3 +// multipart API. Instead, S3 first computes a MD5 of each part: +// e1 := MD5(part-1) +// e2 := MD5(part-2) +// ... +// eN := MD5(part-N) +// +// Then, the ETag of the object is computed as MD5 of all individual +// part checksums. S3 also encodes the number of parts into the ETag +// by appending a - at the end: +// ETag := MD5(e1 || e2 || e3 ... || eN) || -N +// +// For example: ceb8853ddc5086cc4ab9e149f8f09c88-5 +// +// However, this scheme is only used for multipart objects that are +// not encrypted. +// +// Server-side Encryption +// +// S3 specifies three types of server-side-encryption - SSE-C, SSE-S3 +// and SSE-KMS - with different semantics w.r.t. ETags. +// In case of SSE-S3, the ETag of an object is computed the same as +// for single resp. multipart plaintext objects. In particular, +// the ETag of a singlepart SSE-S3 object is its content MD5. +// +// In case of SSE-C and SSE-KMS, the ETag of an object is computed +// differently. For singlepart uploads the ETag is not the content +// MD5 of the object. For multipart uploads the ETag is also not +// the MD5 of the individual part checksums but it still contains +// the number of parts as suffix. +// +// Instead, the ETag is kind of unpredictable for S3 clients when +// an object is encrypted using SSE-C or SSE-KMS. Maybe AWS S3 +// computes the ETag as MD5 of the encrypted content but there is +// no way to verify this assumption since the encryption happens +// inside AWS S3. +// Therefore, S3 clients must not make any assumption about ETags +// in case of SSE-C or SSE-KMS except that the ETag is well-formed. +// +// To put all of this into a simple rule: +// SSE-S3 : ETag == MD5 +// SSE-C : ETag != MD5 +// SSE-KMS: ETag != MD5 +// +// +// Encrypted ETags +// +// An S3 implementation has to remember the content MD5 of objects +// in case of SSE-S3. However, storing the ETag of an encrypted +// object in plaintext may reveal some information about the object. +// For example, two objects with the same ETag are identical with +// a very high probability. +// +// Therefore, an S3 implementation may encrypt an ETag before storing +// it. In this case, the stored ETag may not be a well-formed S3 ETag. +// For example, it can be larger due to a checksum added by authenticated +// encryption schemes. Such an ETag must be decrypted before sent to an +// S3 client. +// +// +// S3 Clients +// +// There are many different S3 client implementations. Most of them +// access the ETag by looking for the HTTP response header key "Etag". +// However, some of them assume that the header key has to be "ETag" +// (case-sensitive) and will fail otherwise. +// Further, some clients require that the ETag value is a double-quoted +// string. Therefore, this package provides dedicated functions for +// adding and extracing the ETag to/from HTTP headers. +package etag + +import ( + "bytes" + "encoding/hex" + "errors" + "fmt" + "net/http" + "strconv" + "strings" +) + +// ETag is a single S3 ETag. +// +// An S3 ETag sometimes corresponds to the MD5 of +// the S3 object content. However, when an object +// is encrypted, compressed or uploaded using +// the S3 multipart API then its ETag is not +// necessarily the MD5 of the object content. +// +// For a more detailed description of S3 ETags +// take a look at the package documentation. +type ETag []byte + +// String returns the string representation of the ETag. +// +// The returned string is a hex representation of the +// binary ETag with an optional '-' suffix. +func (e ETag) String() string { + if e.IsMultipart() { + return hex.EncodeToString(e[:16]) + string(e[16:]) + } + return hex.EncodeToString(e) +} + +// IsEncrypted reports whether the ETag is encrypted. +func (e ETag) IsEncrypted() bool { + return len(e) > 16 && !bytes.ContainsRune(e, '-') +} + +// IsMultipart reports whether the ETag belongs to an +// object that has been uploaded using the S3 multipart +// API. +// An S3 multipart ETag has a - suffix. +func (e ETag) IsMultipart() bool { + return len(e) > 16 && bytes.ContainsRune(e, '-') +} + +// Parts returns the number of object parts that are +// referenced by this ETag. It returns 1 if the object +// has been uploaded using the S3 singlepart API. +// +// Parts may panic if the ETag is an invalid multipart +// ETag. +func (e ETag) Parts() int { + if !e.IsMultipart() { + return 1 + } + + n := bytes.IndexRune(e, '-') + parts, err := strconv.Atoi(string(e[n+1:])) + if err != nil { + panic(err) // malformed ETag + } + return parts +} + +var _ Tagger = ETag{} // compiler check + +// ETag returns the ETag itself. +// +// By providing this method ETag implements +// the Tagger interface. +func (e ETag) ETag() ETag { return e } + +// Set adds the ETag to the HTTP headers. It overwrites any +// existing ETag entry. +// +// Due to legacy S3 clients, that make incorrect assumptions +// about HTTP headers, Set should be used instead of +// http.Header.Set(...). Otherwise, some S3 clients will not +// able to extract the ETag. +func Set(etag ETag, h http.Header) { + // Some (broken) S3 clients expect the ETag header to + // literally "ETag" - not "Etag". Further, some clients + // expect an ETag in double quotes. Therefore, we set the + // ETag directly as map entry instead of using http.Header.Set + h["ETag"] = []string{`"` + etag.String() + `"`} +} + +// Get extracts and parses an ETag from the given HTTP headers. +// It returns an error when the HTTP headers do not contain +// an ETag entry or when the ETag is malformed. +// +// Get only accepts AWS S3 compatible ETags - i.e. no +// encrypted ETags - and therefore is stricter than Parse. +func Get(h http.Header) (ETag, error) { + const strict = true + if v := h.Get("Etag"); v != "" { + return parse(v, strict) + } + v, ok := h["ETag"] + if !ok || len(v) == 0 { + return nil, errors.New("etag: HTTP header does not contain an ETag") + } + return parse(v[0], strict) +} + +// Equal returns true if and only if the two ETags are +// identical. +func Equal(a, b ETag) bool { return bytes.Equal(a, b) } + +// Parse parses s as an S3 ETag, returning the result. +// The string can be an encrypted, singlepart +// or multipart S3 ETag. It returns an error if s is +// not a valid textual representation of an ETag. +func Parse(s string) (ETag, error) { + const strict = false + return parse(s, strict) +} + +// parse parse s as an S3 ETag, returning the result. +// It operates in one of two modes: +// - strict +// - non-strict +// +// In strict mode, parse only accepts ETags that +// are AWS S3 compatible. In particular, an AWS +// S3 ETag always consists of a 128 bit checksum +// value and an optional - suffix. +// Therefore, s must have the following form in +// strict mode: <32-hex-characters>[-] +// +// In non-strict mode, parse also accepts ETags +// that are not AWS S3 compatible - e.g. encrypted +// ETags. +func parse(s string, strict bool) (ETag, error) { + // An S3 ETag may be a double-quoted string. + // Therefore, we remove double quotes at the + // start and end, if any. + if strings.HasPrefix(s, `"`) && strings.HasSuffix(s, `"`) { + s = s[1 : len(s)-1] + } + + // An S3 ETag may be a multipart ETag that + // contains a '-' followed by a number. + // If the ETag does not a '-' is is either + // a singlepart or encrypted ETag. + n := strings.IndexRune(s, '-') + if n == -1 { + etag, err := hex.DecodeString(s) + if err != nil { + return nil, err + } + if strict && len(etag) != 16 { // AWS S3 ETags are always 128 bit long + return nil, fmt.Errorf("etag: invalid length %d", len(etag)) + } + return ETag(etag), nil + } + + prefix, suffix := s[:n], s[n:] + if len(prefix) != 32 { + return nil, fmt.Errorf("etag: invalid prefix length %d", len(prefix)) + } + if len(suffix) <= 1 { + return nil, errors.New("etag: suffix is not a part number") + } + + etag, err := hex.DecodeString(prefix) + if err != nil { + return nil, err + } + partNumber, err := strconv.Atoi(suffix[1:]) // suffix[0] == '-' Therefore, we start parsing at suffix[1] + if err != nil { + return nil, err + } + if strict && (partNumber == 0 || partNumber > 10000) { + return nil, fmt.Errorf("etag: invalid part number %d", partNumber) + } + return ETag(append(etag, suffix...)), nil +} diff --git a/pkg/etag/etag_test.go b/pkg/etag/etag_test.go new file mode 100644 index 000000000..281584016 --- /dev/null +++ b/pkg/etag/etag_test.go @@ -0,0 +1,139 @@ +// MinIO Cloud Storage, (C) 2021 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etag + +import ( + "io" + "io/ioutil" + "strings" + "testing" +) + +var parseTests = []struct { + String string + ETag ETag + ShouldFail bool +}{ + {String: "3b83ef96387f1465", ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101}}, // 0 + {String: "3b83ef96387f14655fc854ddc3c6bd57", ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}}, // 1 + {String: `"3b83ef96387f14655fc854ddc3c6bd57"`, ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}}, // 2 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-1", ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 49}}, // 3 + {String: `"ceb8853ddc5086cc4ab9e149f8f09c88-2"`, ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 50}}, // 4 + { // 5 + String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941", + ETag: ETag{144, 64, 44, 120, 210, 220, 205, 222, 225, 233, 232, 98, 34, 206, 44, 99, 97, 103, 95, 53, 41, 210, 96, 0, 174, 46, 144, 15, 242, 22, 179, 203, 89, 225, 48, 224, 146, 216, 162, 152, 30, 119, 111, 77, 11, 214, 9, 65}, + }, + + {String: `"3b83ef96387f14655fc854ddc3c6bd57`, ShouldFail: true}, // 6 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-", ShouldFail: true}, // 7 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-2a", ShouldFail: true}, // 8 + {String: "ceb8853ddc5086cc4ab9e149f8f09c88-2-1", ShouldFail: true}, // 9 + {String: "90402c78d2dccddee1e9e86222ce2c-1", ShouldFail: true}, // 10 + {String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941-1", ShouldFail: true}, // 11 +} + +func TestParse(t *testing.T) { + for i, test := range parseTests { + etag, err := Parse(test.String) + if err == nil && test.ShouldFail { + t.Fatalf("Test %d: parse should have failed but succeeded", i) + } + if err != nil && !test.ShouldFail { + t.Fatalf("Test %d: failed to parse ETag %q: %v", i, test.String, err) + } + if !Equal(etag, test.ETag) { + t.Log([]byte(etag)) + t.Fatalf("Test %d: ETags don't match", i) + } + } +} + +var stringTests = []struct { + ETag ETag + String string +}{ + {ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101}, String: "3b83ef96387f1465"}, // 0 + {ETag: ETag{59, 131, 239, 150, 56, 127, 20, 101, 95, 200, 84, 221, 195, 198, 189, 87}, String: "3b83ef96387f14655fc854ddc3c6bd57"}, // 1 + {ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 49}, String: "ceb8853ddc5086cc4ab9e149f8f09c88-1"}, // 2 + {ETag: ETag{206, 184, 133, 61, 220, 80, 134, 204, 74, 185, 225, 73, 248, 240, 156, 136, 45, 50}, String: "ceb8853ddc5086cc4ab9e149f8f09c88-2"}, // 3 + { // 4 + ETag: ETag{144, 64, 44, 120, 210, 220, 205, 222, 225, 233, 232, 98, 34, 206, 44, 99, 97, 103, 95, 53, 41, 210, 96, 0, 174, 46, 144, 15, 242, 22, 179, 203, 89, 225, 48, 224, 146, 216, 162, 152, 30, 119, 111, 77, 11, 214, 9, 65}, + String: "90402c78d2dccddee1e9e86222ce2c6361675f3529d26000ae2e900ff216b3cb59e130e092d8a2981e776f4d0bd60941", + }, +} + +func TestString(t *testing.T) { + for i, test := range stringTests { + s := test.ETag.String() + if s != test.String { + t.Fatalf("Test %d: got %s - want %s", i, s, test.String) + } + } +} + +var equalTests = []struct { + A string + B string + Equal bool +}{ + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: "3b83ef96387f14655fc854ddc3c6bd57", Equal: true}, // 0 + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: `"3b83ef96387f14655fc854ddc3c6bd57"`, Equal: true}, // 1 + + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: "3b83ef96387f14655fc854ddc3c6bd57-2", Equal: false}, // 2 + {A: "3b83ef96387f14655fc854ddc3c6bd57", B: "ceb8853ddc5086cc4ab9e149f8f09c88", Equal: false}, // 3 +} + +func TestEqual(t *testing.T) { + for i, test := range equalTests { + A, err := Parse(test.A) + if err != nil { + t.Fatalf("Test %d: %v", i, err) + } + B, err := Parse(test.B) + if err != nil { + t.Fatalf("Test %d: %v", i, err) + } + if equal := Equal(A, B); equal != test.Equal { + t.Fatalf("Test %d: got %v - want %v", i, equal, test.Equal) + } + } +} + +var readerTests = []struct { // Reference values computed by: echo | md5sum + Content string + ETag ETag +}{ + { + Content: "", ETag: ETag{212, 29, 140, 217, 143, 0, 178, 4, 233, 128, 9, 152, 236, 248, 66, 126}, + }, + { + Content: " ", ETag: ETag{114, 21, 238, 156, 125, 157, 194, 41, 210, 146, 26, 64, 232, 153, 236, 95}, + }, + { + Content: "Hello World", ETag: ETag{177, 10, 141, 177, 100, 224, 117, 65, 5, 183, 169, 155, 231, 46, 63, 229}, + }, +} + +func TestReader(t *testing.T) { + for i, test := range readerTests { + reader := NewReader(strings.NewReader(test.Content), test.ETag) + if _, err := io.Copy(ioutil.Discard, reader); err != nil { + t.Fatalf("Test %d: read failed: %v", i, err) + } + if ETag := reader.ETag(); !Equal(ETag, test.ETag) { + t.Fatalf("Test %d: ETag mismatch: got %q - want %q", i, ETag, test.ETag) + } + } +} diff --git a/pkg/etag/reader.go b/pkg/etag/reader.go new file mode 100644 index 000000000..9ff80963b --- /dev/null +++ b/pkg/etag/reader.go @@ -0,0 +1,151 @@ +// MinIO Cloud Storage, (C) 2021 MinIO, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package etag + +import ( + "crypto/md5" + "fmt" + "hash" + "io" +) + +// Tagger is the interface that wraps the basic ETag method. +type Tagger interface { + ETag() ETag +} + +type wrapReader struct { + io.Reader + Tagger +} + +// ETag returns the ETag of the underlying Tagger. +func (r *wrapReader) ETag() ETag { + if r.Tagger == nil { + return nil + } + return r.Tagger.ETag() +} + +// Wrap returns an io.Reader that reads from the wrapped +// io.Reader and implements the Tagger interaface. +// +// If content implements Tagger then the returned Reader +// returns ETag of the content. Otherwise, it returns +// nil as ETag. +// +// Wrap provides an adapter for io.Reader implemetations +// that don't implement the Tagger interface. +// It is mainly used to provide a high-level io.Reader +// access to the ETag computed by a low-level io.Reader: +// +// content := etag.NewReader(r.Body, nil) +// +// compressedContent := Compress(content) +// encryptedContent := Encrypt(compressedContent) +// +// // Now, we need an io.Reader that can access +// // the ETag computed over the content. +// reader := etag.Wrap(encryptedContent, content) +// +func Wrap(wrapped, content io.Reader) io.Reader { + if t, ok := content.(Tagger); ok { + return wrapReader{ + Reader: wrapped, + Tagger: t, + } + } + return wrapReader{ + Reader: wrapped, + } +} + +// A Reader wraps an io.Reader and computes the +// MD5 checksum of the read content as ETag. +// +// Optionally, a Reader can also verify that +// the computed ETag matches an expected value. +// Therefore, it compares both ETags once the +// underlying io.Reader returns io.EOF. +// If the computed ETag does not match the +// expected ETag then Read returns a VerifyError. +// +// Reader implements the Tagger interface. +type Reader struct { + src io.Reader + + md5 hash.Hash + checksum ETag + + readN int64 +} + +// NewReader returns a new Reader that computes the +// MD5 checksum of the content read from r as ETag. +// +// If the provided etag is not nil the returned +// Reader compares the etag with the computed +// MD5 sum once the r returns io.EOF. +func NewReader(r io.Reader, etag ETag) *Reader { + if er, ok := r.(*Reader); ok { + if er.readN == 0 && Equal(etag, er.checksum) { + return er + } + } + return &Reader{ + src: r, + md5: md5.New(), + checksum: etag, + } +} + +// Read reads up to len(p) bytes from the underlying +// io.Reader as specified by the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + n, err := r.src.Read(p) + r.readN += int64(n) + r.md5.Write(p[:n]) + + if err == io.EOF && len(r.checksum) != 0 { + if etag := r.ETag(); !Equal(etag, r.checksum) { + return n, VerifyError{ + Expected: r.checksum, + Computed: etag, + } + } + } + return n, err +} + +// ETag returns the ETag of all the content read +// so far. Reading more content changes the MD5 +// checksum. Therefore, calling ETag multiple +// times may return different results. +func (r *Reader) ETag() ETag { + sum := r.md5.Sum(nil) + return ETag(sum) +} + +// VerifyError is an error signaling that a +// computed ETag does not match an expected +// ETag. +type VerifyError struct { + Expected ETag + Computed ETag +} + +func (v VerifyError) Error() string { + return fmt.Sprintf("etag: expected ETag %q does not match computed ETag %q", v.Expected, v.Computed) +} diff --git a/pkg/hash/reader.go b/pkg/hash/reader.go index 8f3e5d7b1..ab9d84fdd 100644 --- a/pkg/hash/reader.go +++ b/pkg/hash/reader.go @@ -18,61 +18,141 @@ package hash import ( "bytes" - "crypto/md5" "encoding/base64" "encoding/hex" "errors" "hash" "io" + "github.com/minio/minio/pkg/etag" sha256 "github.com/minio/sha256-simd" ) -// Reader writes what it reads from an io.Reader to an MD5 and SHA256 hash.Hash. -// Reader verifies that the content of the io.Reader matches the expected checksums. +// A Reader wraps an io.Reader and computes the MD5 checksum +// of the read content as ETag. Optionally, it also computes +// the SHA256 checksum of the content. +// +// If the reference values for the ETag and content SHA26 +// are not empty then it will check whether the computed +// match the reference values. type Reader struct { - src io.Reader + src io.Reader + bytesRead int64 + size int64 actualSize int64 - bytesRead int64 - md5sum, sha256sum []byte // Byte values of md5sum, sha256sum of client sent values. - md5Hash, sha256Hash hash.Hash + checksum etag.ETag + contentSHA256 []byte + + sha256 hash.Hash } -// NewReader returns a new hash Reader which computes the MD5 sum and -// SHA256 sum (if set) of the provided io.Reader at EOF. -func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) (*Reader, error) { +// NewReader returns a new Reader that wraps src and computes +// MD5 checksum of everything it reads as ETag. +// +// It also computes the SHA256 checksum of everything it reads +// if sha256Hex is not the empty string. +// +// If size resp. actualSize is unknown at the time of calling +// NewReader then it should be set to -1. +// +// NewReader may try merge the given size, MD5 and SHA256 values +// into src - if src is a Reader - to avoid computing the same +// checksums multiple times. +func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) (*Reader, error) { + MD5, err := hex.DecodeString(md5Hex) + if err != nil { + return nil, BadDigest{ // TODO(aead): Return an error that indicates that an invalid ETag has been specified + ExpectedMD5: md5Hex, + CalculatedMD5: "", + } + } + SHA256, err := hex.DecodeString(sha256Hex) + if err != nil { + return nil, SHA256Mismatch{ // TODO(aead): Return an error that indicates that an invalid Content-SHA256 has been specified + ExpectedSHA256: sha256Hex, + CalculatedSHA256: "", + } + } + + // Merge the size, MD5 and SHA256 values if src is a Reader. + // The size may be set to -1 by callers if unknown. if r, ok := src.(*Reader); ok { - // Merge expectations and return parent. - return r.merge(size, md5Hex, sha256Hex, actualSize, strictCompat) + if r.bytesRead > 0 { + return nil, errors.New("hash: already read from hash reader") + } + if len(r.checksum) != 0 && len(MD5) != 0 && !etag.Equal(r.checksum, etag.ETag(MD5)) { + return nil, BadDigest{ + ExpectedMD5: r.checksum.String(), + CalculatedMD5: md5Hex, + } + } + if len(r.contentSHA256) != 0 && len(SHA256) != 0 && !bytes.Equal(r.contentSHA256, SHA256) { + return nil, SHA256Mismatch{ + ExpectedSHA256: hex.EncodeToString(r.contentSHA256), + CalculatedSHA256: sha256Hex, + } + } + if r.size >= 0 && size >= 0 && r.size != size { + return nil, ErrSizeMismatch{Want: r.size, Got: size} + } + + r.checksum = etag.ETag(MD5) + r.contentSHA256 = SHA256 + if r.size < 0 && size >= 0 { + r.src = etag.Wrap(io.LimitReader(r.src, size), r.src) + r.size = size + } + if r.actualSize <= 0 && actualSize >= 0 { + r.actualSize = actualSize + } + return r, nil } - // Create empty reader and merge into that. - r := Reader{src: src, size: -1, actualSize: -1} - return r.merge(size, md5Hex, sha256Hex, actualSize, strictCompat) + var hash hash.Hash + if size >= 0 { + src = io.LimitReader(src, size) + } + if len(SHA256) != 0 { + hash = sha256.New() + } + return &Reader{ + src: etag.NewReader(src, etag.ETag(MD5)), + size: size, + actualSize: actualSize, + checksum: etag.ETag(MD5), + contentSHA256: SHA256, + sha256: hash, + }, nil } -func (r *Reader) Read(p []byte) (n int, err error) { - n, err = r.src.Read(p) - if n > 0 { - if r.md5Hash != nil { - r.md5Hash.Write(p[:n]) - } - if r.sha256Hash != nil { - r.sha256Hash.Write(p[:n]) - } - } +func (r *Reader) Read(p []byte) (int, error) { + n, err := r.src.Read(p) r.bytesRead += int64(n) - - // At io.EOF verify if the checksums are right. - if err == io.EOF { - if cerr := r.verify(); cerr != nil { - return 0, cerr - } + if r.sha256 != nil { + r.sha256.Write(p[:n]) } - return + if err == io.EOF { // Verify content SHA256, if set. + if r.sha256 != nil { + if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) { + return n, SHA256Mismatch{ + ExpectedSHA256: hex.EncodeToString(r.contentSHA256), + CalculatedSHA256: hex.EncodeToString(sum), + } + } + } + } + if err != nil && err != io.EOF { + if v, ok := err.(etag.VerifyError); ok { + return n, BadDigest{ + ExpectedMD5: v.Expected.String(), + CalculatedMD5: v.Computed.String(), + } + } + } + return n, err } // Size returns the absolute number of bytes the Reader @@ -84,114 +164,58 @@ func (r *Reader) Size() int64 { return r.size } // DecompressedSize - For compressed objects. func (r *Reader) ActualSize() int64 { return r.actualSize } -// MD5 - returns byte md5 value +// ETag returns the ETag computed by an underlying etag.Tagger. +// If the underlying io.Reader does not implement etag.Tagger +// it returns nil. +func (r *Reader) ETag() etag.ETag { + if t, ok := r.src.(etag.Tagger); ok { + return t.ETag() + } + return nil +} + +// MD5 returns the MD5 checksum set as reference value. +// +// It corresponds to the checksum that is expected and +// not the actual MD5 checksum of the content. +// Therefore, refer to MD5Current. func (r *Reader) MD5() []byte { - return r.md5sum + return r.checksum } -// MD5Current - returns byte md5 value of the current state -// of the md5 hash after reading the incoming content. -// NOTE: Calling this function multiple times might yield -// different results if they are intermixed with Reader. +// MD5Current returns the MD5 checksum of the content +// that has been read so far. +// +// Calling MD5Current again after reading more data may +// result in a different checksum. func (r *Reader) MD5Current() []byte { - if r.md5Hash != nil { - return r.md5Hash.Sum(nil) - } - return nil + return r.ETag()[:] } -// SHA256 - returns byte sha256 value +// SHA256 returns the SHA256 checksum set as reference value. +// +// It corresponds to the checksum that is expected and +// not the actual SHA256 checksum of the content. func (r *Reader) SHA256() []byte { - return r.sha256sum + return r.contentSHA256 } -// MD5HexString returns hex md5 value. +// MD5HexString returns a hex representation of the MD5. func (r *Reader) MD5HexString() string { - return hex.EncodeToString(r.md5sum) + return hex.EncodeToString(r.checksum) } -// MD5Base64String returns base64 encoded MD5sum value. +// MD5Base64String returns a hex representation of the MD5. func (r *Reader) MD5Base64String() string { - return base64.StdEncoding.EncodeToString(r.md5sum) + return base64.StdEncoding.EncodeToString(r.checksum) } -// SHA256HexString returns hex sha256 value. +// SHA256HexString returns a hex representation of the SHA256. func (r *Reader) SHA256HexString() string { - return hex.EncodeToString(r.sha256sum) + return hex.EncodeToString(r.contentSHA256) } -// verify verifies if the computed MD5 sum and SHA256 sum are -// equal to the ones specified when creating the Reader. -func (r *Reader) verify() error { - if r.sha256Hash != nil && len(r.sha256sum) > 0 { - if sum := r.sha256Hash.Sum(nil); !bytes.Equal(r.sha256sum, sum) { - return SHA256Mismatch{hex.EncodeToString(r.sha256sum), hex.EncodeToString(sum)} - } - } - if r.md5Hash != nil && len(r.md5sum) > 0 { - if sum := r.md5Hash.Sum(nil); !bytes.Equal(r.md5sum, sum) { - return BadDigest{hex.EncodeToString(r.md5sum), hex.EncodeToString(sum)} - } - } - return nil -} - -// merge another hash into this one. -// There cannot be conflicting information given. -func (r *Reader) merge(size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) (*Reader, error) { - if r.bytesRead > 0 { - return nil, errors.New("internal error: Already read from hash reader") - } - // Merge sizes. - // If not set before, just add it. - if r.size < 0 && size >= 0 { - r.src = io.LimitReader(r.src, size) - r.size = size - } - // If set before and set now they must match. - if r.size >= 0 && size >= 0 && r.size != size { - return nil, ErrSizeMismatch{Want: r.size, Got: size} - } - - if r.actualSize <= 0 && actualSize >= 0 { - r.actualSize = actualSize - } - - // Merge SHA256. - sha256sum, err := hex.DecodeString(sha256Hex) - if err != nil { - return nil, SHA256Mismatch{} - } - - // If both are set, they must be the same. - if r.sha256Hash != nil && len(sha256sum) > 0 { - if !bytes.Equal(r.sha256sum, sha256sum) { - return nil, SHA256Mismatch{} - } - } else if len(sha256sum) > 0 { - r.sha256Hash = sha256.New() - r.sha256sum = sha256sum - } - - // Merge MD5 Sum. - md5sum, err := hex.DecodeString(md5Hex) - if err != nil { - return nil, BadDigest{} - } - // If both are set, they must expect the same. - if r.md5Hash != nil && len(md5sum) > 0 { - if !bytes.Equal(r.md5sum, md5sum) { - return nil, BadDigest{} - } - } else if len(md5sum) > 0 || (r.md5Hash == nil && strictCompat) { - r.md5Hash = md5.New() - r.md5sum = md5sum - } - return r, nil -} +var _ io.Closer = (*Reader)(nil) // compiler check // Close and release resources. -func (r *Reader) Close() error { - // Support the io.Closer interface. - return nil -} +func (r *Reader) Close() error { return nil } diff --git a/pkg/hash/reader_test.go b/pkg/hash/reader_test.go index 1942b377a..14d3e6c93 100644 --- a/pkg/hash/reader_test.go +++ b/pkg/hash/reader_test.go @@ -27,7 +27,7 @@ import ( // Tests functions like Size(), MD5*(), SHA256*() func TestHashReaderHelperMethods(t *testing.T) { - r, err := NewReader(bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false) + r, err := NewReader(bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4) if err != nil { t.Fatal(err) } @@ -109,13 +109,13 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Nested hash reader NewReader() should merge.", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, }, { desc: "Incorrect sha256, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c", @@ -126,28 +126,28 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Correct sha256, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", }, { desc: "Correct sha256, nested, truncated", - src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", 4), size: 4, actualSize: -1, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", }, { desc: "Correct sha256, nested, truncated, swapped", - src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", -1, false), + src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", -1), size: 4, actualSize: -1, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", }, { desc: "Incorrect MD5, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, md5hex: "0773da587b322af3a8718cb418a715ce", @@ -165,7 +165,7 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Correct MD5, nested", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, md5hex: "e2fc714c4727ee9395f324cd2e7f331f", @@ -180,7 +180,7 @@ func TestHashReaderVerification(t *testing.T) { }, { desc: "Correct MD5, nested, truncated", - src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), -1, "", "", -1, false), + src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), -1, "", "", -1), size: 4, actualSize: 4, md5hex: "e2fc714c4727ee9395f324cd2e7f331f", @@ -188,7 +188,7 @@ func TestHashReaderVerification(t *testing.T) { } for i, testCase := range testCases { t.Run(fmt.Sprintf("case-%d", i+1), func(t *testing.T) { - r, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize, false) + r, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize) if err != nil { t.Fatalf("Test %q: Initializing reader failed %s", testCase.desc, err) } @@ -202,8 +202,8 @@ func TestHashReaderVerification(t *testing.T) { } } -func mustReader(t *testing.T, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) *Reader { - r, err := NewReader(src, size, md5Hex, sha256Hex, actualSize, strictCompat) +func mustReader(t *testing.T, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) *Reader { + r, err := NewReader(src, size, md5Hex, sha256Hex, actualSize) if err != nil { t.Fatal(err) } @@ -219,63 +219,57 @@ func TestHashReaderInvalidArguments(t *testing.T) { actualSize int64 md5hex, sha256hex string success bool - expectedErr error - strict bool }{ { - desc: "Invalid md5sum NewReader() will fail.", - src: bytes.NewReader([]byte("abcd")), - size: 4, - actualSize: 4, - md5hex: "invalid-md5", - success: false, - expectedErr: BadDigest{}, + desc: "Invalid md5sum NewReader() will fail.", + src: bytes.NewReader([]byte("abcd")), + size: 4, + actualSize: 4, + md5hex: "invalid-md5", + success: false, }, { - desc: "Invalid sha256 NewReader() will fail.", - src: bytes.NewReader([]byte("abcd")), - size: 4, - actualSize: 4, - sha256hex: "invalid-sha256", - success: false, - expectedErr: SHA256Mismatch{}, + desc: "Invalid sha256 NewReader() will fail.", + src: bytes.NewReader([]byte("abcd")), + size: 4, + actualSize: 4, + sha256hex: "invalid-sha256", + success: false, }, { desc: "Nested hash reader NewReader() should merge.", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4), size: 4, actualSize: 4, success: true, }, { - desc: "Mismatching sha256", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false), - size: 4, - actualSize: 4, - sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c", - success: false, - expectedErr: SHA256Mismatch{}, + desc: "Mismatching sha256", + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4), + size: 4, + actualSize: 4, + sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c", + success: false, }, { desc: "Correct sha256", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4), size: 4, actualSize: 4, sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", success: true, }, { - desc: "Mismatching MD5", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4, false), - size: 4, - actualSize: 4, - md5hex: "0773da587b322af3a8718cb418a715ce", - success: false, - expectedErr: BadDigest{}, + desc: "Mismatching MD5", + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4), + size: 4, + actualSize: 4, + md5hex: "0773da587b322af3a8718cb418a715ce", + success: false, }, { desc: "Correct MD5", - src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4, false), + src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4), size: 4, actualSize: 4, md5hex: "e2fc714c4727ee9395f324cd2e7f331f", @@ -289,29 +283,23 @@ func TestHashReaderInvalidArguments(t *testing.T) { success: true, }, { - desc: "Nested, size mismatch", - src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), 4, "", "", -1, false), - size: 2, - actualSize: -1, - success: false, - expectedErr: ErrSizeMismatch{Want: 4, Got: 2}, + desc: "Nested, size mismatch", + src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), 4, "", "", -1), + size: 2, + actualSize: -1, + success: false, }, } for i, testCase := range testCases { t.Run(fmt.Sprintf("case-%d", i+1), func(t *testing.T) { - _, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize, testCase.strict) + _, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize) if err != nil && testCase.success { t.Errorf("Test %q: Expected success, but got error %s instead", testCase.desc, err) } if err == nil && !testCase.success { t.Errorf("Test %q: Expected error, but got success", testCase.desc) } - if !testCase.success { - if err != testCase.expectedErr { - t.Errorf("Test %q: Expected error %v, but got %v", testCase.desc, testCase.expectedErr, err) - } - } }) } }