pkg/etag: add new package for S3 ETag handling (#11577)

This commit adds a new package `etag` for dealing
with S3 ETags.

Even though ETag is often viewed as MD5 checksum of
an object, handling S3 ETags correctly is a surprisingly
complex task. While it is true that the ETag corresponds
to the MD5 for the most basic S3 API operations, there are
many exceptions in case of multipart uploads or encryption.

In worse, some S3 clients expect very specific behavior when
it comes to ETags. For example, some clients expect that the
ETag is a double-quoted string and fail otherwise.
Non-AWS compliant ETag handling has been a source of many bugs
in the past.

Therefore, this commit adds a dedicated `etag` package that provides
functionality for parsing, generating and converting S3 ETags.
Further, this commit removes the ETag computation from the `hash`
package. Instead, the `hash` package (i.e. `hash.Reader`) should
focus only on computing and verifying the content-sha256.

One core feature of this commit is to provide a mechanism to
communicate a computed ETag from a low-level `io.Reader` to
a high-level `io.Reader`.

This problem occurs when an S3 server receives a request and
has to compute the ETag of the content. However, the server
may also wrap the initial body with several other `io.Reader`,
e.g. when encrypting or compressing the content:
```
   reader := Encrypt(Compress(ETag(content)))
```
In such a case, the ETag should be accessible by the high-level
`io.Reader`.

The `etag` provides a mechanism to wrap `io.Reader` implementations
such that the `ETag` can be accessed by a type-check.
This technique is applied to the PUT, COPY and Upload handlers.
This commit is contained in:
Andreas Auernhammer
2021-02-23 21:31:53 +01:00
committed by GitHub
parent 1b63291ee2
commit d4b822d697
19 changed files with 821 additions and 234 deletions

View File

@@ -18,61 +18,141 @@ package hash
import (
"bytes"
"crypto/md5"
"encoding/base64"
"encoding/hex"
"errors"
"hash"
"io"
"github.com/minio/minio/pkg/etag"
sha256 "github.com/minio/sha256-simd"
)
// Reader writes what it reads from an io.Reader to an MD5 and SHA256 hash.Hash.
// Reader verifies that the content of the io.Reader matches the expected checksums.
// A Reader wraps an io.Reader and computes the MD5 checksum
// of the read content as ETag. Optionally, it also computes
// the SHA256 checksum of the content.
//
// If the reference values for the ETag and content SHA26
// are not empty then it will check whether the computed
// match the reference values.
type Reader struct {
src io.Reader
src io.Reader
bytesRead int64
size int64
actualSize int64
bytesRead int64
md5sum, sha256sum []byte // Byte values of md5sum, sha256sum of client sent values.
md5Hash, sha256Hash hash.Hash
checksum etag.ETag
contentSHA256 []byte
sha256 hash.Hash
}
// NewReader returns a new hash Reader which computes the MD5 sum and
// SHA256 sum (if set) of the provided io.Reader at EOF.
func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) (*Reader, error) {
// NewReader returns a new Reader that wraps src and computes
// MD5 checksum of everything it reads as ETag.
//
// It also computes the SHA256 checksum of everything it reads
// if sha256Hex is not the empty string.
//
// If size resp. actualSize is unknown at the time of calling
// NewReader then it should be set to -1.
//
// NewReader may try merge the given size, MD5 and SHA256 values
// into src - if src is a Reader - to avoid computing the same
// checksums multiple times.
func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) (*Reader, error) {
MD5, err := hex.DecodeString(md5Hex)
if err != nil {
return nil, BadDigest{ // TODO(aead): Return an error that indicates that an invalid ETag has been specified
ExpectedMD5: md5Hex,
CalculatedMD5: "",
}
}
SHA256, err := hex.DecodeString(sha256Hex)
if err != nil {
return nil, SHA256Mismatch{ // TODO(aead): Return an error that indicates that an invalid Content-SHA256 has been specified
ExpectedSHA256: sha256Hex,
CalculatedSHA256: "",
}
}
// Merge the size, MD5 and SHA256 values if src is a Reader.
// The size may be set to -1 by callers if unknown.
if r, ok := src.(*Reader); ok {
// Merge expectations and return parent.
return r.merge(size, md5Hex, sha256Hex, actualSize, strictCompat)
if r.bytesRead > 0 {
return nil, errors.New("hash: already read from hash reader")
}
if len(r.checksum) != 0 && len(MD5) != 0 && !etag.Equal(r.checksum, etag.ETag(MD5)) {
return nil, BadDigest{
ExpectedMD5: r.checksum.String(),
CalculatedMD5: md5Hex,
}
}
if len(r.contentSHA256) != 0 && len(SHA256) != 0 && !bytes.Equal(r.contentSHA256, SHA256) {
return nil, SHA256Mismatch{
ExpectedSHA256: hex.EncodeToString(r.contentSHA256),
CalculatedSHA256: sha256Hex,
}
}
if r.size >= 0 && size >= 0 && r.size != size {
return nil, ErrSizeMismatch{Want: r.size, Got: size}
}
r.checksum = etag.ETag(MD5)
r.contentSHA256 = SHA256
if r.size < 0 && size >= 0 {
r.src = etag.Wrap(io.LimitReader(r.src, size), r.src)
r.size = size
}
if r.actualSize <= 0 && actualSize >= 0 {
r.actualSize = actualSize
}
return r, nil
}
// Create empty reader and merge into that.
r := Reader{src: src, size: -1, actualSize: -1}
return r.merge(size, md5Hex, sha256Hex, actualSize, strictCompat)
var hash hash.Hash
if size >= 0 {
src = io.LimitReader(src, size)
}
if len(SHA256) != 0 {
hash = sha256.New()
}
return &Reader{
src: etag.NewReader(src, etag.ETag(MD5)),
size: size,
actualSize: actualSize,
checksum: etag.ETag(MD5),
contentSHA256: SHA256,
sha256: hash,
}, nil
}
func (r *Reader) Read(p []byte) (n int, err error) {
n, err = r.src.Read(p)
if n > 0 {
if r.md5Hash != nil {
r.md5Hash.Write(p[:n])
}
if r.sha256Hash != nil {
r.sha256Hash.Write(p[:n])
}
}
func (r *Reader) Read(p []byte) (int, error) {
n, err := r.src.Read(p)
r.bytesRead += int64(n)
// At io.EOF verify if the checksums are right.
if err == io.EOF {
if cerr := r.verify(); cerr != nil {
return 0, cerr
}
if r.sha256 != nil {
r.sha256.Write(p[:n])
}
return
if err == io.EOF { // Verify content SHA256, if set.
if r.sha256 != nil {
if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) {
return n, SHA256Mismatch{
ExpectedSHA256: hex.EncodeToString(r.contentSHA256),
CalculatedSHA256: hex.EncodeToString(sum),
}
}
}
}
if err != nil && err != io.EOF {
if v, ok := err.(etag.VerifyError); ok {
return n, BadDigest{
ExpectedMD5: v.Expected.String(),
CalculatedMD5: v.Computed.String(),
}
}
}
return n, err
}
// Size returns the absolute number of bytes the Reader
@@ -84,114 +164,58 @@ func (r *Reader) Size() int64 { return r.size }
// DecompressedSize - For compressed objects.
func (r *Reader) ActualSize() int64 { return r.actualSize }
// MD5 - returns byte md5 value
// ETag returns the ETag computed by an underlying etag.Tagger.
// If the underlying io.Reader does not implement etag.Tagger
// it returns nil.
func (r *Reader) ETag() etag.ETag {
if t, ok := r.src.(etag.Tagger); ok {
return t.ETag()
}
return nil
}
// MD5 returns the MD5 checksum set as reference value.
//
// It corresponds to the checksum that is expected and
// not the actual MD5 checksum of the content.
// Therefore, refer to MD5Current.
func (r *Reader) MD5() []byte {
return r.md5sum
return r.checksum
}
// MD5Current - returns byte md5 value of the current state
// of the md5 hash after reading the incoming content.
// NOTE: Calling this function multiple times might yield
// different results if they are intermixed with Reader.
// MD5Current returns the MD5 checksum of the content
// that has been read so far.
//
// Calling MD5Current again after reading more data may
// result in a different checksum.
func (r *Reader) MD5Current() []byte {
if r.md5Hash != nil {
return r.md5Hash.Sum(nil)
}
return nil
return r.ETag()[:]
}
// SHA256 - returns byte sha256 value
// SHA256 returns the SHA256 checksum set as reference value.
//
// It corresponds to the checksum that is expected and
// not the actual SHA256 checksum of the content.
func (r *Reader) SHA256() []byte {
return r.sha256sum
return r.contentSHA256
}
// MD5HexString returns hex md5 value.
// MD5HexString returns a hex representation of the MD5.
func (r *Reader) MD5HexString() string {
return hex.EncodeToString(r.md5sum)
return hex.EncodeToString(r.checksum)
}
// MD5Base64String returns base64 encoded MD5sum value.
// MD5Base64String returns a hex representation of the MD5.
func (r *Reader) MD5Base64String() string {
return base64.StdEncoding.EncodeToString(r.md5sum)
return base64.StdEncoding.EncodeToString(r.checksum)
}
// SHA256HexString returns hex sha256 value.
// SHA256HexString returns a hex representation of the SHA256.
func (r *Reader) SHA256HexString() string {
return hex.EncodeToString(r.sha256sum)
return hex.EncodeToString(r.contentSHA256)
}
// verify verifies if the computed MD5 sum and SHA256 sum are
// equal to the ones specified when creating the Reader.
func (r *Reader) verify() error {
if r.sha256Hash != nil && len(r.sha256sum) > 0 {
if sum := r.sha256Hash.Sum(nil); !bytes.Equal(r.sha256sum, sum) {
return SHA256Mismatch{hex.EncodeToString(r.sha256sum), hex.EncodeToString(sum)}
}
}
if r.md5Hash != nil && len(r.md5sum) > 0 {
if sum := r.md5Hash.Sum(nil); !bytes.Equal(r.md5sum, sum) {
return BadDigest{hex.EncodeToString(r.md5sum), hex.EncodeToString(sum)}
}
}
return nil
}
// merge another hash into this one.
// There cannot be conflicting information given.
func (r *Reader) merge(size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) (*Reader, error) {
if r.bytesRead > 0 {
return nil, errors.New("internal error: Already read from hash reader")
}
// Merge sizes.
// If not set before, just add it.
if r.size < 0 && size >= 0 {
r.src = io.LimitReader(r.src, size)
r.size = size
}
// If set before and set now they must match.
if r.size >= 0 && size >= 0 && r.size != size {
return nil, ErrSizeMismatch{Want: r.size, Got: size}
}
if r.actualSize <= 0 && actualSize >= 0 {
r.actualSize = actualSize
}
// Merge SHA256.
sha256sum, err := hex.DecodeString(sha256Hex)
if err != nil {
return nil, SHA256Mismatch{}
}
// If both are set, they must be the same.
if r.sha256Hash != nil && len(sha256sum) > 0 {
if !bytes.Equal(r.sha256sum, sha256sum) {
return nil, SHA256Mismatch{}
}
} else if len(sha256sum) > 0 {
r.sha256Hash = sha256.New()
r.sha256sum = sha256sum
}
// Merge MD5 Sum.
md5sum, err := hex.DecodeString(md5Hex)
if err != nil {
return nil, BadDigest{}
}
// If both are set, they must expect the same.
if r.md5Hash != nil && len(md5sum) > 0 {
if !bytes.Equal(r.md5sum, md5sum) {
return nil, BadDigest{}
}
} else if len(md5sum) > 0 || (r.md5Hash == nil && strictCompat) {
r.md5Hash = md5.New()
r.md5sum = md5sum
}
return r, nil
}
var _ io.Closer = (*Reader)(nil) // compiler check
// Close and release resources.
func (r *Reader) Close() error {
// Support the io.Closer interface.
return nil
}
func (r *Reader) Close() error { return nil }

View File

@@ -27,7 +27,7 @@ import (
// Tests functions like Size(), MD5*(), SHA256*()
func TestHashReaderHelperMethods(t *testing.T) {
r, err := NewReader(bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false)
r, err := NewReader(bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4)
if err != nil {
t.Fatal(err)
}
@@ -109,13 +109,13 @@ func TestHashReaderVerification(t *testing.T) {
},
{
desc: "Nested hash reader NewReader() should merge.",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4),
size: 4,
actualSize: 4,
},
{
desc: "Incorrect sha256, nested",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4),
size: 4,
actualSize: 4,
sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c",
@@ -126,28 +126,28 @@ func TestHashReaderVerification(t *testing.T) {
},
{
desc: "Correct sha256, nested",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4),
size: 4,
actualSize: 4,
sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589",
},
{
desc: "Correct sha256, nested, truncated",
src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", 4),
size: 4,
actualSize: -1,
sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589",
},
{
desc: "Correct sha256, nested, truncated, swapped",
src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", -1, false),
src: mustReader(t, bytes.NewReader([]byte("abcd-more-stuff-to-be ignored")), 4, "", "", -1),
size: 4,
actualSize: -1,
sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589",
},
{
desc: "Incorrect MD5, nested",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4),
size: 4,
actualSize: 4,
md5hex: "0773da587b322af3a8718cb418a715ce",
@@ -165,7 +165,7 @@ func TestHashReaderVerification(t *testing.T) {
},
{
desc: "Correct MD5, nested",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4),
size: 4,
actualSize: 4,
md5hex: "e2fc714c4727ee9395f324cd2e7f331f",
@@ -180,7 +180,7 @@ func TestHashReaderVerification(t *testing.T) {
},
{
desc: "Correct MD5, nested, truncated",
src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), -1, "", "", -1, false),
src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), -1, "", "", -1),
size: 4,
actualSize: 4,
md5hex: "e2fc714c4727ee9395f324cd2e7f331f",
@@ -188,7 +188,7 @@ func TestHashReaderVerification(t *testing.T) {
}
for i, testCase := range testCases {
t.Run(fmt.Sprintf("case-%d", i+1), func(t *testing.T) {
r, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize, false)
r, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize)
if err != nil {
t.Fatalf("Test %q: Initializing reader failed %s", testCase.desc, err)
}
@@ -202,8 +202,8 @@ func TestHashReaderVerification(t *testing.T) {
}
}
func mustReader(t *testing.T, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, strictCompat bool) *Reader {
r, err := NewReader(src, size, md5Hex, sha256Hex, actualSize, strictCompat)
func mustReader(t *testing.T, src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) *Reader {
r, err := NewReader(src, size, md5Hex, sha256Hex, actualSize)
if err != nil {
t.Fatal(err)
}
@@ -219,63 +219,57 @@ func TestHashReaderInvalidArguments(t *testing.T) {
actualSize int64
md5hex, sha256hex string
success bool
expectedErr error
strict bool
}{
{
desc: "Invalid md5sum NewReader() will fail.",
src: bytes.NewReader([]byte("abcd")),
size: 4,
actualSize: 4,
md5hex: "invalid-md5",
success: false,
expectedErr: BadDigest{},
desc: "Invalid md5sum NewReader() will fail.",
src: bytes.NewReader([]byte("abcd")),
size: 4,
actualSize: 4,
md5hex: "invalid-md5",
success: false,
},
{
desc: "Invalid sha256 NewReader() will fail.",
src: bytes.NewReader([]byte("abcd")),
size: 4,
actualSize: 4,
sha256hex: "invalid-sha256",
success: false,
expectedErr: SHA256Mismatch{},
desc: "Invalid sha256 NewReader() will fail.",
src: bytes.NewReader([]byte("abcd")),
size: 4,
actualSize: 4,
sha256hex: "invalid-sha256",
success: false,
},
{
desc: "Nested hash reader NewReader() should merge.",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "", 4),
size: 4,
actualSize: 4,
success: true,
},
{
desc: "Mismatching sha256",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false),
size: 4,
actualSize: 4,
sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c",
success: false,
expectedErr: SHA256Mismatch{},
desc: "Mismatching sha256",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4),
size: 4,
actualSize: 4,
sha256hex: "50d858e0985ecc7f60418aaf0cc5ab587f42c2570a884095a9e8ccacd0f6545c",
success: false,
},
{
desc: "Correct sha256",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "", "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589", 4),
size: 4,
actualSize: 4,
sha256hex: "88d4266fd4e6338d13b845fcf289579d209c897823b9217da3e161936f031589",
success: true,
},
{
desc: "Mismatching MD5",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4, false),
size: 4,
actualSize: 4,
md5hex: "0773da587b322af3a8718cb418a715ce",
success: false,
expectedErr: BadDigest{},
desc: "Mismatching MD5",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4),
size: 4,
actualSize: 4,
md5hex: "0773da587b322af3a8718cb418a715ce",
success: false,
},
{
desc: "Correct MD5",
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4, false),
src: mustReader(t, bytes.NewReader([]byte("abcd")), 4, "e2fc714c4727ee9395f324cd2e7f331f", "", 4),
size: 4,
actualSize: 4,
md5hex: "e2fc714c4727ee9395f324cd2e7f331f",
@@ -289,29 +283,23 @@ func TestHashReaderInvalidArguments(t *testing.T) {
success: true,
},
{
desc: "Nested, size mismatch",
src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), 4, "", "", -1, false),
size: 2,
actualSize: -1,
success: false,
expectedErr: ErrSizeMismatch{Want: 4, Got: 2},
desc: "Nested, size mismatch",
src: mustReader(t, bytes.NewReader([]byte("abcd-morestuff")), 4, "", "", -1),
size: 2,
actualSize: -1,
success: false,
},
}
for i, testCase := range testCases {
t.Run(fmt.Sprintf("case-%d", i+1), func(t *testing.T) {
_, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize, testCase.strict)
_, err := NewReader(testCase.src, testCase.size, testCase.md5hex, testCase.sha256hex, testCase.actualSize)
if err != nil && testCase.success {
t.Errorf("Test %q: Expected success, but got error %s instead", testCase.desc, err)
}
if err == nil && !testCase.success {
t.Errorf("Test %q: Expected error, but got success", testCase.desc)
}
if !testCase.success {
if err != testCase.expectedErr {
t.Errorf("Test %q: Expected error %v, but got %v", testCase.desc, testCase.expectedErr, err)
}
}
})
}
}