mirror of
https://github.com/minio/minio.git
synced 2024-12-25 22:55:54 -05:00
0bc34952eb
md5sum calculation has a high CPU overhead, avoid calculating it repeatedly for similar fanOut calls. To fix following CPU profiler result ``` (pprof) top10 Showing nodes accounting for 678.68s, 84.67% of 801.54s total Dropped 1072 nodes (cum <= 4.01s) Showing top 10 nodes out of 156 flat flat% sum% cum cum% 332.54s 41.49% 41.49% 332.54s 41.49% runtime/internal/syscall.Syscall6 228.39s 28.49% 69.98% 228.39s 28.49% crypto/md5.block 48.07s 6.00% 75.98% 48.07s 6.00% runtime.memmove 28.91s 3.61% 79.59% 28.91s 3.61% github.com/minio/highwayhash.updateAVX2 8.25s 1.03% 80.61% 8.25s 1.03% runtime.futex 8.25s 1.03% 81.64% 10.81s 1.35% runtime.step 6.99s 0.87% 82.52% 22.35s 2.79% runtime.pcvalue 6.67s 0.83% 83.35% 38.90s 4.85% runtime.mallocgc 5.77s 0.72% 84.07% 32.61s 4.07% runtime.gentraceback 4.84s 0.6% 84.67% 10.49s 1.31% runtime.lock2 ```
371 lines
11 KiB
Go
371 lines
11 KiB
Go
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package hash
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"encoding/hex"
|
|
"errors"
|
|
"hash"
|
|
"io"
|
|
"net/http"
|
|
|
|
"github.com/minio/minio/internal/etag"
|
|
"github.com/minio/minio/internal/hash/sha256"
|
|
"github.com/minio/minio/internal/ioutil"
|
|
)
|
|
|
|
// A Reader wraps an io.Reader and computes the MD5 checksum
|
|
// of the read content as ETag. Optionally, it also computes
|
|
// the SHA256 checksum of the content.
|
|
//
|
|
// If the reference values for the ETag and content SHA26
|
|
// are not empty then it will check whether the computed
|
|
// match the reference values.
|
|
type Reader struct {
|
|
src io.Reader
|
|
bytesRead int64
|
|
expectedMin int64
|
|
expectedMax int64
|
|
|
|
size int64
|
|
actualSize int64
|
|
|
|
checksum etag.ETag
|
|
contentSHA256 []byte
|
|
|
|
// Content checksum
|
|
contentHash Checksum
|
|
contentHasher hash.Hash
|
|
disableMD5 bool
|
|
|
|
trailer http.Header
|
|
|
|
sha256 hash.Hash
|
|
}
|
|
|
|
// Options are optional arguments to NewReaderWithOpts, Options
|
|
// simply converts positional arguments to NewReader() into a
|
|
// more flexible way to provide optional inputs. This is currently
|
|
// used by the FanOut API call mostly to disable expensive md5sum
|
|
// calculation repeatedly under hash.Reader.
|
|
type Options struct {
|
|
MD5Hex string
|
|
SHA256Hex string
|
|
Size int64
|
|
ActualSize int64
|
|
DisableMD5 bool
|
|
}
|
|
|
|
// NewReaderWithOpts is like NewReader but takes `Options` as argument, allowing
|
|
// callers to indicate if they want to disable md5sum checksum.
|
|
func NewReaderWithOpts(src io.Reader, opts Options) (*Reader, error) {
|
|
// return hard limited reader
|
|
return newReader(src, opts.Size, opts.MD5Hex, opts.SHA256Hex, opts.ActualSize, opts.DisableMD5)
|
|
}
|
|
|
|
// NewReader returns a new Reader that wraps src and computes
|
|
// MD5 checksum of everything it reads as ETag.
|
|
//
|
|
// It also computes the SHA256 checksum of everything it reads
|
|
// if sha256Hex is not the empty string.
|
|
//
|
|
// If size resp. actualSize is unknown at the time of calling
|
|
// NewReader then it should be set to -1.
|
|
// When size is >=0 it *must* match the amount of data provided by r.
|
|
//
|
|
// NewReader may try merge the given size, MD5 and SHA256 values
|
|
// into src - if src is a Reader - to avoid computing the same
|
|
// checksums multiple times.
|
|
// NewReader enforces S3 compatibility strictly by ensuring caller
|
|
// does not send more content than specified size.
|
|
func NewReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64) (*Reader, error) {
|
|
return newReader(src, size, md5Hex, sha256Hex, actualSize, false)
|
|
}
|
|
|
|
func newReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize int64, disableMD5 bool) (*Reader, error) {
|
|
MD5, err := hex.DecodeString(md5Hex)
|
|
if err != nil {
|
|
return nil, BadDigest{ // TODO(aead): Return an error that indicates that an invalid ETag has been specified
|
|
ExpectedMD5: md5Hex,
|
|
CalculatedMD5: "",
|
|
}
|
|
}
|
|
SHA256, err := hex.DecodeString(sha256Hex)
|
|
if err != nil {
|
|
return nil, SHA256Mismatch{ // TODO(aead): Return an error that indicates that an invalid Content-SHA256 has been specified
|
|
ExpectedSHA256: sha256Hex,
|
|
CalculatedSHA256: "",
|
|
}
|
|
}
|
|
|
|
// Merge the size, MD5 and SHA256 values if src is a Reader.
|
|
// The size may be set to -1 by callers if unknown.
|
|
if r, ok := src.(*Reader); ok {
|
|
if r.bytesRead > 0 {
|
|
return nil, errors.New("hash: already read from hash reader")
|
|
}
|
|
if len(r.checksum) != 0 && len(MD5) != 0 && !etag.Equal(r.checksum, MD5) {
|
|
return nil, BadDigest{
|
|
ExpectedMD5: r.checksum.String(),
|
|
CalculatedMD5: md5Hex,
|
|
}
|
|
}
|
|
if len(r.contentSHA256) != 0 && len(SHA256) != 0 && !bytes.Equal(r.contentSHA256, SHA256) {
|
|
return nil, SHA256Mismatch{
|
|
ExpectedSHA256: hex.EncodeToString(r.contentSHA256),
|
|
CalculatedSHA256: sha256Hex,
|
|
}
|
|
}
|
|
if r.size >= 0 && size >= 0 && r.size != size {
|
|
return nil, SizeMismatch{Want: r.size, Got: size}
|
|
}
|
|
|
|
r.checksum = MD5
|
|
r.contentSHA256 = SHA256
|
|
if r.size < 0 && size >= 0 {
|
|
r.src = etag.Wrap(ioutil.HardLimitReader(r.src, size), r.src)
|
|
r.size = size
|
|
}
|
|
if r.actualSize <= 0 && actualSize >= 0 {
|
|
r.actualSize = actualSize
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
if size >= 0 {
|
|
r := ioutil.HardLimitReader(src, size)
|
|
if !disableMD5 {
|
|
if _, ok := src.(etag.Tagger); !ok {
|
|
src = etag.NewReader(r, MD5)
|
|
} else {
|
|
src = etag.Wrap(r, src)
|
|
}
|
|
} else {
|
|
src = r
|
|
}
|
|
} else if _, ok := src.(etag.Tagger); !ok {
|
|
if !disableMD5 {
|
|
src = etag.NewReader(src, MD5)
|
|
}
|
|
}
|
|
var h hash.Hash
|
|
if len(SHA256) != 0 {
|
|
h = sha256.New()
|
|
}
|
|
return &Reader{
|
|
src: src,
|
|
size: size,
|
|
actualSize: actualSize,
|
|
checksum: MD5,
|
|
contentSHA256: SHA256,
|
|
sha256: h,
|
|
disableMD5: disableMD5,
|
|
}, nil
|
|
}
|
|
|
|
// ErrInvalidChecksum is returned when an invalid checksum is provided in headers.
|
|
var ErrInvalidChecksum = errors.New("invalid checksum")
|
|
|
|
// SetExpectedMin set expected minimum data expected from reader
|
|
func (r *Reader) SetExpectedMin(expectedMin int64) {
|
|
r.expectedMin = expectedMin
|
|
}
|
|
|
|
// SetExpectedMax set expected max data expected from reader
|
|
func (r *Reader) SetExpectedMax(expectedMax int64) {
|
|
r.expectedMax = expectedMax
|
|
}
|
|
|
|
// AddChecksum will add checksum checks as specified in
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
|
|
// Returns ErrInvalidChecksum if a problem with the checksum is found.
|
|
func (r *Reader) AddChecksum(req *http.Request, ignoreValue bool) error {
|
|
cs, err := GetContentChecksum(req.Header)
|
|
if err != nil {
|
|
return ErrInvalidChecksum
|
|
}
|
|
if cs == nil {
|
|
return nil
|
|
}
|
|
r.contentHash = *cs
|
|
if cs.Type.Trailing() {
|
|
r.trailer = req.Trailer
|
|
}
|
|
return r.AddNonTrailingChecksum(cs, ignoreValue)
|
|
}
|
|
|
|
// AddChecksumNoTrailer will add checksum checks as specified in
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
|
|
// Returns ErrInvalidChecksum if a problem with the checksum is found.
|
|
func (r *Reader) AddChecksumNoTrailer(headers http.Header, ignoreValue bool) error {
|
|
cs, err := GetContentChecksum(headers)
|
|
if err != nil {
|
|
return ErrInvalidChecksum
|
|
}
|
|
if cs == nil {
|
|
return nil
|
|
}
|
|
r.contentHash = *cs
|
|
return r.AddNonTrailingChecksum(cs, ignoreValue)
|
|
}
|
|
|
|
// AddNonTrailingChecksum will add a checksum to the reader.
|
|
// The checksum cannot be trailing.
|
|
func (r *Reader) AddNonTrailingChecksum(cs *Checksum, ignoreValue bool) error {
|
|
if cs == nil {
|
|
return nil
|
|
}
|
|
r.contentHash = *cs
|
|
if ignoreValue {
|
|
// Do not validate, but allow for transfer
|
|
return nil
|
|
}
|
|
|
|
r.contentHasher = cs.Type.Hasher()
|
|
if r.contentHasher == nil {
|
|
return ErrInvalidChecksum
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *Reader) Read(p []byte) (int, error) {
|
|
n, err := r.src.Read(p)
|
|
r.bytesRead += int64(n)
|
|
if r.sha256 != nil {
|
|
r.sha256.Write(p[:n])
|
|
}
|
|
if r.contentHasher != nil {
|
|
r.contentHasher.Write(p[:n])
|
|
}
|
|
|
|
if err == io.EOF { // Verify content SHA256, if set.
|
|
if r.expectedMin > 0 {
|
|
if r.bytesRead < r.expectedMin {
|
|
return 0, SizeTooSmall{Want: r.expectedMin, Got: r.bytesRead}
|
|
}
|
|
}
|
|
if r.expectedMax > 0 {
|
|
if r.bytesRead > r.expectedMax {
|
|
return 0, SizeTooLarge{Want: r.expectedMax, Got: r.bytesRead}
|
|
}
|
|
}
|
|
|
|
if r.sha256 != nil {
|
|
if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) {
|
|
return n, SHA256Mismatch{
|
|
ExpectedSHA256: hex.EncodeToString(r.contentSHA256),
|
|
CalculatedSHA256: hex.EncodeToString(sum),
|
|
}
|
|
}
|
|
}
|
|
if r.contentHasher != nil {
|
|
if r.contentHash.Type.Trailing() {
|
|
var err error
|
|
r.contentHash.Encoded = r.trailer.Get(r.contentHash.Type.Key())
|
|
r.contentHash.Raw, err = base64.StdEncoding.DecodeString(r.contentHash.Encoded)
|
|
if err != nil || len(r.contentHash.Raw) == 0 {
|
|
return 0, ChecksumMismatch{Got: r.contentHash.Encoded}
|
|
}
|
|
}
|
|
if sum := r.contentHasher.Sum(nil); !bytes.Equal(r.contentHash.Raw, sum) {
|
|
err := ChecksumMismatch{
|
|
Want: r.contentHash.Encoded,
|
|
Got: base64.StdEncoding.EncodeToString(sum),
|
|
}
|
|
return n, err
|
|
}
|
|
}
|
|
}
|
|
if err != nil && err != io.EOF {
|
|
if v, ok := err.(etag.VerifyError); ok {
|
|
return n, BadDigest{
|
|
ExpectedMD5: v.Expected.String(),
|
|
CalculatedMD5: v.Computed.String(),
|
|
}
|
|
}
|
|
}
|
|
return n, err
|
|
}
|
|
|
|
// Size returns the absolute number of bytes the Reader
|
|
// will return during reading. It returns -1 for unlimited
|
|
// data.
|
|
func (r *Reader) Size() int64 { return r.size }
|
|
|
|
// ActualSize returns the pre-modified size of the object.
|
|
// DecompressedSize - For compressed objects.
|
|
func (r *Reader) ActualSize() int64 { return r.actualSize }
|
|
|
|
// ETag returns the ETag computed by an underlying etag.Tagger.
|
|
// If the underlying io.Reader does not implement etag.Tagger
|
|
// it returns nil.
|
|
func (r *Reader) ETag() etag.ETag {
|
|
if t, ok := r.src.(etag.Tagger); ok {
|
|
return t.ETag()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// MD5Current returns the MD5 checksum of the content
|
|
// that has been read so far.
|
|
//
|
|
// Calling MD5Current again after reading more data may
|
|
// result in a different checksum.
|
|
func (r *Reader) MD5Current() []byte {
|
|
if r.disableMD5 {
|
|
return r.checksum
|
|
}
|
|
return r.ETag()[:]
|
|
}
|
|
|
|
// SHA256 returns the SHA256 checksum set as reference value.
|
|
//
|
|
// It corresponds to the checksum that is expected and
|
|
// not the actual SHA256 checksum of the content.
|
|
func (r *Reader) SHA256() []byte {
|
|
return r.contentSHA256
|
|
}
|
|
|
|
// SHA256HexString returns a hex representation of the SHA256.
|
|
func (r *Reader) SHA256HexString() string {
|
|
return hex.EncodeToString(r.contentSHA256)
|
|
}
|
|
|
|
// ContentCRCType returns the content checksum type.
|
|
func (r *Reader) ContentCRCType() ChecksumType {
|
|
return r.contentHash.Type
|
|
}
|
|
|
|
// ContentCRC returns the content crc if set.
|
|
func (r *Reader) ContentCRC() map[string]string {
|
|
if r.contentHash.Type == ChecksumNone || !r.contentHash.Valid() {
|
|
return nil
|
|
}
|
|
if r.contentHash.Type.Trailing() {
|
|
return map[string]string{r.contentHash.Type.String(): r.trailer.Get(r.contentHash.Type.Key())}
|
|
}
|
|
return map[string]string{r.contentHash.Type.String(): r.contentHash.Encoded}
|
|
}
|
|
|
|
var _ io.Closer = (*Reader)(nil) // compiler check
|
|
|
|
// Close and release resources.
|
|
func (r *Reader) Close() error { return nil }
|