Add Full Object Checksums and CRC64-NVME (#20855)

Backport of AIStor PR 247.

Add support for full object checksums as described here:

https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html

New checksum types are fully supported. Mint tests from https://github.com/minio/minio-go/pull/2026 are now passing.

Includes fixes from https://github.com/minio/minio/pull/20743 for mint tests.

Add using checksums as validation for object content. Fixes #20845 #20849

Fixes checksum replication (downstream PR 250)
This commit is contained in:
Klaus Post
2025-01-20 06:49:07 -08:00
committed by GitHub
parent 779ec8f0d4
commit 827004cd6d
18 changed files with 665 additions and 168 deletions

70
internal/hash/checker.go Normal file
View File

@@ -0,0 +1,70 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package hash
import (
"bytes"
"errors"
"hash"
"io"
"github.com/minio/minio/internal/ioutil"
)
// Checker allows to verify the checksum of a reader.
type Checker struct {
c io.Closer
r io.Reader
h hash.Hash
want []byte
}
// NewChecker ensures that content with the specified length is read from rc.
// Calling Close on this will close upstream.
func NewChecker(rc io.ReadCloser, h hash.Hash, wantSum []byte, length int64) *Checker {
return &Checker{c: rc, r: ioutil.HardLimitReader(rc, length), h: h, want: wantSum}
}
// Read satisfies io.Reader
func (c Checker) Read(p []byte) (n int, err error) {
n, err = c.r.Read(p)
if n > 0 {
c.h.Write(p[:n])
}
if errors.Is(err, io.EOF) {
got := c.h.Sum(nil)
if !bytes.Equal(got, c.want) {
return n, ErrInvalidChecksum
}
return n, err
}
return n, err
}
// Close satisfies io.Closer
func (c Checker) Close() error {
err := c.c.Close()
if err == nil {
got := c.h.Sum(nil)
if !bytes.Equal(got, c.want) {
return ErrInvalidChecksum
}
}
return err
}

View File

@@ -26,6 +26,7 @@ import (
"fmt"
"hash"
"hash/crc32"
"hash/crc64"
"net/http"
"strconv"
"strings"
@@ -42,6 +43,9 @@ func hashLogIf(ctx context.Context, err error) {
// MinIOMultipartChecksum is as metadata on multipart uploads to indicate checksum type.
const MinIOMultipartChecksum = "x-minio-multipart-checksum"
// MinIOMultipartChecksumType is as metadata on multipart uploads to indicate checksum type.
const MinIOMultipartChecksumType = "x-minio-multipart-checksum-type"
// ChecksumType contains information about the checksum type.
type ChecksumType uint32
@@ -65,11 +69,21 @@ const (
ChecksumMultipart
// ChecksumIncludesMultipart indicates the checksum also contains part checksums.
ChecksumIncludesMultipart
// ChecksumCRC64NVME indicates CRC64 with 0xad93d23594c93659 polynomial.
ChecksumCRC64NVME
// ChecksumFullObject indicates the checksum is of the full object,
// not checksum of checksums. Should only be set on ChecksumMultipart
ChecksumFullObject
// ChecksumNone indicates no checksum.
ChecksumNone ChecksumType = 0
baseTypeMask = ChecksumSHA256 | ChecksumSHA1 | ChecksumCRC32 | ChecksumCRC32C | ChecksumCRC64NVME
)
// BaseChecksumTypes is a list of all the base checksum types.
var BaseChecksumTypes = []ChecksumType{ChecksumSHA256, ChecksumSHA1, ChecksumCRC32, ChecksumCRC64NVME, ChecksumCRC32C}
// Checksum is a type and base 64 encoded value.
type Checksum struct {
Type ChecksumType
@@ -86,6 +100,11 @@ func (c ChecksumType) Is(t ChecksumType) bool {
return c&t == t
}
// Base returns the base checksum (if any)
func (c ChecksumType) Base() ChecksumType {
return c & baseTypeMask
}
// Key returns the header key.
// returns empty string if invalid or none.
func (c ChecksumType) Key() string {
@@ -98,6 +117,8 @@ func (c ChecksumType) Key() string {
return xhttp.AmzChecksumSHA1
case c.Is(ChecksumSHA256):
return xhttp.AmzChecksumSHA256
case c.Is(ChecksumCRC64NVME):
return xhttp.AmzChecksumCRC64NVME
}
return ""
}
@@ -113,32 +134,56 @@ func (c ChecksumType) RawByteLen() int {
return sha1.Size
case c.Is(ChecksumSHA256):
return sha256.Size
case c.Is(ChecksumCRC64NVME):
return crc64.Size
}
return 0
}
// IsSet returns whether the type is valid and known.
func (c ChecksumType) IsSet() bool {
return !c.Is(ChecksumInvalid) && !c.Is(ChecksumNone)
return !c.Is(ChecksumInvalid) && !c.Base().Is(ChecksumNone)
}
// NewChecksumType returns a checksum type based on the algorithm string.
func NewChecksumType(alg string) ChecksumType {
// NewChecksumType returns a checksum type based on the algorithm string and obj type.
func NewChecksumType(alg, objType string) ChecksumType {
full := ChecksumFullObject
if objType != xhttp.AmzChecksumTypeFullObject {
full = 0
}
switch strings.ToUpper(alg) {
case "CRC32":
return ChecksumCRC32
return ChecksumCRC32 | full
case "CRC32C":
return ChecksumCRC32C
return ChecksumCRC32C | full
case "SHA1":
if full != 0 {
return ChecksumInvalid
}
return ChecksumSHA1
case "SHA256":
if full != 0 {
return ChecksumInvalid
}
return ChecksumSHA256
case "CRC64NVME":
// AWS seems to ignore full value, and just assume it.
return ChecksumCRC64NVME
case "":
if full != 0 {
return ChecksumInvalid
}
return ChecksumNone
}
return ChecksumInvalid
}
// NewChecksumHeader returns a checksum type based on the algorithm string.
func NewChecksumHeader(h http.Header) ChecksumType {
return NewChecksumType(h.Get(xhttp.AmzChecksumAlgo), h.Get(xhttp.AmzChecksumType))
}
// String returns the type as a string.
func (c ChecksumType) String() string {
switch {
@@ -150,12 +195,35 @@ func (c ChecksumType) String() string {
return "SHA1"
case c.Is(ChecksumSHA256):
return "SHA256"
case c.Is(ChecksumCRC64NVME):
return "CRC64NVME"
case c.Is(ChecksumNone):
return ""
}
return "invalid"
}
// FullObjectRequested will return if the checksum type indicates full object checksum was requested.
func (c ChecksumType) FullObjectRequested() bool {
return c&(ChecksumFullObject) == ChecksumFullObject || c.Is(ChecksumCRC64NVME)
}
// ObjType returns a string to return as x-amz-checksum-type.
func (c ChecksumType) ObjType() string {
if c.FullObjectRequested() {
return xhttp.AmzChecksumTypeFullObject
}
if c.IsSet() {
return xhttp.AmzChecksumTypeComposite
}
return ""
}
// CanMerge will return if the checksum type indicates that checksums can be merged.
func (c ChecksumType) CanMerge() bool {
return c.Is(ChecksumCRC64NVME) || c.Is(ChecksumCRC32C) || c.Is(ChecksumCRC32)
}
// Hasher returns a hasher corresponding to the checksum type.
// Returns nil if no checksum.
func (c ChecksumType) Hasher() hash.Hash {
@@ -168,6 +236,8 @@ func (c ChecksumType) Hasher() hash.Hash {
return sha1.New()
case c.Is(ChecksumSHA256):
return sha256.New()
case c.Is(ChecksumCRC64NVME):
return crc64.New(crc64Table)
}
return nil
}
@@ -214,7 +284,11 @@ func ReadCheckSums(b []byte, part int) map[string]string {
if n < 0 {
break
}
cs = fmt.Sprintf("%s-%d", cs, t)
if !typ.FullObjectRequested() {
cs = fmt.Sprintf("%s-%d", cs, t)
} else if part <= 0 {
res[xhttp.AmzChecksumType] = xhttp.AmzChecksumTypeFullObject
}
b = b[n:]
if part > 0 {
cs = ""
@@ -322,7 +396,7 @@ func NewChecksumWithType(alg ChecksumType, value string) *Checksum {
// NewChecksumString returns a new checksum from specified algorithm and base64 encoded value.
func NewChecksumString(alg, value string) *Checksum {
return NewChecksumWithType(NewChecksumType(alg), value)
return NewChecksumWithType(NewChecksumType(alg, ""), value)
}
// AppendTo will append the checksum to b.
@@ -377,8 +451,7 @@ func (c Checksum) Valid() bool {
if len(c.Encoded) == 0 || c.Type.Trailing() {
return c.Type.Is(ChecksumNone) || c.Type.Trailing()
}
raw := c.Raw
return c.Type.RawByteLen() == len(raw)
return c.Type.RawByteLen() == len(c.Raw)
}
// Matches returns whether given content matches c.
@@ -440,6 +513,10 @@ func TransferChecksumHeader(w http.ResponseWriter, r *http.Request) {
// AddChecksumHeader will transfer any checksum value that has been checked.
func AddChecksumHeader(w http.ResponseWriter, c map[string]string) {
for k, v := range c {
if k == xhttp.AmzChecksumType {
w.Header().Set(xhttp.AmzChecksumType, v)
continue
}
cksum := NewChecksumString(k, v)
if cksum == nil {
continue
@@ -458,19 +535,11 @@ func GetContentChecksum(h http.Header) (*Checksum, error) {
var res *Checksum
for _, header := range trailing {
var duplicates bool
switch {
case strings.EqualFold(header, ChecksumCRC32C.Key()):
duplicates = res != nil
res = NewChecksumWithType(ChecksumCRC32C|ChecksumTrailing, "")
case strings.EqualFold(header, ChecksumCRC32.Key()):
duplicates = res != nil
res = NewChecksumWithType(ChecksumCRC32|ChecksumTrailing, "")
case strings.EqualFold(header, ChecksumSHA256.Key()):
duplicates = res != nil
res = NewChecksumWithType(ChecksumSHA256|ChecksumTrailing, "")
case strings.EqualFold(header, ChecksumSHA1.Key()):
duplicates = res != nil
res = NewChecksumWithType(ChecksumSHA1|ChecksumTrailing, "")
for _, t := range BaseChecksumTypes {
if strings.EqualFold(t.Key(), header) {
duplicates = res != nil
res = NewChecksumWithType(t|ChecksumTrailing, "")
}
}
if duplicates {
return nil, ErrInvalidChecksum
@@ -500,7 +569,13 @@ func getContentChecksum(h http.Header) (t ChecksumType, s string) {
t = ChecksumNone
alg := h.Get(xhttp.AmzChecksumAlgo)
if alg != "" {
t |= NewChecksumType(alg)
t |= NewChecksumHeader(h)
if h.Get(xhttp.AmzChecksumType) == xhttp.AmzChecksumTypeFullObject {
if !t.CanMerge() {
return ChecksumInvalid, ""
}
t |= ChecksumFullObject
}
if t.IsSet() {
hdr := t.Key()
if s = h.Get(hdr); s == "" {
@@ -519,12 +594,19 @@ func getContentChecksum(h http.Header) (t ChecksumType, s string) {
t = c
s = got
}
if h.Get(xhttp.AmzChecksumType) == xhttp.AmzChecksumTypeFullObject {
if !t.CanMerge() {
t = ChecksumInvalid
s = ""
return
}
t |= ChecksumFullObject
}
return
}
}
checkType(ChecksumCRC32)
checkType(ChecksumCRC32C)
checkType(ChecksumSHA1)
checkType(ChecksumSHA256)
for _, t := range BaseChecksumTypes {
checkType(t)
}
return t, s
}

219
internal/hash/crc.go Normal file
View File

@@ -0,0 +1,219 @@
// Copyright (c) 2015-2024 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package hash
import (
"encoding/base64"
"encoding/binary"
"fmt"
"hash/crc32"
"hash/crc64"
"math/bits"
)
// AddPart will merge a part checksum into the current,
// as if the content of each was appended.
// The size of the content that produced the second checksum must be provided.
// Not all checksum types can be merged, use the CanMerge method to check.
// Checksum types must match.
func (c *Checksum) AddPart(other Checksum, size int64) error {
if !other.Type.CanMerge() {
return fmt.Errorf("checksum type cannot be merged")
}
if size == 0 {
return nil
}
if !c.Type.Is(other.Type.Base()) {
return fmt.Errorf("checksum type does not match got %s and %s", c.Type.String(), other.Type.String())
}
// If never set, just add first checksum.
if len(c.Raw) == 0 {
c.Raw = other.Raw
c.Encoded = other.Encoded
return nil
}
if !c.Valid() {
return fmt.Errorf("invalid base checksum")
}
if !other.Valid() {
return fmt.Errorf("invalid part checksum")
}
switch c.Type.Base() {
case ChecksumCRC32:
v := crc32Combine(crc32.IEEE, binary.BigEndian.Uint32(c.Raw), binary.BigEndian.Uint32(other.Raw), size)
binary.BigEndian.PutUint32(c.Raw, v)
case ChecksumCRC32C:
v := crc32Combine(crc32.Castagnoli, binary.BigEndian.Uint32(c.Raw), binary.BigEndian.Uint32(other.Raw), size)
binary.BigEndian.PutUint32(c.Raw, v)
case ChecksumCRC64NVME:
v := crc64Combine(bits.Reverse64(crc64NVMEPolynomial), binary.BigEndian.Uint64(c.Raw), binary.BigEndian.Uint64(other.Raw), size)
binary.BigEndian.PutUint64(c.Raw, v)
default:
return fmt.Errorf("unknown checksum type: %s", c.Type.String())
}
c.Encoded = base64.StdEncoding.EncodeToString(c.Raw)
return nil
}
const crc64NVMEPolynomial = 0xad93d23594c93659
var crc64Table = crc64.MakeTable(bits.Reverse64(crc64NVMEPolynomial))
// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration.
// Used uint for unsigned long. Used uint32 for input arguments in order to match
// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib)
// Modified for hash/crc64 by Klaus Post, 2024.
func gf2MatrixTimes(mat []uint64, vec uint64) uint64 {
var sum uint64
for vec != 0 {
if vec&1 != 0 {
sum ^= mat[0]
}
vec >>= 1
mat = mat[1:]
}
return sum
}
func gf2MatrixSquare(square, mat []uint64) {
if len(square) != len(mat) {
panic("square matrix size mismatch")
}
for n := range mat {
square[n] = gf2MatrixTimes(mat, mat[n])
}
}
// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32
// hash values crc1 and crc2. poly represents the generator polynomial
// and len2 specifies the byte length that the crc2 hash covers.
func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 {
// degenerate case (also disallow negative lengths)
if len2 <= 0 {
return crc1
}
even := make([]uint64, 32) // even-power-of-two zeros operator
odd := make([]uint64, 32) // odd-power-of-two zeros operator
// put operator for one zero bit in odd
odd[0] = uint64(poly) // CRC-32 polynomial
row := uint64(1)
for n := 1; n < 32; n++ {
odd[n] = row
row <<= 1
}
// put operator for two zero bits in even
gf2MatrixSquare(even, odd)
// put operator for four zero bits in odd
gf2MatrixSquare(odd, even)
// apply len2 zeros to crc1 (first square will put the operator for one
// zero byte, eight zero bits, in even)
crc1n := uint64(crc1)
for {
// apply zeros operator for this bit of len2
gf2MatrixSquare(even, odd)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(even, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
// another iteration of the loop with odd and even swapped
gf2MatrixSquare(odd, even)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(odd, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
}
// return combined crc
crc1n ^= uint64(crc2)
return uint32(crc1n)
}
func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 {
// degenerate case (also disallow negative lengths)
if len2 <= 0 {
return crc1
}
even := make([]uint64, 64) // even-power-of-two zeros operator
odd := make([]uint64, 64) // odd-power-of-two zeros operator
// put operator for one zero bit in odd
odd[0] = poly // CRC-64 polynomial
row := uint64(1)
for n := 1; n < 64; n++ {
odd[n] = row
row <<= 1
}
// put operator for two zero bits in even
gf2MatrixSquare(even, odd)
// put operator for four zero bits in odd
gf2MatrixSquare(odd, even)
// apply len2 zeros to crc1 (first square will put the operator for one
// zero byte, eight zero bits, in even)
crc1n := crc1
for {
// apply zeros operator for this bit of len2
gf2MatrixSquare(even, odd)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(even, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
// another iteration of the loop with odd and even swapped
gf2MatrixSquare(odd, even)
if len2&1 != 0 {
crc1n = gf2MatrixTimes(odd, crc1n)
}
len2 >>= 1
// if no more bits set, then done
if len2 == 0 {
break
}
}
// return combined crc
crc1n ^= crc2
return crc1n
}

View File

@@ -257,26 +257,6 @@ func (r *Reader) Read(p []byte) (int, error) {
r.contentHasher.Write(p[:n])
}
// If we have reached our expected size,
// do one more read to ensure we are at EOF
// and that any trailers have been read.
attempts := 0
for err == nil && r.size >= 0 && r.bytesRead >= r.size {
attempts++
if r.bytesRead > r.size {
return 0, SizeTooLarge{Want: r.size, Got: r.bytesRead}
}
var tmp [1]byte
var n2 int
n2, err = r.src.Read(tmp[:])
if n2 > 0 {
return 0, SizeTooLarge{Want: r.size, Got: r.bytesRead}
}
if attempts == 100 {
return 0, io.ErrNoProgress
}
}
if err == io.EOF { // Verify content SHA256, if set.
if r.expectedMin > 0 {
if r.bytesRead < r.expectedMin {

View File

@@ -170,12 +170,16 @@ const (
MinIOServerStatus = "x-minio-server-status"
// Content Checksums
AmzChecksumAlgo = "x-amz-checksum-algorithm"
AmzChecksumCRC32 = "x-amz-checksum-crc32"
AmzChecksumCRC32C = "x-amz-checksum-crc32c"
AmzChecksumSHA1 = "x-amz-checksum-sha1"
AmzChecksumSHA256 = "x-amz-checksum-sha256"
AmzChecksumMode = "x-amz-checksum-mode"
AmzChecksumAlgo = "x-amz-checksum-algorithm"
AmzChecksumCRC32 = "x-amz-checksum-crc32"
AmzChecksumCRC32C = "x-amz-checksum-crc32c"
AmzChecksumSHA1 = "x-amz-checksum-sha1"
AmzChecksumSHA256 = "x-amz-checksum-sha256"
AmzChecksumCRC64NVME = "x-amz-checksum-crc64nvme"
AmzChecksumMode = "x-amz-checksum-mode"
AmzChecksumType = "x-amz-checksum-type"
AmzChecksumTypeFullObject = "FULL_OBJECT"
AmzChecksumTypeComposite = "COMPOSITE"
// Post Policy related
AmzMetaUUID = "X-Amz-Meta-Uuid"