CopyObject must preserve checksums and encrypt them if required (#21399)

This commit is contained in:
Mark Theunissen
2025-06-25 17:08:54 +02:00
committed by GitHub
parent a65292cab1
commit 2718d9a430
12 changed files with 396 additions and 21 deletions

View File

@@ -230,6 +230,11 @@ func (c ChecksumType) FullObjectRequested() bool {
return c&(ChecksumFullObject) == ChecksumFullObject || c.Is(ChecksumCRC64NVME)
}
// IsMultipartComposite returns true if the checksum is multipart and full object was not requested.
func (c ChecksumType) IsMultipartComposite() bool {
return c.Is(ChecksumMultipart) && !c.FullObjectRequested()
}
// ObjType returns a string to return as x-amz-checksum-type.
func (c ChecksumType) ObjType() string {
if c.FullObjectRequested() {
@@ -269,7 +274,7 @@ func (c ChecksumType) Trailing() bool {
return c.Is(ChecksumTrailing)
}
// NewChecksumFromData returns a new checksum from specified algorithm and base64 encoded value.
// NewChecksumFromData returns a new Checksum, using specified algorithm type on data.
func NewChecksumFromData(t ChecksumType, data []byte) *Checksum {
if !t.IsSet() {
return nil
@@ -311,8 +316,6 @@ func ReadCheckSums(b []byte, part int) (cs map[string]string, isMP bool) {
}
if !typ.FullObjectRequested() {
cs = fmt.Sprintf("%s-%d", cs, t)
} else if part <= 0 {
res[xhttp.AmzChecksumType] = xhttp.AmzChecksumTypeFullObject
}
b = b[n:]
if part > 0 {
@@ -337,6 +340,13 @@ func ReadCheckSums(b []byte, part int) (cs map[string]string, isMP bool) {
}
if cs != "" {
res[typ.String()] = cs
res[xhttp.AmzChecksumType] = typ.ObjType()
if !typ.Is(ChecksumMultipart) {
// Single part PUTs are always FULL_OBJECT checksum
// Refer https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
// **For PutObject uploads, the checksum type is always FULL_OBJECT.**
res[xhttp.AmzChecksumType] = ChecksumFullObject.ObjType()
}
}
}
if len(res) == 0 {
@@ -468,6 +478,65 @@ func (c *Checksum) AppendTo(b []byte, parts []byte) []byte {
return b
}
// ChecksumFromBytes reconstructs a Checksum struct from the serialized bytes created in AppendTo()
// Returns nil if the bytes are invalid or empty.
// AppendTo() can append a serialized Checksum to another already-serialized Checksum,
// however, in practice, we only use one at a time.
// ChecksumFromBytes only returns the first one and no part checksums.
func ChecksumFromBytes(b []byte) *Checksum {
if len(b) == 0 {
return nil
}
// Read checksum type
t, n := binary.Uvarint(b)
if n <= 0 {
return nil
}
b = b[n:]
typ := ChecksumType(t)
length := typ.RawByteLen()
if length == 0 || len(b) < length {
return nil
}
// Read raw checksum bytes
raw := make([]byte, length)
copy(raw, b[:length])
b = b[length:]
c := &Checksum{
Type: typ,
Raw: raw,
Encoded: base64.StdEncoding.EncodeToString(raw),
}
// Handle multipart checksums
if typ.Is(ChecksumMultipart) {
parts, n := binary.Uvarint(b)
if n <= 0 {
return nil
}
b = b[n:]
c.WantParts = int(parts)
if typ.Is(ChecksumIncludesMultipart) {
wantLen := int(parts) * length
if len(b) < wantLen {
return nil
}
}
}
if !c.Valid() {
return nil
}
return c
}
// Valid returns whether checksum is valid.
func (c Checksum) Valid() bool {
if c.Type == ChecksumInvalid {
@@ -506,12 +575,26 @@ func (c Checksum) Matches(content []byte, parts int) error {
return nil
}
// AsMap returns the
// AsMap returns the checksum as a map[string]string.
func (c *Checksum) AsMap() map[string]string {
if c == nil || !c.Valid() {
return nil
}
return map[string]string{c.Type.String(): c.Encoded}
return map[string]string{
c.Type.String(): c.Encoded,
xhttp.AmzChecksumType: c.Type.ObjType(),
}
}
// Equal returns whether two checksum structs are equal in all their fields.
func (c *Checksum) Equal(s *Checksum) bool {
if c == nil || s == nil {
return c == s
}
return c.Type == s.Type &&
c.Encoded == s.Encoded &&
bytes.Equal(c.Raw, s.Raw) &&
c.WantParts == s.WantParts
}
// TransferChecksumHeader will transfer any checksum value that has been checked.

View File

@@ -0,0 +1,162 @@
// Copyright (c) 2015-2025 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package hash
import (
"net/http/httptest"
"testing"
)
// TestChecksumAddToHeader tests that adding and retrieving a checksum on a header works
func TestChecksumAddToHeader(t *testing.T) {
tests := []struct {
name string
checksum ChecksumType
fullobj bool
}{
{"CRC32-composite", ChecksumCRC32, false},
{"CRC32-full-object", ChecksumCRC32, true},
{"CRC32C-composite", ChecksumCRC32C, false},
{"CRC32C-full-object", ChecksumCRC32C, true},
{"CRC64NVME-full-object", ChecksumCRC64NVME, false}, // testing with false, because it always is full object.
{"ChecksumSHA1-composite", ChecksumSHA1, false},
{"ChecksumSHA256-composite", ChecksumSHA256, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
myData := []byte("this-is-a-checksum-data-test")
chksm := NewChecksumFromData(tt.checksum, myData)
if tt.fullobj {
chksm.Type |= ChecksumFullObject
}
w := httptest.NewRecorder()
AddChecksumHeader(w, chksm.AsMap())
gotChksm, err := GetContentChecksum(w.Result().Header)
if err != nil {
t.Fatalf("GetContentChecksum failed: %v", err)
}
// In the CRC64NVM case, it is always full object, so add the flag for easier equality comparison
if chksm.Type.Base().Is(ChecksumCRC64NVME) {
chksm.Type |= ChecksumFullObject
}
if !chksm.Equal(gotChksm) {
t.Fatalf("Checksum mismatch: expected %+v, got %+v", chksm, gotChksm)
}
})
}
}
// TestChecksumSerializeDeserialize checks AppendTo can be reversed by ChecksumFromBytes
func TestChecksumSerializeDeserialize(t *testing.T) {
myData := []byte("this-is-a-checksum-data-test")
chksm := NewChecksumFromData(ChecksumCRC32, myData)
if chksm == nil {
t.Fatal("NewChecksumFromData returned nil")
}
// Serialize the checksum to bytes
b := chksm.AppendTo(nil, nil)
if b == nil {
t.Fatal("AppendTo returned nil")
}
// Deserialize the checksum from bytes
chksmOut := ChecksumFromBytes(b)
if chksmOut == nil {
t.Fatal("ChecksumFromBytes returned nil")
}
// Assert new checksum matches the content
matchError := chksmOut.Matches(myData, 0)
if matchError != nil {
t.Fatalf("Checksum mismatch on chksmOut: %v", matchError)
}
// Assert they are exactly equal
if !chksmOut.Equal(chksm) {
t.Fatalf("Checksum mismatch: expected %+v, got %+v", chksm, chksmOut)
}
}
// TestChecksumSerializeDeserializeMultiPart checks AppendTo can be reversed by ChecksumFromBytes
// for multipart checksum
func TestChecksumSerializeDeserializeMultiPart(t *testing.T) {
// Create dummy data that we'll split into 3 parts
dummyData := []byte("The quick brown fox jumps over the lazy dog. " +
"Pack my box with five dozen brown eggs. " +
"Have another go it will all make sense in the end!")
// Split data into 3 parts
partSize := len(dummyData) / 3
part1Data := dummyData[0:partSize]
part2Data := dummyData[partSize : 2*partSize]
part3Data := dummyData[2*partSize:]
// Calculate CRC32C checksum for each part using NewChecksumFromData
checksumType := ChecksumCRC32C
part1Checksum := NewChecksumFromData(checksumType, part1Data)
part2Checksum := NewChecksumFromData(checksumType, part2Data)
part3Checksum := NewChecksumFromData(checksumType, part3Data)
// Combine the raw checksums (this is what happens in CompleteMultipartUpload)
var checksumCombined []byte
checksumCombined = append(checksumCombined, part1Checksum.Raw...)
checksumCombined = append(checksumCombined, part2Checksum.Raw...)
checksumCombined = append(checksumCombined, part3Checksum.Raw...)
// Create the final checksum (checksum of the combined checksums)
// Add BOTH the multipart flag AND the includes-multipart flag
finalChecksumType := checksumType | ChecksumMultipart | ChecksumIncludesMultipart
finalChecksum := NewChecksumFromData(finalChecksumType, checksumCombined)
// Set WantParts to indicate 3 parts
finalChecksum.WantParts = 3
// Test AppendTo serialization
var serialized []byte
serialized = finalChecksum.AppendTo(serialized, checksumCombined)
// Use ChecksumFromBytes to deserialize the final checksum
chksmOut := ChecksumFromBytes(serialized)
if chksmOut == nil {
t.Fatal("ChecksumFromBytes returned nil")
}
// Assert they are exactly equal
if !chksmOut.Equal(finalChecksum) {
t.Fatalf("Checksum mismatch: expected %+v, got %+v", finalChecksum, chksmOut)
}
// Serialize what we got from ChecksumFromBytes
serializedOut := chksmOut.AppendTo(nil, checksumCombined)
// Read part checksums from serializedOut
readParts := ReadPartCheckSums(serializedOut)
expectedChecksums := []string{
part1Checksum.Encoded,
part2Checksum.Encoded,
part3Checksum.Encoded,
}
for i, expected := range expectedChecksums {
if got := readParts[i][ChecksumCRC32C.String()]; got != expected {
t.Fatalf("want part%dChecksum.Encoded %s, got %s", i+1, expected, got)
}
}
}

View File

@@ -51,11 +51,18 @@ type Reader struct {
checksum etag.ETag
contentSHA256 []byte
// Content checksum
// Client-provided content checksum
contentHash Checksum
contentHasher hash.Hash
disableMD5 bool
// Server side computed checksum. In some cases, like CopyObject, a new checksum
// needs to be computed and saved on the destination object, but the client
// does not provide it. Not calculated if client-side contentHash is set.
ServerSideChecksumType ChecksumType
ServerSideHasher hash.Hash
ServerSideChecksumResult *Checksum
trailer http.Header
sha256 hash.Hash
@@ -247,6 +254,16 @@ func (r *Reader) AddNonTrailingChecksum(cs *Checksum, ignoreValue bool) error {
return nil
}
// AddServerSideChecksumHasher adds a new hasher for computing the server-side checksum.
func (r *Reader) AddServerSideChecksumHasher(t ChecksumType) {
h := t.Hasher()
if h == nil {
return
}
r.ServerSideHasher = h
r.ServerSideChecksumType = t
}
func (r *Reader) Read(p []byte) (int, error) {
n, err := r.src.Read(p)
r.bytesRead += int64(n)
@@ -255,6 +272,8 @@ func (r *Reader) Read(p []byte) (int, error) {
}
if r.contentHasher != nil {
r.contentHasher.Write(p[:n])
} else if r.ServerSideHasher != nil {
r.ServerSideHasher.Write(p[:n])
}
if err == io.EOF { // Verify content SHA256, if set.
@@ -293,6 +312,9 @@ func (r *Reader) Read(p []byte) (int, error) {
}
return n, err
}
} else if r.ServerSideHasher != nil {
sum := r.ServerSideHasher.Sum(nil)
r.ServerSideChecksumResult = NewChecksumWithType(r.ServerSideChecksumType, base64.StdEncoding.EncodeToString(sum))
}
}
if err != nil && err != io.EOF {