CopyObject must preserve checksums and encrypt them if required (#21399)

This commit is contained in:
Mark Theunissen 2025-06-25 17:08:54 +02:00 committed by GitHub
parent a65292cab1
commit 2718d9a430
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 396 additions and 21 deletions

View File

@ -1074,9 +1074,17 @@ func (o *ObjectInfo) metadataDecrypter(h http.Header) objectMetaDecryptFn {
return input, nil
}
var key []byte
if crypto.SSECopy.IsRequested(h) {
sseCopyKey, err := crypto.SSECopy.ParseHTTP(h)
if err != nil {
return nil, err
}
key = sseCopyKey[:]
} else {
if k, err := crypto.SSEC.ParseHTTP(h); err == nil {
key = k[:]
}
}
key, err := decryptObjectMeta(key, o.Bucket, o.Name, o.UserDefined)
if err != nil {
return nil, err
@ -1087,7 +1095,8 @@ func (o *ObjectInfo) metadataDecrypter(h http.Header) objectMetaDecryptFn {
}
}
// decryptPartsChecksums will attempt to decode checksums and return it/them if set.
// decryptPartsChecksums will attempt to decrypt and decode part checksums, and save
// only the decrypted part checksum values on ObjectInfo directly.
// if part > 0, and we have the checksum for the part that will be returned.
func (o *ObjectInfo) decryptPartsChecksums(h http.Header) {
data := o.Checksum
@ -1112,6 +1121,23 @@ func (o *ObjectInfo) decryptPartsChecksums(h http.Header) {
}
}
// decryptChecksum will attempt to decrypt the ObjectInfo.Checksum, returns the decrypted value
// An error is only returned if it was encrypted and the decryption failed.
func (o *ObjectInfo) decryptChecksum(h http.Header) ([]byte, error) {
data := o.Checksum
if len(data) == 0 {
return data, nil
}
if _, encrypted := crypto.IsEncrypted(o.UserDefined); encrypted {
decrypted, err := o.metadataDecrypter(h)("object-checksum", data)
if err != nil {
return nil, err
}
data = decrypted
}
return data, nil
}
// metadataEncryptFn provides an encryption function for metadata.
// Will return nil, nil if unencrypted.
func (o *ObjectInfo) metadataEncryptFn(headers http.Header) (objectMetaEncryptFn, error) {

View File

@ -1470,7 +1470,17 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
actualSize = n
}
}
if fi.Checksum == nil {
// If ServerSideChecksum is wanted for this object, it takes precedence
// over opts.WantChecksum.
if opts.WantServerSideChecksumType.IsSet() {
serverSideChecksum := r.RawServerSideChecksumResult()
if serverSideChecksum != nil {
fi.Checksum = serverSideChecksum.AppendTo(nil, nil)
if opts.EncryptFn != nil {
fi.Checksum = opts.EncryptFn("object-checksum", fi.Checksum)
}
}
} else if fi.Checksum == nil && opts.WantChecksum != nil {
// Trailing headers checksums should now be filled.
fi.Checksum = opts.WantChecksum.AppendTo(nil, nil)
if opts.EncryptFn != nil {

View File

@ -1346,6 +1346,9 @@ func (z *erasureServerPools) CopyObject(ctx context.Context, srcBucket, srcObjec
VersionID: dstOpts.VersionID,
MTime: dstOpts.MTime,
NoLock: true,
EncryptFn: dstOpts.EncryptFn,
WantChecksum: dstOpts.WantChecksum,
WantServerSideChecksumType: dstOpts.WantServerSideChecksumType,
}
return z.serverPools[poolIdx].PutObject(ctx, dstBucket, dstObject, srcInfo.PutObjReader, putOpts)

View File

@ -873,6 +873,9 @@ func (s *erasureSets) CopyObject(ctx context.Context, srcBucket, srcObject, dstB
Versioned: dstOpts.Versioned,
VersionID: dstOpts.VersionID,
MTime: dstOpts.MTime,
EncryptFn: dstOpts.EncryptFn,
WantChecksum: dstOpts.WantChecksum,
WantServerSideChecksumType: dstOpts.WantServerSideChecksumType,
}
return dstSet.putObject(ctx, dstBucket, dstObject, srcInfo.PutObjReader, putOpts)

View File

@ -152,6 +152,10 @@ func encLogIf(ctx context.Context, err error, errKind ...interface{}) {
logger.LogIf(ctx, "encryption", err, errKind...)
}
func encLogOnceIf(ctx context.Context, err error, id string, errKind ...interface{}) {
logger.LogOnceIf(ctx, "encryption", err, id, errKind...)
}
func storageLogIf(ctx context.Context, err error, errKind ...interface{}) {
logger.LogIf(ctx, "storage", err, errKind...)
}

View File

@ -654,6 +654,7 @@ type objectAttributesChecksum struct {
ChecksumSHA1 string `xml:",omitempty"`
ChecksumSHA256 string `xml:",omitempty"`
ChecksumCRC64NVME string `xml:",omitempty"`
ChecksumType string `xml:",omitempty"`
}
type objectAttributesParts struct {

View File

@ -86,6 +86,8 @@ type ObjectOptions struct {
WantChecksum *hash.Checksum // x-amz-checksum-XXX checksum sent to PutObject/ CompleteMultipartUpload.
WantServerSideChecksumType hash.ChecksumType // if set, we compute a server-side checksum of this type
NoDecryption bool // indicates if the stream must be decrypted.
PreserveETag string // preserves this etag during a PUT call.
NoLock bool // indicates to lower layers if the caller is expecting to hold locks.

View File

@ -1096,6 +1096,16 @@ func NewPutObjReader(rawReader *hash.Reader) *PutObjReader {
return &PutObjReader{Reader: rawReader, rawReader: rawReader}
}
// RawServerSideChecksumResult returns the ServerSideChecksumResult from the
// underlying rawReader, since the PutObjReader might be encrypted data and
// thus any checksum from that would be incorrect.
func (p *PutObjReader) RawServerSideChecksumResult() *hash.Checksum {
if p.rawReader != nil {
return p.rawReader.ServerSideChecksumResult
}
return nil
}
func sealETag(encKey crypto.ObjectKey, md5CurrSum []byte) []byte {
var emptyKey [32]byte
if bytes.Equal(encKey[:], emptyKey[:]) {

View File

@ -641,6 +641,7 @@ func (api objectAPIHandlers) getObjectAttributesHandler(ctx context.Context, obj
ChecksumSHA1: strings.Split(chkSums["SHA1"], "-")[0],
ChecksumSHA256: strings.Split(chkSums["SHA256"], "-")[0],
ChecksumCRC64NVME: strings.Split(chkSums["CRC64NVME"], "-")[0],
ChecksumType: chkSums[xhttp.AmzChecksumType],
}
}
}
@ -1465,6 +1466,46 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
targetSize, _ = srcInfo.DecryptedSize()
}
// Client can request that a different type of checksum is computed server-side for the
// destination object using the x-amz-checksum-algorithm header.
headerChecksumType := hash.NewChecksumHeader(r.Header)
if headerChecksumType.IsSet() {
dstOpts.WantServerSideChecksumType = headerChecksumType.Base()
srcInfo.Reader.AddServerSideChecksumHasher(headerChecksumType)
dstOpts.WantChecksum = nil
} else {
// Check the source object for checksum.
// If Checksum is not encrypted, decryptChecksum will be a no-op and return
// the already unencrypted value.
srcChecksumDecrypted, err := srcInfo.decryptChecksum(r.Header)
if err != nil {
encLogOnceIf(GlobalContext,
fmt.Errorf("Unable to decryptChecksum for object: %s/%s, error: %w", srcBucket, srcObject, err),
"copy-object-decrypt-checksums-"+srcBucket+srcObject)
}
// The source object has a checksum set, we need the destination to have one too.
if srcChecksumDecrypted != nil {
dstOpts.WantChecksum = hash.ChecksumFromBytes(srcChecksumDecrypted)
// When an object is being copied from a source that is multipart, the destination will
// no longer be multipart, and thus the checksum becomes full-object instead. Since
// the CopyObject API does not require that the caller send us this final checksum, we need
// to compute it server-side, with the same type as the source object.
if dstOpts.WantChecksum != nil && dstOpts.WantChecksum.Type.IsMultipartComposite() {
dstOpts.WantServerSideChecksumType = dstOpts.WantChecksum.Type.Base()
srcInfo.Reader.AddServerSideChecksumHasher(dstOpts.WantServerSideChecksumType)
dstOpts.WantChecksum = nil
}
} else {
// S3: All copied objects without checksums and specified destination checksum algorithms
// automatically gain a CRC-64NVME checksum algorithm.
dstOpts.WantServerSideChecksumType = hash.ChecksumCRC64NVME
srcInfo.Reader.AddServerSideChecksumHasher(dstOpts.WantServerSideChecksumType)
dstOpts.WantChecksum = nil
}
}
if isTargetEncrypted {
var encReader io.Reader
kind, _ := crypto.IsRequested(r.Header)
@ -1498,6 +1539,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
if dstOpts.IndexCB != nil {
dstOpts.IndexCB = compressionIndexEncrypter(objEncKey, dstOpts.IndexCB)
}
dstOpts.EncryptFn = metadataEncrypter(objEncKey)
}
}
@ -1633,6 +1675,13 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
return
}
// After we've checked for an invalid copy (above), if a server-side checksum type
// is requested, we need to read the source to recompute the checksum.
if dstOpts.WantServerSideChecksumType.IsSet() {
srcInfo.metadataOnly = false
}
// Federation only.
remoteCallRequired := isRemoteCopyRequired(ctx, srcBucket, dstBucket, objectAPI)
var objInfo ObjectInfo

View File

@ -230,6 +230,11 @@ func (c ChecksumType) FullObjectRequested() bool {
return c&(ChecksumFullObject) == ChecksumFullObject || c.Is(ChecksumCRC64NVME)
}
// IsMultipartComposite returns true if the checksum is multipart and full object was not requested.
func (c ChecksumType) IsMultipartComposite() bool {
return c.Is(ChecksumMultipart) && !c.FullObjectRequested()
}
// ObjType returns a string to return as x-amz-checksum-type.
func (c ChecksumType) ObjType() string {
if c.FullObjectRequested() {
@ -269,7 +274,7 @@ func (c ChecksumType) Trailing() bool {
return c.Is(ChecksumTrailing)
}
// NewChecksumFromData returns a new checksum from specified algorithm and base64 encoded value.
// NewChecksumFromData returns a new Checksum, using specified algorithm type on data.
func NewChecksumFromData(t ChecksumType, data []byte) *Checksum {
if !t.IsSet() {
return nil
@ -311,8 +316,6 @@ func ReadCheckSums(b []byte, part int) (cs map[string]string, isMP bool) {
}
if !typ.FullObjectRequested() {
cs = fmt.Sprintf("%s-%d", cs, t)
} else if part <= 0 {
res[xhttp.AmzChecksumType] = xhttp.AmzChecksumTypeFullObject
}
b = b[n:]
if part > 0 {
@ -337,6 +340,13 @@ func ReadCheckSums(b []byte, part int) (cs map[string]string, isMP bool) {
}
if cs != "" {
res[typ.String()] = cs
res[xhttp.AmzChecksumType] = typ.ObjType()
if !typ.Is(ChecksumMultipart) {
// Single part PUTs are always FULL_OBJECT checksum
// Refer https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutObject.html
// **For PutObject uploads, the checksum type is always FULL_OBJECT.**
res[xhttp.AmzChecksumType] = ChecksumFullObject.ObjType()
}
}
}
if len(res) == 0 {
@ -468,6 +478,65 @@ func (c *Checksum) AppendTo(b []byte, parts []byte) []byte {
return b
}
// ChecksumFromBytes reconstructs a Checksum struct from the serialized bytes created in AppendTo()
// Returns nil if the bytes are invalid or empty.
// AppendTo() can append a serialized Checksum to another already-serialized Checksum,
// however, in practice, we only use one at a time.
// ChecksumFromBytes only returns the first one and no part checksums.
func ChecksumFromBytes(b []byte) *Checksum {
if len(b) == 0 {
return nil
}
// Read checksum type
t, n := binary.Uvarint(b)
if n <= 0 {
return nil
}
b = b[n:]
typ := ChecksumType(t)
length := typ.RawByteLen()
if length == 0 || len(b) < length {
return nil
}
// Read raw checksum bytes
raw := make([]byte, length)
copy(raw, b[:length])
b = b[length:]
c := &Checksum{
Type: typ,
Raw: raw,
Encoded: base64.StdEncoding.EncodeToString(raw),
}
// Handle multipart checksums
if typ.Is(ChecksumMultipart) {
parts, n := binary.Uvarint(b)
if n <= 0 {
return nil
}
b = b[n:]
c.WantParts = int(parts)
if typ.Is(ChecksumIncludesMultipart) {
wantLen := int(parts) * length
if len(b) < wantLen {
return nil
}
}
}
if !c.Valid() {
return nil
}
return c
}
// Valid returns whether checksum is valid.
func (c Checksum) Valid() bool {
if c.Type == ChecksumInvalid {
@ -506,12 +575,26 @@ func (c Checksum) Matches(content []byte, parts int) error {
return nil
}
// AsMap returns the
// AsMap returns the checksum as a map[string]string.
func (c *Checksum) AsMap() map[string]string {
if c == nil || !c.Valid() {
return nil
}
return map[string]string{c.Type.String(): c.Encoded}
return map[string]string{
c.Type.String(): c.Encoded,
xhttp.AmzChecksumType: c.Type.ObjType(),
}
}
// Equal returns whether two checksum structs are equal in all their fields.
func (c *Checksum) Equal(s *Checksum) bool {
if c == nil || s == nil {
return c == s
}
return c.Type == s.Type &&
c.Encoded == s.Encoded &&
bytes.Equal(c.Raw, s.Raw) &&
c.WantParts == s.WantParts
}
// TransferChecksumHeader will transfer any checksum value that has been checked.

View File

@ -0,0 +1,162 @@
// Copyright (c) 2015-2025 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package hash
import (
"net/http/httptest"
"testing"
)
// TestChecksumAddToHeader tests that adding and retrieving a checksum on a header works
func TestChecksumAddToHeader(t *testing.T) {
tests := []struct {
name string
checksum ChecksumType
fullobj bool
}{
{"CRC32-composite", ChecksumCRC32, false},
{"CRC32-full-object", ChecksumCRC32, true},
{"CRC32C-composite", ChecksumCRC32C, false},
{"CRC32C-full-object", ChecksumCRC32C, true},
{"CRC64NVME-full-object", ChecksumCRC64NVME, false}, // testing with false, because it always is full object.
{"ChecksumSHA1-composite", ChecksumSHA1, false},
{"ChecksumSHA256-composite", ChecksumSHA256, false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
myData := []byte("this-is-a-checksum-data-test")
chksm := NewChecksumFromData(tt.checksum, myData)
if tt.fullobj {
chksm.Type |= ChecksumFullObject
}
w := httptest.NewRecorder()
AddChecksumHeader(w, chksm.AsMap())
gotChksm, err := GetContentChecksum(w.Result().Header)
if err != nil {
t.Fatalf("GetContentChecksum failed: %v", err)
}
// In the CRC64NVM case, it is always full object, so add the flag for easier equality comparison
if chksm.Type.Base().Is(ChecksumCRC64NVME) {
chksm.Type |= ChecksumFullObject
}
if !chksm.Equal(gotChksm) {
t.Fatalf("Checksum mismatch: expected %+v, got %+v", chksm, gotChksm)
}
})
}
}
// TestChecksumSerializeDeserialize checks AppendTo can be reversed by ChecksumFromBytes
func TestChecksumSerializeDeserialize(t *testing.T) {
myData := []byte("this-is-a-checksum-data-test")
chksm := NewChecksumFromData(ChecksumCRC32, myData)
if chksm == nil {
t.Fatal("NewChecksumFromData returned nil")
}
// Serialize the checksum to bytes
b := chksm.AppendTo(nil, nil)
if b == nil {
t.Fatal("AppendTo returned nil")
}
// Deserialize the checksum from bytes
chksmOut := ChecksumFromBytes(b)
if chksmOut == nil {
t.Fatal("ChecksumFromBytes returned nil")
}
// Assert new checksum matches the content
matchError := chksmOut.Matches(myData, 0)
if matchError != nil {
t.Fatalf("Checksum mismatch on chksmOut: %v", matchError)
}
// Assert they are exactly equal
if !chksmOut.Equal(chksm) {
t.Fatalf("Checksum mismatch: expected %+v, got %+v", chksm, chksmOut)
}
}
// TestChecksumSerializeDeserializeMultiPart checks AppendTo can be reversed by ChecksumFromBytes
// for multipart checksum
func TestChecksumSerializeDeserializeMultiPart(t *testing.T) {
// Create dummy data that we'll split into 3 parts
dummyData := []byte("The quick brown fox jumps over the lazy dog. " +
"Pack my box with five dozen brown eggs. " +
"Have another go it will all make sense in the end!")
// Split data into 3 parts
partSize := len(dummyData) / 3
part1Data := dummyData[0:partSize]
part2Data := dummyData[partSize : 2*partSize]
part3Data := dummyData[2*partSize:]
// Calculate CRC32C checksum for each part using NewChecksumFromData
checksumType := ChecksumCRC32C
part1Checksum := NewChecksumFromData(checksumType, part1Data)
part2Checksum := NewChecksumFromData(checksumType, part2Data)
part3Checksum := NewChecksumFromData(checksumType, part3Data)
// Combine the raw checksums (this is what happens in CompleteMultipartUpload)
var checksumCombined []byte
checksumCombined = append(checksumCombined, part1Checksum.Raw...)
checksumCombined = append(checksumCombined, part2Checksum.Raw...)
checksumCombined = append(checksumCombined, part3Checksum.Raw...)
// Create the final checksum (checksum of the combined checksums)
// Add BOTH the multipart flag AND the includes-multipart flag
finalChecksumType := checksumType | ChecksumMultipart | ChecksumIncludesMultipart
finalChecksum := NewChecksumFromData(finalChecksumType, checksumCombined)
// Set WantParts to indicate 3 parts
finalChecksum.WantParts = 3
// Test AppendTo serialization
var serialized []byte
serialized = finalChecksum.AppendTo(serialized, checksumCombined)
// Use ChecksumFromBytes to deserialize the final checksum
chksmOut := ChecksumFromBytes(serialized)
if chksmOut == nil {
t.Fatal("ChecksumFromBytes returned nil")
}
// Assert they are exactly equal
if !chksmOut.Equal(finalChecksum) {
t.Fatalf("Checksum mismatch: expected %+v, got %+v", finalChecksum, chksmOut)
}
// Serialize what we got from ChecksumFromBytes
serializedOut := chksmOut.AppendTo(nil, checksumCombined)
// Read part checksums from serializedOut
readParts := ReadPartCheckSums(serializedOut)
expectedChecksums := []string{
part1Checksum.Encoded,
part2Checksum.Encoded,
part3Checksum.Encoded,
}
for i, expected := range expectedChecksums {
if got := readParts[i][ChecksumCRC32C.String()]; got != expected {
t.Fatalf("want part%dChecksum.Encoded %s, got %s", i+1, expected, got)
}
}
}

View File

@ -51,11 +51,18 @@ type Reader struct {
checksum etag.ETag
contentSHA256 []byte
// Content checksum
// Client-provided content checksum
contentHash Checksum
contentHasher hash.Hash
disableMD5 bool
// Server side computed checksum. In some cases, like CopyObject, a new checksum
// needs to be computed and saved on the destination object, but the client
// does not provide it. Not calculated if client-side contentHash is set.
ServerSideChecksumType ChecksumType
ServerSideHasher hash.Hash
ServerSideChecksumResult *Checksum
trailer http.Header
sha256 hash.Hash
@ -247,6 +254,16 @@ func (r *Reader) AddNonTrailingChecksum(cs *Checksum, ignoreValue bool) error {
return nil
}
// AddServerSideChecksumHasher adds a new hasher for computing the server-side checksum.
func (r *Reader) AddServerSideChecksumHasher(t ChecksumType) {
h := t.Hasher()
if h == nil {
return
}
r.ServerSideHasher = h
r.ServerSideChecksumType = t
}
func (r *Reader) Read(p []byte) (int, error) {
n, err := r.src.Read(p)
r.bytesRead += int64(n)
@ -255,6 +272,8 @@ func (r *Reader) Read(p []byte) (int, error) {
}
if r.contentHasher != nil {
r.contentHasher.Write(p[:n])
} else if r.ServerSideHasher != nil {
r.ServerSideHasher.Write(p[:n])
}
if err == io.EOF { // Verify content SHA256, if set.
@ -293,6 +312,9 @@ func (r *Reader) Read(p []byte) (int, error) {
}
return n, err
}
} else if r.ServerSideHasher != nil {
sum := r.ServerSideHasher.Sum(nil)
r.ServerSideChecksumResult = NewChecksumWithType(r.ServerSideChecksumType, base64.StdEncoding.EncodeToString(sum))
}
}
if err != nil && err != io.EOF {