From 7da9e3a6f8ecbd814ef6b60705c0bf06b940bf9f Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Sat, 16 Jul 2022 19:35:24 -0700 Subject: [PATCH] support encrypted/compressed objects properly during decommission (#15320) fixes #15314 --- Makefile | 2 + cmd/erasure-multipart.go | 3 + cmd/erasure-object.go | 4 + cmd/erasure-server-pool-decom.go | 41 ++++-- cmd/erasure-server-pool.go | 18 +-- cmd/object-api-interface.go | 9 +- cmd/object-api-options.go | 1 + cmd/object-api-utils.go | 5 +- docs/distributed/decom-encrypted-sse-s3.sh | 154 +++++++++++++++++++++ docs/distributed/decom-encrypted.sh | 139 +++++++++++++++++++ docs/distributed/decom.sh | 19 ++- 11 files changed, 367 insertions(+), 28 deletions(-) create mode 100644 docs/distributed/decom-encrypted-sse-s3.sh create mode 100644 docs/distributed/decom-encrypted.sh diff --git a/Makefile b/Makefile index e9864791b..01b3efcc7 100644 --- a/Makefile +++ b/Makefile @@ -44,6 +44,8 @@ test: verifiers build ## builds minio, runs linters, tests test-decom: install @echo "Running minio decom tests" @env bash $(PWD)/docs/distributed/decom.sh + @env bash $(PWD)/docs/distributed/decom-encrypted.sh + @env bash $(PWD)/docs/distributed/decom-encrypted-sse-s3.sh test-upgrade: build @echo "Running minio upgrade tests" diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 3d537fa4d..2d2ef6e31 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -658,6 +658,9 @@ func (er erasureObjects) PutObjectPart(ctx context.Context, bucket, object, uplo fi.ModTime = UTCNow() md5hex := r.MD5CurrentHexString() + if opts.PreserveETag != "" { + md5hex = opts.PreserveETag + } var index []byte if opts.IndexCB != nil { index = opts.IndexCB() diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index a84d76d4e..3d427d1af 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -1099,8 +1099,12 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st Hash: bitrotWriterSum(w), }) } + if userDefined["etag"] == "" { userDefined["etag"] = r.MD5CurrentHexString() + if opts.PreserveETag != "" { + userDefined["etag"] = opts.PreserveETag + } } // Guess content-type from the extension if possible. diff --git a/cmd/erasure-server-pool-decom.go b/cmd/erasure-server-pool-decom.go index 3c515e8fb..94558b442 100644 --- a/cmd/erasure-server-pool-decom.go +++ b/cmd/erasure-server-pool-decom.go @@ -581,6 +581,11 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri auditLogDecom(ctx, "DecomCopyData", objInfo.Bucket, objInfo.Name, objInfo.VersionID, err) }() + actualSize, err := objInfo.GetActualSize() + if err != nil { + return err + } + if objInfo.isMultipart() { uploadID, err := z.NewMultipartUpload(ctx, bucket, objInfo.Name, ObjectOptions{ VersionID: objInfo.VersionID, @@ -593,14 +598,19 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri defer z.AbortMultipartUpload(ctx, bucket, objInfo.Name, uploadID, ObjectOptions{}) parts := make([]CompletePart, len(objInfo.Parts)) for i, part := range objInfo.Parts { - hr, err := hash.NewReader(gr, part.Size, "", "", part.Size) + hr, err := hash.NewReader(gr, part.Size, "", "", part.ActualSize) if err != nil { return fmt.Errorf("decommissionObject: hash.NewReader() %w", err) } pi, err := z.PutObjectPart(ctx, bucket, objInfo.Name, uploadID, part.Number, NewPutObjReader(hr), - ObjectOptions{}) + ObjectOptions{ + PreserveETag: part.ETag, // Preserve original ETag to ensure same metadata. + IndexCB: func() []byte { + return part.Index // Preserve part Index to ensure decompression works. + }, + }) if err != nil { return fmt.Errorf("decommissionObject: PutObjectPart() %w", err) } @@ -617,7 +627,8 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri } return err } - hr, err := hash.NewReader(gr, objInfo.Size, "", "", objInfo.Size) + + hr, err := hash.NewReader(gr, objInfo.Size, "", "", actualSize) if err != nil { return fmt.Errorf("decommissionObject: hash.NewReader() %w", err) } @@ -626,9 +637,13 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri objInfo.Name, NewPutObjReader(hr), ObjectOptions{ - VersionID: objInfo.VersionID, - MTime: objInfo.ModTime, - UserDefined: objInfo.UserDefined, + VersionID: objInfo.VersionID, + MTime: objInfo.ModTime, + UserDefined: objInfo.UserDefined, + PreserveETag: objInfo.ETag, // Preserve original ETag to ensure same metadata. + IndexCB: func() []byte { + return objInfo.Parts[0].Index // Preserve part Index to ensure decompression works. + }, }) if err != nil { err = fmt.Errorf("decommissionObject: PutObject() %w", err) @@ -741,11 +756,12 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool bi.Name, version.Name, ObjectOptions{ - Versioned: vc.PrefixEnabled(version.Name), - VersionID: version.VersionID, - MTime: version.ModTime, - DeleteReplication: version.ReplicationState, - DeleteMarker: true, // make sure we create a delete marker + Versioned: vc.PrefixEnabled(version.Name), + VersionID: version.VersionID, + MTime: version.ModTime, + DeleteReplication: version.ReplicationState, + DeleteMarker: true, // make sure we create a delete marker + SkipDecommissioned: true, // make sure we skip the decommissioned pool }) var failure bool if err != nil { @@ -772,7 +788,8 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool http.Header{}, noLock, // all mutations are blocked reads are safe without locks. ObjectOptions{ - VersionID: version.VersionID, + VersionID: version.VersionID, + NoDecryption: true, }) if isErrObjectNotFound(err) { // object deleted by the application, nothing to do here we move on. diff --git a/cmd/erasure-server-pool.go b/cmd/erasure-server-pool.go index e427e8c3b..a38a0703d 100644 --- a/cmd/erasure-server-pool.go +++ b/cmd/erasure-server-pool.go @@ -427,8 +427,9 @@ func (z *erasureServerPools) getPoolIdxExistingWithOpts(ctx context.Context, buc }) for _, pinfo := range poolObjInfos { - // skip all objects from suspended pools for mutating calls. - if z.IsSuspended(pinfo.PoolIndex) && opts.Mutate { + // skip all objects from suspended pools if asked by the + // caller. + if z.IsSuspended(pinfo.PoolIndex) && opts.SkipDecommissioned { continue } @@ -461,8 +462,8 @@ func (z *erasureServerPools) getPoolIdxExistingWithOpts(ctx context.Context, buc // The check is skipped if there is only one pool, and 0, nil is always returned in that case. func (z *erasureServerPools) getPoolIdxExistingNoLock(ctx context.Context, bucket, object string) (idx int, err error) { return z.getPoolIdxExistingWithOpts(ctx, bucket, object, ObjectOptions{ - NoLock: true, - Mutate: true, + NoLock: true, + SkipDecommissioned: true, }) } @@ -486,7 +487,7 @@ func (z *erasureServerPools) getPoolIdxNoLock(ctx context.Context, bucket, objec // if none are found falls back to most available space pool, this function is // designed to be only used by PutObject, CopyObject (newObject creation) and NewMultipartUpload. func (z *erasureServerPools) getPoolIdx(ctx context.Context, bucket, object string, size int64) (idx int, err error) { - idx, err = z.getPoolIdxExistingWithOpts(ctx, bucket, object, ObjectOptions{Mutate: true}) + idx, err = z.getPoolIdxExistingWithOpts(ctx, bucket, object, ObjectOptions{SkipDecommissioned: true}) if err != nil && !isErrObjectNotFound(err) { return idx, err } @@ -2275,7 +2276,6 @@ func (z *erasureServerPools) GetObjectTags(ctx context.Context, bucket, object s return z.serverPools[0].GetObjectTags(ctx, bucket, object, opts) } - opts.Mutate = false idx, err := z.getPoolIdxExistingWithOpts(ctx, bucket, object, opts) if err != nil { return nil, err @@ -2291,7 +2291,8 @@ func (z *erasureServerPools) TransitionObject(ctx context.Context, bucket, objec return z.serverPools[0].TransitionObject(ctx, bucket, object, opts) } - opts.Mutate = true // Avoid transitioning an object from a pool being decommissioned. + // Avoid transitioning an object from a pool being decommissioned. + opts.SkipDecommissioned = true idx, err := z.getPoolIdxExistingWithOpts(ctx, bucket, object, opts) if err != nil { return err @@ -2307,7 +2308,8 @@ func (z *erasureServerPools) RestoreTransitionedObject(ctx context.Context, buck return z.serverPools[0].RestoreTransitionedObject(ctx, bucket, object, opts) } - opts.Mutate = true // Avoid restoring object from a pool being decommissioned. + // Avoid restoring object from a pool being decommissioned. + opts.SkipDecommissioned = true idx, err := z.getPoolIdxExistingWithOpts(ctx, bucket, object, opts) if err != nil { return err diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index 1eff08e48..08abbca2e 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -59,6 +59,8 @@ type ObjectOptions struct { Transition TransitionOptions Expiration ExpirationOptions + NoDecryption bool // indicates if the stream must be decrypted. + PreserveETag string // preserves this etag during a PUT call. NoLock bool // indicates to lower layers if the caller is expecting to hold locks. ProxyRequest bool // only set for GET/HEAD in active-active replication scenario ProxyHeaderSet bool // only set for GET/HEAD in active-active replication scenario @@ -73,10 +75,11 @@ type ObjectOptions struct { // Use the maximum parity (N/2), used when saving server configuration files MaxParity bool - // Mutate set to 'true' if the call is namespace mutation call - Mutate bool - WalkAscending bool // return Walk results in ascending order of versions + // SkipDecommissioned set to 'true' if the call requires skipping the pool being decommissioned. + // mainly set for certain WRITE operations. + SkipDecommissioned bool + WalkAscending bool // return Walk results in ascending order of versions PrefixEnabledFn func(prefix string) bool // function which returns true if versioning is enabled on prefix // IndexCB will return any index created but the compression. diff --git a/cmd/object-api-options.go b/cmd/object-api-options.go index 3d9ae61b4..cb75c890d 100644 --- a/cmd/object-api-options.go +++ b/cmd/object-api-options.go @@ -74,6 +74,7 @@ func getDefaultOpts(header http.Header, copySource bool, metadata map[string]str if _, ok := header[xhttp.MinIOSourceReplicationRequest]; ok { opts.ReplicationRequest = true } + opts.Speedtest = header.Get(globalObjectPerfUserMetadata) != "" return } diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index 491f46741..590ad1aaf 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -646,8 +646,9 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, opts ObjectOptions) ( return nil, 0, 0, err } - // if object is encrypted and it is a restore request, fetch content without decrypting. - if opts.Transition.RestoreRequest != nil { + // if object is encrypted and it is a restore request or if NoDecryption + // was requested, fetch content without decrypting. + if opts.Transition.RestoreRequest != nil || opts.NoDecryption { isEncrypted = false isCompressed = false } diff --git a/docs/distributed/decom-encrypted-sse-s3.sh b/docs/distributed/decom-encrypted-sse-s3.sh new file mode 100644 index 000000000..b565527b6 --- /dev/null +++ b/docs/distributed/decom-encrypted-sse-s3.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +if [ -n "$TEST_DEBUG" ]; then + set -x +fi + +pkill minio +rm -rf /tmp/xl + +if [ ! -f ./mc ]; then + wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \ + chmod +x mc +fi + +export CI=true +export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw= +export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/" + +(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null)& +pid=$! + +sleep 2 + +./mc admin user add myminio/ minio123 minio123 +./mc admin user add myminio/ minio12345 minio12345 + +./mc admin policy add myminio/ rw ./docs/distributed/rw.json +./mc admin policy add myminio/ lake ./docs/distributed/rw.json + +./mc admin policy set myminio/ rw user=minio123 +./mc admin policy set myminio/ lake,rw user=minio12345 + +./mc mb -l myminio/versioned + +./mc encrypt set sse-s3 myminio/versioned + +./mc mirror internal myminio/versioned/ --quiet >/dev/null + +## Soft delete (creates delete markers) +./mc rm -r --force myminio/versioned >/dev/null + +## mirror again to create another set of version on top +./mc mirror internal myminio/versioned/ --quiet >/dev/null + +expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum) + +user_count=$(./mc admin user list myminio/ | wc -l) +policy_count=$(./mc admin policy list myminio/ | wc -l) + +kill $pid + +(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) & +pid=$! + +sleep 2 + +expanded_user_count=$(./mc admin user list myminio/ | wc -l) +expanded_policy_count=$(./mc admin policy list myminio/ | wc -l) + +if [ $user_count -ne $expanded_user_count ]; then + echo "BUG: original user count differs from expanded setup" + exit 1 +fi + +if [ $policy_count -ne $expanded_policy_count ]; then + echo "BUG: original policy count differs from expanded setup" + exit 1 +fi + +./mc version info myminio/versioned | grep -q "versioning is enabled" +ret=$? +if [ $ret -ne 0 ]; then + echo "expected versioning enabled after expansion" + exit 1 +fi + +./mc encrypt info myminio/versioned | grep -q "Auto encryption 'sse-s3' is enabled" +ret=$? +if [ $ret -ne 0 ]; then + echo "expected encryption enabled after expansion" + exit 1 +fi + +./mc mirror cmd myminio/versioned/ --quiet >/dev/null + +./mc ls -r myminio/versioned/ > expanded_ns.txt +./mc ls -r --versions myminio/versioned/ > expanded_ns_versions.txt + +./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1} + +until $(./mc admin decom status myminio/ | grep -q Complete) +do + echo "waiting for decom to finish..." + sleep 1 +done + +kill $pid + +(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log)& +pid=$! + +sleep 2 + +decom_user_count=$(./mc admin user list myminio/ | wc -l) +decom_policy_count=$(./mc admin policy list myminio/ | wc -l) + +if [ $user_count -ne $decom_user_count ]; then + echo "BUG: original user count differs after decommission" + exit 1 +fi + +if [ $policy_count -ne $decom_policy_count ]; then + echo "BUG: original policy count differs after decommission" + exit 1 +fi + +./mc version info myminio/versioned | grep -q "versioning is enabled" +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected versioning enabled after decommission" + exit 1 +fi + +./mc encrypt info myminio/versioned | grep -q "Auto encryption 'sse-s3' is enabled" +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected encryption enabled after expansion" + exit 1 +fi + +got_checksum=$(./mc cat myminio/versioned/dsync/drwmutex.go | md5sum) +if [ "${expected_checksum}" != "${got_checksum}" ]; then + echo "BUG: decommission failed on encrypted objects: expected ${expected_checksum} got ${got_checksum}" + exit 1 +fi + +./mc ls -r myminio/versioned > decommissioned_ns.txt +./mc ls -r --versions myminio/versioned > decommissioned_ns_versions.txt + +out=$(diff -qpruN expanded_ns.txt decommissioned_ns.txt) +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected no missing entries after decommission: $out" + exit 1 +fi + +out=$(diff -qpruN expanded_ns_versions.txt decommissioned_ns_versions.txt) +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected no missing entries after decommission: $out" + exit 1 +fi + +kill $pid diff --git a/docs/distributed/decom-encrypted.sh b/docs/distributed/decom-encrypted.sh new file mode 100644 index 000000000..fd499bc59 --- /dev/null +++ b/docs/distributed/decom-encrypted.sh @@ -0,0 +1,139 @@ +#!/bin/bash + +if [ -n "$TEST_DEBUG" ]; then + set -x +fi + +pkill minio +rm -rf /tmp/xl + +if [ ! -f ./mc ]; then + wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \ + chmod +x mc +fi + +export CI=true +export MINIO_KMS_AUTO_ENCRYPTION=on +export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw= + +(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null)& +pid=$! + +sleep 2 + +export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/" + +./mc admin user add myminio/ minio123 minio123 +./mc admin user add myminio/ minio12345 minio12345 + +./mc admin policy add myminio/ rw ./docs/distributed/rw.json +./mc admin policy add myminio/ lake ./docs/distributed/rw.json + +./mc admin policy set myminio/ rw user=minio123 +./mc admin policy set myminio/ lake,rw user=minio12345 + +./mc mb -l myminio/versioned + +./mc mirror internal myminio/versioned/ --quiet >/dev/null + +## Soft delete (creates delete markers) +./mc rm -r --force myminio/versioned >/dev/null + +## mirror again to create another set of version on top +./mc mirror internal myminio/versioned/ --quiet >/dev/null + +expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum) + +user_count=$(./mc admin user list myminio/ | wc -l) +policy_count=$(./mc admin policy list myminio/ | wc -l) + +kill $pid +(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) & +pid=$! + +sleep 2 + +expanded_user_count=$(./mc admin user list myminio/ | wc -l) +expanded_policy_count=$(./mc admin policy list myminio/ | wc -l) + +if [ $user_count -ne $expanded_user_count ]; then + echo "BUG: original user count differs from expanded setup" + exit 1 +fi + +if [ $policy_count -ne $expanded_policy_count ]; then + echo "BUG: original policy count differs from expanded setup" + exit 1 +fi + +./mc version info myminio/versioned | grep -q "versioning is enabled" +ret=$? +if [ $ret -ne 0 ]; then + echo "expected versioning enabled after expansion" + exit 1 +fi + +./mc mirror cmd myminio/versioned/ --quiet >/dev/null + +./mc ls -r myminio/versioned/ > expanded_ns.txt +./mc ls -r --versions myminio/versioned/ > expanded_ns_versions.txt + +./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1} + +until $(./mc admin decom status myminio/ | grep -q Complete) +do + echo "waiting for decom to finish..." + sleep 1 +done + +kill $pid + +(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/dev/null)& +pid=$! + +sleep 2 + +decom_user_count=$(./mc admin user list myminio/ | wc -l) +decom_policy_count=$(./mc admin policy list myminio/ | wc -l) + +if [ $user_count -ne $decom_user_count ]; then + echo "BUG: original user count differs after decommission" + exit 1 +fi + +if [ $policy_count -ne $decom_policy_count ]; then + echo "BUG: original policy count differs after decommission" + exit 1 +fi + +./mc version info myminio/versioned | grep -q "versioning is enabled" +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected versioning enabled after decommission" + exit 1 +fi + +./mc ls -r myminio/versioned > decommissioned_ns.txt +./mc ls -r --versions myminio/versioned > decommissioned_ns_versions.txt + +out=$(diff -qpruN expanded_ns.txt decommissioned_ns.txt) +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected no missing entries after decommission: $out" + exit 1 +fi + +out=$(diff -qpruN expanded_ns_versions.txt decommissioned_ns_versions.txt) +ret=$? +if [ $ret -ne 0 ]; then + echo "BUG: expected no missing entries after decommission: $out" + exit 1 +fi + +got_checksum=$(./mc cat myminio/versioned/dsync/drwmutex.go | md5sum) +if [ "${expected_checksum}" != "${got_checksum}" ]; then + echo "BUG: decommission failed on encrypted objects: expected ${expected_checksum} got ${got_checksum}" + exit 1 +fi + +kill $pid diff --git a/docs/distributed/decom.sh b/docs/distributed/decom.sh index 97ff00594..75f1ff23a 100755 --- a/docs/distributed/decom.sh +++ b/docs/distributed/decom.sh @@ -1,16 +1,19 @@ #!/bin/bash -if [ -n "$DEBUG" ]; then +if [ -n "$TEST_DEBUG" ]; then set -x fi pkill minio rm -rf /tmp/xl -wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \ - chmod +x mc +if [ ! -f ./mc ]; then + wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \ + chmod +x mc +fi export CI=true + (minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null)& pid=$! @@ -37,6 +40,8 @@ export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/" ## mirror again to create another set of version on top ./mc mirror internal myminio/versioned/ --quiet >/dev/null +expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum) + user_count=$(./mc admin user list myminio/ | wc -l) policy_count=$(./mc admin policy list myminio/ | wc -l) @@ -113,12 +118,20 @@ out=$(diff -qpruN expanded_ns.txt decommissioned_ns.txt) ret=$? if [ $ret -ne 0 ]; then echo "BUG: expected no missing entries after decommission: $out" + exit 1 fi out=$(diff -qpruN expanded_ns_versions.txt decommissioned_ns_versions.txt) ret=$? if [ $ret -ne 0 ]; then echo "BUG: expected no missing entries after decommission: $out" + exit 1 +fi + +got_checksum=$(./mc cat myminio/versioned/dsync/drwmutex.go | md5sum) +if [ "${expected_checksum}" != "${got_checksum}" ]; then + echo "BUG: decommission failed on encrypted objects: expected ${expected_checksum} got ${got_checksum}" + exit 1 fi kill $pid