support encrypted/compressed objects properly during decommission (#15320)

fixes #15314
This commit is contained in:
Harshavardhana 2022-07-16 19:35:24 -07:00 committed by GitHub
parent 876970baea
commit 7da9e3a6f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 367 additions and 28 deletions

View File

@ -44,6 +44,8 @@ test: verifiers build ## builds minio, runs linters, tests
test-decom: install
@echo "Running minio decom tests"
@env bash $(PWD)/docs/distributed/decom.sh
@env bash $(PWD)/docs/distributed/decom-encrypted.sh
@env bash $(PWD)/docs/distributed/decom-encrypted-sse-s3.sh
test-upgrade: build
@echo "Running minio upgrade tests"

View File

@ -658,6 +658,9 @@ func (er erasureObjects) PutObjectPart(ctx context.Context, bucket, object, uplo
fi.ModTime = UTCNow()
md5hex := r.MD5CurrentHexString()
if opts.PreserveETag != "" {
md5hex = opts.PreserveETag
}
var index []byte
if opts.IndexCB != nil {
index = opts.IndexCB()

View File

@ -1099,8 +1099,12 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
Hash: bitrotWriterSum(w),
})
}
if userDefined["etag"] == "" {
userDefined["etag"] = r.MD5CurrentHexString()
if opts.PreserveETag != "" {
userDefined["etag"] = opts.PreserveETag
}
}
// Guess content-type from the extension if possible.

View File

@ -581,6 +581,11 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
auditLogDecom(ctx, "DecomCopyData", objInfo.Bucket, objInfo.Name, objInfo.VersionID, err)
}()
actualSize, err := objInfo.GetActualSize()
if err != nil {
return err
}
if objInfo.isMultipart() {
uploadID, err := z.NewMultipartUpload(ctx, bucket, objInfo.Name, ObjectOptions{
VersionID: objInfo.VersionID,
@ -593,14 +598,19 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
defer z.AbortMultipartUpload(ctx, bucket, objInfo.Name, uploadID, ObjectOptions{})
parts := make([]CompletePart, len(objInfo.Parts))
for i, part := range objInfo.Parts {
hr, err := hash.NewReader(gr, part.Size, "", "", part.Size)
hr, err := hash.NewReader(gr, part.Size, "", "", part.ActualSize)
if err != nil {
return fmt.Errorf("decommissionObject: hash.NewReader() %w", err)
}
pi, err := z.PutObjectPart(ctx, bucket, objInfo.Name, uploadID,
part.Number,
NewPutObjReader(hr),
ObjectOptions{})
ObjectOptions{
PreserveETag: part.ETag, // Preserve original ETag to ensure same metadata.
IndexCB: func() []byte {
return part.Index // Preserve part Index to ensure decompression works.
},
})
if err != nil {
return fmt.Errorf("decommissionObject: PutObjectPart() %w", err)
}
@ -617,7 +627,8 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
}
return err
}
hr, err := hash.NewReader(gr, objInfo.Size, "", "", objInfo.Size)
hr, err := hash.NewReader(gr, objInfo.Size, "", "", actualSize)
if err != nil {
return fmt.Errorf("decommissionObject: hash.NewReader() %w", err)
}
@ -626,9 +637,13 @@ func (z *erasureServerPools) decommissionObject(ctx context.Context, bucket stri
objInfo.Name,
NewPutObjReader(hr),
ObjectOptions{
VersionID: objInfo.VersionID,
MTime: objInfo.ModTime,
UserDefined: objInfo.UserDefined,
VersionID: objInfo.VersionID,
MTime: objInfo.ModTime,
UserDefined: objInfo.UserDefined,
PreserveETag: objInfo.ETag, // Preserve original ETag to ensure same metadata.
IndexCB: func() []byte {
return objInfo.Parts[0].Index // Preserve part Index to ensure decompression works.
},
})
if err != nil {
err = fmt.Errorf("decommissionObject: PutObject() %w", err)
@ -741,11 +756,12 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
bi.Name,
version.Name,
ObjectOptions{
Versioned: vc.PrefixEnabled(version.Name),
VersionID: version.VersionID,
MTime: version.ModTime,
DeleteReplication: version.ReplicationState,
DeleteMarker: true, // make sure we create a delete marker
Versioned: vc.PrefixEnabled(version.Name),
VersionID: version.VersionID,
MTime: version.ModTime,
DeleteReplication: version.ReplicationState,
DeleteMarker: true, // make sure we create a delete marker
SkipDecommissioned: true, // make sure we skip the decommissioned pool
})
var failure bool
if err != nil {
@ -772,7 +788,8 @@ func (z *erasureServerPools) decommissionPool(ctx context.Context, idx int, pool
http.Header{},
noLock, // all mutations are blocked reads are safe without locks.
ObjectOptions{
VersionID: version.VersionID,
VersionID: version.VersionID,
NoDecryption: true,
})
if isErrObjectNotFound(err) {
// object deleted by the application, nothing to do here we move on.

View File

@ -427,8 +427,9 @@ func (z *erasureServerPools) getPoolIdxExistingWithOpts(ctx context.Context, buc
})
for _, pinfo := range poolObjInfos {
// skip all objects from suspended pools for mutating calls.
if z.IsSuspended(pinfo.PoolIndex) && opts.Mutate {
// skip all objects from suspended pools if asked by the
// caller.
if z.IsSuspended(pinfo.PoolIndex) && opts.SkipDecommissioned {
continue
}
@ -461,8 +462,8 @@ func (z *erasureServerPools) getPoolIdxExistingWithOpts(ctx context.Context, buc
// The check is skipped if there is only one pool, and 0, nil is always returned in that case.
func (z *erasureServerPools) getPoolIdxExistingNoLock(ctx context.Context, bucket, object string) (idx int, err error) {
return z.getPoolIdxExistingWithOpts(ctx, bucket, object, ObjectOptions{
NoLock: true,
Mutate: true,
NoLock: true,
SkipDecommissioned: true,
})
}
@ -486,7 +487,7 @@ func (z *erasureServerPools) getPoolIdxNoLock(ctx context.Context, bucket, objec
// if none are found falls back to most available space pool, this function is
// designed to be only used by PutObject, CopyObject (newObject creation) and NewMultipartUpload.
func (z *erasureServerPools) getPoolIdx(ctx context.Context, bucket, object string, size int64) (idx int, err error) {
idx, err = z.getPoolIdxExistingWithOpts(ctx, bucket, object, ObjectOptions{Mutate: true})
idx, err = z.getPoolIdxExistingWithOpts(ctx, bucket, object, ObjectOptions{SkipDecommissioned: true})
if err != nil && !isErrObjectNotFound(err) {
return idx, err
}
@ -2275,7 +2276,6 @@ func (z *erasureServerPools) GetObjectTags(ctx context.Context, bucket, object s
return z.serverPools[0].GetObjectTags(ctx, bucket, object, opts)
}
opts.Mutate = false
idx, err := z.getPoolIdxExistingWithOpts(ctx, bucket, object, opts)
if err != nil {
return nil, err
@ -2291,7 +2291,8 @@ func (z *erasureServerPools) TransitionObject(ctx context.Context, bucket, objec
return z.serverPools[0].TransitionObject(ctx, bucket, object, opts)
}
opts.Mutate = true // Avoid transitioning an object from a pool being decommissioned.
// Avoid transitioning an object from a pool being decommissioned.
opts.SkipDecommissioned = true
idx, err := z.getPoolIdxExistingWithOpts(ctx, bucket, object, opts)
if err != nil {
return err
@ -2307,7 +2308,8 @@ func (z *erasureServerPools) RestoreTransitionedObject(ctx context.Context, buck
return z.serverPools[0].RestoreTransitionedObject(ctx, bucket, object, opts)
}
opts.Mutate = true // Avoid restoring object from a pool being decommissioned.
// Avoid restoring object from a pool being decommissioned.
opts.SkipDecommissioned = true
idx, err := z.getPoolIdxExistingWithOpts(ctx, bucket, object, opts)
if err != nil {
return err

View File

@ -59,6 +59,8 @@ type ObjectOptions struct {
Transition TransitionOptions
Expiration ExpirationOptions
NoDecryption bool // indicates if the stream must be decrypted.
PreserveETag string // preserves this etag during a PUT call.
NoLock bool // indicates to lower layers if the caller is expecting to hold locks.
ProxyRequest bool // only set for GET/HEAD in active-active replication scenario
ProxyHeaderSet bool // only set for GET/HEAD in active-active replication scenario
@ -73,10 +75,11 @@ type ObjectOptions struct {
// Use the maximum parity (N/2), used when saving server configuration files
MaxParity bool
// Mutate set to 'true' if the call is namespace mutation call
Mutate bool
WalkAscending bool // return Walk results in ascending order of versions
// SkipDecommissioned set to 'true' if the call requires skipping the pool being decommissioned.
// mainly set for certain WRITE operations.
SkipDecommissioned bool
WalkAscending bool // return Walk results in ascending order of versions
PrefixEnabledFn func(prefix string) bool // function which returns true if versioning is enabled on prefix
// IndexCB will return any index created but the compression.

View File

@ -74,6 +74,7 @@ func getDefaultOpts(header http.Header, copySource bool, metadata map[string]str
if _, ok := header[xhttp.MinIOSourceReplicationRequest]; ok {
opts.ReplicationRequest = true
}
opts.Speedtest = header.Get(globalObjectPerfUserMetadata) != ""
return
}

View File

@ -646,8 +646,9 @@ func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, opts ObjectOptions) (
return nil, 0, 0, err
}
// if object is encrypted and it is a restore request, fetch content without decrypting.
if opts.Transition.RestoreRequest != nil {
// if object is encrypted and it is a restore request or if NoDecryption
// was requested, fetch content without decrypting.
if opts.Transition.RestoreRequest != nil || opts.NoDecryption {
isEncrypted = false
isCompressed = false
}

View File

@ -0,0 +1,154 @@
#!/bin/bash
if [ -n "$TEST_DEBUG" ]; then
set -x
fi
pkill minio
rm -rf /tmp/xl
if [ ! -f ./mc ]; then
wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \
chmod +x mc
fi
export CI=true
export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw=
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/"
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null)&
pid=$!
sleep 2
./mc admin user add myminio/ minio123 minio123
./mc admin user add myminio/ minio12345 minio12345
./mc admin policy add myminio/ rw ./docs/distributed/rw.json
./mc admin policy add myminio/ lake ./docs/distributed/rw.json
./mc admin policy set myminio/ rw user=minio123
./mc admin policy set myminio/ lake,rw user=minio12345
./mc mb -l myminio/versioned
./mc encrypt set sse-s3 myminio/versioned
./mc mirror internal myminio/versioned/ --quiet >/dev/null
## Soft delete (creates delete markers)
./mc rm -r --force myminio/versioned >/dev/null
## mirror again to create another set of version on top
./mc mirror internal myminio/versioned/ --quiet >/dev/null
expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum)
user_count=$(./mc admin user list myminio/ | wc -l)
policy_count=$(./mc admin policy list myminio/ | wc -l)
kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) &
pid=$!
sleep 2
expanded_user_count=$(./mc admin user list myminio/ | wc -l)
expanded_policy_count=$(./mc admin policy list myminio/ | wc -l)
if [ $user_count -ne $expanded_user_count ]; then
echo "BUG: original user count differs from expanded setup"
exit 1
fi
if [ $policy_count -ne $expanded_policy_count ]; then
echo "BUG: original policy count differs from expanded setup"
exit 1
fi
./mc version info myminio/versioned | grep -q "versioning is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "expected versioning enabled after expansion"
exit 1
fi
./mc encrypt info myminio/versioned | grep -q "Auto encryption 'sse-s3' is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "expected encryption enabled after expansion"
exit 1
fi
./mc mirror cmd myminio/versioned/ --quiet >/dev/null
./mc ls -r myminio/versioned/ > expanded_ns.txt
./mc ls -r --versions myminio/versioned/ > expanded_ns_versions.txt
./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1}
until $(./mc admin decom status myminio/ | grep -q Complete)
do
echo "waiting for decom to finish..."
sleep 1
done
kill $pid
(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/removed.log)&
pid=$!
sleep 2
decom_user_count=$(./mc admin user list myminio/ | wc -l)
decom_policy_count=$(./mc admin policy list myminio/ | wc -l)
if [ $user_count -ne $decom_user_count ]; then
echo "BUG: original user count differs after decommission"
exit 1
fi
if [ $policy_count -ne $decom_policy_count ]; then
echo "BUG: original policy count differs after decommission"
exit 1
fi
./mc version info myminio/versioned | grep -q "versioning is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected versioning enabled after decommission"
exit 1
fi
./mc encrypt info myminio/versioned | grep -q "Auto encryption 'sse-s3' is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected encryption enabled after expansion"
exit 1
fi
got_checksum=$(./mc cat myminio/versioned/dsync/drwmutex.go | md5sum)
if [ "${expected_checksum}" != "${got_checksum}" ]; then
echo "BUG: decommission failed on encrypted objects: expected ${expected_checksum} got ${got_checksum}"
exit 1
fi
./mc ls -r myminio/versioned > decommissioned_ns.txt
./mc ls -r --versions myminio/versioned > decommissioned_ns_versions.txt
out=$(diff -qpruN expanded_ns.txt decommissioned_ns.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission: $out"
exit 1
fi
out=$(diff -qpruN expanded_ns_versions.txt decommissioned_ns_versions.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission: $out"
exit 1
fi
kill $pid

View File

@ -0,0 +1,139 @@
#!/bin/bash
if [ -n "$TEST_DEBUG" ]; then
set -x
fi
pkill minio
rm -rf /tmp/xl
if [ ! -f ./mc ]; then
wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \
chmod +x mc
fi
export CI=true
export MINIO_KMS_AUTO_ENCRYPTION=on
export MINIO_KMS_SECRET_KEY=my-minio-key:OSMM+vkKUTCvQs9YL/CVMIMt43HFhkUpqJxTmGl6rYw=
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null)&
pid=$!
sleep 2
export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/"
./mc admin user add myminio/ minio123 minio123
./mc admin user add myminio/ minio12345 minio12345
./mc admin policy add myminio/ rw ./docs/distributed/rw.json
./mc admin policy add myminio/ lake ./docs/distributed/rw.json
./mc admin policy set myminio/ rw user=minio123
./mc admin policy set myminio/ lake,rw user=minio12345
./mc mb -l myminio/versioned
./mc mirror internal myminio/versioned/ --quiet >/dev/null
## Soft delete (creates delete markers)
./mc rm -r --force myminio/versioned >/dev/null
## mirror again to create another set of version on top
./mc mirror internal myminio/versioned/ --quiet >/dev/null
expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum)
user_count=$(./mc admin user list myminio/ | wc -l)
policy_count=$(./mc admin policy list myminio/ | wc -l)
kill $pid
(minio server /tmp/xl/{1...10}/disk{0...1} /tmp/xl/{11...30}/disk{0...3} 2>&1 >/tmp/expanded.log) &
pid=$!
sleep 2
expanded_user_count=$(./mc admin user list myminio/ | wc -l)
expanded_policy_count=$(./mc admin policy list myminio/ | wc -l)
if [ $user_count -ne $expanded_user_count ]; then
echo "BUG: original user count differs from expanded setup"
exit 1
fi
if [ $policy_count -ne $expanded_policy_count ]; then
echo "BUG: original policy count differs from expanded setup"
exit 1
fi
./mc version info myminio/versioned | grep -q "versioning is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "expected versioning enabled after expansion"
exit 1
fi
./mc mirror cmd myminio/versioned/ --quiet >/dev/null
./mc ls -r myminio/versioned/ > expanded_ns.txt
./mc ls -r --versions myminio/versioned/ > expanded_ns_versions.txt
./mc admin decom start myminio/ /tmp/xl/{1...10}/disk{0...1}
until $(./mc admin decom status myminio/ | grep -q Complete)
do
echo "waiting for decom to finish..."
sleep 1
done
kill $pid
(minio server /tmp/xl/{11...30}/disk{0...3} 2>&1 >/dev/null)&
pid=$!
sleep 2
decom_user_count=$(./mc admin user list myminio/ | wc -l)
decom_policy_count=$(./mc admin policy list myminio/ | wc -l)
if [ $user_count -ne $decom_user_count ]; then
echo "BUG: original user count differs after decommission"
exit 1
fi
if [ $policy_count -ne $decom_policy_count ]; then
echo "BUG: original policy count differs after decommission"
exit 1
fi
./mc version info myminio/versioned | grep -q "versioning is enabled"
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected versioning enabled after decommission"
exit 1
fi
./mc ls -r myminio/versioned > decommissioned_ns.txt
./mc ls -r --versions myminio/versioned > decommissioned_ns_versions.txt
out=$(diff -qpruN expanded_ns.txt decommissioned_ns.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission: $out"
exit 1
fi
out=$(diff -qpruN expanded_ns_versions.txt decommissioned_ns_versions.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission: $out"
exit 1
fi
got_checksum=$(./mc cat myminio/versioned/dsync/drwmutex.go | md5sum)
if [ "${expected_checksum}" != "${got_checksum}" ]; then
echo "BUG: decommission failed on encrypted objects: expected ${expected_checksum} got ${got_checksum}"
exit 1
fi
kill $pid

View File

@ -1,16 +1,19 @@
#!/bin/bash
if [ -n "$DEBUG" ]; then
if [ -n "$TEST_DEBUG" ]; then
set -x
fi
pkill minio
rm -rf /tmp/xl
wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \
chmod +x mc
if [ ! -f ./mc ]; then
wget --quiet -O mc https://dl.minio.io/client/mc/release/linux-amd64/mc && \
chmod +x mc
fi
export CI=true
(minio server /tmp/xl/{1...10}/disk{0...1} 2>&1 >/dev/null)&
pid=$!
@ -37,6 +40,8 @@ export MC_HOST_myminio="http://minioadmin:minioadmin@localhost:9000/"
## mirror again to create another set of version on top
./mc mirror internal myminio/versioned/ --quiet >/dev/null
expected_checksum=$(./mc cat internal/dsync/drwmutex.go | md5sum)
user_count=$(./mc admin user list myminio/ | wc -l)
policy_count=$(./mc admin policy list myminio/ | wc -l)
@ -113,12 +118,20 @@ out=$(diff -qpruN expanded_ns.txt decommissioned_ns.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission: $out"
exit 1
fi
out=$(diff -qpruN expanded_ns_versions.txt decommissioned_ns_versions.txt)
ret=$?
if [ $ret -ne 0 ]; then
echo "BUG: expected no missing entries after decommission: $out"
exit 1
fi
got_checksum=$(./mc cat myminio/versioned/dsync/drwmutex.go | md5sum)
if [ "${expected_checksum}" != "${got_checksum}" ]; then
echo "BUG: decommission failed on encrypted objects: expected ${expected_checksum} got ${got_checksum}"
exit 1
fi
kill $pid