mirror of
https://github.com/minio/minio.git
synced 2025-11-22 02:35:30 -05:00
feat: implement prefix-level versioning exclusion (#14828)
Spark/Hadoop workloads which use Hadoop MR Committer v1/v2 algorithm upload objects to a temporary prefix in a bucket. These objects are 'renamed' to a different prefix on Job commit. Object storage admins are forced to configure separate ILM policies to expire these objects and their versions to reclaim space. Our solution: This can be avoided by simply marking objects under these prefixes to be excluded from versioning, as shown below. Consequently, these objects are excluded from replication, and don't require ILM policies to prune unnecessary versions. - MinIO Extension to Bucket Version Configuration ```xml <VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"> <Status>Enabled</Status> <ExcludeFolders>true</ExcludeFolders> <ExcludedPrefixes> <Prefix>app1-jobs/*/_temporary/</Prefix> </ExcludedPrefixes> <ExcludedPrefixes> <Prefix>app2-jobs/*/__magic/</Prefix> </ExcludedPrefixes> <!-- .. up to 10 prefixes in all --> </VersioningConfiguration> ``` Note: `ExcludeFolders` excludes all folders in a bucket from versioning. This is required to prevent the parent folders from accumulating delete markers, especially those which are shared across spark workloads spanning projects/teams. - To enable version exclusion on a list of prefixes ``` mc version enable --excluded-prefixes "app1-jobs/*/_temporary/,app2-jobs/*/_magic," --exclude-prefix-marker myminio/test ```
This commit is contained in:
committed by
GitHub
parent
3ec1844e4a
commit
ad8e611098
@@ -204,7 +204,7 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
|
||||
objInfo, err := getObjectInfo(ctx, bucket, object, opts)
|
||||
if err != nil {
|
||||
if globalBucketVersioningSys.Enabled(bucket) {
|
||||
if globalBucketVersioningSys.PrefixEnabled(bucket, object) {
|
||||
// Versioning enabled quite possibly object is deleted might be delete-marker
|
||||
// if present set the headers, no idea why AWS S3 sets these headers.
|
||||
if objInfo.VersionID != "" && objInfo.DeleteMarker {
|
||||
@@ -445,7 +445,7 @@ func (api objectAPIHandlers) getObjectHandler(ctx context.Context, objectAPI Obj
|
||||
writeErrorResponse(ctx, w, toAPIError(ctx, proxy.Err), r.URL)
|
||||
return
|
||||
}
|
||||
if globalBucketVersioningSys.Enabled(bucket) && gr != nil {
|
||||
if globalBucketVersioningSys.PrefixEnabled(bucket, object) && gr != nil {
|
||||
if !gr.ObjInfo.VersionPurgeStatus.Empty() {
|
||||
// Shows the replication status of a permanent delete of a version
|
||||
w.Header()[xhttp.MinIODeleteReplicationStatus] = []string{string(gr.ObjInfo.VersionPurgeStatus)}
|
||||
@@ -673,7 +673,7 @@ func (api objectAPIHandlers) headObjectHandler(ctx context.Context, objectAPI Ob
|
||||
}
|
||||
}
|
||||
if !proxy.Proxy {
|
||||
if globalBucketVersioningSys.Enabled(bucket) {
|
||||
if globalBucketVersioningSys.PrefixEnabled(bucket, object) {
|
||||
switch {
|
||||
case !objInfo.VersionPurgeStatus.Empty():
|
||||
w.Header()[xhttp.MinIODeleteReplicationStatus] = []string{string(objInfo.VersionPurgeStatus)}
|
||||
@@ -1087,7 +1087,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
|
||||
if isErrPreconditionFailed(err) {
|
||||
return
|
||||
}
|
||||
if globalBucketVersioningSys.Enabled(srcBucket) && gr != nil {
|
||||
if globalBucketVersioningSys.PrefixEnabled(srcBucket, srcObject) && gr != nil {
|
||||
// Versioning enabled quite possibly object is deleted might be delete-marker
|
||||
// if present set the headers, no idea why AWS S3 sets these headers.
|
||||
if gr.ObjInfo.VersionID != "" && gr.ObjInfo.DeleteMarker {
|
||||
@@ -2480,7 +2480,7 @@ func (api objectAPIHandlers) CopyObjectPartHandler(w http.ResponseWriter, r *htt
|
||||
if isErrPreconditionFailed(err) {
|
||||
return
|
||||
}
|
||||
if globalBucketVersioningSys.Enabled(srcBucket) && gr != nil {
|
||||
if globalBucketVersioningSys.PrefixEnabled(srcBucket, srcObject) && gr != nil {
|
||||
// Versioning enabled quite possibly object is deleted might be delete-marker
|
||||
// if present set the headers, no idea why AWS S3 sets these headers.
|
||||
if gr.ObjInfo.VersionID != "" && gr.ObjInfo.DeleteMarker {
|
||||
@@ -3211,8 +3211,8 @@ func (api objectAPIHandlers) CompleteMultipartUploadHandler(w http.ResponseWrite
|
||||
w.Write(encodedErrorResponse)
|
||||
}
|
||||
|
||||
versioned := globalBucketVersioningSys.Enabled(bucket)
|
||||
suspended := globalBucketVersioningSys.Suspended(bucket)
|
||||
versioned := globalBucketVersioningSys.PrefixEnabled(bucket, object)
|
||||
suspended := globalBucketVersioningSys.PrefixSuspended(bucket, object)
|
||||
os := newObjSweeper(bucket, object).WithVersioning(versioned, suspended)
|
||||
if !globalTierConfigMgr.Empty() {
|
||||
// Get appropriate object info to identify the remote object to delete
|
||||
|
||||
Reference in New Issue
Block a user