fix: multiple fixes in prefix exclude implementation (#14877)

- do not need to restrict prefix exclusions that do not
  have `/` as suffix, relax this requirement as spark may
  have staging folders with other autogenerated characters
  , so we are better off doing full prefix March and skip. 

- multiple delete objects was incorrectly creating a
  null delete marker on a versioned bucket instead of
  creating a proper versioned delete marker.

- do not suspend paths on the excluded prefixes during
  delete operations to avoid creating `null` delete markers,
  honor suspension of versioning only at bucket level for
  delete markers.
This commit is contained in:
Harshavardhana 2022-05-07 22:06:44 -07:00 committed by GitHub
parent def75ffcfe
commit 5cffd3780a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 25 additions and 48 deletions

View File

@ -512,7 +512,7 @@ func (api objectAPIHandlers) DeleteMultipleObjectsHandler(w http.ResponseWriter,
opts := ObjectOptions{
VersionID: object.VersionID,
Versioned: vc.PrefixEnabled(object.ObjectName),
VersionSuspended: vc.PrefixSuspended(object.ObjectName),
VersionSuspended: vc.Suspended(),
}
if replicateDeletes || object.VersionID != "" && hasLockEnabled || !globalTierConfigMgr.Empty() {
@ -579,7 +579,7 @@ func (api objectAPIHandlers) DeleteMultipleObjectsHandler(w http.ResponseWriter,
deleteList := toNames(objectsToDelete)
dObjects, errs := deleteObjectsFn(ctx, bucket, deleteList, ObjectOptions{
PrefixEnabledFn: vc.PrefixEnabled,
PrefixSuspendedFn: vc.PrefixSuspended,
VersionSuspended: vc.Suspended(),
})
for i := range errs {
@ -635,23 +635,13 @@ func (api objectAPIHandlers) DeleteMultipleObjectsHandler(w http.ResponseWriter,
continue
}
if replicateDeletes {
if dobj.DeleteMarkerReplicationStatus() == replication.Pending || dobj.VersionPurgeStatus() == Pending {
if replicateDeletes && (dobj.DeleteMarkerReplicationStatus() == replication.Pending || dobj.VersionPurgeStatus() == Pending) {
dv := DeletedObjectReplicationInfo{
DeletedObject: dobj,
Bucket: bucket,
}
scheduleReplicationDelete(ctx, dv, objectAPI)
}
}
}
// Notify deleted event for objects.
for _, dobj := range deletedObjects {
if dobj.ObjectName == "" {
continue
}
eventName := event.ObjectRemovedDelete
objInfo := ObjectInfo{

View File

@ -271,7 +271,7 @@ func checkReplicateDelete(ctx context.Context, bucket string, dobj ObjectToDelet
}
// Skip replication if this object's prefix is excluded from being
// versioned.
if delOpts.VersionSuspended {
if !delOpts.Versioned {
return
}
opts := replication.ObjectOpts{

View File

@ -1189,9 +1189,6 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
if objects[i].VersionID == "" {
// MinIO extension to bucket version configuration
suspended := opts.VersionSuspended
if opts.PrefixSuspendedFn != nil {
suspended = opts.PrefixSuspendedFn(objects[i].ObjectName)
}
versioned := opts.Versioned
if opts.PrefixEnabledFn != nil {
versioned = opts.PrefixEnabledFn(objects[i].ObjectName)
@ -1204,7 +1201,7 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
// Versioning suspended means that we add a `null` version
// delete marker, if not add a new version for this delete
// marker.
if opts.Versioned {
if versioned {
vr.VersionID = mustGetUUID()
}
}

View File

@ -78,7 +78,6 @@ type ObjectOptions struct {
WalkAscending bool // return Walk results in ascending order of versions
PrefixEnabledFn func(prefix string) bool // function which returns true if versioning is enabled on prefix
PrefixSuspendedFn func(prefix string) bool // function which returns true if versioning is suspended on prefix
}
// ExpirationOptions represents object options for object expiration at objectLayer.

View File

@ -166,7 +166,7 @@ func delOpts(ctx context.Context, r *http.Request, bucket, object string) (opts
return opts, err
}
opts.Versioned = globalBucketVersioningSys.PrefixEnabled(bucket, object)
opts.VersionSuspended = globalBucketVersioningSys.PrefixSuspended(bucket, object)
opts.VersionSuspended = globalBucketVersioningSys.Suspended(bucket)
delMarker := strings.TrimSpace(r.Header.Get(xhttp.MinIOSourceDeleteMarker))
if delMarker != "" {
switch delMarker {

View File

@ -276,7 +276,7 @@ func deleteObjectVersions(ctx context.Context, o ObjectLayer, bucket string, toD
vc, _ := globalBucketVersioningSys.Get(bucket)
deletedObjs, errs := o.DeleteObjects(ctx, bucket, toDel, ObjectOptions{
PrefixEnabledFn: vc.PrefixEnabled,
PrefixSuspendedFn: vc.PrefixSuspended,
VersionSuspended: vc.Suspended(),
})
var logged bool
for i, err := range errs {

View File

@ -65,6 +65,8 @@ Similarly to suspend versioning set the configuration with Status set to `Suspen
## MinIO extension to Bucket Versioning
### Motivation
**PLEASE READ: This feature is meant for advanced usecases only where the setup is using bucket versioning or with replicated buckets, use this feature to optimize versioning behavior for some specific applications. MinIO experts will evaluate and guide on the benefits for your application, please reach out to us on https://subnet.min.io.**
Spark/Hadoop workloads which use Hadoop MR Committer v1/v2 algorithm upload objects to a temporary prefix in a bucket. These objects are 'renamed' to a different prefix on Job commit. Object storage admins are forced to configure separate ILM policies to expire these objects and their versions to reclaim space.
### Solution
@ -76,19 +78,23 @@ To exclude objects under a list of prefix (glob) patterns from being versioned,
<ExcludeFolders>true</ExcludeFolders>
<ExcludedPrefixes>
<Prefix>app1-jobs/*/_temporary/</Prefix>
<Prefix>*/_temporary</Prefix>
</ExcludedPrefixes>
<ExcludedPrefixes>
<Prefix>app2-jobs/*/_magic/</Prefix>
<Prefix>*/__magic</Prefix>
</ExcludedPrefixes>
<ExcludedPrefixes>
<Prefix>*/_staging</Prefix>
</ExcludedPrefixes>
<!-- .. up to 10 prefixes in all -->
</VersioningConfiguration>
```
Note: Objects matching these prefixes will behave as though versioning were suspended. These objects **will not** be replicated if replication is configured.
Only users with explicit permissions or the root credential can configure the versioning state of any bucket.
### Features
- Objects matching these prefixes will behave as though versioning were suspended. These objects **will not** be replicated if bucket has replication configured.
- Objects matching these prefixes will also not leave delete markers, dramatically reduces namespace pollution while keeping the benefits of replication.
- Users with explicit permissions or the root credential can configure the versioning state of any bucket.
## Examples of enabling bucket versioning using MinIO Java SDK

View File

@ -39,7 +39,6 @@ const (
var (
errExcludedPrefixNotSupported = Errorf("excluded prefixes extension supported only when versioning is enabled")
errTooManyExcludedPrefixes = Errorf("too many excluded prefixes")
errInvalidPrefixPattern = Errorf("invalid prefix pattern")
)
// ExcludedPrefix - holds individual prefixes excluded from being versioned.
@ -73,11 +72,6 @@ func (v Versioning) Validate() error {
if len(v.ExcludedPrefixes) > maxExcludedPrefixes {
return errTooManyExcludedPrefixes
}
for _, sprefix := range v.ExcludedPrefixes {
if !strings.HasSuffix(sprefix.Prefix, "/") {
return errInvalidPrefixPattern
}
}
case Suspended:
if len(v.ExcludedPrefixes) > 0 {

View File

@ -112,15 +112,6 @@ func TestParseConfig(t *testing.T) {
excludedPrefixes: []string{"path/to/my/workload/_staging/", "path/to/my/workload/_temporary/"},
excludeFolders: true,
},
{
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
<Status>Enabled</Status>
<ExcludedPrefixes>
<Prefix>path/to/my/workload/_staging</Prefix>
</ExcludedPrefixes>
</VersioningConfiguration>`,
err: errInvalidPrefixPattern,
},
}
for i, tc := range testcases {