feat: implement prefix-level versioning exclusion (#14828)

Spark/Hadoop workloads which use Hadoop MR 
Committer v1/v2 algorithm upload objects to a 
temporary prefix in a bucket. These objects are 
'renamed' to a different prefix on Job commit. 
Object storage admins are forced to configure 
separate ILM policies to expire these objects 
and their versions to reclaim space.

Our solution:

This can be avoided by simply marking objects 
under these prefixes to be excluded from versioning, 
as shown below. Consequently, these objects are 
excluded from replication, and don't require ILM 
policies to prune unnecessary versions.

-  MinIO Extension to Bucket Version Configuration
```xml
<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"> 
        <Status>Enabled</Status>
        <ExcludeFolders>true</ExcludeFolders>
        <ExcludedPrefixes>
          <Prefix>app1-jobs/*/_temporary/</Prefix>
        </ExcludedPrefixes>
        <ExcludedPrefixes>
          <Prefix>app2-jobs/*/__magic/</Prefix>
        </ExcludedPrefixes>

        <!-- .. up to 10 prefixes in all -->     
</VersioningConfiguration>
```
Note: `ExcludeFolders` excludes all folders in a bucket 
from versioning. This is required to prevent the parent 
folders from accumulating delete markers, especially
those which are shared across spark workloads 
spanning projects/teams.

- To enable version exclusion on a list of prefixes

```
mc version enable --excluded-prefixes "app1-jobs/*/_temporary/,app2-jobs/*/_magic," --exclude-prefix-marker myminio/test
```
This commit is contained in:
Krishnan Parthasarathi
2022-05-06 19:05:28 -07:00
committed by GitHub
parent 3ec1844e4a
commit ad8e611098
20 changed files with 478 additions and 59 deletions

View File

@@ -20,6 +20,9 @@ package versioning
import (
"encoding/xml"
"io"
"strings"
"github.com/minio/pkg/wildcard"
)
// State - enabled/disabled/suspended states
@@ -33,12 +36,27 @@ const (
Suspended State = "Suspended"
)
var (
errExcludedPrefixNotSupported = Errorf("excluded prefixes extension supported only when versioning is enabled")
errTooManyExcludedPrefixes = Errorf("too many excluded prefixes")
errInvalidPrefixPattern = Errorf("invalid prefix pattern")
)
// ExcludedPrefix - holds individual prefixes excluded from being versioned.
type ExcludedPrefix struct {
Prefix string
}
// Versioning - Configuration for bucket versioning.
type Versioning struct {
XMLNS string `xml:"xmlns,attr,omitempty"`
XMLName xml.Name `xml:"VersioningConfiguration"`
// MFADelete State `xml:"MFADelete,omitempty"` // not supported yet.
Status State `xml:"Status,omitempty"`
// MinIO extension - allows selective, prefix-level versioning exclusion.
// Requires versioning to be enabled
ExcludedPrefixes []ExcludedPrefix `xml:",omitempty"`
ExcludeFolders bool `xml:",omitempty"`
}
// Validate - validates the versioning configuration
@@ -50,7 +68,21 @@ func (v Versioning) Validate() error {
// return Errorf("unsupported MFADelete state %s", v.MFADelete)
// }
switch v.Status {
case Enabled, Suspended:
case Enabled:
const maxExcludedPrefixes = 10
if len(v.ExcludedPrefixes) > maxExcludedPrefixes {
return errTooManyExcludedPrefixes
}
for _, sprefix := range v.ExcludedPrefixes {
if !strings.HasSuffix(sprefix.Prefix, "/") {
return errInvalidPrefixPattern
}
}
case Suspended:
if len(v.ExcludedPrefixes) > 0 {
return errExcludedPrefixNotSupported
}
default:
return Errorf("unsupported Versioning status %s", v.Status)
}
@@ -62,11 +94,67 @@ func (v Versioning) Enabled() bool {
return v.Status == Enabled
}
// PrefixEnabled - returns true if versioning is enabled at the bucket and given
// prefix, false otherwise.
func (v Versioning) PrefixEnabled(prefix string) bool {
if v.Status != Enabled {
return false
}
if prefix == "" {
return true
}
if v.ExcludeFolders && strings.HasSuffix(prefix, "/") {
return false
}
for _, sprefix := range v.ExcludedPrefixes {
// Note: all excluded prefix patterns end with `/` (See Validate)
sprefix.Prefix += "*"
if matched := wildcard.MatchSimple(sprefix.Prefix, prefix); matched {
return false
}
}
return true
}
// Suspended - returns true if versioning is suspended
func (v Versioning) Suspended() bool {
return v.Status == Suspended
}
// PrefixSuspended - returns true if versioning is suspended at the bucket level
// or suspended on the given prefix.
func (v Versioning) PrefixSuspended(prefix string) bool {
if v.Status == Suspended {
return true
}
if v.Status == Enabled {
if prefix == "" {
return false
}
if v.ExcludeFolders && strings.HasSuffix(prefix, "/") {
return true
}
for _, sprefix := range v.ExcludedPrefixes {
// Note: all excluded prefix patterns end with `/` (See Validate)
sprefix.Prefix += "*"
if matched := wildcard.MatchSimple(sprefix.Prefix, prefix); matched {
return true
}
}
}
return false
}
// PrefixesExcluded returns true if v contains one or more excluded object
// prefixes or if ExcludeFolders is true.
func (v Versioning) PrefixesExcluded() bool {
return len(v.ExcludedPrefixes) > 0 || v.ExcludeFolders
}
// ParseConfig - parses data in given reader to VersioningConfiguration.
func ParseConfig(reader io.Reader) (*Versioning, error) {
var v Versioning