mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
feat: implement prefix-level versioning exclusion (#14828)
Spark/Hadoop workloads which use Hadoop MR Committer v1/v2 algorithm upload objects to a temporary prefix in a bucket. These objects are 'renamed' to a different prefix on Job commit. Object storage admins are forced to configure separate ILM policies to expire these objects and their versions to reclaim space. Our solution: This can be avoided by simply marking objects under these prefixes to be excluded from versioning, as shown below. Consequently, these objects are excluded from replication, and don't require ILM policies to prune unnecessary versions. - MinIO Extension to Bucket Version Configuration ```xml <VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/"> <Status>Enabled</Status> <ExcludeFolders>true</ExcludeFolders> <ExcludedPrefixes> <Prefix>app1-jobs/*/_temporary/</Prefix> </ExcludedPrefixes> <ExcludedPrefixes> <Prefix>app2-jobs/*/__magic/</Prefix> </ExcludedPrefixes> <!-- .. up to 10 prefixes in all --> </VersioningConfiguration> ``` Note: `ExcludeFolders` excludes all folders in a bucket from versioning. This is required to prevent the parent folders from accumulating delete markers, especially those which are shared across spark workloads spanning projects/teams. - To enable version exclusion on a list of prefixes ``` mc version enable --excluded-prefixes "app1-jobs/*/_temporary/,app2-jobs/*/_magic," --exclude-prefix-marker myminio/test ```
This commit is contained in:
committed by
GitHub
parent
3ec1844e4a
commit
ad8e611098
@@ -20,6 +20,9 @@ package versioning
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/minio/pkg/wildcard"
|
||||
)
|
||||
|
||||
// State - enabled/disabled/suspended states
|
||||
@@ -33,12 +36,27 @@ const (
|
||||
Suspended State = "Suspended"
|
||||
)
|
||||
|
||||
var (
|
||||
errExcludedPrefixNotSupported = Errorf("excluded prefixes extension supported only when versioning is enabled")
|
||||
errTooManyExcludedPrefixes = Errorf("too many excluded prefixes")
|
||||
errInvalidPrefixPattern = Errorf("invalid prefix pattern")
|
||||
)
|
||||
|
||||
// ExcludedPrefix - holds individual prefixes excluded from being versioned.
|
||||
type ExcludedPrefix struct {
|
||||
Prefix string
|
||||
}
|
||||
|
||||
// Versioning - Configuration for bucket versioning.
|
||||
type Versioning struct {
|
||||
XMLNS string `xml:"xmlns,attr,omitempty"`
|
||||
XMLName xml.Name `xml:"VersioningConfiguration"`
|
||||
// MFADelete State `xml:"MFADelete,omitempty"` // not supported yet.
|
||||
Status State `xml:"Status,omitempty"`
|
||||
// MinIO extension - allows selective, prefix-level versioning exclusion.
|
||||
// Requires versioning to be enabled
|
||||
ExcludedPrefixes []ExcludedPrefix `xml:",omitempty"`
|
||||
ExcludeFolders bool `xml:",omitempty"`
|
||||
}
|
||||
|
||||
// Validate - validates the versioning configuration
|
||||
@@ -50,7 +68,21 @@ func (v Versioning) Validate() error {
|
||||
// return Errorf("unsupported MFADelete state %s", v.MFADelete)
|
||||
// }
|
||||
switch v.Status {
|
||||
case Enabled, Suspended:
|
||||
case Enabled:
|
||||
const maxExcludedPrefixes = 10
|
||||
if len(v.ExcludedPrefixes) > maxExcludedPrefixes {
|
||||
return errTooManyExcludedPrefixes
|
||||
}
|
||||
for _, sprefix := range v.ExcludedPrefixes {
|
||||
if !strings.HasSuffix(sprefix.Prefix, "/") {
|
||||
return errInvalidPrefixPattern
|
||||
}
|
||||
}
|
||||
|
||||
case Suspended:
|
||||
if len(v.ExcludedPrefixes) > 0 {
|
||||
return errExcludedPrefixNotSupported
|
||||
}
|
||||
default:
|
||||
return Errorf("unsupported Versioning status %s", v.Status)
|
||||
}
|
||||
@@ -62,11 +94,67 @@ func (v Versioning) Enabled() bool {
|
||||
return v.Status == Enabled
|
||||
}
|
||||
|
||||
// PrefixEnabled - returns true if versioning is enabled at the bucket and given
|
||||
// prefix, false otherwise.
|
||||
func (v Versioning) PrefixEnabled(prefix string) bool {
|
||||
if v.Status != Enabled {
|
||||
return false
|
||||
}
|
||||
|
||||
if prefix == "" {
|
||||
return true
|
||||
}
|
||||
if v.ExcludeFolders && strings.HasSuffix(prefix, "/") {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, sprefix := range v.ExcludedPrefixes {
|
||||
// Note: all excluded prefix patterns end with `/` (See Validate)
|
||||
sprefix.Prefix += "*"
|
||||
|
||||
if matched := wildcard.MatchSimple(sprefix.Prefix, prefix); matched {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// Suspended - returns true if versioning is suspended
|
||||
func (v Versioning) Suspended() bool {
|
||||
return v.Status == Suspended
|
||||
}
|
||||
|
||||
// PrefixSuspended - returns true if versioning is suspended at the bucket level
|
||||
// or suspended on the given prefix.
|
||||
func (v Versioning) PrefixSuspended(prefix string) bool {
|
||||
if v.Status == Suspended {
|
||||
return true
|
||||
}
|
||||
if v.Status == Enabled {
|
||||
if prefix == "" {
|
||||
return false
|
||||
}
|
||||
if v.ExcludeFolders && strings.HasSuffix(prefix, "/") {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, sprefix := range v.ExcludedPrefixes {
|
||||
// Note: all excluded prefix patterns end with `/` (See Validate)
|
||||
sprefix.Prefix += "*"
|
||||
if matched := wildcard.MatchSimple(sprefix.Prefix, prefix); matched {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PrefixesExcluded returns true if v contains one or more excluded object
|
||||
// prefixes or if ExcludeFolders is true.
|
||||
func (v Versioning) PrefixesExcluded() bool {
|
||||
return len(v.ExcludedPrefixes) > 0 || v.ExcludeFolders
|
||||
}
|
||||
|
||||
// ParseConfig - parses data in given reader to VersioningConfiguration.
|
||||
func ParseConfig(reader io.Reader) (*Versioning, error) {
|
||||
var v Versioning
|
||||
|
||||
250
internal/bucket/versioning/versioning_test.go
Normal file
250
internal/bucket/versioning/versioning_test.go
Normal file
@@ -0,0 +1,250 @@
|
||||
// Copyright (c) 2015-2022 MinIO, Inc.
|
||||
//
|
||||
// This file is part of MinIO Object Storage stack
|
||||
//
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Affero General Public License as published by
|
||||
// the Free Software Foundation, either version 3 of the License, or
|
||||
// (at your option) any later version.
|
||||
//
|
||||
// This program is distributed in the hope that it will be useful
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU Affero General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
package versioning
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseConfig(t *testing.T) {
|
||||
testcases := []struct {
|
||||
input string
|
||||
err error
|
||||
excludedPrefixes []string
|
||||
excludeFolders bool
|
||||
}{
|
||||
{
|
||||
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<Status>Enabled</Status>
|
||||
</VersioningConfiguration>`,
|
||||
err: nil,
|
||||
},
|
||||
{
|
||||
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<Status>Enabled</Status>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_temporary/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
</VersioningConfiguration>`,
|
||||
err: nil,
|
||||
excludedPrefixes: []string{"path/to/my/workload/_staging/", "path/to/my/workload/_temporary/"},
|
||||
},
|
||||
{
|
||||
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<Status>Suspended</Status>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
</VersioningConfiguration>`,
|
||||
err: errExcludedPrefixNotSupported,
|
||||
},
|
||||
{
|
||||
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<Status>Enabled</Status>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/ab/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/cd/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/ef/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/gh/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/ij/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/kl/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/mn/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/op/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/qr/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/st/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/uv/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
</VersioningConfiguration>`,
|
||||
err: errTooManyExcludedPrefixes,
|
||||
},
|
||||
{
|
||||
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<Status>Enabled</Status>
|
||||
<ExcludeFolders>true</ExcludeFolders>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_temporary/</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
</VersioningConfiguration>`,
|
||||
err: nil,
|
||||
excludedPrefixes: []string{"path/to/my/workload/_staging/", "path/to/my/workload/_temporary/"},
|
||||
excludeFolders: true,
|
||||
},
|
||||
{
|
||||
input: `<VersioningConfiguration xmlns="http://s3.amazonaws.com/doc/2006-03-01/">
|
||||
<Status>Enabled</Status>
|
||||
<ExcludedPrefixes>
|
||||
<Prefix>path/to/my/workload/_staging</Prefix>
|
||||
</ExcludedPrefixes>
|
||||
</VersioningConfiguration>`,
|
||||
err: errInvalidPrefixPattern,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testcases {
|
||||
var v *Versioning
|
||||
var err error
|
||||
v, err = ParseConfig(strings.NewReader(tc.input))
|
||||
if tc.err != err {
|
||||
t.Fatalf("Test %d: expected %v but got %v", i+1, tc.err, err)
|
||||
}
|
||||
if err != nil {
|
||||
if tc.err == nil {
|
||||
t.Fatalf("Test %d: failed due to %v", i+1, err)
|
||||
}
|
||||
} else {
|
||||
if err := v.Validate(); tc.err != err {
|
||||
t.Fatalf("Test %d: validation failed due to %v", i+1, err)
|
||||
}
|
||||
if len(tc.excludedPrefixes) > 0 {
|
||||
var mismatch bool
|
||||
if len(v.ExcludedPrefixes) != len(tc.excludedPrefixes) {
|
||||
t.Fatalf("Test %d: Expected length of excluded prefixes %d but got %d", i+1, len(tc.excludedPrefixes), len(v.ExcludedPrefixes))
|
||||
}
|
||||
var i int
|
||||
var eprefix string
|
||||
for i, eprefix = range tc.excludedPrefixes {
|
||||
if eprefix != v.ExcludedPrefixes[i].Prefix {
|
||||
mismatch = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if mismatch {
|
||||
t.Fatalf("Test %d: Expected excluded prefix %s but got %s", i+1, tc.excludedPrefixes[i], v.ExcludedPrefixes[i].Prefix)
|
||||
}
|
||||
}
|
||||
if tc.excludeFolders != v.ExcludeFolders {
|
||||
t.Fatalf("Test %d: Expected ExcludeFoldersr=%v but got %v", i+1, tc.excludeFolders, v.ExcludeFolders)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMarshalXML(t *testing.T) {
|
||||
// Validates if Versioning with no excluded prefixes omits
|
||||
// ExcludedPrefixes tags
|
||||
v := Versioning{
|
||||
Status: Enabled,
|
||||
}
|
||||
buf, err := xml.Marshal(v)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to marshal %v: %v", v, err)
|
||||
}
|
||||
|
||||
str := string(buf)
|
||||
if strings.Contains(str, "ExcludedPrefixes") {
|
||||
t.Fatalf("XML shouldn't contain ExcludedPrefixes tag - %s", str)
|
||||
}
|
||||
}
|
||||
|
||||
func TestVersioningZero(t *testing.T) {
|
||||
var v Versioning
|
||||
if v.Enabled() {
|
||||
t.Fatalf("Expected to be disabled but got enabled")
|
||||
}
|
||||
if v.Suspended() {
|
||||
t.Fatalf("Expected to be disabled but got suspended")
|
||||
}
|
||||
}
|
||||
|
||||
func TestExcludeFolders(t *testing.T) {
|
||||
v := Versioning{
|
||||
Status: Enabled,
|
||||
ExcludeFolders: true,
|
||||
}
|
||||
testPrefixes := []string{"jobs/output/_temporary/", "jobs/output/", "jobs/"}
|
||||
for i, prefix := range testPrefixes {
|
||||
if v.PrefixEnabled(prefix) || !v.PrefixSuspended(prefix) {
|
||||
t.Fatalf("Test %d: Expected versioning to be excluded for %s", i+1, prefix)
|
||||
}
|
||||
}
|
||||
|
||||
// Test applicability for regular objects
|
||||
if prefix := "prefix-1/obj-1"; !v.PrefixEnabled(prefix) || v.PrefixSuspended(prefix) {
|
||||
t.Fatalf("Expected versioning to be enabled for %s", prefix)
|
||||
}
|
||||
|
||||
// Test when ExcludeFolders is disabled
|
||||
v.ExcludeFolders = false
|
||||
for i, prefix := range testPrefixes {
|
||||
if !v.PrefixEnabled(prefix) || v.PrefixSuspended(prefix) {
|
||||
t.Fatalf("Test %d: Expected versioning to be enabled for %s", i+1, prefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestExcludedPrefixesMatch(t *testing.T) {
|
||||
v := Versioning{
|
||||
Status: Enabled,
|
||||
ExcludedPrefixes: []ExcludedPrefix{{"*/_temporary/"}},
|
||||
}
|
||||
|
||||
if err := v.Validate(); err != nil {
|
||||
t.Fatalf("Invalid test versioning config %v: %v", v, err)
|
||||
}
|
||||
tests := []struct {
|
||||
prefix string
|
||||
excluded bool
|
||||
}{
|
||||
{
|
||||
prefix: "app1-jobs/output/_temporary/attempt1/data.csv",
|
||||
excluded: true,
|
||||
},
|
||||
{
|
||||
prefix: "app1-jobs/output/final/attempt1/data.csv",
|
||||
excluded: false,
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
if v.PrefixSuspended(test.prefix) != test.excluded {
|
||||
if test.excluded {
|
||||
t.Fatalf("Test %d: Expected prefix %s to be excluded from versioning", i+1, test.prefix)
|
||||
} else {
|
||||
t.Fatalf("Test %d: Expected prefix %s to have versioning enabled", i+1, test.prefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user