mirror of
https://github.com/minio/minio.git
synced 2025-02-09 12:48:08 -05:00
Currently, DeleteObjects() tries to find the object's pool before sending a delete request. This only works well when an object has multiple versions in different pools since looking for the pool does not consider the version-id. When an S3 client wants to remove a version-id that exists in pool 2, the delete request will be directed to pool one because it has another version of the same object. This commit will remove looking for pool logic and will send a delete request to all pools in parallel. This should not cause any performance regression in most of the cases since the object will unlikely exist in only one pool, and the performance price will be similar to getPoolIndex() in that case.
This commit is contained in:
parent
dcc000ae2c
commit
079d64c801
@ -32,6 +32,8 @@ type DeletedObject struct {
|
||||
DeleteMarkerMTime DeleteMarkerMTime `xml:"-"`
|
||||
// MinIO extensions to support delete marker replication
|
||||
ReplicationState ReplicationState `xml:"-"`
|
||||
|
||||
found bool // the object was found during deletion
|
||||
}
|
||||
|
||||
// DeleteMarkerMTime is an embedded type containing time.Time for XML marshal
|
||||
|
@ -27,6 +27,8 @@ import (
|
||||
"net/http"
|
||||
"path"
|
||||
"runtime"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@ -1706,8 +1708,21 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
|
||||
}
|
||||
|
||||
dedupVersions := make([]FileInfoVersions, 0, len(versionsMap))
|
||||
for _, version := range versionsMap {
|
||||
dedupVersions = append(dedupVersions, version)
|
||||
for _, fivs := range versionsMap {
|
||||
// Removal of existing versions and adding a delete marker in the same
|
||||
// request is supported. At the same time, we cannot allow adding
|
||||
// two delete markers on top of any object. To avoid this situation,
|
||||
// we will sort deletions to execute existing deletion first,
|
||||
// then add only one delete marker if requested
|
||||
sort.SliceStable(fivs.Versions, func(i, j int) bool {
|
||||
return !fivs.Versions[i].Deleted
|
||||
})
|
||||
if idx := slices.IndexFunc(fivs.Versions, func(fi FileInfo) bool {
|
||||
return fi.Deleted
|
||||
}); idx > -1 {
|
||||
fivs.Versions = fivs.Versions[:idx+1]
|
||||
}
|
||||
dedupVersions = append(dedupVersions, fivs)
|
||||
}
|
||||
|
||||
// Initialize list of errors.
|
||||
@ -1732,12 +1747,6 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
|
||||
continue
|
||||
}
|
||||
for _, v := range dedupVersions[i].Versions {
|
||||
if err == errFileNotFound || err == errFileVersionNotFound {
|
||||
if !dobjects[v.Idx].DeleteMarker {
|
||||
// Not delete marker, if not found, ok.
|
||||
continue
|
||||
}
|
||||
}
|
||||
delObjErrs[index][v.Idx] = err
|
||||
}
|
||||
}
|
||||
@ -1757,6 +1766,13 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
|
||||
}
|
||||
}
|
||||
err := reduceWriteQuorumErrs(ctx, diskErrs, objectOpIgnoredErrs, writeQuorums[objIndex])
|
||||
if err == nil {
|
||||
dobjects[objIndex].found = true
|
||||
} else if isErrVersionNotFound(err) || isErrObjectNotFound(err) {
|
||||
if !dobjects[objIndex].DeleteMarker {
|
||||
err = nil
|
||||
}
|
||||
}
|
||||
if objects[objIndex].VersionID != "" {
|
||||
errs[objIndex] = toObjectErr(err, bucket, objects[objIndex].ObjectName, objects[objIndex].VersionID)
|
||||
} else {
|
||||
|
@ -131,6 +131,75 @@ func TestErasureDeleteObjectBasic(t *testing.T) {
|
||||
removeRoots(fsDirs)
|
||||
}
|
||||
|
||||
func TestDeleteObjectsVersionedTwoPools(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
obj, fsDirs, err := prepareErasurePools()
|
||||
if err != nil {
|
||||
t.Fatal("Unable to initialize 'Erasure' object layer.", err)
|
||||
}
|
||||
// Remove all dirs.
|
||||
for _, dir := range fsDirs {
|
||||
defer os.RemoveAll(dir)
|
||||
}
|
||||
|
||||
bucketName := "bucket"
|
||||
objectName := "myobject"
|
||||
err = obj.MakeBucket(ctx, bucketName, MakeBucketOptions{
|
||||
VersioningEnabled: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
z, ok := obj.(*erasureServerPools)
|
||||
if !ok {
|
||||
t.Fatal("unexpected object layer type")
|
||||
}
|
||||
|
||||
versions := make([]string, 2)
|
||||
for i := range z.serverPools {
|
||||
objInfo, err := z.serverPools[i].PutObject(ctx, bucketName, objectName,
|
||||
mustGetPutObjReader(t, bytes.NewReader([]byte("abcd")), int64(len("abcd")), "", ""), ObjectOptions{
|
||||
Versioned: true,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("Erasure Object upload failed: <ERROR> %s", err)
|
||||
}
|
||||
versions[i] = objInfo.VersionID
|
||||
}
|
||||
|
||||
// Remove and check the version in the second pool, then
|
||||
// remove and check the version in the first pool
|
||||
for testIdx, vid := range []string{versions[1], versions[0]} {
|
||||
names := []ObjectToDelete{
|
||||
{
|
||||
ObjectV: ObjectV{
|
||||
ObjectName: objectName,
|
||||
VersionID: vid,
|
||||
},
|
||||
},
|
||||
}
|
||||
_, delErrs := obj.DeleteObjects(ctx, bucketName, names, ObjectOptions{
|
||||
Versioned: true,
|
||||
})
|
||||
for i := range delErrs {
|
||||
if delErrs[i] != nil {
|
||||
t.Errorf("Test %d: Failed to remove object `%v` with the error: `%v`", testIdx, names[i], delErrs[i])
|
||||
}
|
||||
_, statErr := obj.GetObjectInfo(ctx, bucketName, objectName, ObjectOptions{
|
||||
VersionID: names[i].ObjectV.VersionID,
|
||||
})
|
||||
switch statErr.(type) {
|
||||
case VersionNotFound:
|
||||
default:
|
||||
t.Errorf("Test %d: Object %s is not removed", testIdx, objectName)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeleteObjectsVersioned(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
@ -32,9 +32,9 @@ func prepareErasurePools() (ObjectLayer, []string, error) {
|
||||
pools := mustGetPoolEndpoints(0, fsDirs[:16]...)
|
||||
pools = append(pools, mustGetPoolEndpoints(1, fsDirs[16:]...)...)
|
||||
|
||||
// Everything is fine, should return nil
|
||||
objLayer, err := newErasureServerPools(context.Background(), pools)
|
||||
objLayer, _, err := initObjectLayer(context.Background(), pools)
|
||||
if err != nil {
|
||||
removeRoots(fsDirs)
|
||||
return nil, nil, err
|
||||
}
|
||||
return objLayer, fsDirs, nil
|
||||
|
@ -1248,89 +1248,42 @@ func (z *erasureServerPools) DeleteObjects(ctx context.Context, bucket string, o
|
||||
ctx = lkctx.Context()
|
||||
defer multiDeleteLock.Unlock(lkctx)
|
||||
|
||||
// Fetch location of up to 10 objects concurrently.
|
||||
poolObjIdxMap := map[int][]ObjectToDelete{}
|
||||
origIndexMap := map[int][]int{}
|
||||
dObjectsByPool := make([][]DeletedObject, len(z.serverPools))
|
||||
dErrsByPool := make([][]error, len(z.serverPools))
|
||||
|
||||
// Always perform 1/10th of the number of objects per delete
|
||||
concurrent := len(objects) / 10
|
||||
if concurrent <= 10 {
|
||||
// if we cannot get 1/10th then choose the number of
|
||||
// objects as concurrent.
|
||||
concurrent = len(objects)
|
||||
}
|
||||
|
||||
var mu sync.Mutex
|
||||
eg := errgroup.WithNErrs(len(objects)).WithConcurrency(concurrent)
|
||||
for j, obj := range objects {
|
||||
j := j
|
||||
obj := obj
|
||||
eg := errgroup.WithNErrs(len(z.serverPools)).WithConcurrency(len(z.serverPools))
|
||||
for i, pool := range z.serverPools {
|
||||
i := i
|
||||
pool := pool
|
||||
eg.Go(func() error {
|
||||
pinfo, _, err := z.getPoolInfoExistingWithOpts(ctx, bucket, obj.ObjectName, ObjectOptions{
|
||||
NoLock: true,
|
||||
})
|
||||
if err != nil {
|
||||
if !isErrObjectNotFound(err) && !isErrVersionNotFound(err) {
|
||||
derrs[j] = err
|
||||
}
|
||||
dobjects[j] = DeletedObject{
|
||||
ObjectName: decodeDirObject(obj.ObjectName),
|
||||
VersionID: obj.VersionID,
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Delete marker already present we are not going to create new delete markers.
|
||||
if pinfo.ObjInfo.DeleteMarker && obj.VersionID == "" {
|
||||
dobjects[j] = DeletedObject{
|
||||
DeleteMarker: pinfo.ObjInfo.DeleteMarker,
|
||||
DeleteMarkerVersionID: pinfo.ObjInfo.VersionID,
|
||||
DeleteMarkerMTime: DeleteMarkerMTime{pinfo.ObjInfo.ModTime},
|
||||
ObjectName: decodeDirObject(pinfo.ObjInfo.Name),
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
idx := pinfo.Index
|
||||
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
|
||||
poolObjIdxMap[idx] = append(poolObjIdxMap[idx], obj)
|
||||
origIndexMap[idx] = append(origIndexMap[idx], j)
|
||||
dObjectsByPool[i], dErrsByPool[i] = pool.DeleteObjects(ctx, bucket, objects, opts)
|
||||
return nil
|
||||
}, j)
|
||||
}, i)
|
||||
}
|
||||
|
||||
eg.Wait() // wait to check all the pools.
|
||||
|
||||
if len(poolObjIdxMap) > 0 {
|
||||
// Delete concurrently in all server pools.
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(z.serverPools))
|
||||
for idx, pool := range z.serverPools {
|
||||
go func(idx int, pool *erasureSets) {
|
||||
defer wg.Done()
|
||||
objs := poolObjIdxMap[idx]
|
||||
if len(objs) == 0 {
|
||||
return
|
||||
}
|
||||
orgIndexes := origIndexMap[idx]
|
||||
deletedObjects, errs := pool.DeleteObjects(ctx, bucket, objs, opts)
|
||||
mu.Lock()
|
||||
for i, derr := range errs {
|
||||
if derr != nil {
|
||||
derrs[orgIndexes[i]] = derr
|
||||
}
|
||||
deletedObjects[i].ObjectName = decodeDirObject(deletedObjects[i].ObjectName)
|
||||
dobjects[orgIndexes[i]] = deletedObjects[i]
|
||||
}
|
||||
mu.Unlock()
|
||||
}(idx, pool)
|
||||
for i := range dobjects {
|
||||
// Iterate over pools
|
||||
for pool := range z.serverPools {
|
||||
if dErrsByPool[pool][i] == nil && dObjectsByPool[pool][i].found {
|
||||
// A fast exit when the object is found and removed
|
||||
dobjects[i] = dObjectsByPool[pool][i]
|
||||
derrs[i] = nil
|
||||
break
|
||||
}
|
||||
if derrs[i] == nil {
|
||||
// No error related to this object is found, assign this pool result
|
||||
// whether it is nil because there is no object found or because of
|
||||
// some other errors such erasure quorum errors.
|
||||
dobjects[i] = dObjectsByPool[pool][i]
|
||||
derrs[i] = dErrsByPool[pool][i]
|
||||
}
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
for i := range dobjects {
|
||||
dobjects[i].ObjectName = decodeDirObject(dobjects[i].ObjectName)
|
||||
}
|
||||
return dobjects, derrs
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user