mirror of https://github.com/minio/minio.git
add CopyObject optimization when source and destination are same (#10170)
when source and destination are same and versioning is enabled on the destination bucket - we do not need to re-create the entire object once again to optimize on space utilization. Cases this PR is not supporting - any pre-existing legacy object will not be preserved in this manner, meaning a new dataDir will be created. - key-rotation and storage class changes of course will never re-use the dataDir
This commit is contained in:
parent
e99bc177c0
commit
5ce82b45da
|
@ -112,6 +112,7 @@ func (fi FileInfo) ToObjectInfo(bucket, object string) ObjectInfo {
|
|||
DeleteMarker: fi.Deleted,
|
||||
Size: fi.Size,
|
||||
ModTime: fi.ModTime,
|
||||
Legacy: fi.XLV1,
|
||||
ContentType: fi.Metadata["content-type"],
|
||||
ContentEncoding: fi.Metadata["content-encoding"],
|
||||
}
|
||||
|
|
|
@ -65,7 +65,7 @@ func (er erasureObjects) putObjectDir(ctx context.Context, bucket, object string
|
|||
// if source object and destination object are same we only
|
||||
// update metadata.
|
||||
func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (oi ObjectInfo, e error) {
|
||||
// This call shouldn't be used for anything other than metadata updates.
|
||||
// This call shouldn't be used for anything other than metadata updates or adding self referential versions.
|
||||
if !srcInfo.metadataOnly {
|
||||
return oi, NotImplemented{}
|
||||
}
|
||||
|
@ -97,8 +97,23 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d
|
|||
return fi.ToObjectInfo(srcBucket, srcObject), toObjectErr(errMethodNotAllowed, srcBucket, srcObject)
|
||||
}
|
||||
|
||||
versionID := srcInfo.VersionID
|
||||
if srcInfo.versionOnly {
|
||||
versionID = dstOpts.VersionID
|
||||
// preserve destination versionId if specified.
|
||||
if versionID == "" {
|
||||
versionID = mustGetUUID()
|
||||
}
|
||||
modTime = UTCNow()
|
||||
}
|
||||
|
||||
fi.VersionID = versionID // set any new versionID we might have created
|
||||
fi.ModTime = modTime // set modTime for the new versionID
|
||||
|
||||
// Update `xl.meta` content on each disks.
|
||||
for index := range metaArr {
|
||||
metaArr[index].ModTime = modTime
|
||||
metaArr[index].VersionID = versionID
|
||||
metaArr[index].Metadata = srcInfo.UserDefined
|
||||
metaArr[index].Metadata["etag"] = srcInfo.ETag
|
||||
}
|
||||
|
|
|
@ -738,14 +738,24 @@ func (s *erasureSets) CopyObject(ctx context.Context, srcBucket, srcObject, dstB
|
|||
srcSet := s.getHashedSet(srcObject)
|
||||
dstSet := s.getHashedSet(dstObject)
|
||||
|
||||
cpSrcDstSame := srcSet == dstSet
|
||||
|
||||
// Check if this request is only metadata update.
|
||||
if srcSet == dstSet && srcInfo.metadataOnly {
|
||||
if cpSrcDstSame && srcInfo.metadataOnly {
|
||||
if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID {
|
||||
return srcSet.CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
if !dstOpts.Versioned && srcOpts.VersionID == "" {
|
||||
return srcSet.CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
// CopyObject optimization where we don't create an entire copy
|
||||
// of the content, instead we add a reference, we disallow legacy
|
||||
// objects to be self referenced in this manner so make sure
|
||||
// that we actually create a new dataDir for legacy objects.
|
||||
if dstOpts.Versioned && srcOpts.VersionID != dstOpts.VersionID && !srcInfo.Legacy {
|
||||
srcInfo.versionOnly = true
|
||||
return srcSet.CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
}
|
||||
|
||||
putOpts := ObjectOptions{
|
||||
|
|
|
@ -626,22 +626,24 @@ func (z *erasureZones) CopyObject(ctx context.Context, srcBucket, srcObject, dst
|
|||
defer lk.Unlock()
|
||||
}
|
||||
|
||||
if z.SingleZone() {
|
||||
return z.zones[0].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
|
||||
zoneIdx, err := z.getZoneIdx(ctx, dstBucket, dstObject, dstOpts, srcInfo.Size)
|
||||
if err != nil {
|
||||
return objInfo, err
|
||||
}
|
||||
|
||||
if cpSrcDstSame && srcInfo.metadataOnly && srcOpts.VersionID == dstOpts.VersionID {
|
||||
if cpSrcDstSame && srcInfo.metadataOnly {
|
||||
if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID {
|
||||
return z.zones[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
if !dstOpts.Versioned && srcOpts.VersionID == "" {
|
||||
return z.zones[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
if dstOpts.Versioned && srcOpts.VersionID != dstOpts.VersionID && !srcInfo.Legacy {
|
||||
// CopyObject optimization where we don't create an entire copy
|
||||
// of the content, instead we add a reference.
|
||||
srcInfo.versionOnly = true
|
||||
return z.zones[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
|
||||
}
|
||||
}
|
||||
|
||||
putOpts := ObjectOptions{
|
||||
|
|
|
@ -198,11 +198,14 @@ type ObjectInfo struct {
|
|||
PutObjReader *PutObjReader `json:"-"`
|
||||
|
||||
metadataOnly bool
|
||||
versionOnly bool // adds a new version, only used by CopyObject
|
||||
keyRotation bool
|
||||
|
||||
// Date and time when the object was last accessed.
|
||||
AccTime time.Time
|
||||
|
||||
Legacy bool // indicates object on disk is in legacy data format
|
||||
|
||||
// backendType indicates which backend filled this structure
|
||||
backendType BackendType
|
||||
}
|
||||
|
|
|
@ -442,11 +442,6 @@ func (z *xlMetaV2) DeleteVersion(fi FileInfo) (string, bool, error) {
|
|||
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
|
||||
return version.ObjectV1.DataDir, len(z.Versions) == 0, nil
|
||||
}
|
||||
case ObjectType:
|
||||
if bytes.Equal(version.ObjectV2.VersionID[:], uv[:]) {
|
||||
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
|
||||
return uuid.UUID(version.ObjectV2.DataDir).String(), len(z.Versions) == 0, nil
|
||||
}
|
||||
case DeleteType:
|
||||
if bytes.Equal(version.DeleteMarker.VersionID[:], uv[:]) {
|
||||
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
|
||||
|
@ -454,6 +449,38 @@ func (z *xlMetaV2) DeleteVersion(fi FileInfo) (string, bool, error) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
findDataDir := func(dataDir [16]byte, versions []xlMetaV2Version) int {
|
||||
var sameDataDirCount int
|
||||
for _, version := range versions {
|
||||
switch version.Type {
|
||||
case ObjectType:
|
||||
if bytes.Equal(version.ObjectV2.DataDir[:], dataDir[:]) {
|
||||
sameDataDirCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
return sameDataDirCount
|
||||
}
|
||||
|
||||
for i, version := range z.Versions {
|
||||
if !version.Valid() {
|
||||
return "", false, errFileCorrupt
|
||||
}
|
||||
switch version.Type {
|
||||
case ObjectType:
|
||||
if bytes.Equal(version.ObjectV2.VersionID[:], uv[:]) {
|
||||
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
|
||||
if findDataDir(version.ObjectV2.DataDir, z.Versions) > 0 {
|
||||
// Found that another version references the same dataDir
|
||||
// we shouldn't remove it, and only remove the version instead
|
||||
return "", len(z.Versions) == 0, nil
|
||||
}
|
||||
return uuid.UUID(version.ObjectV2.DataDir).String(), len(z.Versions) == 0, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "", false, errFileVersionNotFound
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue