add CopyObject optimization when source and destination are same (#10170)

when source and destination are same and versioning is enabled
on the destination bucket - we do not need to re-create the entire
object once again to optimize on space utilization.

Cases this PR is not supporting

- any pre-existing legacy object will not
  be preserved in this manner, meaning a new
  dataDir will be created.

- key-rotation and storage class changes
  of course will never re-use the dataDir
This commit is contained in:
Harshavardhana 2020-08-03 16:21:10 -07:00 committed by GitHub
parent e99bc177c0
commit 5ce82b45da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 70 additions and 12 deletions

View File

@ -112,6 +112,7 @@ func (fi FileInfo) ToObjectInfo(bucket, object string) ObjectInfo {
DeleteMarker: fi.Deleted,
Size: fi.Size,
ModTime: fi.ModTime,
Legacy: fi.XLV1,
ContentType: fi.Metadata["content-type"],
ContentEncoding: fi.Metadata["content-encoding"],
}

View File

@ -65,7 +65,7 @@ func (er erasureObjects) putObjectDir(ctx context.Context, bucket, object string
// if source object and destination object are same we only
// update metadata.
func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo ObjectInfo, srcOpts, dstOpts ObjectOptions) (oi ObjectInfo, e error) {
// This call shouldn't be used for anything other than metadata updates.
// This call shouldn't be used for anything other than metadata updates or adding self referential versions.
if !srcInfo.metadataOnly {
return oi, NotImplemented{}
}
@ -97,8 +97,23 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d
return fi.ToObjectInfo(srcBucket, srcObject), toObjectErr(errMethodNotAllowed, srcBucket, srcObject)
}
versionID := srcInfo.VersionID
if srcInfo.versionOnly {
versionID = dstOpts.VersionID
// preserve destination versionId if specified.
if versionID == "" {
versionID = mustGetUUID()
}
modTime = UTCNow()
}
fi.VersionID = versionID // set any new versionID we might have created
fi.ModTime = modTime // set modTime for the new versionID
// Update `xl.meta` content on each disks.
for index := range metaArr {
metaArr[index].ModTime = modTime
metaArr[index].VersionID = versionID
metaArr[index].Metadata = srcInfo.UserDefined
metaArr[index].Metadata["etag"] = srcInfo.ETag
}

View File

@ -738,14 +738,24 @@ func (s *erasureSets) CopyObject(ctx context.Context, srcBucket, srcObject, dstB
srcSet := s.getHashedSet(srcObject)
dstSet := s.getHashedSet(dstObject)
cpSrcDstSame := srcSet == dstSet
// Check if this request is only metadata update.
if srcSet == dstSet && srcInfo.metadataOnly {
if cpSrcDstSame && srcInfo.metadataOnly {
if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID {
return srcSet.CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
if !dstOpts.Versioned && srcOpts.VersionID == "" {
return srcSet.CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
// CopyObject optimization where we don't create an entire copy
// of the content, instead we add a reference, we disallow legacy
// objects to be self referenced in this manner so make sure
// that we actually create a new dataDir for legacy objects.
if dstOpts.Versioned && srcOpts.VersionID != dstOpts.VersionID && !srcInfo.Legacy {
srcInfo.versionOnly = true
return srcSet.CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
}
putOpts := ObjectOptions{

View File

@ -626,22 +626,24 @@ func (z *erasureZones) CopyObject(ctx context.Context, srcBucket, srcObject, dst
defer lk.Unlock()
}
if z.SingleZone() {
return z.zones[0].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
zoneIdx, err := z.getZoneIdx(ctx, dstBucket, dstObject, dstOpts, srcInfo.Size)
if err != nil {
return objInfo, err
}
if cpSrcDstSame && srcInfo.metadataOnly && srcOpts.VersionID == dstOpts.VersionID {
if cpSrcDstSame && srcInfo.metadataOnly {
if dstOpts.VersionID != "" && srcOpts.VersionID == dstOpts.VersionID {
return z.zones[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
if !dstOpts.Versioned && srcOpts.VersionID == "" {
return z.zones[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
if dstOpts.Versioned && srcOpts.VersionID != dstOpts.VersionID && !srcInfo.Legacy {
// CopyObject optimization where we don't create an entire copy
// of the content, instead we add a reference.
srcInfo.versionOnly = true
return z.zones[zoneIdx].CopyObject(ctx, srcBucket, srcObject, dstBucket, dstObject, srcInfo, srcOpts, dstOpts)
}
}
putOpts := ObjectOptions{

View File

@ -198,11 +198,14 @@ type ObjectInfo struct {
PutObjReader *PutObjReader `json:"-"`
metadataOnly bool
versionOnly bool // adds a new version, only used by CopyObject
keyRotation bool
// Date and time when the object was last accessed.
AccTime time.Time
Legacy bool // indicates object on disk is in legacy data format
// backendType indicates which backend filled this structure
backendType BackendType
}

View File

@ -442,11 +442,6 @@ func (z *xlMetaV2) DeleteVersion(fi FileInfo) (string, bool, error) {
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
return version.ObjectV1.DataDir, len(z.Versions) == 0, nil
}
case ObjectType:
if bytes.Equal(version.ObjectV2.VersionID[:], uv[:]) {
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
return uuid.UUID(version.ObjectV2.DataDir).String(), len(z.Versions) == 0, nil
}
case DeleteType:
if bytes.Equal(version.DeleteMarker.VersionID[:], uv[:]) {
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
@ -454,6 +449,38 @@ func (z *xlMetaV2) DeleteVersion(fi FileInfo) (string, bool, error) {
}
}
}
findDataDir := func(dataDir [16]byte, versions []xlMetaV2Version) int {
var sameDataDirCount int
for _, version := range versions {
switch version.Type {
case ObjectType:
if bytes.Equal(version.ObjectV2.DataDir[:], dataDir[:]) {
sameDataDirCount++
}
}
}
return sameDataDirCount
}
for i, version := range z.Versions {
if !version.Valid() {
return "", false, errFileCorrupt
}
switch version.Type {
case ObjectType:
if bytes.Equal(version.ObjectV2.VersionID[:], uv[:]) {
z.Versions = append(z.Versions[:i], z.Versions[i+1:]...)
if findDataDir(version.ObjectV2.DataDir, z.Versions) > 0 {
// Found that another version references the same dataDir
// we shouldn't remove it, and only remove the version instead
return "", len(z.Versions) == 0, nil
}
return uuid.UUID(version.ObjectV2.DataDir).String(), len(z.Versions) == 0, nil
}
}
}
return "", false, errFileVersionNotFound
}