From 6386b45c08d30599eded95a27279f114692abd44 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Fri, 26 Feb 2021 09:52:27 -0800 Subject: [PATCH] [feat] use rename instead of recursive deletes (#11641) most of the delete calls today spend time in a blocking operation where multiple calls need to be recursively sent to delete the objects, instead we can use rename operation to atomically move the objects from the namespace to `tmp/.trash` we can schedule deletion of objects at this location once in 15, 30mins and we can also add wait times between each delete operation. this allows us to make delete's faster as well less chattier on the drives, each server runs locally a groutine which would clean this up regularly. --- cmd/erasure-multipart.go | 39 ++++++++++--------- cmd/erasure-sets.go | 44 ++++++++++++++++----- cmd/erasure.go | 25 ++++++++++++ cmd/fs-v1-helpers.go | 50 ++++++++++++++++++++++++ cmd/globals.go | 5 ++- cmd/metacache-server-pool.go | 3 +- cmd/object-api-putobject_test.go | 28 +++++++++++-- cmd/object-api-utils.go | 5 ++- cmd/xl-storage.go | 67 ++++++++------------------------ 9 files changed, 179 insertions(+), 87 deletions(-) diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 15cedd1a2..58b0f6719 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -20,6 +20,7 @@ import ( "context" "fmt" "io" + "os" "path" "sort" "strconv" @@ -138,39 +139,39 @@ func (er erasureObjects) deleteAll(ctx context.Context, bucket, prefix string) { // Remove the old multipart uploads on the given disk. func (er erasureObjects) cleanupStaleUploadsOnDisk(ctx context.Context, disk StorageAPI, expiry time.Duration) { now := time.Now() - shaDirs, err := disk.ListDir(ctx, minioMetaMultipartBucket, "", -1) - if err != nil { - return - } - for _, shaDir := range shaDirs { - uploadIDDirs, err := disk.ListDir(ctx, minioMetaMultipartBucket, shaDir, -1) - if err != nil { - continue - } - for _, uploadIDDir := range uploadIDDirs { + diskPath := disk.Endpoint().Path + + readDirFn(pathJoin(diskPath, minioMetaMultipartBucket), func(shaDir string, typ os.FileMode) error { + return readDirFn(pathJoin(diskPath, minioMetaMultipartBucket, shaDir), func(uploadIDDir string, typ os.FileMode) error { uploadIDPath := pathJoin(shaDir, uploadIDDir) fi, err := disk.ReadVersion(ctx, minioMetaMultipartBucket, uploadIDPath, "", false) if err != nil { - continue + return nil } + wait := er.deletedCleanupSleeper.Timer(ctx) if now.Sub(fi.ModTime) > expiry { er.renameAll(ctx, minioMetaMultipartBucket, uploadIDPath) } + wait() + return nil + }) + }) + + readDirFn(pathJoin(diskPath, minioMetaTmpBucket), func(tmpDir string, typ os.FileMode) error { + if tmpDir == ".trash/" { // do not remove .trash/ here, it has its own routines + return nil } - } - tmpDirs, err := disk.ListDir(ctx, minioMetaTmpBucket, "", -1) - if err != nil { - return - } - for _, tmpDir := range tmpDirs { vi, err := disk.StatVol(ctx, pathJoin(minioMetaTmpBucket, tmpDir)) if err != nil { - continue + return nil } + wait := er.deletedCleanupSleeper.Timer(ctx) if now.Sub(vi.Created) > expiry { er.deleteAll(ctx, minioMetaTmpBucket, tmpDir) } - } + wait() + return nil + }) } // ListMultipartUploads - lists all the pending multipart diff --git a/cmd/erasure-sets.go b/cmd/erasure-sets.go index 21fc37957..0d54d0370 100644 --- a/cmd/erasure-sets.go +++ b/cmd/erasure-sets.go @@ -403,21 +403,28 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto // Initialize erasure objects for a given set. s.sets[i] = &erasureObjects{ - setNumber: i, - setDriveCount: setDriveCount, - defaultParityCount: defaultParityCount, - getDisks: s.GetDisks(i), - getLockers: s.GetLockers(i), - getEndpoints: s.GetEndpoints(i), - nsMutex: mutex, - bp: bp, - mrfOpCh: make(chan partialOperation, 10000), + setNumber: i, + setDriveCount: setDriveCount, + defaultParityCount: defaultParityCount, + getDisks: s.GetDisks(i), + getLockers: s.GetLockers(i), + getEndpoints: s.GetEndpoints(i), + deletedCleanupSleeper: newDynamicSleeper(10, 10*time.Second), + nsMutex: mutex, + bp: bp, + mrfOpCh: make(chan partialOperation, 10000), } } + // cleanup ".trash/" folder every 30 minutes with sufficient sleep cycles. + const deletedObjectsCleanupInterval = 30 * time.Minute + // start cleanup stale uploads go-routine. go s.cleanupStaleUploads(ctx, GlobalStaleUploadsCleanupInterval, GlobalStaleUploadsExpiry) + // start cleanup of deleted objects. + go s.cleanupDeletedObjects(ctx, deletedObjectsCleanupInterval) + // Start the disk monitoring and connect routine. go s.monitorAndConnectEndpoints(ctx, defaultMonitorConnectEndpointInterval) go s.maintainMRFList() @@ -426,6 +433,25 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto return s, nil } +func (s *erasureSets) cleanupDeletedObjects(ctx context.Context, cleanupInterval time.Duration) { + timer := time.NewTimer(cleanupInterval) + defer timer.Stop() + + for { + select { + case <-ctx.Done(): + return + case <-timer.C: + // Reset for the next interval + timer.Reset(cleanupInterval) + + for _, set := range s.sets { + set.cleanupDeletedObjects(ctx) + } + } + } +} + func (s *erasureSets) cleanupStaleUploads(ctx context.Context, cleanupInterval, expiry time.Duration) { timer := time.NewTimer(cleanupInterval) defer timer.Stop() diff --git a/cmd/erasure.go b/cmd/erasure.go index 24784316d..9619e8eae 100644 --- a/cmd/erasure.go +++ b/cmd/erasure.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "math/rand" + "os" "sort" "sync" "time" @@ -71,6 +72,8 @@ type erasureObjects struct { bp *bpool.BytePoolCap mrfOpCh chan partialOperation + + deletedCleanupSleeper *dynamicSleeper } // NewNSLock - initialize a new namespace RWLocker instance. @@ -273,6 +276,28 @@ func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, hea return newDisks, healing } +// Clean-up previously deleted objects. from .minio.sys/tmp/.trash/ +func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) { + // run multiple cleanup's local to this server. + var wg sync.WaitGroup + for _, disk := range er.getLoadBalancedLocalDisks() { + if disk != nil { + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + diskPath := disk.Endpoint().Path + readDirFn(pathJoin(diskPath, minioMetaTmpDeletedBucket), func(ddir string, typ os.FileMode) error { + wait := er.deletedCleanupSleeper.Timer(ctx) + removeAll(pathJoin(diskPath, minioMetaTmpDeletedBucket, ddir)) + wait() + return nil + }) + }(disk) + } + } + wg.Wait() +} + // CrawlAndGetDataUsage will start crawling buckets and send updated totals as they are traversed. // Updates are sent on a regular basis and the caller *must* consume them. func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error { diff --git a/cmd/fs-v1-helpers.go b/cmd/fs-v1-helpers.go index 4aa216a38..2adcb289b 100644 --- a/cmd/fs-v1-helpers.go +++ b/cmd/fs-v1-helpers.go @@ -22,6 +22,7 @@ import ( "os" pathutil "path" "runtime" + "strings" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/lock" @@ -392,6 +393,55 @@ func fsRenameFile(ctx context.Context, sourcePath, destPath string) error { return nil } +func deleteFile(basePath, deletePath string, recursive bool) error { + if basePath == "" || deletePath == "" { + return nil + } + isObjectDir := HasSuffix(deletePath, SlashSeparator) + basePath = pathutil.Clean(basePath) + deletePath = pathutil.Clean(deletePath) + if !strings.HasPrefix(deletePath, basePath) || deletePath == basePath { + return nil + } + + var err error + if recursive { + os.RemoveAll(deletePath) + } else { + err = os.Remove(deletePath) + } + if err != nil { + switch { + case isSysErrNotEmpty(err): + // if object is a directory, but if its not empty + // return FileNotFound to indicate its an empty prefix. + if isObjectDir { + return errFileNotFound + } + // Ignore errors if the directory is not empty. The server relies on + // this functionality, and sometimes uses recursion that should not + // error on parent directories. + return nil + case osIsNotExist(err): + return errFileNotFound + case osIsPermission(err): + return errFileAccessDenied + case isSysErrIO(err): + return errFaultyDisk + default: + return err + } + } + + deletePath = pathutil.Dir(deletePath) + + // Delete parent directory obviously not recursively. Errors for + // parent directories shouldn't trickle down. + deleteFile(basePath, deletePath, false) + + return nil +} + // fsDeleteFile is a wrapper for deleteFile(), after checking the path length. func fsDeleteFile(ctx context.Context, basePath, deletePath string) error { if err := checkPathLength(basePath); err != nil { diff --git a/cmd/globals.go b/cmd/globals.go index 94976ebde..32e1192e4 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -87,8 +87,9 @@ const ( // GlobalStaleUploadsExpiry - Expiry duration after which the uploads in multipart, tmp directory are deemed stale. GlobalStaleUploadsExpiry = time.Hour * 24 // 24 hrs. + // GlobalStaleUploadsCleanupInterval - Cleanup interval when the stale uploads cleanup is initiated. - GlobalStaleUploadsCleanupInterval = time.Hour * 24 // 24 hrs. + GlobalStaleUploadsCleanupInterval = time.Hour * 12 // 12 hrs. // GlobalServiceExecutionInterval - Executes the Lifecycle events. GlobalServiceExecutionInterval = time.Hour * 24 // 24 hrs. @@ -96,7 +97,7 @@ const ( // Refresh interval to update in-memory iam config cache. globalRefreshIAMInterval = 5 * time.Minute - // Limit of location constraint XML for unauthenticted PUT bucket operations. + // Limit of location constraint XML for unauthenticated PUT bucket operations. maxLocationConstraintSize = 3 * humanize.MiByte // Maximum size of default bucket encryption configuration allowed diff --git a/cmd/metacache-server-pool.go b/cmd/metacache-server-pool.go index 6dea435c7..58adc24ef 100644 --- a/cmd/metacache-server-pool.go +++ b/cmd/metacache-server-pool.go @@ -23,6 +23,7 @@ import ( "io" "os" "path" + pathutil "path" "strings" "sync" "time" @@ -35,7 +36,7 @@ func renameAllBucketMetacache(epPath string) error { // to `.minio.sys/tmp/` for deletion. return readDirFn(pathJoin(epPath, minioMetaBucket, bucketMetaPrefix), func(name string, typ os.FileMode) error { if typ == os.ModeDir { - tmpMetacacheOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID()) + tmpMetacacheOld := pathutil.Join(epPath, minioMetaTmpDeletedBucket, mustGetUUID()) if err := renameAll(pathJoin(epPath, minioMetaBucket, metacachePrefixForID(name, slashSeparator)), tmpMetacacheOld); err != nil && err != errFileNotFound { return fmt.Errorf("unable to rename (%s -> %s) %w", diff --git a/cmd/object-api-putobject_test.go b/cmd/object-api-putobject_test.go index 43c22353a..cf1d4603d 100644 --- a/cmd/object-api-putobject_test.go +++ b/cmd/object-api-putobject_test.go @@ -340,8 +340,19 @@ func testObjectAPIPutObjectStaleFiles(obj ObjectLayer, instanceType string, disk for _, disk := range disks { tmpMetaDir := path.Join(disk, minioMetaTmpBucket) - if !isDirEmpty(tmpMetaDir) { - t.Fatalf("%s: expected: empty, got: non-empty", minioMetaTmpBucket) + files, err := ioutil.ReadDir(tmpMetaDir) + if err != nil { + t.Fatal(err) + } + var found bool + for _, fi := range files { + if fi.Name() == ".trash" { + continue + } + found = true + } + if found { + t.Fatalf("%s: expected: empty, got: non-empty %#v", minioMetaTmpBucket, files) } } } @@ -418,8 +429,17 @@ func testObjectAPIMultipartPutObjectStaleFiles(obj ObjectLayer, instanceType str t.Errorf("%s", err) } - if len(files) != 0 { - t.Fatalf("%s: expected: empty, got: non-empty. content: %s", tmpMetaDir, files) + var found bool + for _, fi := range files { + if fi.Name() == ".trash" { + continue + } + found = true + break + } + + if found { + t.Fatalf("%s: expected: empty, got: non-empty. content: %#v", tmpMetaDir, files) } } } diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index f3b789471..699a00cbc 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -58,8 +58,11 @@ const ( mpartMetaPrefix = "multipart" // MinIO Multipart meta prefix. minioMetaMultipartBucket = minioMetaBucket + SlashSeparator + mpartMetaPrefix - // MinIO Tmp meta prefix. + // MinIO tmp meta prefix. minioMetaTmpBucket = minioMetaBucket + "/tmp" + // MinIO tmp meta prefix for deleted objects. + minioMetaTmpDeletedBucket = minioMetaTmpBucket + "/.trash" + // DNS separator (period), used for bucket name validation. dnsDelimiter = "." // On compressed files bigger than this; diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 11874da98..47c8dd112 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -940,7 +940,7 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F if !isXL2V1Format(buf) { // Delete the meta file, if there are no more versions the // top level parent is automatically removed. - return deleteFile(volumeDir, pathJoin(volumeDir, path), true) + return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true) } var xlMeta xlMetaV2 @@ -967,7 +967,8 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F return err } - if err = removeAll(filePath); err != nil { + tmpuuid := mustGetUUID() + if err = renameAll(filePath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid)); err != nil { return err } } @@ -985,7 +986,7 @@ func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi F return err } - return deleteFile(volumeDir, filePath, false) + return s.deleteFile(volumeDir, filePath, false) } // WriteMetadata - writes FileInfo metadata for path at `xl.meta` @@ -1828,7 +1829,7 @@ func (s *xlStorage) CheckFile(ctx context.Context, volume string, path string) e // move up the tree, deleting empty parent directories until it finds one // with files in it. Returns nil for a non-empty directory even when // recursive is set to false. -func deleteFile(basePath, deletePath string, recursive bool) error { +func (s *xlStorage) deleteFile(basePath, deletePath string, recursive bool) error { if basePath == "" || deletePath == "" { return nil } @@ -1841,7 +1842,8 @@ func deleteFile(basePath, deletePath string, recursive bool) error { var err error if recursive { - err = os.RemoveAll(deletePath) + tmpuuid := mustGetUUID() + err = renameAll(deletePath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid)) } else { err = os.Remove(deletePath) } @@ -1872,7 +1874,7 @@ func deleteFile(basePath, deletePath string, recursive bool) error { // Delete parent directory obviously not recursively. Errors for // parent directories shouldn't trickle down. - deleteFile(basePath, deletePath, false) + s.deleteFile(basePath, deletePath, false) return nil } @@ -1910,46 +1912,7 @@ func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recu } // Delete file and delete parent directory as well if it's empty. - return deleteFile(volumeDir, filePath, recursive) -} - -func (s *xlStorage) DeleteFileBulk(volume string, paths []string) (errs []error, err error) { - atomic.AddInt32(&s.activeIOCount, 1) - defer func() { - atomic.AddInt32(&s.activeIOCount, -1) - }() - - volumeDir, err := s.getVolDir(volume) - if err != nil { - return nil, err - } - - // Stat a volume entry. - _, err = os.Lstat(volumeDir) - if err != nil { - if osIsNotExist(err) { - return nil, errVolumeNotFound - } else if osIsPermission(err) { - return nil, errVolumeAccessDenied - } else if isSysErrIO(err) { - return nil, errFaultyDisk - } - return nil, err - } - - errs = make([]error, len(paths)) - // Following code is needed so that we retain SlashSeparator - // suffix if any in path argument. - for idx, path := range paths { - filePath := pathJoin(volumeDir, path) - errs[idx] = checkPathLength(filePath) - if errs[idx] != nil { - continue - } - // Delete file and delete parent directory as well if its empty. - errs[idx] = deleteFile(volumeDir, filePath, false) - } - return + return s.deleteFile(volumeDir, filePath, recursive) } // RenameData - rename source path to destination path atomically, metadata and data directory. @@ -2180,8 +2143,10 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, // Commit data if srcDataPath != "" { - removeAll(oldDstDataPath) - removeAll(dstDataPath) + tmpuuid := mustGetUUID() + renameAll(oldDstDataPath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid)) + tmpuuid = mustGetUUID() + renameAll(dstDataPath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid)) if err = renameAll(srcDataPath, dstDataPath); err != nil { return osErrToFileErr(err) } @@ -2194,12 +2159,12 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, // Remove parent dir of the source file if empty if parentDir := pathutil.Dir(srcFilePath); isDirEmpty(parentDir) { - deleteFile(srcVolumeDir, parentDir, false) + s.deleteFile(srcVolumeDir, parentDir, false) } if srcDataPath != "" { if parentDir := pathutil.Dir(srcDataPath); isDirEmpty(parentDir) { - deleteFile(srcVolumeDir, parentDir, false) + s.deleteFile(srcVolumeDir, parentDir, false) } } @@ -2286,7 +2251,7 @@ func (s *xlStorage) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolum // Remove parent dir of the source file if empty if parentDir := pathutil.Dir(srcFilePath); isDirEmpty(parentDir) { - deleteFile(srcVolumeDir, parentDir, false) + s.deleteFile(srcVolumeDir, parentDir, false) } return nil