mirror of
https://github.com/minio/minio.git
synced 2025-11-22 18:47:43 -05:00
re-implement data usage crawler to be more efficient (#9075)
Implementation overview: https://gist.github.com/klauspost/1801c858d5e0df391114436fdad6987b
This commit is contained in:
41
cmd/fs-v1.go
41
cmd/fs-v1.go
@@ -37,12 +37,11 @@ import (
|
||||
"github.com/minio/minio/cmd/config"
|
||||
xhttp "github.com/minio/minio/cmd/http"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
|
||||
bucketsse "github.com/minio/minio/pkg/bucket/encryption"
|
||||
"github.com/minio/minio/pkg/bucket/lifecycle"
|
||||
"github.com/minio/minio/pkg/bucket/object/tagging"
|
||||
"github.com/minio/minio/pkg/bucket/policy"
|
||||
|
||||
"github.com/minio/minio/pkg/color"
|
||||
"github.com/minio/minio/pkg/lock"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
"github.com/minio/minio/pkg/mimedb"
|
||||
@@ -112,7 +111,7 @@ func initMetaVolumeFS(fsPath, fsUUID string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := os.MkdirAll(pathJoin(fsPath, minioMetaBackgroundOpsBucket), 0777); err != nil {
|
||||
if err := os.MkdirAll(pathJoin(fsPath, dataUsageBucket), 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -235,9 +234,26 @@ func (fs *FSObjects) waitForLowActiveIO() {
|
||||
}
|
||||
|
||||
// CrawlAndGetDataUsage returns data usage stats of the current FS deployment
|
||||
func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo {
|
||||
dataUsageInfo := updateUsage(fs.fsPath, endCh, fs.waitForLowActiveIO, func(item Item) (int64, error) {
|
||||
// Get file size, symlinks which cannot bex
|
||||
func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error {
|
||||
// Load bucket totals
|
||||
var oldCache dataUsageCache
|
||||
err := oldCache.load(ctx, fs, dataUsageCacheName)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if oldCache.Info.Name == "" {
|
||||
oldCache.Info.Name = dataUsageRoot
|
||||
}
|
||||
if dataUsageDebug {
|
||||
logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:") + " Start crawl cycle")
|
||||
}
|
||||
buckets, err := fs.ListBuckets(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
t := time.Now()
|
||||
cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) {
|
||||
// Get file size, symlinks which cannot be
|
||||
// followed are automatically filtered by fastwalk.
|
||||
fi, err := os.Stat(item.Path)
|
||||
if err != nil {
|
||||
@@ -245,11 +261,16 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan stru
|
||||
}
|
||||
return fi.Size(), nil
|
||||
})
|
||||
if dataUsageDebug {
|
||||
logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:")+" Crawl time: %v", time.Since(t))
|
||||
}
|
||||
// Even if there was an error, the new cache may have better info.
|
||||
if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) {
|
||||
logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName))
|
||||
updates <- cache.dui(dataUsageRoot, buckets)
|
||||
}
|
||||
|
||||
dataUsageInfo.LastUpdate = UTCNow()
|
||||
atomic.StoreUint64(&fs.totalUsed, dataUsageInfo.ObjectsTotalSize)
|
||||
|
||||
return dataUsageInfo
|
||||
return err
|
||||
}
|
||||
|
||||
/// Bucket operations
|
||||
|
||||
Reference in New Issue
Block a user