mirror of https://github.com/minio/minio.git
crawler: Remove waitForLowActiveIO (#10667)
Only use dynamic delays for the crawler. Even though the max wait was 1 second the number of waits could severely impact crawler speed. Instead of relying on a global metric, we use the stateless local delays to keep the crawler running at a speed more adjusted to current conditions. The only case we keep it is before bitrot checks when enabled.
This commit is contained in:
parent
9c042a503b
commit
03991c5d41
|
@ -135,12 +135,11 @@ type cachedFolder struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
type folderScanner struct {
|
type folderScanner struct {
|
||||||
root string
|
root string
|
||||||
getSize getSizeFn
|
getSize getSizeFn
|
||||||
oldCache dataUsageCache
|
oldCache dataUsageCache
|
||||||
newCache dataUsageCache
|
newCache dataUsageCache
|
||||||
withFilter *bloomFilter
|
withFilter *bloomFilter
|
||||||
waitForLowActiveIO func()
|
|
||||||
|
|
||||||
dataUsageCrawlMult float64
|
dataUsageCrawlMult float64
|
||||||
dataUsageCrawlDebug bool
|
dataUsageCrawlDebug bool
|
||||||
|
@ -155,7 +154,7 @@ type folderScanner struct {
|
||||||
// The returned cache will always be valid, but may not be updated from the existing.
|
// The returned cache will always be valid, but may not be updated from the existing.
|
||||||
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
|
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
|
||||||
// If the supplied context is canceled the function will return at the first chance.
|
// If the supplied context is canceled the function will return at the first chance.
|
||||||
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) {
|
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
|
||||||
t := UTCNow()
|
t := UTCNow()
|
||||||
|
|
||||||
logPrefix := color.Green("data-usage: ")
|
logPrefix := color.Green("data-usage: ")
|
||||||
|
@ -183,7 +182,6 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
|
||||||
getSize: getSize,
|
getSize: getSize,
|
||||||
oldCache: cache,
|
oldCache: cache,
|
||||||
newCache: dataUsageCache{Info: cache.Info},
|
newCache: dataUsageCache{Info: cache.Info},
|
||||||
waitForLowActiveIO: waitForLowActiveIO,
|
|
||||||
newFolders: nil,
|
newFolders: nil,
|
||||||
existingFolders: nil,
|
existingFolders: nil,
|
||||||
dataUsageCrawlMult: delayMult,
|
dataUsageCrawlMult: delayMult,
|
||||||
|
@ -376,7 +374,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
f.waitForLowActiveIO()
|
|
||||||
sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult)
|
sleepDuration(dataCrawlSleepPerFolder, f.dataUsageCrawlMult)
|
||||||
|
|
||||||
cache := dataUsageEntry{}
|
cache := dataUsageEntry{}
|
||||||
|
@ -424,7 +421,6 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
f.waitForLowActiveIO()
|
|
||||||
// Dynamic time delay.
|
// Dynamic time delay.
|
||||||
t := UTCNow()
|
t := UTCNow()
|
||||||
|
|
||||||
|
@ -484,7 +480,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
||||||
// If that doesn't bring it back we remove the folder and assume it was deleted.
|
// If that doesn't bring it back we remove the folder and assume it was deleted.
|
||||||
// This means that the next run will not look for it.
|
// This means that the next run will not look for it.
|
||||||
for k := range existing {
|
for k := range existing {
|
||||||
f.waitForLowActiveIO()
|
// Dynamic time delay.
|
||||||
|
t := UTCNow()
|
||||||
|
|
||||||
bucket, prefix := path2BucketObject(k)
|
bucket, prefix := path2BucketObject(k)
|
||||||
if f.dataUsageCrawlDebug {
|
if f.dataUsageCrawlDebug {
|
||||||
logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix)
|
logger.Info(color.Green("folder-scanner:")+" checking disappeared folder: %v/%v", bucket, prefix)
|
||||||
|
@ -498,6 +496,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
||||||
versionID: versionID,
|
versionID: versionID,
|
||||||
}, madmin.HealItemObject)
|
}, madmin.HealItemObject)
|
||||||
})
|
})
|
||||||
|
sleepDuration(time.Since(t), f.dataUsageCrawlMult)
|
||||||
|
|
||||||
if f.dataUsageCrawlDebug && err != nil {
|
if f.dataUsageCrawlDebug && err != nil {
|
||||||
logger.Info(color.Green("healObjects:")+" checking returned value %v", err)
|
logger.Info(color.Green("healObjects:")+" checking returned value %v", err)
|
||||||
|
@ -535,7 +534,6 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder)
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
f.waitForLowActiveIO()
|
|
||||||
if typ&os.ModeDir != 0 {
|
if typ&os.ModeDir != 0 {
|
||||||
dirStack = append(dirStack, entName)
|
dirStack = append(dirStack, entName)
|
||||||
err := readDirFn(path.Join(dirStack...), addDir)
|
err := readDirFn(path.Join(dirStack...), addDir)
|
||||||
|
|
|
@ -62,7 +62,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize)
|
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -183,7 +183,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
createUsageTestFiles(t, base, bucket, files)
|
createUsageTestFiles(t, base, bucket, files)
|
||||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -268,7 +268,7 @@ func TestDataUsageUpdate(t *testing.T) {
|
||||||
}
|
}
|
||||||
// Changed dir must be picked up in this many cycles.
|
// Changed dir must be picked up in this many cycles.
|
||||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -355,7 +355,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
||||||
}
|
}
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, func() {}, getSize)
|
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -465,7 +465,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
createUsageTestFiles(t, base, "", files)
|
createUsageTestFiles(t, base, "", files)
|
||||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -548,7 +548,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
|
||||||
}
|
}
|
||||||
// Changed dir must be picked up in this many cycles.
|
// Changed dir must be picked up in this many cycles.
|
||||||
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
for i := 0; i < dataUsageUpdateDirCycles; i++ {
|
||||||
got, err = crawlDataFolder(context.Background(), base, got, func() {}, getSize)
|
got, err = crawlDataFolder(context.Background(), base, got, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -652,7 +652,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
|
||||||
}
|
}
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, func() {}, getSize)
|
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -327,7 +327,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load bucket info.
|
// Load bucket info.
|
||||||
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, fs.waitForLowActiveIO, func(item crawlItem) (int64, error) {
|
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (int64, error) {
|
||||||
bucket, object := item.bucket, item.objectPath()
|
bucket, object := item.bucket, item.objectPath()
|
||||||
fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
|
fsMetaBytes, err := ioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
|
||||||
if err != nil && !os.IsNotExist(err) {
|
if err != nil && !os.IsNotExist(err) {
|
||||||
|
|
|
@ -372,7 +372,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
|
||||||
}
|
}
|
||||||
opts := globalCrawlerConfig
|
opts := globalCrawlerConfig
|
||||||
|
|
||||||
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, s.waitForLowActiveIO, func(item crawlItem) (int64, error) {
|
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (int64, error) {
|
||||||
// Look for `xl.meta/xl.json' at the leaf.
|
// Look for `xl.meta/xl.json' at the leaf.
|
||||||
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
|
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
|
||||||
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
|
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
|
||||||
|
|
Loading…
Reference in New Issue