mirror of
https://github.com/minio/minio.git
synced 2025-11-20 18:06:10 -05:00
fix: Avoid double usage calculation on every restart (#8856)
On every restart of the server, usage was being calculated which is not useful instead wait for sufficient time to start the crawling routine. This PR also avoids lots of double allocations through strings, optimizes usage of string builders and also avoids crawling through symbolic links. Fixes #8844
This commit is contained in:
committed by
kannappanr
parent
e2b3c083aa
commit
f14f60a487
@@ -20,6 +20,8 @@ import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
@@ -48,40 +50,36 @@ func runDataUsageInfoUpdateRoutine() {
|
||||
break
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
switch v := objAPI.(type) {
|
||||
case *xlZones:
|
||||
runDataUsageInfoForXLZones(ctx, v, GlobalServiceDoneCh)
|
||||
case *FSObjects:
|
||||
runDataUsageInfoForFS(ctx, v, GlobalServiceDoneCh)
|
||||
default:
|
||||
return
|
||||
}
|
||||
runDataUsageInfo(context.Background(), objAPI, GlobalServiceDoneCh)
|
||||
}
|
||||
|
||||
func runDataUsageInfoForFS(ctx context.Context, fsObj *FSObjects, endCh <-chan struct{}) {
|
||||
t := time.NewTicker(dataUsageCrawlInterval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
// Get data usage info of the FS Object
|
||||
usageInfo := fsObj.crawlAndGetDataUsageInfo(ctx, endCh)
|
||||
// Save the data usage in the disk
|
||||
err := storeDataUsageInBackend(ctx, fsObj, usageInfo)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
select {
|
||||
case <-endCh:
|
||||
return
|
||||
// Wait until the next crawl interval
|
||||
case <-t.C:
|
||||
}
|
||||
// timeToNextCrawl returns the duration until next crawl should occur
|
||||
// this is validated by verifying the LastUpdate time.
|
||||
func timeToCrawl(ctx context.Context, objAPI ObjectLayer) time.Duration {
|
||||
dataUsageInfo, err := loadDataUsageFromBackend(ctx, objAPI)
|
||||
if err != nil {
|
||||
// Upon an error wait for like 10
|
||||
// seconds to start the crawler.
|
||||
return 10 * time.Second
|
||||
}
|
||||
// File indeed doesn't exist when LastUpdate is zero
|
||||
// so we have never crawled, start crawl right away.
|
||||
if dataUsageInfo.LastUpdate.IsZero() {
|
||||
return 1 * time.Second
|
||||
}
|
||||
waitDuration := dataUsageInfo.LastUpdate.Sub(UTCNow())
|
||||
if waitDuration > dataUsageCrawlInterval {
|
||||
// Waited long enough start crawl in a 1 second
|
||||
return 1 * time.Second
|
||||
}
|
||||
// No crawling needed, ask the routine to wait until
|
||||
// the daily interval 12hrs - delta between last update
|
||||
// with current time.
|
||||
return dataUsageCrawlInterval - waitDuration
|
||||
}
|
||||
|
||||
func runDataUsageInfoForXLZones(ctx context.Context, z *xlZones, endCh <-chan struct{}) {
|
||||
locker := z.NewNSLock(ctx, minioMetaBucket, "leader-data-usage-info")
|
||||
func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer, endCh <-chan struct{}) {
|
||||
locker := objAPI.NewNSLock(ctx, minioMetaBucket, "leader-data-usage-info")
|
||||
for {
|
||||
err := locker.GetLock(newDynamicTimeout(time.Millisecond, time.Millisecond))
|
||||
if err != nil {
|
||||
@@ -93,19 +91,17 @@ func runDataUsageInfoForXLZones(ctx context.Context, z *xlZones, endCh <-chan st
|
||||
break
|
||||
}
|
||||
|
||||
t := time.NewTicker(dataUsageCrawlInterval)
|
||||
defer t.Stop()
|
||||
for {
|
||||
usageInfo := z.crawlAndGetDataUsage(ctx, endCh)
|
||||
err := storeDataUsageInBackend(ctx, z, usageInfo)
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
wait := timeToCrawl(ctx, objAPI)
|
||||
select {
|
||||
case <-endCh:
|
||||
locker.Unlock()
|
||||
return
|
||||
case <-t.C:
|
||||
case <-time.NewTimer(wait).C:
|
||||
// Crawl only when no previous crawl has occurred,
|
||||
// or its been too long since last crawl.
|
||||
err := storeDataUsageInBackend(ctx, objAPI, objAPI.CrawlAndGetDataUsage(ctx, endCh))
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -131,7 +127,10 @@ func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsag
|
||||
|
||||
err := objAPI.GetObject(ctx, minioMetaBackgroundOpsBucket, dataUsageObjName, 0, -1, &dataUsageInfoJSON, "", ObjectOptions{})
|
||||
if err != nil {
|
||||
return DataUsageInfo{}, nil
|
||||
if isErrObjectNotFound(err) {
|
||||
return DataUsageInfo{}, nil
|
||||
}
|
||||
return DataUsageInfo{}, toObjectErr(err, minioMetaBackgroundOpsBucket, dataUsageObjName)
|
||||
}
|
||||
|
||||
var dataUsageInfo DataUsageInfo
|
||||
@@ -143,3 +142,85 @@ func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsag
|
||||
|
||||
return dataUsageInfo, nil
|
||||
}
|
||||
|
||||
// Item represents each file while walking.
|
||||
type Item struct {
|
||||
Path string
|
||||
Typ os.FileMode
|
||||
}
|
||||
|
||||
type getSizeFn func(item Item) (int64, error)
|
||||
type activeIOFn func() error
|
||||
|
||||
func updateUsage(basePath string, endCh <-chan struct{}, waitForLowActiveIO activeIOFn, getSize getSizeFn) DataUsageInfo {
|
||||
var dataUsageInfo = DataUsageInfo{
|
||||
BucketsSizes: make(map[string]uint64),
|
||||
ObjectsSizesHistogram: make(map[string]uint64),
|
||||
}
|
||||
|
||||
itemCh := make(chan Item)
|
||||
skipCh := make(chan error)
|
||||
defer close(skipCh)
|
||||
|
||||
go func() {
|
||||
defer close(itemCh)
|
||||
fastWalk(basePath, func(path string, typ os.FileMode) error {
|
||||
if err := waitForLowActiveIO(); err != nil {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
|
||||
select {
|
||||
case <-endCh:
|
||||
return filepath.SkipDir
|
||||
case itemCh <- Item{path, typ}:
|
||||
}
|
||||
return <-skipCh
|
||||
})
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-endCh:
|
||||
return dataUsageInfo
|
||||
case item, ok := <-itemCh:
|
||||
if !ok {
|
||||
return dataUsageInfo
|
||||
}
|
||||
|
||||
bucket, entry := path2BucketObjectWithBasePath(basePath, item.Path)
|
||||
if bucket == "" {
|
||||
skipCh <- nil
|
||||
continue
|
||||
}
|
||||
|
||||
if isReservedOrInvalidBucket(bucket, false) {
|
||||
skipCh <- filepath.SkipDir
|
||||
continue
|
||||
}
|
||||
|
||||
if entry == "" && item.Typ&os.ModeDir != 0 {
|
||||
dataUsageInfo.BucketsCount++
|
||||
dataUsageInfo.BucketsSizes[bucket] = 0
|
||||
skipCh <- nil
|
||||
continue
|
||||
}
|
||||
|
||||
if item.Typ&os.ModeDir != 0 {
|
||||
skipCh <- nil
|
||||
continue
|
||||
}
|
||||
|
||||
size, err := getSize(item)
|
||||
if err != nil {
|
||||
skipCh <- errSkipFile
|
||||
continue
|
||||
}
|
||||
|
||||
dataUsageInfo.ObjectsCount++
|
||||
dataUsageInfo.ObjectsTotalSize += uint64(size)
|
||||
dataUsageInfo.BucketsSizes[bucket] += uint64(size)
|
||||
dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++
|
||||
skipCh <- nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user