fix: data usage crawler env handling, usage-cache.bin location (#9163)

canonicalize the ENVs such that we can bring these ENVs 
as part of the config values, as a subsequent change.

- fix location of per bucket usage to `.minio.sys/buckets/<bucket_name>/usage-cache.bin`
- fix location of the overall usage in `json` at `.minio.sys/buckets/.usage.json`
  (avoid conflicts with a bucket named `usage.json` )
- fix location of the overall usage in `msgp` at `.minio.sys/buckets/.usage.bin`
  (avoid conflicts with a bucket named `usage.bin`
This commit is contained in:
Harshavardhana 2020-03-19 09:47:47 -07:00 committed by GitHub
parent d45a1808f2
commit b1a2169dcc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 94 additions and 105 deletions

View File

@ -29,7 +29,6 @@ import (
"github.com/cespare/xxhash/v2" "github.com/cespare/xxhash/v2"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/color"
"github.com/minio/minio/pkg/hash" "github.com/minio/minio/pkg/hash"
"github.com/tinylib/msgp/msgp" "github.com/tinylib/msgp/msgp"
) )
@ -114,7 +113,7 @@ func (d *dataUsageCache) find(path string) *dataUsageEntry {
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo { func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
e := d.find(path) e := d.find(path)
if e == nil { if e == nil {
return DataUsageInfo{LastUpdate: time.Now()} return DataUsageInfo{LastUpdate: UTCNow()}
} }
flat := d.flatten(*e) flat := d.flatten(*e)
return DataUsageInfo{ return DataUsageInfo{
@ -213,9 +212,6 @@ func (d *dataUsageCache) pathSizes(buckets []BucketInfo) map[string]uint64 {
for _, bucket := range buckets { for _, bucket := range buckets {
e := d.find(bucket.Name) e := d.find(bucket.Name)
if e == nil { if e == nil {
if dataUsageDebug {
logger.Info(color.Green("data-usage:")+" Bucket not found in cache: %v", bucket.Name)
}
continue continue
} }
flat := d.flatten(*e) flat := d.flatten(*e)

View File

@ -35,12 +35,11 @@ import (
) )
const ( const (
dataUsageObjName = "usage.json" dataUsageObjName = ".usage.json"
dataUsageCacheName = "usage-cache.bin" dataUsageCacheName = ".usage-cache.bin"
dataUsageBucketCacheDir = "usage-caches" envDataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL_ENABLE"
dataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL" envDataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY"
dataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY" envDataUsageCrawlDebug = "MINIO_DISK_USAGE_CRAWL_DEBUG"
dataUsageDebug = true
dataUsageSleepPerFolder = 1 * time.Millisecond dataUsageSleepPerFolder = 1 * time.Millisecond
dataUsageSleepDefMult = 10.0 dataUsageSleepDefMult = 10.0
dataUsageUpdateDirCycles = 16 dataUsageUpdateDirCycles = 16
@ -51,11 +50,9 @@ const (
// initDataUsageStats will start the crawler unless disabled. // initDataUsageStats will start the crawler unless disabled.
func initDataUsageStats() { func initDataUsageStats() {
dataUsageEnabled, err := config.ParseBool(env.Get(dataUsageCrawlConf, config.EnableOn)) if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn {
if err == nil && !dataUsageEnabled { go runDataUsageInfoUpdateRoutine()
return
} }
go runDataUsageInfoUpdateRoutine()
} }
// runDataUsageInfoUpdateRoutine will contain the main crawler. // runDataUsageInfoUpdateRoutine will contain the main crawler.
@ -89,9 +86,6 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) {
// data usage calculator role for its lifetime. // data usage calculator role for its lifetime.
break break
} }
if dataUsageDebug {
logger.Info(color.Green("runDataUsageInfo:") + " Starting crawler master")
}
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -111,14 +105,11 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) {
// storeDataUsageInBackend will store all objects sent on the gui channel until closed. // storeDataUsageInBackend will store all objects sent on the gui channel until closed.
func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, gui <-chan DataUsageInfo) { func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, gui <-chan DataUsageInfo) {
for dataUsageInfo := range gui { for dataUsageInfo := range gui {
dataUsageJSON, err := json.MarshalIndent(dataUsageInfo, "", " ") dataUsageJSON, err := json.Marshal(dataUsageInfo)
if err != nil { if err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
continue continue
} }
if dataUsageDebug {
logger.Info(color.Green("data-usage:")+" Received update: %s", string(dataUsageJSON))
}
size := int64(len(dataUsageJSON)) size := int64(len(dataUsageJSON))
r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false) r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false)
if err != nil { if err != nil {
@ -172,6 +163,9 @@ type folderScanner struct {
newCache dataUsageCache newCache dataUsageCache
waitForLowActiveIO func() waitForLowActiveIO func()
dataUsageCrawlMult float64
dataUsageCrawlDebug bool
newFolders []cachedFolder newFolders []cachedFolder
existingFolders []cachedFolder existingFolders []cachedFolder
} }
@ -194,12 +188,6 @@ func sleepDuration(d time.Duration, x float64) {
// If final is not provided the folders found are returned from the function. // If final is not provided the folders found are returned from the function.
func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFolder, final bool) ([]cachedFolder, error) { func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFolder, final bool) ([]cachedFolder, error) {
var nextFolders []cachedFolder var nextFolders []cachedFolder
delayMult := dataUsageSleepDefMult
if mult := os.Getenv(dataUsageCrawlDelay); mult != "" {
if d, err := strconv.ParseFloat(mult, 64); err == nil {
delayMult = d
}
}
done := ctx.Done() done := ctx.Done()
for _, folder := range folders { for _, folder := range folders {
select { select {
@ -207,8 +195,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
return nil, ctx.Err() return nil, ctx.Err()
default: default:
} }
f.waitForLowActiveIO() f.waitForLowActiveIO()
sleepDuration(dataUsageSleepPerFolder, delayMult) sleepDuration(dataUsageSleepPerFolder, f.dataUsageCrawlMult)
cache := dataUsageEntry{} cache := dataUsageEntry{}
thisHash := hashPath(folder.name) thisHash := hashPath(folder.name)
@ -218,14 +207,14 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
entName = path.Clean(path.Join(folder.name, entName)) entName = path.Clean(path.Join(folder.name, entName))
bucket, _ := path2BucketObjectWithBasePath(f.root, entName) bucket, _ := path2BucketObjectWithBasePath(f.root, entName)
if bucket == "" { if bucket == "" {
if dataUsageDebug { if f.dataUsageCrawlDebug {
logger.Info(color.Green("data-usage:")+" no bucket (%s,%s)", f.root, entName) logger.Info(color.Green("data-usage:")+" no bucket (%s,%s)", f.root, entName)
} }
return nil return nil
} }
if isReservedOrInvalidBucket(bucket, false) { if isReservedOrInvalidBucket(bucket, false) {
if dataUsageDebug { if f.dataUsageCrawlDebug {
logger.Info(color.Green("data-usage:")+" invalid bucket: %v, entry: %v", bucket, entName) logger.Info(color.Green("data-usage:")+" invalid bucket: %v, entry: %v", bucket, entName)
} }
return nil return nil
@ -257,12 +246,12 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
} }
f.waitForLowActiveIO() f.waitForLowActiveIO()
// Dynamic time delay. // Dynamic time delay.
t := time.Now() t := UTCNow()
// Get file size, ignore errors. // Get file size, ignore errors.
size, err := f.getSize(Item{Path: path.Join(f.root, entName), Typ: typ}) size, err := f.getSize(Item{Path: path.Join(f.root, entName), Typ: typ})
sleepDuration(time.Since(t), delayMult) sleepDuration(time.Since(t), f.dataUsageCrawlMult)
if err == errSkipFile { if err == errSkipFile {
return nil return nil
} }
@ -284,12 +273,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
// deepScanFolder will deep scan a folder and return the size if no error occurs. // deepScanFolder will deep scan a folder and return the size if no error occurs.
func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dataUsageEntry, error) { func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dataUsageEntry, error) {
var cache dataUsageEntry var cache dataUsageEntry
delayMult := dataUsageSleepDefMult
if mult := os.Getenv(dataUsageCrawlDelay); mult != "" {
if d, err := strconv.ParseFloat(mult, 64); err == nil {
delayMult = d
}
}
done := ctx.Done() done := ctx.Done()
var addDir func(entName string, typ os.FileMode) error var addDir func(entName string, typ os.FileMode) error
@ -307,11 +291,12 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
dirStack = append(dirStack, entName) dirStack = append(dirStack, entName)
err := readDirFn(path.Join(dirStack...), addDir) err := readDirFn(path.Join(dirStack...), addDir)
dirStack = dirStack[:len(dirStack)-1] dirStack = dirStack[:len(dirStack)-1]
sleepDuration(dataUsageSleepPerFolder, delayMult) sleepDuration(dataUsageSleepPerFolder, f.dataUsageCrawlMult)
return err return err
} }
// Dynamic time delay. // Dynamic time delay.
t := time.Now() t := UTCNow()
// Get file size, ignore errors. // Get file size, ignore errors.
dirStack = append(dirStack, entName) dirStack = append(dirStack, entName)
@ -321,7 +306,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
size, err := f.getSize(Item{Path: fileName, Typ: typ}) size, err := f.getSize(Item{Path: fileName, Typ: typ})
// Don't sleep for really small amount of time // Don't sleep for really small amount of time
sleepDuration(time.Since(t), delayMult) sleepDuration(time.Since(t), f.dataUsageCrawlMult)
if err == errSkipFile { if err == errSkipFile {
return nil return nil
@ -344,22 +329,39 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler. // Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
// If the supplied context is canceled the function will return at the first chance. // If the supplied context is canceled the function will return at the first chance.
func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) { func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) {
t := UTCNow()
dataUsageDebug := env.Get(envDataUsageCrawlDebug, config.EnableOff) == config.EnableOn
defer func() {
if dataUsageDebug {
logger.Info(color.Green("updateUsage")+" Crawl time at %s: %v", basePath, time.Since(t))
}
}()
if cache.Info.Name == "" { if cache.Info.Name == "" {
cache.Info.Name = dataUsageRoot cache.Info.Name = dataUsageRoot
} }
var logPrefix, logSuffix string
if dataUsageDebug { delayMult, err := strconv.ParseFloat(env.Get(envDataUsageCrawlDelay, "10.0"), 64)
logPrefix = color.Green("data-usage: ") if err != nil {
logSuffix = color.Blue(" - %v + %v", basePath, cache.Info.Name) logger.LogIf(ctx, err)
delayMult = dataUsageSleepDefMult
} }
s := folderScanner{ s := folderScanner{
root: basePath, root: basePath,
getSize: getSize, getSize: getSize,
oldCache: cache, oldCache: cache,
newCache: dataUsageCache{Info: cache.Info}, newCache: dataUsageCache{Info: cache.Info},
waitForLowActiveIO: waitForLowActiveIO, waitForLowActiveIO: waitForLowActiveIO,
newFolders: nil, newFolders: nil,
existingFolders: nil, existingFolders: nil,
dataUsageCrawlMult: delayMult,
dataUsageCrawlDebug: dataUsageDebug,
}
if s.dataUsageCrawlDebug {
logger.Info(color.Green("runDataUsageInfo:") + " Starting crawler master")
} }
done := ctx.Done() done := ctx.Done()
@ -369,14 +371,21 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
if cache.Info.Name != dataUsageRoot { if cache.Info.Name != dataUsageRoot {
flattenLevels-- flattenLevels--
} }
if dataUsageDebug {
var logPrefix, logSuffix string
if s.dataUsageCrawlDebug {
logPrefix = color.Green("data-usage: ")
logSuffix = color.Blue(" - %v + %v", basePath, cache.Info.Name)
}
if s.dataUsageCrawlDebug {
logger.Info(logPrefix+"Cycle: %v"+logSuffix, cache.Info.NextCycle) logger.Info(logPrefix+"Cycle: %v"+logSuffix, cache.Info.NextCycle)
} }
// Always scan flattenLevels deep. Cache root is level 0. // Always scan flattenLevels deep. Cache root is level 0.
todo := []cachedFolder{{name: cache.Info.Name}} todo := []cachedFolder{{name: cache.Info.Name}}
for i := 0; i < flattenLevels; i++ { for i := 0; i < flattenLevels; i++ {
if dataUsageDebug { if s.dataUsageCrawlDebug {
logger.Info(logPrefix+"Level %v, scanning %v directories."+logSuffix, i, len(todo)) logger.Info(logPrefix+"Level %v, scanning %v directories."+logSuffix, i, len(todo))
} }
select { select {
@ -392,9 +401,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
} }
} }
if dataUsageDebug { if s.dataUsageCrawlDebug {
logger.Info(logPrefix+"New folders: %v"+logSuffix, s.newFolders) logger.Info(logPrefix+"New folders: %v"+logSuffix, s.newFolders)
} }
// Add new folders first // Add new folders first
for _, folder := range s.newFolders { for _, folder := range s.newFolders {
select { select {
@ -419,9 +429,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
} }
} }
if dataUsageDebug { if s.dataUsageCrawlDebug {
logger.Info(logPrefix+"Existing folders: %v"+logSuffix, len(s.existingFolders)) logger.Info(logPrefix+"Existing folders: %v"+logSuffix, len(s.existingFolders))
} }
// Do selective scanning of existing folders. // Do selective scanning of existing folders.
for _, folder := range s.existingFolders { for _, folder := range s.existingFolders {
select { select {
@ -448,7 +459,7 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
s.newCache.replaceHashed(h, folder.parent, *du) s.newCache.replaceHashed(h, folder.parent, *du)
} }
s.newCache.Info.LastUpdate = time.Now() s.newCache.Info.LastUpdate = UTCNow()
s.newCache.Info.NextCycle++ s.newCache.Info.NextCycle++
return s.newCache, nil return s.newCache, nil
} }

View File

@ -18,7 +18,6 @@ package cmd
import ( import (
"context" "context"
"fmt"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
@ -30,8 +29,8 @@ type usageTestFile struct {
size int size int
} }
func Test_updateUsage(t *testing.T) { func TestDataUsageUpdate(t *testing.T) {
base, err := ioutil.TempDir("", "Test_updateUsage") base, err := ioutil.TempDir("", "TestDataUsageUpdate")
if err != nil { if err != nil {
t.Skip(err) t.Skip(err)
} }
@ -58,6 +57,7 @@ func Test_updateUsage(t *testing.T) {
} }
return 0, nil return 0, nil
} }
got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize) got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
@ -316,17 +316,10 @@ func Test_updateUsage(t *testing.T) {
} }
}) })
} }
t.Log(got.StringAll())
t.Logf("Root, flat: %+v", got.flatten(*got.root()))
t.Logf("Root: %+v", *got.root())
t.Logf("/dir1/dira: %+v", *got.find("/dir1/dira"))
} }
func Test_updateUsagePrefix(t *testing.T) { func TestDataUsageUpdatePrefix(t *testing.T) {
base, err := ioutil.TempDir("", "Test_updateUsagePrefix") base, err := ioutil.TempDir("", "TestDataUpdateUsagePrefix")
if err != nil { if err != nil {
t.Skip(err) t.Skip(err)
} }
@ -593,12 +586,6 @@ func Test_updateUsagePrefix(t *testing.T) {
} }
}) })
} }
t.Log(got.StringAll())
t.Logf("Root, flat: %+v", got.flatten(*got.root()))
t.Logf("Root: %+v", *got.root())
t.Logf("bucket/dir1/dira: %+v", *got.find("bucket/dir1/dira"))
} }
func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) { func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) {
@ -614,8 +601,8 @@ func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) {
} }
} }
func Test_dataUsageCacheSerialize(t *testing.T) { func TestDataUsageCacheSerialize(t *testing.T) {
base, err := ioutil.TempDir("", "Test_dataUsageCacheSerialize") base, err := ioutil.TempDir("", "TestDataUsageCacheSerialize")
if err != nil { if err != nil {
t.Skip(err) t.Skip(err)
} }
@ -646,19 +633,19 @@ func Test_dataUsageCacheSerialize(t *testing.T) {
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
b := want.serialize()
t.Log("serialize -> ", len(b), "bytes")
b := want.serialize()
var got dataUsageCache var got dataUsageCache
err = got.deserialize(b) err = got.deserialize(b)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
if got.Info.LastUpdate.IsZero() { if got.Info.LastUpdate.IsZero() {
t.Error("lastupdate not set") t.Error("lastupdate not set")
} }
if fmt.Sprint(want) == fmt.Sprint(got) { if !want.Info.LastUpdate.Equal(got.Info.LastUpdate) {
t.Fatalf("deserialize mismatch\nwant: %+v\ngot: %+v", want, got) t.Fatalf("deserialize mismatch\nwant: %+v\ngot: %+v", want, got)
} }
} }

View File

@ -41,7 +41,6 @@ import (
"github.com/minio/minio/pkg/bucket/lifecycle" "github.com/minio/minio/pkg/bucket/lifecycle"
"github.com/minio/minio/pkg/bucket/object/tagging" "github.com/minio/minio/pkg/bucket/object/tagging"
"github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/policy"
"github.com/minio/minio/pkg/color"
"github.com/minio/minio/pkg/lock" "github.com/minio/minio/pkg/lock"
"github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/mimedb" "github.com/minio/minio/pkg/mimedb"
@ -244,14 +243,10 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- Da
if oldCache.Info.Name == "" { if oldCache.Info.Name == "" {
oldCache.Info.Name = dataUsageRoot oldCache.Info.Name = dataUsageRoot
} }
if dataUsageDebug {
logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:") + " Start crawl cycle")
}
buckets, err := fs.ListBuckets(ctx) buckets, err := fs.ListBuckets(ctx)
if err != nil { if err != nil {
return err return err
} }
t := time.Now()
cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) { cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) {
// Get file size, symlinks which cannot be // Get file size, symlinks which cannot be
// followed are automatically filtered by fastwalk. // followed are automatically filtered by fastwalk.
@ -261,9 +256,7 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- Da
} }
return fi.Size(), nil return fi.Size(), nil
}) })
if dataUsageDebug {
logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:")+" Crawl time: %v", time.Since(t))
}
// Even if there was an error, the new cache may have better info. // Even if there was an error, the new cache may have better info.
if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) { if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) {
logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName)) logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName))

View File

@ -19,14 +19,12 @@ package cmd
import ( import (
"context" "context"
"fmt" "fmt"
"path"
"sort" "sort"
"sync" "sync"
"time" "time"
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/bpool" "github.com/minio/minio/pkg/bpool"
"github.com/minio/minio/pkg/color"
"github.com/minio/minio/pkg/dsync" "github.com/minio/minio/pkg/dsync"
"github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/sync/errgroup" "github.com/minio/minio/pkg/sync/errgroup"
@ -321,11 +319,9 @@ func (xl xlObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketIn
return return
default: default:
} }
if dataUsageDebug {
logger.Info(color.Green("crawlAndGetDataUsage:")+" Scanning bucket %v.", bucket.Name)
}
// Load cache for bucket // Load cache for bucket
cacheName := path.Join(dataUsageBucketCacheDir, bucket.Name+".bin") cacheName := pathJoin(bucket.Name, dataUsageCacheName)
cache := dataUsageCache{} cache := dataUsageCache{}
logger.LogIf(ctx, cache.load(ctx, xl, cacheName)) logger.LogIf(ctx, cache.load(ctx, xl, cacheName))
if cache.Info.Name == "" { if cache.Info.Name == "" {

View File

@ -32,7 +32,6 @@ import (
"github.com/minio/minio/pkg/bucket/lifecycle" "github.com/minio/minio/pkg/bucket/lifecycle"
"github.com/minio/minio/pkg/bucket/object/tagging" "github.com/minio/minio/pkg/bucket/object/tagging"
"github.com/minio/minio/pkg/bucket/policy" "github.com/minio/minio/pkg/bucket/policy"
"github.com/minio/minio/pkg/color"
"github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/sync/errgroup" "github.com/minio/minio/pkg/sync/errgroup"
) )
@ -227,10 +226,6 @@ func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataU
var knownBuckets = make(map[string]struct{}) // used to deduplicate buckets. var knownBuckets = make(map[string]struct{}) // used to deduplicate buckets.
var allBuckets []BucketInfo var allBuckets []BucketInfo
t := time.Now()
if dataUsageDebug {
logger.Info(color.Green("xlZones.CrawlAndGetDataUsage:") + " Start crawl cycle")
}
// Collect for each set in zones. // Collect for each set in zones.
for _, z := range z.zones { for _, z := range z.zones {
for _, xlObj := range z.sets { for _, xlObj := range z.sets {
@ -314,9 +309,6 @@ func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataU
}() }()
wg.Wait() wg.Wait()
if dataUsageDebug {
logger.Info(color.Green("xlZones.CrawlAndGetDataUsage:")+" Cycle scan time: %v", time.Since(t))
}
ch := make(chan struct{}) ch := make(chan struct{})
updateCloser <- ch updateCloser <- ch
<-ch <-ch

View File

@ -235,8 +235,22 @@ This behavior is consistent across all keys, each key self documents itself with
## Environment only settings (not in config) ## Environment only settings (not in config)
#### Worm #### Usage crawler
Enable this to turn on Write-Once-Read-Many. By default it is set to `off`. Set ``MINIO_WORM=on`` environment variable to enable WORM mode. Data usage crawler is enabled by default, following ENVs allow for more staggered delay in terms of usage calculation.
The crawler adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the crawler and thereby the latency of updates being reflected. The delays between each operation of the crawl can be adjusted by the `MINIO_DISK_USAGE_CRAWL_DELAY` environment variable. By default the value is `10`. This means the crawler will sleep *10x* the time each operation takes.
This will in most setups make the crawler slow enough to not impact overall system performance. Setting `MINIO_DISK_USAGE_CRAWL_DELAY` to a *lower* value will make the crawler faster and setting it to 0 will make the crawler run at full speed (not recommended). Setting it to a higher value will make the crawler slower, further consume less resources.
Example: Following setting will decrease the crawler speed by a factor of 3, reducing the system resource use, but increasing the latency of updates being reflected.
```sh
export MINIO_DISK_USAGE_CRAWL_DELAY=30
minio server /data
```
#### Worm (deprecated)
Enable this to turn on Write-Once-Read-Many. By default it is set to `off`. Set ``MINIO_WORM=on`` environment variable to enable WORM mode. This ENV setting is not recommended anymore, please use Object Locking and Object Retention APIs documented [here](https://github.com/minio/minio/tree/master/docs/retention).
Example: Example: