mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
fix: data usage crawler env handling, usage-cache.bin location (#9163)
canonicalize the ENVs such that we can bring these ENVs as part of the config values, as a subsequent change. - fix location of per bucket usage to `.minio.sys/buckets/<bucket_name>/usage-cache.bin` - fix location of the overall usage in `json` at `.minio.sys/buckets/.usage.json` (avoid conflicts with a bucket named `usage.json` ) - fix location of the overall usage in `msgp` at `.minio.sys/buckets/.usage.bin` (avoid conflicts with a bucket named `usage.bin`
This commit is contained in:
parent
d45a1808f2
commit
b1a2169dcc
@ -29,7 +29,6 @@ import (
|
|||||||
|
|
||||||
"github.com/cespare/xxhash/v2"
|
"github.com/cespare/xxhash/v2"
|
||||||
"github.com/minio/minio/cmd/logger"
|
"github.com/minio/minio/cmd/logger"
|
||||||
"github.com/minio/minio/pkg/color"
|
|
||||||
"github.com/minio/minio/pkg/hash"
|
"github.com/minio/minio/pkg/hash"
|
||||||
"github.com/tinylib/msgp/msgp"
|
"github.com/tinylib/msgp/msgp"
|
||||||
)
|
)
|
||||||
@ -114,7 +113,7 @@ func (d *dataUsageCache) find(path string) *dataUsageEntry {
|
|||||||
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
|
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
|
||||||
e := d.find(path)
|
e := d.find(path)
|
||||||
if e == nil {
|
if e == nil {
|
||||||
return DataUsageInfo{LastUpdate: time.Now()}
|
return DataUsageInfo{LastUpdate: UTCNow()}
|
||||||
}
|
}
|
||||||
flat := d.flatten(*e)
|
flat := d.flatten(*e)
|
||||||
return DataUsageInfo{
|
return DataUsageInfo{
|
||||||
@ -213,9 +212,6 @@ func (d *dataUsageCache) pathSizes(buckets []BucketInfo) map[string]uint64 {
|
|||||||
for _, bucket := range buckets {
|
for _, bucket := range buckets {
|
||||||
e := d.find(bucket.Name)
|
e := d.find(bucket.Name)
|
||||||
if e == nil {
|
if e == nil {
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("data-usage:")+" Bucket not found in cache: %v", bucket.Name)
|
|
||||||
}
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
flat := d.flatten(*e)
|
flat := d.flatten(*e)
|
||||||
|
@ -35,12 +35,11 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
dataUsageObjName = "usage.json"
|
dataUsageObjName = ".usage.json"
|
||||||
dataUsageCacheName = "usage-cache.bin"
|
dataUsageCacheName = ".usage-cache.bin"
|
||||||
dataUsageBucketCacheDir = "usage-caches"
|
envDataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL_ENABLE"
|
||||||
dataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL"
|
envDataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY"
|
||||||
dataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY"
|
envDataUsageCrawlDebug = "MINIO_DISK_USAGE_CRAWL_DEBUG"
|
||||||
dataUsageDebug = true
|
|
||||||
dataUsageSleepPerFolder = 1 * time.Millisecond
|
dataUsageSleepPerFolder = 1 * time.Millisecond
|
||||||
dataUsageSleepDefMult = 10.0
|
dataUsageSleepDefMult = 10.0
|
||||||
dataUsageUpdateDirCycles = 16
|
dataUsageUpdateDirCycles = 16
|
||||||
@ -51,12 +50,10 @@ const (
|
|||||||
|
|
||||||
// initDataUsageStats will start the crawler unless disabled.
|
// initDataUsageStats will start the crawler unless disabled.
|
||||||
func initDataUsageStats() {
|
func initDataUsageStats() {
|
||||||
dataUsageEnabled, err := config.ParseBool(env.Get(dataUsageCrawlConf, config.EnableOn))
|
if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOn {
|
||||||
if err == nil && !dataUsageEnabled {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
go runDataUsageInfoUpdateRoutine()
|
go runDataUsageInfoUpdateRoutine()
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// runDataUsageInfoUpdateRoutine will contain the main crawler.
|
// runDataUsageInfoUpdateRoutine will contain the main crawler.
|
||||||
func runDataUsageInfoUpdateRoutine() {
|
func runDataUsageInfoUpdateRoutine() {
|
||||||
@ -89,9 +86,6 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) {
|
|||||||
// data usage calculator role for its lifetime.
|
// data usage calculator role for its lifetime.
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("runDataUsageInfo:") + " Starting crawler master")
|
|
||||||
}
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
@ -111,14 +105,11 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) {
|
|||||||
// storeDataUsageInBackend will store all objects sent on the gui channel until closed.
|
// storeDataUsageInBackend will store all objects sent on the gui channel until closed.
|
||||||
func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, gui <-chan DataUsageInfo) {
|
func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, gui <-chan DataUsageInfo) {
|
||||||
for dataUsageInfo := range gui {
|
for dataUsageInfo := range gui {
|
||||||
dataUsageJSON, err := json.MarshalIndent(dataUsageInfo, "", " ")
|
dataUsageJSON, err := json.Marshal(dataUsageInfo)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("data-usage:")+" Received update: %s", string(dataUsageJSON))
|
|
||||||
}
|
|
||||||
size := int64(len(dataUsageJSON))
|
size := int64(len(dataUsageJSON))
|
||||||
r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false)
|
r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -172,6 +163,9 @@ type folderScanner struct {
|
|||||||
newCache dataUsageCache
|
newCache dataUsageCache
|
||||||
waitForLowActiveIO func()
|
waitForLowActiveIO func()
|
||||||
|
|
||||||
|
dataUsageCrawlMult float64
|
||||||
|
dataUsageCrawlDebug bool
|
||||||
|
|
||||||
newFolders []cachedFolder
|
newFolders []cachedFolder
|
||||||
existingFolders []cachedFolder
|
existingFolders []cachedFolder
|
||||||
}
|
}
|
||||||
@ -194,12 +188,6 @@ func sleepDuration(d time.Duration, x float64) {
|
|||||||
// If final is not provided the folders found are returned from the function.
|
// If final is not provided the folders found are returned from the function.
|
||||||
func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFolder, final bool) ([]cachedFolder, error) {
|
func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFolder, final bool) ([]cachedFolder, error) {
|
||||||
var nextFolders []cachedFolder
|
var nextFolders []cachedFolder
|
||||||
delayMult := dataUsageSleepDefMult
|
|
||||||
if mult := os.Getenv(dataUsageCrawlDelay); mult != "" {
|
|
||||||
if d, err := strconv.ParseFloat(mult, 64); err == nil {
|
|
||||||
delayMult = d
|
|
||||||
}
|
|
||||||
}
|
|
||||||
done := ctx.Done()
|
done := ctx.Done()
|
||||||
for _, folder := range folders {
|
for _, folder := range folders {
|
||||||
select {
|
select {
|
||||||
@ -207,8 +195,9 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||||||
return nil, ctx.Err()
|
return nil, ctx.Err()
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
f.waitForLowActiveIO()
|
f.waitForLowActiveIO()
|
||||||
sleepDuration(dataUsageSleepPerFolder, delayMult)
|
sleepDuration(dataUsageSleepPerFolder, f.dataUsageCrawlMult)
|
||||||
|
|
||||||
cache := dataUsageEntry{}
|
cache := dataUsageEntry{}
|
||||||
thisHash := hashPath(folder.name)
|
thisHash := hashPath(folder.name)
|
||||||
@ -218,14 +207,14 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||||||
entName = path.Clean(path.Join(folder.name, entName))
|
entName = path.Clean(path.Join(folder.name, entName))
|
||||||
bucket, _ := path2BucketObjectWithBasePath(f.root, entName)
|
bucket, _ := path2BucketObjectWithBasePath(f.root, entName)
|
||||||
if bucket == "" {
|
if bucket == "" {
|
||||||
if dataUsageDebug {
|
if f.dataUsageCrawlDebug {
|
||||||
logger.Info(color.Green("data-usage:")+" no bucket (%s,%s)", f.root, entName)
|
logger.Info(color.Green("data-usage:")+" no bucket (%s,%s)", f.root, entName)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if isReservedOrInvalidBucket(bucket, false) {
|
if isReservedOrInvalidBucket(bucket, false) {
|
||||||
if dataUsageDebug {
|
if f.dataUsageCrawlDebug {
|
||||||
logger.Info(color.Green("data-usage:")+" invalid bucket: %v, entry: %v", bucket, entName)
|
logger.Info(color.Green("data-usage:")+" invalid bucket: %v, entry: %v", bucket, entName)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
@ -257,12 +246,12 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||||||
}
|
}
|
||||||
f.waitForLowActiveIO()
|
f.waitForLowActiveIO()
|
||||||
// Dynamic time delay.
|
// Dynamic time delay.
|
||||||
t := time.Now()
|
t := UTCNow()
|
||||||
|
|
||||||
// Get file size, ignore errors.
|
// Get file size, ignore errors.
|
||||||
size, err := f.getSize(Item{Path: path.Join(f.root, entName), Typ: typ})
|
size, err := f.getSize(Item{Path: path.Join(f.root, entName), Typ: typ})
|
||||||
|
|
||||||
sleepDuration(time.Since(t), delayMult)
|
sleepDuration(time.Since(t), f.dataUsageCrawlMult)
|
||||||
if err == errSkipFile {
|
if err == errSkipFile {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -284,12 +273,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
|
|||||||
// deepScanFolder will deep scan a folder and return the size if no error occurs.
|
// deepScanFolder will deep scan a folder and return the size if no error occurs.
|
||||||
func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dataUsageEntry, error) {
|
func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dataUsageEntry, error) {
|
||||||
var cache dataUsageEntry
|
var cache dataUsageEntry
|
||||||
delayMult := dataUsageSleepDefMult
|
|
||||||
if mult := os.Getenv(dataUsageCrawlDelay); mult != "" {
|
|
||||||
if d, err := strconv.ParseFloat(mult, 64); err == nil {
|
|
||||||
delayMult = d
|
|
||||||
}
|
|
||||||
}
|
|
||||||
done := ctx.Done()
|
done := ctx.Done()
|
||||||
|
|
||||||
var addDir func(entName string, typ os.FileMode) error
|
var addDir func(entName string, typ os.FileMode) error
|
||||||
@ -307,11 +291,12 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
|
|||||||
dirStack = append(dirStack, entName)
|
dirStack = append(dirStack, entName)
|
||||||
err := readDirFn(path.Join(dirStack...), addDir)
|
err := readDirFn(path.Join(dirStack...), addDir)
|
||||||
dirStack = dirStack[:len(dirStack)-1]
|
dirStack = dirStack[:len(dirStack)-1]
|
||||||
sleepDuration(dataUsageSleepPerFolder, delayMult)
|
sleepDuration(dataUsageSleepPerFolder, f.dataUsageCrawlMult)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dynamic time delay.
|
// Dynamic time delay.
|
||||||
t := time.Now()
|
t := UTCNow()
|
||||||
|
|
||||||
// Get file size, ignore errors.
|
// Get file size, ignore errors.
|
||||||
dirStack = append(dirStack, entName)
|
dirStack = append(dirStack, entName)
|
||||||
@ -321,7 +306,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
|
|||||||
size, err := f.getSize(Item{Path: fileName, Typ: typ})
|
size, err := f.getSize(Item{Path: fileName, Typ: typ})
|
||||||
|
|
||||||
// Don't sleep for really small amount of time
|
// Don't sleep for really small amount of time
|
||||||
sleepDuration(time.Since(t), delayMult)
|
sleepDuration(time.Since(t), f.dataUsageCrawlMult)
|
||||||
|
|
||||||
if err == errSkipFile {
|
if err == errSkipFile {
|
||||||
return nil
|
return nil
|
||||||
@ -344,14 +329,25 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dat
|
|||||||
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
|
// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler.
|
||||||
// If the supplied context is canceled the function will return at the first chance.
|
// If the supplied context is canceled the function will return at the first chance.
|
||||||
func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) {
|
func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) {
|
||||||
|
t := UTCNow()
|
||||||
|
|
||||||
|
dataUsageDebug := env.Get(envDataUsageCrawlDebug, config.EnableOff) == config.EnableOn
|
||||||
|
defer func() {
|
||||||
|
if dataUsageDebug {
|
||||||
|
logger.Info(color.Green("updateUsage")+" Crawl time at %s: %v", basePath, time.Since(t))
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
if cache.Info.Name == "" {
|
if cache.Info.Name == "" {
|
||||||
cache.Info.Name = dataUsageRoot
|
cache.Info.Name = dataUsageRoot
|
||||||
}
|
}
|
||||||
var logPrefix, logSuffix string
|
|
||||||
if dataUsageDebug {
|
delayMult, err := strconv.ParseFloat(env.Get(envDataUsageCrawlDelay, "10.0"), 64)
|
||||||
logPrefix = color.Green("data-usage: ")
|
if err != nil {
|
||||||
logSuffix = color.Blue(" - %v + %v", basePath, cache.Info.Name)
|
logger.LogIf(ctx, err)
|
||||||
|
delayMult = dataUsageSleepDefMult
|
||||||
}
|
}
|
||||||
|
|
||||||
s := folderScanner{
|
s := folderScanner{
|
||||||
root: basePath,
|
root: basePath,
|
||||||
getSize: getSize,
|
getSize: getSize,
|
||||||
@ -360,6 +356,12 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
|
|||||||
waitForLowActiveIO: waitForLowActiveIO,
|
waitForLowActiveIO: waitForLowActiveIO,
|
||||||
newFolders: nil,
|
newFolders: nil,
|
||||||
existingFolders: nil,
|
existingFolders: nil,
|
||||||
|
dataUsageCrawlMult: delayMult,
|
||||||
|
dataUsageCrawlDebug: dataUsageDebug,
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.dataUsageCrawlDebug {
|
||||||
|
logger.Info(color.Green("runDataUsageInfo:") + " Starting crawler master")
|
||||||
}
|
}
|
||||||
|
|
||||||
done := ctx.Done()
|
done := ctx.Done()
|
||||||
@ -369,14 +371,21 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
|
|||||||
if cache.Info.Name != dataUsageRoot {
|
if cache.Info.Name != dataUsageRoot {
|
||||||
flattenLevels--
|
flattenLevels--
|
||||||
}
|
}
|
||||||
if dataUsageDebug {
|
|
||||||
|
var logPrefix, logSuffix string
|
||||||
|
if s.dataUsageCrawlDebug {
|
||||||
|
logPrefix = color.Green("data-usage: ")
|
||||||
|
logSuffix = color.Blue(" - %v + %v", basePath, cache.Info.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if s.dataUsageCrawlDebug {
|
||||||
logger.Info(logPrefix+"Cycle: %v"+logSuffix, cache.Info.NextCycle)
|
logger.Info(logPrefix+"Cycle: %v"+logSuffix, cache.Info.NextCycle)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Always scan flattenLevels deep. Cache root is level 0.
|
// Always scan flattenLevels deep. Cache root is level 0.
|
||||||
todo := []cachedFolder{{name: cache.Info.Name}}
|
todo := []cachedFolder{{name: cache.Info.Name}}
|
||||||
for i := 0; i < flattenLevels; i++ {
|
for i := 0; i < flattenLevels; i++ {
|
||||||
if dataUsageDebug {
|
if s.dataUsageCrawlDebug {
|
||||||
logger.Info(logPrefix+"Level %v, scanning %v directories."+logSuffix, i, len(todo))
|
logger.Info(logPrefix+"Level %v, scanning %v directories."+logSuffix, i, len(todo))
|
||||||
}
|
}
|
||||||
select {
|
select {
|
||||||
@ -392,9 +401,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if dataUsageDebug {
|
if s.dataUsageCrawlDebug {
|
||||||
logger.Info(logPrefix+"New folders: %v"+logSuffix, s.newFolders)
|
logger.Info(logPrefix+"New folders: %v"+logSuffix, s.newFolders)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add new folders first
|
// Add new folders first
|
||||||
for _, folder := range s.newFolders {
|
for _, folder := range s.newFolders {
|
||||||
select {
|
select {
|
||||||
@ -419,9 +429,10 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if dataUsageDebug {
|
if s.dataUsageCrawlDebug {
|
||||||
logger.Info(logPrefix+"Existing folders: %v"+logSuffix, len(s.existingFolders))
|
logger.Info(logPrefix+"Existing folders: %v"+logSuffix, len(s.existingFolders))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Do selective scanning of existing folders.
|
// Do selective scanning of existing folders.
|
||||||
for _, folder := range s.existingFolders {
|
for _, folder := range s.existingFolders {
|
||||||
select {
|
select {
|
||||||
@ -448,7 +459,7 @@ func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, wai
|
|||||||
s.newCache.replaceHashed(h, folder.parent, *du)
|
s.newCache.replaceHashed(h, folder.parent, *du)
|
||||||
}
|
}
|
||||||
|
|
||||||
s.newCache.Info.LastUpdate = time.Now()
|
s.newCache.Info.LastUpdate = UTCNow()
|
||||||
s.newCache.Info.NextCycle++
|
s.newCache.Info.NextCycle++
|
||||||
return s.newCache, nil
|
return s.newCache, nil
|
||||||
}
|
}
|
||||||
|
@ -18,7 +18,6 @@ package cmd
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
@ -30,8 +29,8 @@ type usageTestFile struct {
|
|||||||
size int
|
size int
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_updateUsage(t *testing.T) {
|
func TestDataUsageUpdate(t *testing.T) {
|
||||||
base, err := ioutil.TempDir("", "Test_updateUsage")
|
base, err := ioutil.TempDir("", "TestDataUsageUpdate")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Skip(err)
|
t.Skip(err)
|
||||||
}
|
}
|
||||||
@ -58,6 +57,7 @@ func Test_updateUsage(t *testing.T) {
|
|||||||
}
|
}
|
||||||
return 0, nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize)
|
got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
@ -316,17 +316,10 @@ func Test_updateUsage(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Log(got.StringAll())
|
|
||||||
|
|
||||||
t.Logf("Root, flat: %+v", got.flatten(*got.root()))
|
|
||||||
t.Logf("Root: %+v", *got.root())
|
|
||||||
t.Logf("/dir1/dira: %+v", *got.find("/dir1/dira"))
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_updateUsagePrefix(t *testing.T) {
|
func TestDataUsageUpdatePrefix(t *testing.T) {
|
||||||
base, err := ioutil.TempDir("", "Test_updateUsagePrefix")
|
base, err := ioutil.TempDir("", "TestDataUpdateUsagePrefix")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Skip(err)
|
t.Skip(err)
|
||||||
}
|
}
|
||||||
@ -593,12 +586,6 @@ func Test_updateUsagePrefix(t *testing.T) {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
t.Log(got.StringAll())
|
|
||||||
|
|
||||||
t.Logf("Root, flat: %+v", got.flatten(*got.root()))
|
|
||||||
t.Logf("Root: %+v", *got.root())
|
|
||||||
t.Logf("bucket/dir1/dira: %+v", *got.find("bucket/dir1/dira"))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) {
|
func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) {
|
||||||
@ -614,8 +601,8 @@ func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Test_dataUsageCacheSerialize(t *testing.T) {
|
func TestDataUsageCacheSerialize(t *testing.T) {
|
||||||
base, err := ioutil.TempDir("", "Test_dataUsageCacheSerialize")
|
base, err := ioutil.TempDir("", "TestDataUsageCacheSerialize")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Skip(err)
|
t.Skip(err)
|
||||||
}
|
}
|
||||||
@ -646,19 +633,19 @@ func Test_dataUsageCacheSerialize(t *testing.T) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
b := want.serialize()
|
|
||||||
t.Log("serialize -> ", len(b), "bytes")
|
|
||||||
|
|
||||||
|
b := want.serialize()
|
||||||
var got dataUsageCache
|
var got dataUsageCache
|
||||||
err = got.deserialize(b)
|
err = got.deserialize(b)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatal(err)
|
t.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if got.Info.LastUpdate.IsZero() {
|
if got.Info.LastUpdate.IsZero() {
|
||||||
t.Error("lastupdate not set")
|
t.Error("lastupdate not set")
|
||||||
}
|
}
|
||||||
|
|
||||||
if fmt.Sprint(want) == fmt.Sprint(got) {
|
if !want.Info.LastUpdate.Equal(got.Info.LastUpdate) {
|
||||||
t.Fatalf("deserialize mismatch\nwant: %+v\ngot: %+v", want, got)
|
t.Fatalf("deserialize mismatch\nwant: %+v\ngot: %+v", want, got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -41,7 +41,6 @@ import (
|
|||||||
"github.com/minio/minio/pkg/bucket/lifecycle"
|
"github.com/minio/minio/pkg/bucket/lifecycle"
|
||||||
"github.com/minio/minio/pkg/bucket/object/tagging"
|
"github.com/minio/minio/pkg/bucket/object/tagging"
|
||||||
"github.com/minio/minio/pkg/bucket/policy"
|
"github.com/minio/minio/pkg/bucket/policy"
|
||||||
"github.com/minio/minio/pkg/color"
|
|
||||||
"github.com/minio/minio/pkg/lock"
|
"github.com/minio/minio/pkg/lock"
|
||||||
"github.com/minio/minio/pkg/madmin"
|
"github.com/minio/minio/pkg/madmin"
|
||||||
"github.com/minio/minio/pkg/mimedb"
|
"github.com/minio/minio/pkg/mimedb"
|
||||||
@ -244,14 +243,10 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- Da
|
|||||||
if oldCache.Info.Name == "" {
|
if oldCache.Info.Name == "" {
|
||||||
oldCache.Info.Name = dataUsageRoot
|
oldCache.Info.Name = dataUsageRoot
|
||||||
}
|
}
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:") + " Start crawl cycle")
|
|
||||||
}
|
|
||||||
buckets, err := fs.ListBuckets(ctx)
|
buckets, err := fs.ListBuckets(ctx)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
t := time.Now()
|
|
||||||
cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) {
|
cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) {
|
||||||
// Get file size, symlinks which cannot be
|
// Get file size, symlinks which cannot be
|
||||||
// followed are automatically filtered by fastwalk.
|
// followed are automatically filtered by fastwalk.
|
||||||
@ -261,9 +256,7 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- Da
|
|||||||
}
|
}
|
||||||
return fi.Size(), nil
|
return fi.Size(), nil
|
||||||
})
|
})
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:")+" Crawl time: %v", time.Since(t))
|
|
||||||
}
|
|
||||||
// Even if there was an error, the new cache may have better info.
|
// Even if there was an error, the new cache may have better info.
|
||||||
if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) {
|
if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) {
|
||||||
logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName))
|
logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName))
|
||||||
|
@ -19,14 +19,12 @@ package cmd
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
|
||||||
"sort"
|
"sort"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/minio/minio/cmd/logger"
|
"github.com/minio/minio/cmd/logger"
|
||||||
"github.com/minio/minio/pkg/bpool"
|
"github.com/minio/minio/pkg/bpool"
|
||||||
"github.com/minio/minio/pkg/color"
|
|
||||||
"github.com/minio/minio/pkg/dsync"
|
"github.com/minio/minio/pkg/dsync"
|
||||||
"github.com/minio/minio/pkg/madmin"
|
"github.com/minio/minio/pkg/madmin"
|
||||||
"github.com/minio/minio/pkg/sync/errgroup"
|
"github.com/minio/minio/pkg/sync/errgroup"
|
||||||
@ -321,11 +319,9 @@ func (xl xlObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketIn
|
|||||||
return
|
return
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("crawlAndGetDataUsage:")+" Scanning bucket %v.", bucket.Name)
|
|
||||||
}
|
|
||||||
// Load cache for bucket
|
// Load cache for bucket
|
||||||
cacheName := path.Join(dataUsageBucketCacheDir, bucket.Name+".bin")
|
cacheName := pathJoin(bucket.Name, dataUsageCacheName)
|
||||||
cache := dataUsageCache{}
|
cache := dataUsageCache{}
|
||||||
logger.LogIf(ctx, cache.load(ctx, xl, cacheName))
|
logger.LogIf(ctx, cache.load(ctx, xl, cacheName))
|
||||||
if cache.Info.Name == "" {
|
if cache.Info.Name == "" {
|
||||||
|
@ -32,7 +32,6 @@ import (
|
|||||||
"github.com/minio/minio/pkg/bucket/lifecycle"
|
"github.com/minio/minio/pkg/bucket/lifecycle"
|
||||||
"github.com/minio/minio/pkg/bucket/object/tagging"
|
"github.com/minio/minio/pkg/bucket/object/tagging"
|
||||||
"github.com/minio/minio/pkg/bucket/policy"
|
"github.com/minio/minio/pkg/bucket/policy"
|
||||||
"github.com/minio/minio/pkg/color"
|
|
||||||
"github.com/minio/minio/pkg/madmin"
|
"github.com/minio/minio/pkg/madmin"
|
||||||
"github.com/minio/minio/pkg/sync/errgroup"
|
"github.com/minio/minio/pkg/sync/errgroup"
|
||||||
)
|
)
|
||||||
@ -227,10 +226,6 @@ func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataU
|
|||||||
var knownBuckets = make(map[string]struct{}) // used to deduplicate buckets.
|
var knownBuckets = make(map[string]struct{}) // used to deduplicate buckets.
|
||||||
var allBuckets []BucketInfo
|
var allBuckets []BucketInfo
|
||||||
|
|
||||||
t := time.Now()
|
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("xlZones.CrawlAndGetDataUsage:") + " Start crawl cycle")
|
|
||||||
}
|
|
||||||
// Collect for each set in zones.
|
// Collect for each set in zones.
|
||||||
for _, z := range z.zones {
|
for _, z := range z.zones {
|
||||||
for _, xlObj := range z.sets {
|
for _, xlObj := range z.sets {
|
||||||
@ -314,9 +309,6 @@ func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataU
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
wg.Wait()
|
wg.Wait()
|
||||||
if dataUsageDebug {
|
|
||||||
logger.Info(color.Green("xlZones.CrawlAndGetDataUsage:")+" Cycle scan time: %v", time.Since(t))
|
|
||||||
}
|
|
||||||
ch := make(chan struct{})
|
ch := make(chan struct{})
|
||||||
updateCloser <- ch
|
updateCloser <- ch
|
||||||
<-ch
|
<-ch
|
||||||
|
@ -235,8 +235,22 @@ This behavior is consistent across all keys, each key self documents itself with
|
|||||||
|
|
||||||
## Environment only settings (not in config)
|
## Environment only settings (not in config)
|
||||||
|
|
||||||
#### Worm
|
#### Usage crawler
|
||||||
Enable this to turn on Write-Once-Read-Many. By default it is set to `off`. Set ``MINIO_WORM=on`` environment variable to enable WORM mode.
|
Data usage crawler is enabled by default, following ENVs allow for more staggered delay in terms of usage calculation.
|
||||||
|
|
||||||
|
The crawler adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the crawler and thereby the latency of updates being reflected. The delays between each operation of the crawl can be adjusted by the `MINIO_DISK_USAGE_CRAWL_DELAY` environment variable. By default the value is `10`. This means the crawler will sleep *10x* the time each operation takes.
|
||||||
|
|
||||||
|
This will in most setups make the crawler slow enough to not impact overall system performance. Setting `MINIO_DISK_USAGE_CRAWL_DELAY` to a *lower* value will make the crawler faster and setting it to 0 will make the crawler run at full speed (not recommended). Setting it to a higher value will make the crawler slower, further consume less resources.
|
||||||
|
|
||||||
|
Example: Following setting will decrease the crawler speed by a factor of 3, reducing the system resource use, but increasing the latency of updates being reflected.
|
||||||
|
|
||||||
|
```sh
|
||||||
|
export MINIO_DISK_USAGE_CRAWL_DELAY=30
|
||||||
|
minio server /data
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Worm (deprecated)
|
||||||
|
Enable this to turn on Write-Once-Read-Many. By default it is set to `off`. Set ``MINIO_WORM=on`` environment variable to enable WORM mode. This ENV setting is not recommended anymore, please use Object Locking and Object Retention APIs documented [here](https://github.com/minio/minio/tree/master/docs/retention).
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user