rename all references from crawl -> scanner (#11621)

This commit is contained in:
Harshavardhana 2021-02-26 15:11:42 -08:00 committed by GitHub
parent 6386b45c08
commit 9171d6ef65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 133 additions and 133 deletions

View File

@ -118,7 +118,7 @@ func initHelp() {
}, },
config.HelpKV{ config.HelpKV{
Key: config.ScannerSubSys, Key: config.ScannerSubSys,
Description: "manage scanner for usage calculation, lifecycle, healing and more", Description: "manage namespace scanning for usage calculation, lifecycle, healing and more",
}, },
config.HelpKV{ config.HelpKV{
Key: config.LoggerWebhookSubSys, Key: config.LoggerWebhookSubSys,

View File

@ -66,7 +66,7 @@ var (
Help = config.HelpKVS{ Help = config.HelpKVS{
config.HelpKV{ config.HelpKV{
Key: Bitrot, Key: Bitrot,
Description: `perform bitrot scan on disks when checking objects during crawl`, Description: `perform bitrot scan on disks when checking objects during scanner`,
Optional: true, Optional: true,
Type: "on|off", Type: "on|off",
}, },

View File

@ -43,9 +43,9 @@ import (
) )
const ( const (
dataCrawlSleepPerFolder = time.Millisecond // Time to wait between folders. dataScannerSleepPerFolder = time.Millisecond // Time to wait between folders.
dataCrawlStartDelay = 1 * time.Minute // Time to wait on startup and between cycles. dataScannerStartDelay = 1 * time.Minute // Time to wait on startup and between cycles.
dataUsageUpdateDirCycles = 16 // Visit all folders every n cycles. dataUsageUpdateDirCycles = 16 // Visit all folders every n cycles.
healDeleteDangling = true healDeleteDangling = true
healFolderIncludeProb = 32 // Include a clean folder one in n cycles. healFolderIncludeProb = 32 // Include a clean folder one in n cycles.
@ -76,7 +76,7 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) {
for { for {
err := locker.GetLock(ctx, dataScannerLeaderLockTimeout) err := locker.GetLock(ctx, dataScannerLeaderLockTimeout)
if err != nil { if err != nil {
time.Sleep(time.Duration(r.Float64() * float64(dataCrawlStartDelay))) time.Sleep(time.Duration(r.Float64() * float64(dataScannerStartDelay)))
continue continue
} }
break break
@ -100,16 +100,16 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) {
br.Close() br.Close()
} }
crawlTimer := time.NewTimer(dataCrawlStartDelay) scannerTimer := time.NewTimer(dataScannerStartDelay)
defer crawlTimer.Stop() defer scannerTimer.Stop()
for { for {
select { select {
case <-ctx.Done(): case <-ctx.Done():
return return
case <-crawlTimer.C: case <-scannerTimer.C:
// Reset the timer for next cycle. // Reset the timer for next cycle.
crawlTimer.Reset(dataCrawlStartDelay) scannerTimer.Reset(dataScannerStartDelay)
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
console.Debugln("starting scanner cycle") console.Debugln("starting scanner cycle")
@ -120,7 +120,7 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) {
go storeDataUsageInBackend(ctx, objAPI, results) go storeDataUsageInBackend(ctx, objAPI, results)
bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle) bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle)
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
err = objAPI.CrawlAndGetDataUsage(ctx, bf, results) err = objAPI.NSScanner(ctx, bf, results)
close(results) close(results)
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
if err == nil { if err == nil {
@ -156,27 +156,27 @@ type folderScanner struct {
newCache dataUsageCache newCache dataUsageCache
withFilter *bloomFilter withFilter *bloomFilter
dataUsageCrawlDebug bool dataUsageScannerDebug bool
healFolderInclude uint32 // Include a clean folder one in n cycles. healFolderInclude uint32 // Include a clean folder one in n cycles.
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
newFolders []cachedFolder newFolders []cachedFolder
existingFolders []cachedFolder existingFolders []cachedFolder
disks []StorageAPI disks []StorageAPI
} }
// crawlDataFolder will crawl the basepath+cache.Info.Name and return an updated cache. // scanDataFolder will scanner the basepath+cache.Info.Name and return an updated cache.
// The returned cache will always be valid, but may not be updated from the existing. // The returned cache will always be valid, but may not be updated from the existing.
// Before each operation sleepDuration is called which can be used to temporarily halt the scanner. // Before each operation sleepDuration is called which can be used to temporarily halt the scanner.
// If the supplied context is canceled the function will return at the first chance. // If the supplied context is canceled the function will return at the first chance.
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) { func scanDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
t := UTCNow() t := UTCNow()
logPrefix := color.Green("data-usage: ") logPrefix := color.Green("data-usage: ")
logSuffix := color.Blue("- %v + %v", basePath, cache.Info.Name) logSuffix := color.Blue("- %v + %v", basePath, cache.Info.Name)
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
defer func() { defer func() {
console.Debugf(logPrefix+" Crawl time: %v %s\n", time.Since(t), logSuffix) console.Debugf(logPrefix+" Scanner time: %v %s\n", time.Since(t), logSuffix)
}() }()
} }
@ -189,15 +189,15 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
skipHeal := cache.Info.SkipHealing skipHeal := cache.Info.SkipHealing
s := folderScanner{ s := folderScanner{
root: basePath, root: basePath,
getSize: getSize, getSize: getSize,
oldCache: cache, oldCache: cache,
newCache: dataUsageCache{Info: cache.Info}, newCache: dataUsageCache{Info: cache.Info},
newFolders: nil, newFolders: nil,
existingFolders: nil, existingFolders: nil,
dataUsageCrawlDebug: intDataUpdateTracker.debug, dataUsageScannerDebug: intDataUpdateTracker.debug,
healFolderInclude: 0, healFolderInclude: 0,
healObjectSelect: 0, healObjectSelect: 0,
} }
// Add disks for set healing. // Add disks for set healing.
@ -227,21 +227,21 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
s.withFilter = nil s.withFilter = nil
} }
} }
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Start crawling. Bloom filter: %v %s\n", s.withFilter != nil, logSuffix) console.Debugf(logPrefix+"Start scanning. Bloom filter: %v %s\n", s.withFilter != nil, logSuffix)
} }
done := ctx.Done() done := ctx.Done()
var flattenLevels = 2 var flattenLevels = 2
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Cycle: %v, Entries: %v %s\n", cache.Info.NextCycle, len(cache.Cache), logSuffix) console.Debugf(logPrefix+"Cycle: %v, Entries: %v %s\n", cache.Info.NextCycle, len(cache.Cache), logSuffix)
} }
// Always scan flattenLevels deep. Cache root is level 0. // Always scan flattenLevels deep. Cache root is level 0.
todo := []cachedFolder{{name: cache.Info.Name, objectHealProbDiv: 1}} todo := []cachedFolder{{name: cache.Info.Name, objectHealProbDiv: 1}}
for i := 0; i < flattenLevels; i++ { for i := 0; i < flattenLevels; i++ {
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Level %v, scanning %v directories. %s\n", i, len(todo), logSuffix) console.Debugf(logPrefix+"Level %v, scanning %v directories. %s\n", i, len(todo), logSuffix)
} }
select { select {
@ -257,7 +257,7 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
} }
} }
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"New folders: %v %s\n", s.newFolders, logSuffix) console.Debugf(logPrefix+"New folders: %v %s\n", s.newFolders, logSuffix)
} }
@ -286,7 +286,7 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
} }
} }
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Existing folders: %v %s\n", len(s.existingFolders), logSuffix) console.Debugf(logPrefix+"Existing folders: %v %s\n", len(s.existingFolders), logSuffix)
} }
@ -313,13 +313,13 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
// If folder isn't in filter, skip it completely. // If folder isn't in filter, skip it completely.
if !s.withFilter.containsDir(folder.name) { if !s.withFilter.containsDir(folder.name) {
if !h.mod(s.oldCache.Info.NextCycle, s.healFolderInclude/folder.objectHealProbDiv) { if !h.mod(s.oldCache.Info.NextCycle, s.healFolderInclude/folder.objectHealProbDiv) {
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Skipping non-updated folder: %v %s\n", folder, logSuffix) console.Debugf(logPrefix+"Skipping non-updated folder: %v %s\n", folder, logSuffix)
} }
s.newCache.replaceHashed(h, folder.parent, s.oldCache.Cache[h.Key()]) s.newCache.replaceHashed(h, folder.parent, s.oldCache.Cache[h.Key()])
continue continue
} else { } else {
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Adding non-updated folder to heal check: %v %s\n", folder.name, logSuffix) console.Debugf(logPrefix+"Adding non-updated folder to heal check: %v %s\n", folder.name, logSuffix)
} }
// Update probability of including objects // Update probability of including objects
@ -341,8 +341,8 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
} }
s.newCache.replaceHashed(h, folder.parent, *du) s.newCache.replaceHashed(h, folder.parent, *du)
} }
if s.dataUsageCrawlDebug { if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Finished crawl, %v entries %s\n", len(s.newCache.Cache), logSuffix) console.Debugf(logPrefix+"Finished scanner, %v entries %s\n", len(s.newCache.Cache), logSuffix)
} }
s.newCache.Info.LastUpdate = UTCNow() s.newCache.Info.LastUpdate = UTCNow()
s.newCache.Info.NextCycle++ s.newCache.Info.NextCycle++
@ -371,7 +371,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
_, prefix := path2BucketObjectWithBasePath(f.root, folder.name) _, prefix := path2BucketObjectWithBasePath(f.root, folder.name)
var activeLifeCycle *lifecycle.Lifecycle var activeLifeCycle *lifecycle.Lifecycle
if f.oldCache.Info.lifeCycle != nil && f.oldCache.Info.lifeCycle.HasActiveRules(prefix, true) { if f.oldCache.Info.lifeCycle != nil && f.oldCache.Info.lifeCycle.HasActiveRules(prefix, true) {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" Prefix %q has active rules\n", prefix) console.Debugf(scannerLogPrefix+" Prefix %q has active rules\n", prefix)
} }
activeLifeCycle = f.oldCache.Info.lifeCycle activeLifeCycle = f.oldCache.Info.lifeCycle
@ -382,12 +382,12 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
if folder.name != dataUsageRoot && !filter.containsDir(folder.name) { if folder.name != dataUsageRoot && !filter.containsDir(folder.name) {
if !thisHash.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) { if !thisHash.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) {
f.newCache.copyWithChildren(&f.oldCache, thisHash, folder.parent) f.newCache.copyWithChildren(&f.oldCache, thisHash, folder.parent)
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" Skipping non-updated folder: %v\n", folder.name) console.Debugf(scannerLogPrefix+" Skipping non-updated folder: %v\n", folder.name)
} }
continue continue
} else { } else {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" Adding non-updated folder to heal check: %v\n", folder.name) console.Debugf(scannerLogPrefix+" Adding non-updated folder to heal check: %v\n", folder.name)
} }
// If probability was already scannerHealFolderInclude, keep it. // If probability was already scannerHealFolderInclude, keep it.
@ -395,7 +395,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
} }
} }
} }
scannerSleeper.Sleep(ctx, dataCrawlSleepPerFolder) scannerSleeper.Sleep(ctx, dataScannerSleepPerFolder)
cache := dataUsageEntry{} cache := dataUsageEntry{}
@ -404,14 +404,14 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
entName = path.Clean(path.Join(folder.name, entName)) entName = path.Clean(path.Join(folder.name, entName))
bucket, prefix := path2BucketObjectWithBasePath(f.root, entName) bucket, prefix := path2BucketObjectWithBasePath(f.root, entName)
if bucket == "" { if bucket == "" {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName) console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName)
} }
return errDoneForNow return errDoneForNow
} }
if isReservedOrInvalidBucket(bucket, false) { if isReservedOrInvalidBucket(bucket, false) {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName) console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName)
} }
return errDoneForNow return errDoneForNow
@ -450,13 +450,13 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
wait := scannerSleeper.Timer(ctx) wait := scannerSleeper.Timer(ctx)
// Get file size, ignore errors. // Get file size, ignore errors.
item := crawlItem{ item := scannerItem{
Path: path.Join(f.root, entName), Path: path.Join(f.root, entName),
Typ: typ, Typ: typ,
bucket: bucket, bucket: bucket,
prefix: path.Dir(prefix), prefix: path.Dir(prefix),
objectName: path.Base(entName), objectName: path.Base(entName),
debug: f.dataUsageCrawlDebug, debug: f.dataUsageScannerDebug,
lifeCycle: activeLifeCycle, lifeCycle: activeLifeCycle,
heal: thisHash.mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure, heal: thisHash.mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
} }
@ -532,7 +532,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
healObjectsPrefix := color.Green("healObjects:") healObjectsPrefix := color.Green("healObjects:")
for k := range existing { for k := range existing {
bucket, prefix := path2BucketObject(k) bucket, prefix := path2BucketObject(k)
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" checking disappeared folder: %v/%v\n", bucket, prefix) console.Debugf(scannerLogPrefix+" checking disappeared folder: %v/%v\n", bucket, prefix)
} }
@ -552,13 +552,13 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
minDisks: len(f.disks), // We want full consistency. minDisks: len(f.disks), // We want full consistency.
// Weird, maybe transient error. // Weird, maybe transient error.
agreed: func(entry metaCacheEntry) { agreed: func(entry metaCacheEntry) {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" got agreement: %v\n", entry.name) console.Debugf(healObjectsPrefix+" got agreement: %v\n", entry.name)
} }
}, },
// Some disks have data for this. // Some disks have data for this.
partial: func(entries metaCacheEntries, nAgreed int, errs []error) { partial: func(entries metaCacheEntries, nAgreed int, errs []error) {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" got partial, %d agreed, errs: %v\n", nAgreed, errs) console.Debugf(healObjectsPrefix+" got partial, %d agreed, errs: %v\n", nAgreed, errs)
} }
@ -580,7 +580,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
entry, _ = entries.firstFound() entry, _ = entries.firstFound()
} }
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" resolved to: %v, dir: %v\n", entry.name, entry.isDir()) console.Debugf(healObjectsPrefix+" resolved to: %v, dir: %v\n", entry.name, entry.isDir())
} }
@ -618,20 +618,20 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
}, },
// Too many disks failed. // Too many disks failed.
finished: func(errs []error) { finished: func(errs []error) {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" too many errors: %v\n", errs) console.Debugf(healObjectsPrefix+" too many errors: %v\n", errs)
} }
cancel() cancel()
}, },
}) })
if f.dataUsageCrawlDebug && err != nil && err != errFileNotFound { if f.dataUsageScannerDebug && err != nil && err != errFileNotFound {
console.Debugf(healObjectsPrefix+" checking returned value %v (%T)\n", err, err) console.Debugf(healObjectsPrefix+" checking returned value %v (%T)\n", err, err)
} }
// If we found one or more disks with this folder, delete it. // If we found one or more disks with this folder, delete it.
if err == nil && dangling { if err == nil && dangling {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" deleting dangling directory %s\n", prefix) console.Debugf(healObjectsPrefix+" deleting dangling directory %s\n", prefix)
} }
@ -690,7 +690,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
dirStack = append(dirStack, entName) dirStack = append(dirStack, entName)
err := readDirFn(path.Join(dirStack...), addDir) err := readDirFn(path.Join(dirStack...), addDir)
dirStack = dirStack[:len(dirStack)-1] dirStack = dirStack[:len(dirStack)-1]
scannerSleeper.Sleep(ctx, dataCrawlSleepPerFolder) scannerSleeper.Sleep(ctx, dataScannerSleepPerFolder)
return err return err
} }
@ -705,19 +705,19 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
bucket, prefix := path2BucketObjectWithBasePath(f.root, fileName) bucket, prefix := path2BucketObjectWithBasePath(f.root, fileName)
var activeLifeCycle *lifecycle.Lifecycle var activeLifeCycle *lifecycle.Lifecycle
if f.oldCache.Info.lifeCycle != nil && f.oldCache.Info.lifeCycle.HasActiveRules(prefix, false) { if f.oldCache.Info.lifeCycle != nil && f.oldCache.Info.lifeCycle.HasActiveRules(prefix, false) {
if f.dataUsageCrawlDebug { if f.dataUsageScannerDebug {
console.Debugf(deepScannerLogPrefix+" Prefix %q has active rules\n", prefix) console.Debugf(deepScannerLogPrefix+" Prefix %q has active rules\n", prefix)
} }
activeLifeCycle = f.oldCache.Info.lifeCycle activeLifeCycle = f.oldCache.Info.lifeCycle
} }
item := crawlItem{ item := scannerItem{
Path: fileName, Path: fileName,
Typ: typ, Typ: typ,
bucket: bucket, bucket: bucket,
prefix: path.Dir(prefix), prefix: path.Dir(prefix),
objectName: path.Base(entName), objectName: path.Base(entName),
debug: f.dataUsageCrawlDebug, debug: f.dataUsageScannerDebug,
lifeCycle: activeLifeCycle, lifeCycle: activeLifeCycle,
heal: hashPath(path.Join(prefix, entName)).mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure, heal: hashPath(path.Join(prefix, entName)).mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
} }
@ -752,8 +752,8 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
return &cache, nil return &cache, nil
} }
// crawlItem represents each file while walking. // scannerItem represents each file while walking.
type crawlItem struct { type scannerItem struct {
Path string Path string
Typ os.FileMode Typ os.FileMode
@ -773,10 +773,10 @@ type sizeSummary struct {
replicaSize int64 replicaSize int64
} }
type getSizeFn func(item crawlItem) (sizeSummary, error) type getSizeFn func(item scannerItem) (sizeSummary, error)
// transformMetaDir will transform a directory to prefix/file.ext // transformMetaDir will transform a directory to prefix/file.ext
func (i *crawlItem) transformMetaDir() { func (i *scannerItem) transformMetaDir() {
split := strings.Split(i.prefix, SlashSeparator) split := strings.Split(i.prefix, SlashSeparator)
if len(split) > 1 { if len(split) > 1 {
i.prefix = path.Join(split[:len(split)-1]...) i.prefix = path.Join(split[:len(split)-1]...)
@ -799,7 +799,7 @@ var applyActionsLogPrefix = color.Green("applyActions:")
// The resulting size on disk will always be returned. // The resulting size on disk will always be returned.
// The metadata will be compared to consensus on the object layer before any changes are applied. // The metadata will be compared to consensus on the object layer before any changes are applied.
// If no metadata is supplied, -1 is returned if no action is taken. // If no metadata is supplied, -1 is returned if no action is taken.
func (i *crawlItem) applyActions(ctx context.Context, o ObjectLayer, meta actionMeta) (size int64) { func (i *scannerItem) applyActions(ctx context.Context, o ObjectLayer, meta actionMeta) (size int64) {
size, err := meta.oi.GetActualSize() size, err := meta.oi.GetActualSize()
if i.debug { if i.debug {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
@ -1052,12 +1052,12 @@ func applyLifecycleAction(ctx context.Context, action lifecycle.Action, objLayer
} }
// objectPath returns the prefix and object name. // objectPath returns the prefix and object name.
func (i *crawlItem) objectPath() string { func (i *scannerItem) objectPath() string {
return path.Join(i.prefix, i.objectName) return path.Join(i.prefix, i.objectName)
} }
// healReplication will heal a scanned item that has failed replication. // healReplication will heal a scanned item that has failed replication.
func (i *crawlItem) healReplication(ctx context.Context, o ObjectLayer, oi ObjectInfo, sizeS *sizeSummary) { func (i *scannerItem) healReplication(ctx context.Context, o ObjectLayer, oi ObjectInfo, sizeS *sizeSummary) {
if oi.DeleteMarker || !oi.VersionPurgeStatus.Empty() { if oi.DeleteMarker || !oi.VersionPurgeStatus.Empty() {
// heal delete marker replication failure or versioned delete replication failure // heal delete marker replication failure or versioned delete replication failure
if oi.ReplicationStatus == replication.Pending || if oi.ReplicationStatus == replication.Pending ||
@ -1082,7 +1082,7 @@ func (i *crawlItem) healReplication(ctx context.Context, o ObjectLayer, oi Objec
} }
// healReplicationDeletes will heal a scanned deleted item that failed to replicate deletes. // healReplicationDeletes will heal a scanned deleted item that failed to replicate deletes.
func (i *crawlItem) healReplicationDeletes(ctx context.Context, o ObjectLayer, oi ObjectInfo) { func (i *scannerItem) healReplicationDeletes(ctx context.Context, o ObjectLayer, oi ObjectInfo) {
// handle soft delete and permanent delete failures here. // handle soft delete and permanent delete failures here.
if oi.DeleteMarker || !oi.VersionPurgeStatus.Empty() { if oi.DeleteMarker || !oi.VersionPurgeStatus.Empty() {
versionID := "" versionID := ""

View File

@ -80,7 +80,7 @@ func newDataUpdateTracker() *dataUpdateTracker {
Current: dataUpdateFilter{ Current: dataUpdateFilter{
idx: 1, idx: 1,
}, },
debug: env.Get(envDataUsageCrawlDebug, config.EnableOff) == config.EnableOn || serverDebugLog, debug: env.Get(envDataUsageScannerDebug, config.EnableOff) == config.EnableOn || serverDebugLog,
input: make(chan string, dataUpdateTrackerQueueSize), input: make(chan string, dataUpdateTrackerQueueSize),
save: make(chan struct{}, 1), save: make(chan struct{}, 1),
saveExited: make(chan struct{}), saveExited: make(chan struct{}),

View File

@ -28,7 +28,7 @@ import (
) )
const ( const (
envDataUsageCrawlDebug = "MINIO_DISK_USAGE_CRAWL_DEBUG" envDataUsageScannerDebug = "MINIO_DISK_USAGE_SCANNER_DEBUG"
dataUsageRoot = SlashSeparator dataUsageRoot = SlashSeparator
dataUsageBucket = minioMetaBucket + SlashSeparator + bucketMetaPrefix dataUsageBucket = minioMetaBucket + SlashSeparator + bucketMetaPrefix

View File

@ -51,7 +51,7 @@ func TestDataUsageUpdate(t *testing.T) {
} }
createUsageTestFiles(t, base, bucket, files) createUsageTestFiles(t, base, bucket, files)
getSize := func(item crawlItem) (sizeS sizeSummary, err error) { getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
if item.Typ&os.ModeDir == 0 { if item.Typ&os.ModeDir == 0 {
var s os.FileInfo var s os.FileInfo
s, err = os.Stat(item.Path) s, err = os.Stat(item.Path)
@ -64,7 +64,7 @@ func TestDataUsageUpdate(t *testing.T) {
return return
} }
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize) got, err := scanDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -185,7 +185,7 @@ func TestDataUsageUpdate(t *testing.T) {
}, },
} }
createUsageTestFiles(t, base, bucket, files) createUsageTestFiles(t, base, bucket, files)
got, err = crawlDataFolder(context.Background(), base, got, getSize) got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -270,7 +270,7 @@ func TestDataUsageUpdate(t *testing.T) {
} }
// Changed dir must be picked up in this many cycles. // Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ { for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, getSize) got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -347,7 +347,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
} }
createUsageTestFiles(t, base, "", files) createUsageTestFiles(t, base, "", files)
getSize := func(item crawlItem) (sizeS sizeSummary, err error) { getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
if item.Typ&os.ModeDir == 0 { if item.Typ&os.ModeDir == 0 {
var s os.FileInfo var s os.FileInfo
s, err = os.Stat(item.Path) s, err = os.Stat(item.Path)
@ -359,7 +359,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
} }
return return
} }
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize) got, err := scanDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -469,7 +469,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}, },
} }
createUsageTestFiles(t, base, "", files) createUsageTestFiles(t, base, "", files)
got, err = crawlDataFolder(context.Background(), base, got, getSize) got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -552,7 +552,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
} }
// Changed dir must be picked up in this many cycles. // Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ { for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, getSize) got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }
@ -646,7 +646,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
} }
createUsageTestFiles(t, base, bucket, files) createUsageTestFiles(t, base, bucket, files)
getSize := func(item crawlItem) (sizeS sizeSummary, err error) { getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
if item.Typ&os.ModeDir == 0 { if item.Typ&os.ModeDir == 0 {
var s os.FileInfo var s os.FileInfo
s, err = os.Stat(item.Path) s, err = os.Stat(item.Path)
@ -658,7 +658,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
} }
return return
} }
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize) want, err := scanDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil { if err != nil {
t.Fatal(err) t.Fatal(err)
} }

View File

@ -360,7 +360,7 @@ func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []er
return storageInfo, errs return storageInfo, errs
} }
func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error { func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
ctx, cancel := context.WithCancel(ctx) ctx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()
@ -379,7 +379,7 @@ func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloom
return nil return nil
} }
// Crawl latest allBuckets first. // Scanner latest allBuckets first.
sort.Slice(allBuckets, func(i, j int) bool { sort.Slice(allBuckets, func(i, j int) bool {
return allBuckets[i].Created.After(allBuckets[j].Created) return allBuckets[i].Created.After(allBuckets[j].Created)
}) })
@ -402,7 +402,7 @@ func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloom
} }
}() }()
// Start scanner. Blocks until done. // Start scanner. Blocks until done.
err := erObj.crawlAndGetDataUsage(ctx, allBuckets, bf, updates) err := erObj.nsScanner(ctx, allBuckets, bf, updates)
if err != nil { if err != nil {
logger.LogIf(ctx, err) logger.LogIf(ctx, err)
mu.Lock() mu.Lock()

View File

@ -298,9 +298,9 @@ func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) {
wg.Wait() wg.Wait()
} }
// CrawlAndGetDataUsage will start crawling buckets and send updated totals as they are traversed. // nsScanner will start scanning buckets and send updated totals as they are traversed.
// Updates are sent on a regular basis and the caller *must* consume them. // Updates are sent on a regular basis and the caller *must* consume them.
func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error { func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error {
if len(buckets) == 0 { if len(buckets) == 0 {
return nil return nil
} }
@ -308,7 +308,7 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
// Collect disks we can use. // Collect disks we can use.
disks, healing := er.getOnlineDisksWithHealing() disks, healing := er.getOnlineDisksWithHealing()
if len(disks) == 0 { if len(disks) == 0 {
logger.Info(color.Green("data-crawl:") + " all disks are offline or being healed, skipping crawl") logger.Info(color.Green("data-scanner:") + " all disks are offline or being healed, skipping scanner")
return nil return nil
} }
@ -442,7 +442,7 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
// Calc usage // Calc usage
before := cache.Info.LastUpdate before := cache.Info.LastUpdate
var err error var err error
cache, err = disk.CrawlAndGetDataUsage(ctx, cache) cache, err = disk.NSScanner(ctx, cache)
cache.Info.BloomFilter = nil cache.Info.BloomFilter = nil
if err != nil { if err != nil {
if !cache.Info.LastUpdate.IsZero() && cache.Info.LastUpdate.After(before) { if !cache.Info.LastUpdate.IsZero() && cache.Info.LastUpdate.After(before) {

View File

@ -231,8 +231,8 @@ func (fs *FSObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) {
return storageInfo, nil return storageInfo, nil
} }
// CrawlAndGetDataUsage returns data usage stats of the current FS deployment // NSScanner returns data usage stats of the current FS deployment
func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error { func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
// Load bucket totals // Load bucket totals
var totalCache dataUsageCache var totalCache dataUsageCache
err := totalCache.load(ctx, fs, dataUsageCacheName) err := totalCache.load(ctx, fs, dataUsageCacheName)
@ -268,7 +268,7 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
} }
bCache.Info.BloomFilter = totalCache.Info.BloomFilter bCache.Info.BloomFilter = totalCache.Info.BloomFilter
cache, err := fs.crawlBucket(ctx, b.Name, bCache) cache, err := fs.scanBucket(ctx, b.Name, bCache)
select { select {
case <-ctx.Done(): case <-ctx.Done():
return ctx.Err() return ctx.Err()
@ -279,13 +279,13 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
if cache.root() == nil { if cache.root() == nil {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
logger.Info(color.Green("CrawlAndGetDataUsage:") + " No root added. Adding empty") logger.Info(color.Green("NSScanner:") + " No root added. Adding empty")
} }
cache.replace(cache.Info.Name, dataUsageRoot, dataUsageEntry{}) cache.replace(cache.Info.Name, dataUsageRoot, dataUsageEntry{})
} }
if cache.Info.LastUpdate.After(bCache.Info.LastUpdate) { if cache.Info.LastUpdate.After(bCache.Info.LastUpdate) {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
logger.Info(color.Green("CrawlAndGetDataUsage:")+" Saving bucket %q cache with %d entries", b.Name, len(cache.Cache)) logger.Info(color.Green("NSScanner:")+" Saving bucket %q cache with %d entries", b.Name, len(cache.Cache))
} }
logger.LogIf(ctx, cache.save(ctx, fs, path.Join(b.Name, dataUsageCacheName))) logger.LogIf(ctx, cache.save(ctx, fs, path.Join(b.Name, dataUsageCacheName)))
} }
@ -295,7 +295,7 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
entry := cl.flatten(*cl.root()) entry := cl.flatten(*cl.root())
totalCache.replace(cl.Info.Name, dataUsageRoot, entry) totalCache.replace(cl.Info.Name, dataUsageRoot, entry)
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
logger.Info(color.Green("CrawlAndGetDataUsage:")+" Saving totals cache with %d entries", len(totalCache.Cache)) logger.Info(color.Green("NSScanner:")+" Saving totals cache with %d entries", len(totalCache.Cache))
} }
totalCache.Info.LastUpdate = time.Now() totalCache.Info.LastUpdate = time.Now()
logger.LogIf(ctx, totalCache.save(ctx, fs, dataUsageCacheName)) logger.LogIf(ctx, totalCache.save(ctx, fs, dataUsageCacheName))
@ -307,27 +307,27 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
return nil return nil
} }
// crawlBucket crawls a single bucket in FS mode. // scanBucket scans a single bucket in FS mode.
// The updated cache for the bucket is returned. // The updated cache for the bucket is returned.
// A partially updated bucket may be returned. // A partially updated bucket may be returned.
func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataUsageCache) (dataUsageCache, error) { func (fs *FSObjects) scanBucket(ctx context.Context, bucket string, cache dataUsageCache) (dataUsageCache, error) {
// Get bucket policy // Get bucket policy
// Check if the current bucket has a configured lifecycle policy // Check if the current bucket has a configured lifecycle policy
lc, err := globalLifecycleSys.Get(bucket) lc, err := globalLifecycleSys.Get(bucket)
if err == nil && lc.HasActiveRules("", true) { if err == nil && lc.HasActiveRules("", true) {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
logger.Info(color.Green("crawlBucket:") + " lifecycle: Active rules found") logger.Info(color.Green("scanBucket:") + " lifecycle: Active rules found")
} }
cache.Info.lifeCycle = lc cache.Info.lifeCycle = lc
} }
// Load bucket info. // Load bucket info.
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (sizeSummary, error) { cache, err = scanDataFolder(ctx, fs.fsPath, cache, func(item scannerItem) (sizeSummary, error) {
bucket, object := item.bucket, item.objectPath() bucket, object := item.bucket, item.objectPath()
fsMetaBytes, err := xioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)) fsMetaBytes, err := xioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
if err != nil && !osIsNotExist(err) { if err != nil && !osIsNotExist(err) {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
logger.Info(color.Green("crawlBucket:")+" object return unexpected error: %v/%v: %w", item.bucket, item.objectPath(), err) logger.Info(color.Green("scanBucket:")+" object return unexpected error: %v/%v: %w", item.bucket, item.objectPath(), err)
} }
return sizeSummary{}, errSkipFile return sizeSummary{}, errSkipFile
} }
@ -348,7 +348,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU
fi, fiErr := os.Stat(item.Path) fi, fiErr := os.Stat(item.Path)
if fiErr != nil { if fiErr != nil {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
logger.Info(color.Green("crawlBucket:")+" object path missing: %v: %w", item.Path, fiErr) logger.Info(color.Green("scanBucket:")+" object path missing: %v: %w", item.Path, fiErr)
} }
return sizeSummary{}, errSkipFile return sizeSummary{}, errSkipFile
} }

View File

@ -39,8 +39,8 @@ func (a GatewayUnsupported) BackendInfo() BackendInfo {
return BackendInfo{Type: BackendGateway} return BackendInfo{Type: BackendGateway}
} }
// CrawlAndGetDataUsage - crawl is not implemented for gateway // NSScanner - scanner is not implemented for gateway
func (a GatewayUnsupported) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error { func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
logger.CriticalIf(ctx, errors.New("not implemented")) logger.CriticalIf(ctx, errors.New("not implemented"))
return NotImplemented{} return NotImplemented{}
} }

View File

@ -32,7 +32,7 @@ const (
) )
// NewBgHealSequence creates a background healing sequence // NewBgHealSequence creates a background healing sequence
// operation which crawls all objects and heal them. // operation which scans all objects and heal them.
func newBgHealSequence() *healSequence { func newBgHealSequence() *healSequence {
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"} reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo)) ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))

View File

@ -34,7 +34,7 @@ import (
// WalkDirOptions provides options for WalkDir operations. // WalkDirOptions provides options for WalkDir operations.
type WalkDirOptions struct { type WalkDirOptions struct {
// Bucket to crawl // Bucket to scanner
Bucket string Bucket string
// Directory inside the bucket. // Directory inside the bucket.

View File

@ -98,8 +98,8 @@ func (d *naughtyDisk) SetDiskID(id string) {
d.disk.SetDiskID(id) d.disk.SetDiskID(id)
} }
func (d *naughtyDisk) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) { func (d *naughtyDisk) NSScanner(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) {
return d.disk.CrawlAndGetDataUsage(ctx, cache) return d.disk.NSScanner(ctx, cache)
} }
func (d *naughtyDisk) DiskInfo(ctx context.Context) (info DiskInfo, err error) { func (d *naughtyDisk) DiskInfo(ctx context.Context) (info DiskInfo, err error) {

View File

@ -87,7 +87,7 @@ type ObjectLayer interface {
// Storage operations. // Storage operations.
Shutdown(context.Context) error Shutdown(context.Context) error
CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error
BackendInfo() BackendInfo BackendInfo() BackendInfo
StorageInfo(ctx context.Context) (StorageInfo, []error) // local queries only local disks StorageInfo(ctx context.Context) (StorageInfo, []error) // local queries only local disks

View File

@ -39,7 +39,7 @@ type StorageAPI interface {
Healing() bool // Returns if disk is healing. Healing() bool // Returns if disk is healing.
DiskInfo(ctx context.Context) (info DiskInfo, err error) DiskInfo(ctx context.Context) (info DiskInfo, err error)
CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error)
// Volume operations. // Volume operations.
MakeVol(ctx context.Context, volume string) (err error) MakeVol(ctx context.Context, volume string) (err error)

View File

@ -170,12 +170,12 @@ func (client *storageRESTClient) Healing() bool {
return false return false
} }
func (client *storageRESTClient) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) { func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
pr, pw := io.Pipe() pr, pw := io.Pipe()
go func() { go func() {
pw.CloseWithError(cache.serializeTo(pw)) pw.CloseWithError(cache.serializeTo(pw))
}() }()
respBody, err := client.call(ctx, storageRESTMethodCrawlAndGetDataUsage, url.Values{}, pr, -1) respBody, err := client.call(ctx, storageRESTMethodNSScanner, url.Values{}, pr, -1)
defer http.DrainBody(respBody) defer http.DrainBody(respBody)
if err != nil { if err != nil {
pr.Close() pr.Close()

View File

@ -17,20 +17,20 @@
package cmd package cmd
const ( const (
storageRESTVersion = "v27" // Add force-delete-marker to DeleteVersion storageRESTVersion = "v28" // Renamed crawl -> scanner
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
storageRESTPrefix = minioReservedBucketPath + "/storage" storageRESTPrefix = minioReservedBucketPath + "/storage"
) )
const ( const (
storageRESTMethodHealth = "/health" storageRESTMethodHealth = "/health"
storageRESTMethodDiskInfo = "/diskinfo" storageRESTMethodDiskInfo = "/diskinfo"
storageRESTMethodCrawlAndGetDataUsage = "/crawlandgetdatausage" storageRESTMethodNSScanner = "/nsscanner"
storageRESTMethodMakeVol = "/makevol" storageRESTMethodMakeVol = "/makevol"
storageRESTMethodMakeVolBulk = "/makevolbulk" storageRESTMethodMakeVolBulk = "/makevolbulk"
storageRESTMethodStatVol = "/statvol" storageRESTMethodStatVol = "/statvol"
storageRESTMethodDeleteVol = "/deletevol" storageRESTMethodDeleteVol = "/deletevol"
storageRESTMethodListVols = "/listvols" storageRESTMethodListVols = "/listvols"
storageRESTMethodAppendFile = "/appendfile" storageRESTMethodAppendFile = "/appendfile"
storageRESTMethodCreateFile = "/createfile" storageRESTMethodCreateFile = "/createfile"

View File

@ -156,7 +156,7 @@ func (s *storageRESTServer) DiskInfoHandler(w http.ResponseWriter, r *http.Reque
logger.LogIf(r.Context(), msgp.Encode(w, &info)) logger.LogIf(r.Context(), msgp.Encode(w, &info))
} }
func (s *storageRESTServer) CrawlAndGetDataUsageHandler(w http.ResponseWriter, r *http.Request) { func (s *storageRESTServer) NSScannerHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) { if !s.IsValid(w, r) {
return return
} }
@ -172,7 +172,7 @@ func (s *storageRESTServer) CrawlAndGetDataUsageHandler(w http.ResponseWriter, r
} }
resp := streamHTTPResponse(w) resp := streamHTTPResponse(w)
usageInfo, err := s.storage.CrawlAndGetDataUsage(r.Context(), cache) usageInfo, err := s.storage.NSScanner(r.Context(), cache)
if err != nil { if err != nil {
resp.CloseWithError(err) resp.CloseWithError(err)
return return
@ -684,7 +684,7 @@ func (s *storageRESTServer) RenameFileHandler(w http.ResponseWriter, r *http.Req
} }
// keepHTTPResponseAlive can be used to avoid timeouts with long storage // keepHTTPResponseAlive can be used to avoid timeouts with long storage
// operations, such as bitrot verification or data usage crawling. // operations, such as bitrot verification or data usage scanning.
// Every 10 seconds a space character is sent. // Every 10 seconds a space character is sent.
// The returned function should always be called to release resources. // The returned function should always be called to release resources.
// An optional error can be sent which will be picked as text only error, // An optional error can be sent which will be picked as text only error,
@ -808,7 +808,7 @@ func (h *httpStreamResponse) CloseWithError(err error) {
} }
// streamHTTPResponse can be used to avoid timeouts with long storage // streamHTTPResponse can be used to avoid timeouts with long storage
// operations, such as bitrot verification or data usage crawling. // operations, such as bitrot verification or data usage scanning.
// Every 10 seconds a space character is sent. // Every 10 seconds a space character is sent.
// The returned function should always be called to release resources. // The returned function should always be called to release resources.
// An optional error can be sent which will be picked as text only error, // An optional error can be sent which will be picked as text only error,
@ -1029,7 +1029,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodHealth).HandlerFunc(httpTraceHdrs(server.HealthHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodHealth).HandlerFunc(httpTraceHdrs(server.HealthHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDiskInfo).HandlerFunc(httpTraceHdrs(server.DiskInfoHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDiskInfo).HandlerFunc(httpTraceHdrs(server.DiskInfoHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodCrawlAndGetDataUsage).HandlerFunc(httpTraceHdrs(server.CrawlAndGetDataUsageHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodNSScanner).HandlerFunc(httpTraceHdrs(server.NSScannerHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodMakeVol).HandlerFunc(httpTraceHdrs(server.MakeVolHandler)).Queries(restQueries(storageRESTVolume)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodMakeVol).HandlerFunc(httpTraceHdrs(server.MakeVolHandler)).Queries(restQueries(storageRESTVolume)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodMakeVolBulk).HandlerFunc(httpTraceHdrs(server.MakeVolBulkHandler)).Queries(restQueries(storageRESTVolumes)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodMakeVolBulk).HandlerFunc(httpTraceHdrs(server.MakeVolBulkHandler)).Queries(restQueries(storageRESTVolumes)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodStatVol).HandlerFunc(httpTraceHdrs(server.StatVolHandler)).Queries(restQueries(storageRESTVolume)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodStatVol).HandlerFunc(httpTraceHdrs(server.StatVolHandler)).Queries(restQueries(storageRESTVolume)...)

View File

@ -55,11 +55,11 @@ func (p *xlStorageDiskIDCheck) Healing() bool {
return p.storage.Healing() return p.storage.Healing()
} }
func (p *xlStorageDiskIDCheck) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) { func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
if err := p.checkDiskStale(); err != nil { if err := p.checkDiskStale(); err != nil {
return dataUsageCache{}, err return dataUsageCache{}, err
} }
return p.storage.CrawlAndGetDataUsage(ctx, cache) return p.storage.NSScanner(ctx, cache)
} }
func (p *xlStorageDiskIDCheck) Close() error { func (p *xlStorageDiskIDCheck) Close() error {

View File

@ -353,7 +353,7 @@ func (s *xlStorage) Healing() bool {
return err == nil return err == nil
} }
func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) { func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
var lc *lifecycle.Lifecycle var lc *lifecycle.Lifecycle
var err error var err error
@ -363,7 +363,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
if err == nil && lc.HasActiveRules("", true) { if err == nil && lc.HasActiveRules("", true) {
cache.Info.lifeCycle = lc cache.Info.lifeCycle = lc
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
console.Debugln(color.Green("crawlDisk:") + " lifecycle: Active rules found") console.Debugln(color.Green("scannerDisk:") + " lifecycle: Active rules found")
} }
} }
} }
@ -375,7 +375,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
healOpts := globalHealConfig healOpts := globalHealConfig
globalHealConfigMu.Unlock() globalHealConfigMu.Unlock()
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (sizeSummary, error) { dataUsageInfo, err := scanDataFolder(ctx, s.diskPath, cache, func(item scannerItem) (sizeSummary, error) {
// Look for `xl.meta/xl.json' at the leaf. // Look for `xl.meta/xl.json' at the leaf.
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) && if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) { !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
@ -386,7 +386,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
buf, err := xioutil.ReadFile(item.Path) buf, err := xioutil.ReadFile(item.Path)
if err != nil { if err != nil {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
console.Debugf(color.Green("crawlBucket:")+" object path missing: %v: %w\n", item.Path, err) console.Debugf(color.Green("scannerBucket:")+" object path missing: %v: %w\n", item.Path, err)
} }
return sizeSummary{}, errSkipFile return sizeSummary{}, errSkipFile
} }
@ -397,7 +397,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath()) fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath())
if err != nil { if err != nil {
if intDataUpdateTracker.debug { if intDataUpdateTracker.debug {
console.Debugf(color.Green("crawlBucket:")+" reading xl.meta failed: %v: %w\n", item.Path, err) console.Debugf(color.Green("scannerBucket:")+" reading xl.meta failed: %v: %w\n", item.Path, err)
} }
return sizeSummary{}, errSkipFile return sizeSummary{}, errSkipFile
} }

View File

@ -139,7 +139,7 @@ The replication configuration follows [AWS S3 Spec](https://docs.aws.amazon.com/
When object locking is used in conjunction with replication, both source and destination buckets needs to have [object locking](https://docs.min.io/docs/minio-bucket-object-lock-guide.html) enabled. Similarly objects encrypted on the server side, will be replicated if destination also supports encryption. When object locking is used in conjunction with replication, both source and destination buckets needs to have [object locking](https://docs.min.io/docs/minio-bucket-object-lock-guide.html) enabled. Similarly objects encrypted on the server side, will be replicated if destination also supports encryption.
Replication status can be seen in the metadata on the source and destination objects. On the source side, the `X-Amz-Replication-Status` changes from `PENDING` to `COMPLETED` or `FAILED` after replication attempt either succeeded or failed respectively. On the destination side, a `X-Amz-Replication-Status` status of `REPLICA` indicates that the object was replicated successfully. Any replication failures are automatically re-attempted during a periodic disk crawl cycle. Replication status can be seen in the metadata on the source and destination objects. On the source side, the `X-Amz-Replication-Status` changes from `PENDING` to `COMPLETED` or `FAILED` after replication attempt either succeeded or failed respectively. On the destination side, a `X-Amz-Replication-Status` status of `REPLICA` indicates that the object was replicated successfully. Any replication failures are automatically re-attempted during a periodic disk scanner cycle.
To perform bi-directional replication, repeat the above process on the target site - this time setting the source bucket as the replication target. It is recommended that replication be run in a system with atleast two CPU's available to the process, so that replication can run in its own thread. To perform bi-directional replication, repeat the above process on the target site - this time setting the source bucket as the replication target. It is recommended that replication be run in a system with atleast two CPU's available to the process, so that replication can run in its own thread.

View File

@ -263,21 +263,21 @@ The following sub-systems are dynamic i.e., configuration parameters for each su
``` ```
api manage global HTTP API call specific features, such as throttling, authentication types, etc. api manage global HTTP API call specific features, such as throttling, authentication types, etc.
heal manage object healing frequency and bitrot verification checks heal manage object healing frequency and bitrot verification checks
scanner manage crawling for usage calculation, lifecycle, healing and more scanner manage namespace scanning for usage calculation, lifecycle, healing and more
``` ```
> NOTE: if you set any of the following sub-system configuration using ENVs, dynamic behavior is not supported. > NOTE: if you set any of the following sub-system configuration using ENVs, dynamic behavior is not supported.
### Usage scanner ### Usage scanner
Data usage scanner is enabled by default. The following configuration settings allow for more staggered delay in terms of usage calculation. The scanner adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the scanner and thereby the latency of updates being reflected. The delays between each operation of the crawl can be adjusted by the `mc admin config set alias/ delay=15.0`. By default the value is `10.0`. This means the scanner will sleep *10x* the time each operation takes. Data usage scanner is enabled by default. The following configuration settings allow for more staggered delay in terms of usage calculation. The scanner adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the scanner and thereby the latency of updates being reflected. The delays between each operation of the scanner can be adjusted by the `mc admin config set alias/ delay=15.0`. By default the value is `10.0`. This means the scanner will sleep *10x* the time each operation takes.
In most setups this will keep the scanner slow enough to not impact overall system performance. Setting the `delay` key to a *lower* value will make the scanner faster and setting it to 0 will make the scanner run at full speed (not recommended in production). Setting it to a higher value will make the scanner slower, consuming less resources with the trade off of not collecting metrics for operations like healing and disk usage as fast. In most setups this will keep the scanner slow enough to not impact overall system performance. Setting the `delay` key to a *lower* value will make the scanner faster and setting it to 0 will make the scanner run at full speed (not recommended in production). Setting it to a higher value will make the scanner slower, consuming less resources with the trade off of not collecting metrics for operations like healing and disk usage as fast.
``` ```
~ mc admin config set alias/ scanner ~ mc admin config set alias/ scanner
KEY: KEY:
scanner manage crawling for usage calculation, lifecycle, healing and more scanner manage namespace scanning for usage calculation, lifecycle, healing and more
ARGS: ARGS:
delay (float) scanner delay multiplier, defaults to '10.0' delay (float) scanner delay multiplier, defaults to '10.0'
@ -306,7 +306,7 @@ KEY:
heal manage object healing frequency and bitrot verification checks heal manage object healing frequency and bitrot verification checks
ARGS: ARGS:
bitrotscan (on|off) perform bitrot scan on disks when checking objects during crawl bitrotscan (on|off) perform bitrot scan on disks when checking objects during scanner
max_sleep (duration) maximum sleep duration between objects to slow down heal operation. eg. 2s max_sleep (duration) maximum sleep duration between objects to slow down heal operation. eg. 2s
max_io (int) maximum IO requests allowed between objects to slow down heal operation. eg. 3 max_io (int) maximum IO requests allowed between objects to slow down heal operation. eg. 3
``` ```

View File

@ -267,15 +267,15 @@ MINIO_ETCD_COMMENT (sentence) optionally add a comment to this setting
数据使用情况采集器默认是启用的通过Envs可以设置更多的交错延迟。 数据使用情况采集器默认是启用的通过Envs可以设置更多的交错延迟。
采集器能适应系统速度,并在系统负载时完全暂停。 可以调整采集器的速度,从而达到延迟更新的效果。 每次采集操作之间的延迟都可以通过环境变量`MINIO_DISK_USAGE_CRAWL_DELAY`来调整。 默认情况下该值为10。 这意味着采集每次操作都将休眠*10x*的时间。 采集器能适应系统速度,并在系统负载时完全暂停。 可以调整采集器的速度,从而达到延迟更新的效果。 每次采集操作之间的延迟都可以通过环境变量`MINIO_SCANNER_DELAY`来调整。 默认情况下该值为10。 这意味着采集每次操作都将休眠*10x*的时间。
大多数设置要让采集器足够慢,这样不会影响整体的系统性能。 大多数设置要让采集器足够慢,这样不会影响整体的系统性能。
设置 `MINIO_DISK_USAGE_CRAWL_DELAY` 为一个 *较低* 的值可以让采集器更快并且设置为0的时候可以让采集器全速运行不推荐。 设置一个较高的值可以让采集器变慢,进一步减少资源的消耗。 设置 `MINIO_SCANNER_DELAY` 为一个 *较低* 的值可以让采集器更快并且设置为0的时候可以让采集器全速运行不推荐。 设置一个较高的值可以让采集器变慢,进一步减少资源的消耗。
示例: 如下设置将使采集器的速度降低三倍, 减少了系统资源的使用,但是反映到更新的延迟会增加。 示例: 如下设置将使采集器的速度降低三倍, 减少了系统资源的使用,但是反映到更新的延迟会增加。
```sh ```sh
export MINIO_DISK_USAGE_CRAWL_DELAY=30 export MINIO_SCANNER_DELAY=30
minio server /data minio server /data
``` ```