rename all references from crawl -> scanner (#11621)

This commit is contained in:
Harshavardhana 2021-02-26 15:11:42 -08:00 committed by GitHub
parent 6386b45c08
commit 9171d6ef65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
23 changed files with 133 additions and 133 deletions

View File

@ -118,7 +118,7 @@ func initHelp() {
},
config.HelpKV{
Key: config.ScannerSubSys,
Description: "manage scanner for usage calculation, lifecycle, healing and more",
Description: "manage namespace scanning for usage calculation, lifecycle, healing and more",
},
config.HelpKV{
Key: config.LoggerWebhookSubSys,

View File

@ -66,7 +66,7 @@ var (
Help = config.HelpKVS{
config.HelpKV{
Key: Bitrot,
Description: `perform bitrot scan on disks when checking objects during crawl`,
Description: `perform bitrot scan on disks when checking objects during scanner`,
Optional: true,
Type: "on|off",
},

View File

@ -43,9 +43,9 @@ import (
)
const (
dataCrawlSleepPerFolder = time.Millisecond // Time to wait between folders.
dataCrawlStartDelay = 1 * time.Minute // Time to wait on startup and between cycles.
dataUsageUpdateDirCycles = 16 // Visit all folders every n cycles.
dataScannerSleepPerFolder = time.Millisecond // Time to wait between folders.
dataScannerStartDelay = 1 * time.Minute // Time to wait on startup and between cycles.
dataUsageUpdateDirCycles = 16 // Visit all folders every n cycles.
healDeleteDangling = true
healFolderIncludeProb = 32 // Include a clean folder one in n cycles.
@ -76,7 +76,7 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) {
for {
err := locker.GetLock(ctx, dataScannerLeaderLockTimeout)
if err != nil {
time.Sleep(time.Duration(r.Float64() * float64(dataCrawlStartDelay)))
time.Sleep(time.Duration(r.Float64() * float64(dataScannerStartDelay)))
continue
}
break
@ -100,16 +100,16 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) {
br.Close()
}
crawlTimer := time.NewTimer(dataCrawlStartDelay)
defer crawlTimer.Stop()
scannerTimer := time.NewTimer(dataScannerStartDelay)
defer scannerTimer.Stop()
for {
select {
case <-ctx.Done():
return
case <-crawlTimer.C:
case <-scannerTimer.C:
// Reset the timer for next cycle.
crawlTimer.Reset(dataCrawlStartDelay)
scannerTimer.Reset(dataScannerStartDelay)
if intDataUpdateTracker.debug {
console.Debugln("starting scanner cycle")
@ -120,7 +120,7 @@ func runDataScanner(ctx context.Context, objAPI ObjectLayer) {
go storeDataUsageInBackend(ctx, objAPI, results)
bf, err := globalNotificationSys.updateBloomFilter(ctx, nextBloomCycle)
logger.LogIf(ctx, err)
err = objAPI.CrawlAndGetDataUsage(ctx, bf, results)
err = objAPI.NSScanner(ctx, bf, results)
close(results)
logger.LogIf(ctx, err)
if err == nil {
@ -156,27 +156,27 @@ type folderScanner struct {
newCache dataUsageCache
withFilter *bloomFilter
dataUsageCrawlDebug bool
healFolderInclude uint32 // Include a clean folder one in n cycles.
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
dataUsageScannerDebug bool
healFolderInclude uint32 // Include a clean folder one in n cycles.
healObjectSelect uint32 // Do a heal check on an object once every n cycles. Must divide into healFolderInclude
newFolders []cachedFolder
existingFolders []cachedFolder
disks []StorageAPI
}
// crawlDataFolder will crawl the basepath+cache.Info.Name and return an updated cache.
// scanDataFolder will scanner the basepath+cache.Info.Name and return an updated cache.
// The returned cache will always be valid, but may not be updated from the existing.
// Before each operation sleepDuration is called which can be used to temporarily halt the scanner.
// If the supplied context is canceled the function will return at the first chance.
func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
func scanDataFolder(ctx context.Context, basePath string, cache dataUsageCache, getSize getSizeFn) (dataUsageCache, error) {
t := UTCNow()
logPrefix := color.Green("data-usage: ")
logSuffix := color.Blue("- %v + %v", basePath, cache.Info.Name)
if intDataUpdateTracker.debug {
defer func() {
console.Debugf(logPrefix+" Crawl time: %v %s\n", time.Since(t), logSuffix)
console.Debugf(logPrefix+" Scanner time: %v %s\n", time.Since(t), logSuffix)
}()
}
@ -189,15 +189,15 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
skipHeal := cache.Info.SkipHealing
s := folderScanner{
root: basePath,
getSize: getSize,
oldCache: cache,
newCache: dataUsageCache{Info: cache.Info},
newFolders: nil,
existingFolders: nil,
dataUsageCrawlDebug: intDataUpdateTracker.debug,
healFolderInclude: 0,
healObjectSelect: 0,
root: basePath,
getSize: getSize,
oldCache: cache,
newCache: dataUsageCache{Info: cache.Info},
newFolders: nil,
existingFolders: nil,
dataUsageScannerDebug: intDataUpdateTracker.debug,
healFolderInclude: 0,
healObjectSelect: 0,
}
// Add disks for set healing.
@ -227,21 +227,21 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
s.withFilter = nil
}
}
if s.dataUsageCrawlDebug {
console.Debugf(logPrefix+"Start crawling. Bloom filter: %v %s\n", s.withFilter != nil, logSuffix)
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Start scanning. Bloom filter: %v %s\n", s.withFilter != nil, logSuffix)
}
done := ctx.Done()
var flattenLevels = 2
if s.dataUsageCrawlDebug {
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Cycle: %v, Entries: %v %s\n", cache.Info.NextCycle, len(cache.Cache), logSuffix)
}
// Always scan flattenLevels deep. Cache root is level 0.
todo := []cachedFolder{{name: cache.Info.Name, objectHealProbDiv: 1}}
for i := 0; i < flattenLevels; i++ {
if s.dataUsageCrawlDebug {
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Level %v, scanning %v directories. %s\n", i, len(todo), logSuffix)
}
select {
@ -257,7 +257,7 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
}
}
if s.dataUsageCrawlDebug {
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"New folders: %v %s\n", s.newFolders, logSuffix)
}
@ -286,7 +286,7 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
}
}
if s.dataUsageCrawlDebug {
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Existing folders: %v %s\n", len(s.existingFolders), logSuffix)
}
@ -313,13 +313,13 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
// If folder isn't in filter, skip it completely.
if !s.withFilter.containsDir(folder.name) {
if !h.mod(s.oldCache.Info.NextCycle, s.healFolderInclude/folder.objectHealProbDiv) {
if s.dataUsageCrawlDebug {
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Skipping non-updated folder: %v %s\n", folder, logSuffix)
}
s.newCache.replaceHashed(h, folder.parent, s.oldCache.Cache[h.Key()])
continue
} else {
if s.dataUsageCrawlDebug {
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Adding non-updated folder to heal check: %v %s\n", folder.name, logSuffix)
}
// Update probability of including objects
@ -341,8 +341,8 @@ func crawlDataFolder(ctx context.Context, basePath string, cache dataUsageCache,
}
s.newCache.replaceHashed(h, folder.parent, *du)
}
if s.dataUsageCrawlDebug {
console.Debugf(logPrefix+"Finished crawl, %v entries %s\n", len(s.newCache.Cache), logSuffix)
if s.dataUsageScannerDebug {
console.Debugf(logPrefix+"Finished scanner, %v entries %s\n", len(s.newCache.Cache), logSuffix)
}
s.newCache.Info.LastUpdate = UTCNow()
s.newCache.Info.NextCycle++
@ -371,7 +371,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
_, prefix := path2BucketObjectWithBasePath(f.root, folder.name)
var activeLifeCycle *lifecycle.Lifecycle
if f.oldCache.Info.lifeCycle != nil && f.oldCache.Info.lifeCycle.HasActiveRules(prefix, true) {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" Prefix %q has active rules\n", prefix)
}
activeLifeCycle = f.oldCache.Info.lifeCycle
@ -382,12 +382,12 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
if folder.name != dataUsageRoot && !filter.containsDir(folder.name) {
if !thisHash.mod(f.oldCache.Info.NextCycle, f.healFolderInclude/folder.objectHealProbDiv) {
f.newCache.copyWithChildren(&f.oldCache, thisHash, folder.parent)
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" Skipping non-updated folder: %v\n", folder.name)
}
continue
} else {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" Adding non-updated folder to heal check: %v\n", folder.name)
}
// If probability was already scannerHealFolderInclude, keep it.
@ -395,7 +395,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
}
}
}
scannerSleeper.Sleep(ctx, dataCrawlSleepPerFolder)
scannerSleeper.Sleep(ctx, dataScannerSleepPerFolder)
cache := dataUsageEntry{}
@ -404,14 +404,14 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
entName = path.Clean(path.Join(folder.name, entName))
bucket, prefix := path2BucketObjectWithBasePath(f.root, entName)
if bucket == "" {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" no bucket (%s,%s)\n", f.root, entName)
}
return errDoneForNow
}
if isReservedOrInvalidBucket(bucket, false) {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" invalid bucket: %v, entry: %v\n", bucket, entName)
}
return errDoneForNow
@ -450,13 +450,13 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
wait := scannerSleeper.Timer(ctx)
// Get file size, ignore errors.
item := crawlItem{
item := scannerItem{
Path: path.Join(f.root, entName),
Typ: typ,
bucket: bucket,
prefix: path.Dir(prefix),
objectName: path.Base(entName),
debug: f.dataUsageCrawlDebug,
debug: f.dataUsageScannerDebug,
lifeCycle: activeLifeCycle,
heal: thisHash.mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
}
@ -532,7 +532,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
healObjectsPrefix := color.Green("healObjects:")
for k := range existing {
bucket, prefix := path2BucketObject(k)
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(scannerLogPrefix+" checking disappeared folder: %v/%v\n", bucket, prefix)
}
@ -552,13 +552,13 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
minDisks: len(f.disks), // We want full consistency.
// Weird, maybe transient error.
agreed: func(entry metaCacheEntry) {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" got agreement: %v\n", entry.name)
}
},
// Some disks have data for this.
partial: func(entries metaCacheEntries, nAgreed int, errs []error) {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" got partial, %d agreed, errs: %v\n", nAgreed, errs)
}
@ -580,7 +580,7 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
entry, _ = entries.firstFound()
}
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" resolved to: %v, dir: %v\n", entry.name, entry.isDir())
}
@ -618,20 +618,20 @@ func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFo
},
// Too many disks failed.
finished: func(errs []error) {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" too many errors: %v\n", errs)
}
cancel()
},
})
if f.dataUsageCrawlDebug && err != nil && err != errFileNotFound {
if f.dataUsageScannerDebug && err != nil && err != errFileNotFound {
console.Debugf(healObjectsPrefix+" checking returned value %v (%T)\n", err, err)
}
// If we found one or more disks with this folder, delete it.
if err == nil && dangling {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(healObjectsPrefix+" deleting dangling directory %s\n", prefix)
}
@ -690,7 +690,7 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
dirStack = append(dirStack, entName)
err := readDirFn(path.Join(dirStack...), addDir)
dirStack = dirStack[:len(dirStack)-1]
scannerSleeper.Sleep(ctx, dataCrawlSleepPerFolder)
scannerSleeper.Sleep(ctx, dataScannerSleepPerFolder)
return err
}
@ -705,19 +705,19 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
bucket, prefix := path2BucketObjectWithBasePath(f.root, fileName)
var activeLifeCycle *lifecycle.Lifecycle
if f.oldCache.Info.lifeCycle != nil && f.oldCache.Info.lifeCycle.HasActiveRules(prefix, false) {
if f.dataUsageCrawlDebug {
if f.dataUsageScannerDebug {
console.Debugf(deepScannerLogPrefix+" Prefix %q has active rules\n", prefix)
}
activeLifeCycle = f.oldCache.Info.lifeCycle
}
item := crawlItem{
item := scannerItem{
Path: fileName,
Typ: typ,
bucket: bucket,
prefix: path.Dir(prefix),
objectName: path.Base(entName),
debug: f.dataUsageCrawlDebug,
debug: f.dataUsageScannerDebug,
lifeCycle: activeLifeCycle,
heal: hashPath(path.Join(prefix, entName)).mod(f.oldCache.Info.NextCycle, f.healObjectSelect/folder.objectHealProbDiv) && globalIsErasure,
}
@ -752,8 +752,8 @@ func (f *folderScanner) deepScanFolder(ctx context.Context, folder cachedFolder,
return &cache, nil
}
// crawlItem represents each file while walking.
type crawlItem struct {
// scannerItem represents each file while walking.
type scannerItem struct {
Path string
Typ os.FileMode
@ -773,10 +773,10 @@ type sizeSummary struct {
replicaSize int64
}
type getSizeFn func(item crawlItem) (sizeSummary, error)
type getSizeFn func(item scannerItem) (sizeSummary, error)
// transformMetaDir will transform a directory to prefix/file.ext
func (i *crawlItem) transformMetaDir() {
func (i *scannerItem) transformMetaDir() {
split := strings.Split(i.prefix, SlashSeparator)
if len(split) > 1 {
i.prefix = path.Join(split[:len(split)-1]...)
@ -799,7 +799,7 @@ var applyActionsLogPrefix = color.Green("applyActions:")
// The resulting size on disk will always be returned.
// The metadata will be compared to consensus on the object layer before any changes are applied.
// If no metadata is supplied, -1 is returned if no action is taken.
func (i *crawlItem) applyActions(ctx context.Context, o ObjectLayer, meta actionMeta) (size int64) {
func (i *scannerItem) applyActions(ctx context.Context, o ObjectLayer, meta actionMeta) (size int64) {
size, err := meta.oi.GetActualSize()
if i.debug {
logger.LogIf(ctx, err)
@ -1052,12 +1052,12 @@ func applyLifecycleAction(ctx context.Context, action lifecycle.Action, objLayer
}
// objectPath returns the prefix and object name.
func (i *crawlItem) objectPath() string {
func (i *scannerItem) objectPath() string {
return path.Join(i.prefix, i.objectName)
}
// healReplication will heal a scanned item that has failed replication.
func (i *crawlItem) healReplication(ctx context.Context, o ObjectLayer, oi ObjectInfo, sizeS *sizeSummary) {
func (i *scannerItem) healReplication(ctx context.Context, o ObjectLayer, oi ObjectInfo, sizeS *sizeSummary) {
if oi.DeleteMarker || !oi.VersionPurgeStatus.Empty() {
// heal delete marker replication failure or versioned delete replication failure
if oi.ReplicationStatus == replication.Pending ||
@ -1082,7 +1082,7 @@ func (i *crawlItem) healReplication(ctx context.Context, o ObjectLayer, oi Objec
}
// healReplicationDeletes will heal a scanned deleted item that failed to replicate deletes.
func (i *crawlItem) healReplicationDeletes(ctx context.Context, o ObjectLayer, oi ObjectInfo) {
func (i *scannerItem) healReplicationDeletes(ctx context.Context, o ObjectLayer, oi ObjectInfo) {
// handle soft delete and permanent delete failures here.
if oi.DeleteMarker || !oi.VersionPurgeStatus.Empty() {
versionID := ""

View File

@ -80,7 +80,7 @@ func newDataUpdateTracker() *dataUpdateTracker {
Current: dataUpdateFilter{
idx: 1,
},
debug: env.Get(envDataUsageCrawlDebug, config.EnableOff) == config.EnableOn || serverDebugLog,
debug: env.Get(envDataUsageScannerDebug, config.EnableOff) == config.EnableOn || serverDebugLog,
input: make(chan string, dataUpdateTrackerQueueSize),
save: make(chan struct{}, 1),
saveExited: make(chan struct{}),

View File

@ -28,7 +28,7 @@ import (
)
const (
envDataUsageCrawlDebug = "MINIO_DISK_USAGE_CRAWL_DEBUG"
envDataUsageScannerDebug = "MINIO_DISK_USAGE_SCANNER_DEBUG"
dataUsageRoot = SlashSeparator
dataUsageBucket = minioMetaBucket + SlashSeparator + bucketMetaPrefix

View File

@ -51,7 +51,7 @@ func TestDataUsageUpdate(t *testing.T) {
}
createUsageTestFiles(t, base, bucket, files)
getSize := func(item crawlItem) (sizeS sizeSummary, err error) {
getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
if item.Typ&os.ModeDir == 0 {
var s os.FileInfo
s, err = os.Stat(item.Path)
@ -64,7 +64,7 @@ func TestDataUsageUpdate(t *testing.T) {
return
}
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
got, err := scanDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil {
t.Fatal(err)
}
@ -185,7 +185,7 @@ func TestDataUsageUpdate(t *testing.T) {
},
}
createUsageTestFiles(t, base, bucket, files)
got, err = crawlDataFolder(context.Background(), base, got, getSize)
got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -270,7 +270,7 @@ func TestDataUsageUpdate(t *testing.T) {
}
// Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, getSize)
got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -347,7 +347,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}
createUsageTestFiles(t, base, "", files)
getSize := func(item crawlItem) (sizeS sizeSummary, err error) {
getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
if item.Typ&os.ModeDir == 0 {
var s os.FileInfo
s, err = os.Stat(item.Path)
@ -359,7 +359,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}
return
}
got, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
got, err := scanDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, getSize)
if err != nil {
t.Fatal(err)
}
@ -469,7 +469,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
},
}
createUsageTestFiles(t, base, "", files)
got, err = crawlDataFolder(context.Background(), base, got, getSize)
got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -552,7 +552,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
}
// Changed dir must be picked up in this many cycles.
for i := 0; i < dataUsageUpdateDirCycles; i++ {
got, err = crawlDataFolder(context.Background(), base, got, getSize)
got, err = scanDataFolder(context.Background(), base, got, getSize)
if err != nil {
t.Fatal(err)
}
@ -646,7 +646,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
}
createUsageTestFiles(t, base, bucket, files)
getSize := func(item crawlItem) (sizeS sizeSummary, err error) {
getSize := func(item scannerItem) (sizeS sizeSummary, err error) {
if item.Typ&os.ModeDir == 0 {
var s os.FileInfo
s, err = os.Stat(item.Path)
@ -658,7 +658,7 @@ func TestDataUsageCacheSerialize(t *testing.T) {
}
return
}
want, err := crawlDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
want, err := scanDataFolder(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: bucket}}, getSize)
if err != nil {
t.Fatal(err)
}

View File

@ -360,7 +360,7 @@ func (z *erasureServerPools) StorageInfo(ctx context.Context) (StorageInfo, []er
return storageInfo, errs
}
func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
func (z *erasureServerPools) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
@ -379,7 +379,7 @@ func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloom
return nil
}
// Crawl latest allBuckets first.
// Scanner latest allBuckets first.
sort.Slice(allBuckets, func(i, j int) bool {
return allBuckets[i].Created.After(allBuckets[j].Created)
})
@ -402,7 +402,7 @@ func (z *erasureServerPools) CrawlAndGetDataUsage(ctx context.Context, bf *bloom
}
}()
// Start scanner. Blocks until done.
err := erObj.crawlAndGetDataUsage(ctx, allBuckets, bf, updates)
err := erObj.nsScanner(ctx, allBuckets, bf, updates)
if err != nil {
logger.LogIf(ctx, err)
mu.Lock()

View File

@ -298,9 +298,9 @@ func (er erasureObjects) cleanupDeletedObjects(ctx context.Context) {
wg.Wait()
}
// CrawlAndGetDataUsage will start crawling buckets and send updated totals as they are traversed.
// nsScanner will start scanning buckets and send updated totals as they are traversed.
// Updates are sent on a regular basis and the caller *must* consume them.
func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error {
func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, bf *bloomFilter, updates chan<- dataUsageCache) error {
if len(buckets) == 0 {
return nil
}
@ -308,7 +308,7 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
// Collect disks we can use.
disks, healing := er.getOnlineDisksWithHealing()
if len(disks) == 0 {
logger.Info(color.Green("data-crawl:") + " all disks are offline or being healed, skipping crawl")
logger.Info(color.Green("data-scanner:") + " all disks are offline or being healed, skipping scanner")
return nil
}
@ -442,7 +442,7 @@ func (er erasureObjects) crawlAndGetDataUsage(ctx context.Context, buckets []Buc
// Calc usage
before := cache.Info.LastUpdate
var err error
cache, err = disk.CrawlAndGetDataUsage(ctx, cache)
cache, err = disk.NSScanner(ctx, cache)
cache.Info.BloomFilter = nil
if err != nil {
if !cache.Info.LastUpdate.IsZero() && cache.Info.LastUpdate.After(before) {

View File

@ -231,8 +231,8 @@ func (fs *FSObjects) StorageInfo(ctx context.Context) (StorageInfo, []error) {
return storageInfo, nil
}
// CrawlAndGetDataUsage returns data usage stats of the current FS deployment
func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
// NSScanner returns data usage stats of the current FS deployment
func (fs *FSObjects) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
// Load bucket totals
var totalCache dataUsageCache
err := totalCache.load(ctx, fs, dataUsageCacheName)
@ -268,7 +268,7 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
}
bCache.Info.BloomFilter = totalCache.Info.BloomFilter
cache, err := fs.crawlBucket(ctx, b.Name, bCache)
cache, err := fs.scanBucket(ctx, b.Name, bCache)
select {
case <-ctx.Done():
return ctx.Err()
@ -279,13 +279,13 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
if cache.root() == nil {
if intDataUpdateTracker.debug {
logger.Info(color.Green("CrawlAndGetDataUsage:") + " No root added. Adding empty")
logger.Info(color.Green("NSScanner:") + " No root added. Adding empty")
}
cache.replace(cache.Info.Name, dataUsageRoot, dataUsageEntry{})
}
if cache.Info.LastUpdate.After(bCache.Info.LastUpdate) {
if intDataUpdateTracker.debug {
logger.Info(color.Green("CrawlAndGetDataUsage:")+" Saving bucket %q cache with %d entries", b.Name, len(cache.Cache))
logger.Info(color.Green("NSScanner:")+" Saving bucket %q cache with %d entries", b.Name, len(cache.Cache))
}
logger.LogIf(ctx, cache.save(ctx, fs, path.Join(b.Name, dataUsageCacheName)))
}
@ -295,7 +295,7 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
entry := cl.flatten(*cl.root())
totalCache.replace(cl.Info.Name, dataUsageRoot, entry)
if intDataUpdateTracker.debug {
logger.Info(color.Green("CrawlAndGetDataUsage:")+" Saving totals cache with %d entries", len(totalCache.Cache))
logger.Info(color.Green("NSScanner:")+" Saving totals cache with %d entries", len(totalCache.Cache))
}
totalCache.Info.LastUpdate = time.Now()
logger.LogIf(ctx, totalCache.save(ctx, fs, dataUsageCacheName))
@ -307,27 +307,27 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter,
return nil
}
// crawlBucket crawls a single bucket in FS mode.
// scanBucket scans a single bucket in FS mode.
// The updated cache for the bucket is returned.
// A partially updated bucket may be returned.
func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataUsageCache) (dataUsageCache, error) {
func (fs *FSObjects) scanBucket(ctx context.Context, bucket string, cache dataUsageCache) (dataUsageCache, error) {
// Get bucket policy
// Check if the current bucket has a configured lifecycle policy
lc, err := globalLifecycleSys.Get(bucket)
if err == nil && lc.HasActiveRules("", true) {
if intDataUpdateTracker.debug {
logger.Info(color.Green("crawlBucket:") + " lifecycle: Active rules found")
logger.Info(color.Green("scanBucket:") + " lifecycle: Active rules found")
}
cache.Info.lifeCycle = lc
}
// Load bucket info.
cache, err = crawlDataFolder(ctx, fs.fsPath, cache, func(item crawlItem) (sizeSummary, error) {
cache, err = scanDataFolder(ctx, fs.fsPath, cache, func(item scannerItem) (sizeSummary, error) {
bucket, object := item.bucket, item.objectPath()
fsMetaBytes, err := xioutil.ReadFile(pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile))
if err != nil && !osIsNotExist(err) {
if intDataUpdateTracker.debug {
logger.Info(color.Green("crawlBucket:")+" object return unexpected error: %v/%v: %w", item.bucket, item.objectPath(), err)
logger.Info(color.Green("scanBucket:")+" object return unexpected error: %v/%v: %w", item.bucket, item.objectPath(), err)
}
return sizeSummary{}, errSkipFile
}
@ -348,7 +348,7 @@ func (fs *FSObjects) crawlBucket(ctx context.Context, bucket string, cache dataU
fi, fiErr := os.Stat(item.Path)
if fiErr != nil {
if intDataUpdateTracker.debug {
logger.Info(color.Green("crawlBucket:")+" object path missing: %v: %w", item.Path, fiErr)
logger.Info(color.Green("scanBucket:")+" object path missing: %v: %w", item.Path, fiErr)
}
return sizeSummary{}, errSkipFile
}

View File

@ -39,8 +39,8 @@ func (a GatewayUnsupported) BackendInfo() BackendInfo {
return BackendInfo{Type: BackendGateway}
}
// CrawlAndGetDataUsage - crawl is not implemented for gateway
func (a GatewayUnsupported) CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
// NSScanner - scanner is not implemented for gateway
func (a GatewayUnsupported) NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error {
logger.CriticalIf(ctx, errors.New("not implemented"))
return NotImplemented{}
}

View File

@ -32,7 +32,7 @@ const (
)
// NewBgHealSequence creates a background healing sequence
// operation which crawls all objects and heal them.
// operation which scans all objects and heal them.
func newBgHealSequence() *healSequence {
reqInfo := &logger.ReqInfo{API: "BackgroundHeal"}
ctx, cancelCtx := context.WithCancel(logger.SetReqInfo(GlobalContext, reqInfo))

View File

@ -34,7 +34,7 @@ import (
// WalkDirOptions provides options for WalkDir operations.
type WalkDirOptions struct {
// Bucket to crawl
// Bucket to scanner
Bucket string
// Directory inside the bucket.

View File

@ -98,8 +98,8 @@ func (d *naughtyDisk) SetDiskID(id string) {
d.disk.SetDiskID(id)
}
func (d *naughtyDisk) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) {
return d.disk.CrawlAndGetDataUsage(ctx, cache)
func (d *naughtyDisk) NSScanner(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) {
return d.disk.NSScanner(ctx, cache)
}
func (d *naughtyDisk) DiskInfo(ctx context.Context) (info DiskInfo, err error) {

View File

@ -87,7 +87,7 @@ type ObjectLayer interface {
// Storage operations.
Shutdown(context.Context) error
CrawlAndGetDataUsage(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error
NSScanner(ctx context.Context, bf *bloomFilter, updates chan<- DataUsageInfo) error
BackendInfo() BackendInfo
StorageInfo(ctx context.Context) (StorageInfo, []error) // local queries only local disks

View File

@ -39,7 +39,7 @@ type StorageAPI interface {
Healing() bool // Returns if disk is healing.
DiskInfo(ctx context.Context) (info DiskInfo, err error)
CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error)
NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error)
// Volume operations.
MakeVol(ctx context.Context, volume string) (err error)

View File

@ -170,12 +170,12 @@ func (client *storageRESTClient) Healing() bool {
return false
}
func (client *storageRESTClient) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
pr, pw := io.Pipe()
go func() {
pw.CloseWithError(cache.serializeTo(pw))
}()
respBody, err := client.call(ctx, storageRESTMethodCrawlAndGetDataUsage, url.Values{}, pr, -1)
respBody, err := client.call(ctx, storageRESTMethodNSScanner, url.Values{}, pr, -1)
defer http.DrainBody(respBody)
if err != nil {
pr.Close()

View File

@ -17,20 +17,20 @@
package cmd
const (
storageRESTVersion = "v27" // Add force-delete-marker to DeleteVersion
storageRESTVersion = "v28" // Renamed crawl -> scanner
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
storageRESTPrefix = minioReservedBucketPath + "/storage"
)
const (
storageRESTMethodHealth = "/health"
storageRESTMethodDiskInfo = "/diskinfo"
storageRESTMethodCrawlAndGetDataUsage = "/crawlandgetdatausage"
storageRESTMethodMakeVol = "/makevol"
storageRESTMethodMakeVolBulk = "/makevolbulk"
storageRESTMethodStatVol = "/statvol"
storageRESTMethodDeleteVol = "/deletevol"
storageRESTMethodListVols = "/listvols"
storageRESTMethodHealth = "/health"
storageRESTMethodDiskInfo = "/diskinfo"
storageRESTMethodNSScanner = "/nsscanner"
storageRESTMethodMakeVol = "/makevol"
storageRESTMethodMakeVolBulk = "/makevolbulk"
storageRESTMethodStatVol = "/statvol"
storageRESTMethodDeleteVol = "/deletevol"
storageRESTMethodListVols = "/listvols"
storageRESTMethodAppendFile = "/appendfile"
storageRESTMethodCreateFile = "/createfile"

View File

@ -156,7 +156,7 @@ func (s *storageRESTServer) DiskInfoHandler(w http.ResponseWriter, r *http.Reque
logger.LogIf(r.Context(), msgp.Encode(w, &info))
}
func (s *storageRESTServer) CrawlAndGetDataUsageHandler(w http.ResponseWriter, r *http.Request) {
func (s *storageRESTServer) NSScannerHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
return
}
@ -172,7 +172,7 @@ func (s *storageRESTServer) CrawlAndGetDataUsageHandler(w http.ResponseWriter, r
}
resp := streamHTTPResponse(w)
usageInfo, err := s.storage.CrawlAndGetDataUsage(r.Context(), cache)
usageInfo, err := s.storage.NSScanner(r.Context(), cache)
if err != nil {
resp.CloseWithError(err)
return
@ -684,7 +684,7 @@ func (s *storageRESTServer) RenameFileHandler(w http.ResponseWriter, r *http.Req
}
// keepHTTPResponseAlive can be used to avoid timeouts with long storage
// operations, such as bitrot verification or data usage crawling.
// operations, such as bitrot verification or data usage scanning.
// Every 10 seconds a space character is sent.
// The returned function should always be called to release resources.
// An optional error can be sent which will be picked as text only error,
@ -808,7 +808,7 @@ func (h *httpStreamResponse) CloseWithError(err error) {
}
// streamHTTPResponse can be used to avoid timeouts with long storage
// operations, such as bitrot verification or data usage crawling.
// operations, such as bitrot verification or data usage scanning.
// Every 10 seconds a space character is sent.
// The returned function should always be called to release resources.
// An optional error can be sent which will be picked as text only error,
@ -1029,7 +1029,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodHealth).HandlerFunc(httpTraceHdrs(server.HealthHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDiskInfo).HandlerFunc(httpTraceHdrs(server.DiskInfoHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodCrawlAndGetDataUsage).HandlerFunc(httpTraceHdrs(server.CrawlAndGetDataUsageHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodNSScanner).HandlerFunc(httpTraceHdrs(server.NSScannerHandler))
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodMakeVol).HandlerFunc(httpTraceHdrs(server.MakeVolHandler)).Queries(restQueries(storageRESTVolume)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodMakeVolBulk).HandlerFunc(httpTraceHdrs(server.MakeVolBulkHandler)).Queries(restQueries(storageRESTVolumes)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodStatVol).HandlerFunc(httpTraceHdrs(server.StatVolHandler)).Queries(restQueries(storageRESTVolume)...)

View File

@ -55,11 +55,11 @@ func (p *xlStorageDiskIDCheck) Healing() bool {
return p.storage.Healing()
}
func (p *xlStorageDiskIDCheck) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
func (p *xlStorageDiskIDCheck) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
if err := p.checkDiskStale(); err != nil {
return dataUsageCache{}, err
}
return p.storage.CrawlAndGetDataUsage(ctx, cache)
return p.storage.NSScanner(ctx, cache)
}
func (p *xlStorageDiskIDCheck) Close() error {

View File

@ -353,7 +353,7 @@ func (s *xlStorage) Healing() bool {
return err == nil
}
func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
var lc *lifecycle.Lifecycle
var err error
@ -363,7 +363,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
if err == nil && lc.HasActiveRules("", true) {
cache.Info.lifeCycle = lc
if intDataUpdateTracker.debug {
console.Debugln(color.Green("crawlDisk:") + " lifecycle: Active rules found")
console.Debugln(color.Green("scannerDisk:") + " lifecycle: Active rules found")
}
}
}
@ -375,7 +375,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
healOpts := globalHealConfig
globalHealConfigMu.Unlock()
dataUsageInfo, err := crawlDataFolder(ctx, s.diskPath, cache, func(item crawlItem) (sizeSummary, error) {
dataUsageInfo, err := scanDataFolder(ctx, s.diskPath, cache, func(item scannerItem) (sizeSummary, error) {
// Look for `xl.meta/xl.json' at the leaf.
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
@ -386,7 +386,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
buf, err := xioutil.ReadFile(item.Path)
if err != nil {
if intDataUpdateTracker.debug {
console.Debugf(color.Green("crawlBucket:")+" object path missing: %v: %w\n", item.Path, err)
console.Debugf(color.Green("scannerBucket:")+" object path missing: %v: %w\n", item.Path, err)
}
return sizeSummary{}, errSkipFile
}
@ -397,7 +397,7 @@ func (s *xlStorage) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCac
fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath())
if err != nil {
if intDataUpdateTracker.debug {
console.Debugf(color.Green("crawlBucket:")+" reading xl.meta failed: %v: %w\n", item.Path, err)
console.Debugf(color.Green("scannerBucket:")+" reading xl.meta failed: %v: %w\n", item.Path, err)
}
return sizeSummary{}, errSkipFile
}

View File

@ -139,7 +139,7 @@ The replication configuration follows [AWS S3 Spec](https://docs.aws.amazon.com/
When object locking is used in conjunction with replication, both source and destination buckets needs to have [object locking](https://docs.min.io/docs/minio-bucket-object-lock-guide.html) enabled. Similarly objects encrypted on the server side, will be replicated if destination also supports encryption.
Replication status can be seen in the metadata on the source and destination objects. On the source side, the `X-Amz-Replication-Status` changes from `PENDING` to `COMPLETED` or `FAILED` after replication attempt either succeeded or failed respectively. On the destination side, a `X-Amz-Replication-Status` status of `REPLICA` indicates that the object was replicated successfully. Any replication failures are automatically re-attempted during a periodic disk crawl cycle.
Replication status can be seen in the metadata on the source and destination objects. On the source side, the `X-Amz-Replication-Status` changes from `PENDING` to `COMPLETED` or `FAILED` after replication attempt either succeeded or failed respectively. On the destination side, a `X-Amz-Replication-Status` status of `REPLICA` indicates that the object was replicated successfully. Any replication failures are automatically re-attempted during a periodic disk scanner cycle.
To perform bi-directional replication, repeat the above process on the target site - this time setting the source bucket as the replication target. It is recommended that replication be run in a system with atleast two CPU's available to the process, so that replication can run in its own thread.

View File

@ -263,21 +263,21 @@ The following sub-systems are dynamic i.e., configuration parameters for each su
```
api manage global HTTP API call specific features, such as throttling, authentication types, etc.
heal manage object healing frequency and bitrot verification checks
scanner manage crawling for usage calculation, lifecycle, healing and more
scanner manage namespace scanning for usage calculation, lifecycle, healing and more
```
> NOTE: if you set any of the following sub-system configuration using ENVs, dynamic behavior is not supported.
### Usage scanner
Data usage scanner is enabled by default. The following configuration settings allow for more staggered delay in terms of usage calculation. The scanner adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the scanner and thereby the latency of updates being reflected. The delays between each operation of the crawl can be adjusted by the `mc admin config set alias/ delay=15.0`. By default the value is `10.0`. This means the scanner will sleep *10x* the time each operation takes.
Data usage scanner is enabled by default. The following configuration settings allow for more staggered delay in terms of usage calculation. The scanner adapts to the system speed and completely pauses when the system is under load. It is possible to adjust the speed of the scanner and thereby the latency of updates being reflected. The delays between each operation of the scanner can be adjusted by the `mc admin config set alias/ delay=15.0`. By default the value is `10.0`. This means the scanner will sleep *10x* the time each operation takes.
In most setups this will keep the scanner slow enough to not impact overall system performance. Setting the `delay` key to a *lower* value will make the scanner faster and setting it to 0 will make the scanner run at full speed (not recommended in production). Setting it to a higher value will make the scanner slower, consuming less resources with the trade off of not collecting metrics for operations like healing and disk usage as fast.
```
~ mc admin config set alias/ scanner
KEY:
scanner manage crawling for usage calculation, lifecycle, healing and more
scanner manage namespace scanning for usage calculation, lifecycle, healing and more
ARGS:
delay (float) scanner delay multiplier, defaults to '10.0'
@ -306,7 +306,7 @@ KEY:
heal manage object healing frequency and bitrot verification checks
ARGS:
bitrotscan (on|off) perform bitrot scan on disks when checking objects during crawl
bitrotscan (on|off) perform bitrot scan on disks when checking objects during scanner
max_sleep (duration) maximum sleep duration between objects to slow down heal operation. eg. 2s
max_io (int) maximum IO requests allowed between objects to slow down heal operation. eg. 3
```

View File

@ -267,15 +267,15 @@ MINIO_ETCD_COMMENT (sentence) optionally add a comment to this setting
数据使用情况采集器默认是启用的通过Envs可以设置更多的交错延迟。
采集器能适应系统速度,并在系统负载时完全暂停。 可以调整采集器的速度,从而达到延迟更新的效果。 每次采集操作之间的延迟都可以通过环境变量`MINIO_DISK_USAGE_CRAWL_DELAY`来调整。 默认情况下该值为10。 这意味着采集每次操作都将休眠*10x*的时间。
采集器能适应系统速度,并在系统负载时完全暂停。 可以调整采集器的速度,从而达到延迟更新的效果。 每次采集操作之间的延迟都可以通过环境变量`MINIO_SCANNER_DELAY`来调整。 默认情况下该值为10。 这意味着采集每次操作都将休眠*10x*的时间。
大多数设置要让采集器足够慢,这样不会影响整体的系统性能。
设置 `MINIO_DISK_USAGE_CRAWL_DELAY` 为一个 *较低* 的值可以让采集器更快并且设置为0的时候可以让采集器全速运行不推荐。 设置一个较高的值可以让采集器变慢,进一步减少资源的消耗。
设置 `MINIO_SCANNER_DELAY` 为一个 *较低* 的值可以让采集器更快并且设置为0的时候可以让采集器全速运行不推荐。 设置一个较高的值可以让采集器变慢,进一步减少资源的消耗。
示例: 如下设置将使采集器的速度降低三倍, 减少了系统资源的使用,但是反映到更新的延迟会增加。
```sh
export MINIO_DISK_USAGE_CRAWL_DELAY=30
export MINIO_SCANNER_DELAY=30
minio server /data
```