minio/cmd/metacache.go
Harshavardhana b3c56b53fb
fix: metacache should only rename entries during cleanup (#11503)
To avoid large delays in metacache cleanup, use rename
instead of recursive delete calls, renames are cheaper
move the content to minioMetaTmpBucket and then cleanup
this folder once in 24hrs instead.

If the new cache can replace an existing one, we should
let it replace since that is currently being saved anyways,
this avoids pile up of 1000's of metacache entires for
same listing calls that are not necessary to be stored
on disk.
2021-02-11 10:22:03 -08:00

252 lines
7.7 KiB
Go

/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"errors"
"fmt"
"path"
"strings"
"time"
"github.com/minio/minio/cmd/logger"
)
type scanStatus uint8
const (
scanStateNone scanStatus = iota
scanStateStarted
scanStateSuccess
scanStateError
// Time in which the initiator of a scan must have reported back.
metacacheMaxRunningAge = time.Minute
// metacacheBlockSize is the number of file/directory entries to have in each block.
metacacheBlockSize = 5000
// metacacheSharePrefix controls whether prefixes on dirty paths are always shared.
// This will make `test/a` and `test/b` share listings if they are concurrent.
// Enabling this will make cache sharing more likely and cause less IO,
// but may cause additional latency to some calls.
metacacheSharePrefix = false
)
//go:generate msgp -file $GOFILE -unexported
// metacache contains a tracked cache entry.
type metacache struct {
id string `msg:"id"`
bucket string `msg:"b"`
root string `msg:"root"`
recursive bool `msg:"rec"`
filter string `msg:"flt"`
status scanStatus `msg:"stat"`
fileNotFound bool `msg:"fnf"`
error string `msg:"err"`
started time.Time `msg:"st"`
ended time.Time `msg:"end"`
lastUpdate time.Time `msg:"u"`
lastHandout time.Time `msg:"lh"`
startedCycle uint64 `msg:"stc"`
endedCycle uint64 `msg:"endc"`
dataVersion uint8 `msg:"v"`
}
func (m *metacache) finished() bool {
return !m.ended.IsZero()
}
// matches returns whether the metacache matches the options given.
func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool {
if o == nil {
return false
}
// Never return transient caches if there is no id.
if m.status == scanStateError || m.status == scanStateNone || m.dataVersion != metacacheStreamVersion {
o.debugf("cache %s state or stream version mismatch", m.id)
return false
}
if m.startedCycle < o.OldestCycle {
o.debugf("cache %s cycle too old", m.id)
return false
}
// Root of what we are looking for must at least have the same
if !strings.HasPrefix(o.BaseDir, m.root) {
o.debugf("cache %s prefix mismatch, cached:%v, want:%v", m.id, m.root, o.BaseDir)
return false
}
if m.filter != "" && strings.HasPrefix(m.filter, o.FilterPrefix) {
o.debugf("cache %s cannot be used because of filter %s", m.id, m.filter)
return false
}
if o.Recursive && !m.recursive {
o.debugf("cache %s not recursive", m.id)
// If this is recursive the cached listing must be as well.
return false
}
if o.Separator != slashSeparator && !m.recursive {
o.debugf("cache %s not slashsep and not recursive", m.id)
// Non slash separator requires recursive.
return false
}
if !m.finished() && time.Since(m.lastUpdate) > metacacheMaxRunningAge {
o.debugf("cache %s not running, time: %v", m.id, time.Since(m.lastUpdate))
// Abandoned
return false
}
if m.finished() && m.endedCycle <= o.OldestCycle {
if extend <= 0 {
// If scan has ended the oldest requested must be less.
o.debugf("cache %s ended and cycle (%v) <= oldest allowed (%v)", m.id, m.endedCycle, o.OldestCycle)
return false
}
if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend {
// Cache ended within bloom cycle, but we can extend the life.
o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, metacacheMaxRunningAge+extend)
return false
}
}
return true
}
// worthKeeping indicates if the cache by itself is worth keeping.
func (m *metacache) worthKeeping(currentCycle uint64) bool {
if m == nil {
return false
}
cache := m
switch {
case !cache.finished() && time.Since(cache.lastUpdate) > metacacheMaxRunningAge:
// Not finished and update for metacacheMaxRunningAge, discard it.
return false
case cache.finished() && cache.startedCycle > currentCycle:
// Cycle is somehow bigger.
return false
case cache.finished() && time.Since(cache.lastHandout) > 48*time.Hour:
// Keep only for 2 days. Fallback if crawler is clogged.
return false
case cache.finished() && currentCycle >= dataUsageUpdateDirCycles && cache.startedCycle < currentCycle-dataUsageUpdateDirCycles:
// Cycle is too old to be valuable.
return false
case cache.status == scanStateError || cache.status == scanStateNone:
// Remove failed listings after 5 minutes.
return time.Since(cache.lastUpdate) < 5*time.Minute
}
return true
}
// canBeReplacedBy.
// Both must pass the worthKeeping check.
func (m *metacache) canBeReplacedBy(other *metacache) bool {
// If the other is older it can never replace.
if other.started.Before(m.started) || m.id == other.id {
return false
}
if other.status == scanStateNone || other.status == scanStateError {
return false
}
if m.status == scanStateStarted && time.Since(m.lastUpdate) < metacacheMaxRunningAge {
return false
}
// Keep it around a bit longer.
if time.Since(m.lastHandout) < 30*time.Minute || time.Since(m.lastUpdate) < metacacheMaxRunningAge {
return false
}
// Go through recursive combinations.
switch {
case !m.recursive && !other.recursive:
// If both not recursive root must match.
return m.root == other.root && strings.HasPrefix(m.filter, other.filter)
case m.recursive && !other.recursive:
// A recursive can never be replaced by a non-recursive
return false
case !m.recursive && other.recursive:
// If other is recursive it must contain this root
return strings.HasPrefix(m.root, other.root) && other.filter == ""
case m.recursive && other.recursive:
// Similar if both are recursive
return strings.HasPrefix(m.root, other.root) && other.filter == ""
}
panic("should be unreachable")
}
// baseDirFromPrefix will return the base directory given an object path.
// For example an object with name prefix/folder/object.ext will return `prefix/folder/`.
func baseDirFromPrefix(prefix string) string {
b := path.Dir(prefix)
if b == "." || b == "./" || b == "/" {
b = ""
}
if !strings.Contains(prefix, slashSeparator) {
b = ""
}
if len(b) > 0 && !strings.HasSuffix(b, slashSeparator) {
b += slashSeparator
}
return b
}
// update cache with new status.
// The updates are conditional so multiple callers can update with different states.
func (m *metacache) update(update metacache) {
m.lastUpdate = UTCNow()
if m.status == scanStateStarted && update.status == scanStateSuccess {
m.ended = UTCNow()
m.endedCycle = update.endedCycle
}
if m.status == scanStateStarted && update.status != scanStateStarted {
m.status = update.status
}
if m.error == "" && update.error != "" {
m.error = update.error
m.status = scanStateError
m.ended = UTCNow()
}
m.fileNotFound = m.fileNotFound || update.fileNotFound
}
// delete all cache data on disks.
func (m *metacache) delete(ctx context.Context) {
if m.bucket == "" || m.id == "" {
logger.LogIf(ctx, fmt.Errorf("metacache.delete: bucket (%s) or id (%s) empty", m.bucket, m.id))
}
objAPI := newObjectLayerFn()
if objAPI == nil {
logger.LogIf(ctx, errors.New("metacache.delete: no object layer"))
return
}
ez, ok := objAPI.(*erasureServerPools)
if !ok {
logger.LogIf(ctx, errors.New("metacache.delete: expected objAPI to be *erasureServerPools"))
return
}
ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(m.bucket, m.id))
}