minio/cmd/metacache.go

252 lines
7.7 KiB
Go

/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"errors"
"fmt"
"path"
"strings"
"time"
"github.com/minio/minio/cmd/logger"
)
type scanStatus uint8
const (
scanStateNone scanStatus = iota
scanStateStarted
scanStateSuccess
scanStateError
// Time in which the initiator of a scan must have reported back.
metacacheMaxRunningAge = time.Minute
// metacacheBlockSize is the number of file/directory entries to have in each block.
metacacheBlockSize = 5000
// metacacheSharePrefix controls whether prefixes on dirty paths are always shared.
// This will make `test/a` and `test/b` share listings if they are concurrent.
// Enabling this will make cache sharing more likely and cause less IO,
// but may cause additional latency to some calls.
metacacheSharePrefix = false
)
//go:generate msgp -file $GOFILE -unexported
// metacache contains a tracked cache entry.
type metacache struct {
id string `msg:"id"`
bucket string `msg:"b"`
root string `msg:"root"`
recursive bool `msg:"rec"`
filter string `msg:"flt"`
status scanStatus `msg:"stat"`
fileNotFound bool `msg:"fnf"`
error string `msg:"err"`
started time.Time `msg:"st"`
ended time.Time `msg:"end"`
lastUpdate time.Time `msg:"u"`
lastHandout time.Time `msg:"lh"`
startedCycle uint64 `msg:"stc"`
endedCycle uint64 `msg:"endc"`
dataVersion uint8 `msg:"v"`
}
func (m *metacache) finished() bool {
return !m.ended.IsZero()
}
// matches returns whether the metacache matches the options given.
func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool {
if o == nil {
return false
}
// Never return transient caches if there is no id.
if m.status == scanStateError || m.status == scanStateNone || m.dataVersion != metacacheStreamVersion {
o.debugf("cache %s state or stream version mismatch", m.id)
return false
}
if m.startedCycle < o.OldestCycle {
o.debugf("cache %s cycle too old", m.id)
return false
}
// Root of what we are looking for must at least have the same
if !strings.HasPrefix(o.BaseDir, m.root) {
o.debugf("cache %s prefix mismatch, cached:%v, want:%v", m.id, m.root, o.BaseDir)
return false
}
if m.filter != "" && strings.HasPrefix(m.filter, o.FilterPrefix) {
o.debugf("cache %s cannot be used because of filter %s", m.id, m.filter)
return false
}
if o.Recursive && !m.recursive {
o.debugf("cache %s not recursive", m.id)
// If this is recursive the cached listing must be as well.
return false
}
if o.Separator != slashSeparator && !m.recursive {
o.debugf("cache %s not slashsep and not recursive", m.id)
// Non slash separator requires recursive.
return false
}
if !m.finished() && time.Since(m.lastUpdate) > metacacheMaxRunningAge {
o.debugf("cache %s not running, time: %v", m.id, time.Since(m.lastUpdate))
// Abandoned
return false
}
if m.finished() && m.endedCycle <= o.OldestCycle {
if extend <= 0 {
// If scan has ended the oldest requested must be less.
o.debugf("cache %s ended and cycle (%v) <= oldest allowed (%v)", m.id, m.endedCycle, o.OldestCycle)
return false
}
if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend {
// Cache ended within bloom cycle, but we can extend the life.
o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, metacacheMaxRunningAge+extend)
return false
}
}
return true
}
// worthKeeping indicates if the cache by itself is worth keeping.
func (m *metacache) worthKeeping(currentCycle uint64) bool {
if m == nil {
return false
}
cache := m
switch {
case !cache.finished() && time.Since(cache.lastUpdate) > metacacheMaxRunningAge:
// Not finished and update for metacacheMaxRunningAge, discard it.
return false
case cache.finished() && cache.startedCycle > currentCycle:
// Cycle is somehow bigger.
return false
case cache.finished() && time.Since(cache.lastHandout) > 48*time.Hour:
// Keep only for 2 days. Fallback if crawler is clogged.
return false
case cache.finished() && currentCycle >= dataUsageUpdateDirCycles && cache.startedCycle < currentCycle-dataUsageUpdateDirCycles:
// Cycle is too old to be valuable.
return false
case cache.status == scanStateError || cache.status == scanStateNone:
// Remove failed listings after 5 minutes.
return time.Since(cache.lastUpdate) < 5*time.Minute
}
return true
}
// canBeReplacedBy.
// Both must pass the worthKeeping check.
func (m *metacache) canBeReplacedBy(other *metacache) bool {
// If the other is older it can never replace.
if other.started.Before(m.started) || m.id == other.id {
return false
}
if other.status == scanStateNone || other.status == scanStateError {
return false
}
if m.status == scanStateStarted && time.Since(m.lastUpdate) < metacacheMaxRunningAge {
return false
}
// Keep it around a bit longer.
if time.Since(m.lastHandout) < 30*time.Minute || time.Since(m.lastUpdate) < metacacheMaxRunningAge {
return false
}
// Go through recursive combinations.
switch {
case !m.recursive && !other.recursive:
// If both not recursive root must match.
return m.root == other.root && strings.HasPrefix(m.filter, other.filter)
case m.recursive && !other.recursive:
// A recursive can never be replaced by a non-recursive
return false
case !m.recursive && other.recursive:
// If other is recursive it must contain this root
return strings.HasPrefix(m.root, other.root) && other.filter == ""
case m.recursive && other.recursive:
// Similar if both are recursive
return strings.HasPrefix(m.root, other.root) && other.filter == ""
}
panic("should be unreachable")
}
// baseDirFromPrefix will return the base directory given an object path.
// For example an object with name prefix/folder/object.ext will return `prefix/folder/`.
func baseDirFromPrefix(prefix string) string {
b := path.Dir(prefix)
if b == "." || b == "./" || b == "/" {
b = ""
}
if !strings.Contains(prefix, slashSeparator) {
b = ""
}
if len(b) > 0 && !strings.HasSuffix(b, slashSeparator) {
b += slashSeparator
}
return b
}
// update cache with new status.
// The updates are conditional so multiple callers can update with different states.
func (m *metacache) update(update metacache) {
m.lastUpdate = UTCNow()
if m.status == scanStateStarted && update.status == scanStateSuccess {
m.ended = UTCNow()
m.endedCycle = update.endedCycle
}
if m.status == scanStateStarted && update.status != scanStateStarted {
m.status = update.status
}
if m.error == "" && update.error != "" {
m.error = update.error
m.status = scanStateError
m.ended = UTCNow()
}
m.fileNotFound = m.fileNotFound || update.fileNotFound
}
// delete all cache data on disks.
func (m *metacache) delete(ctx context.Context) {
if m.bucket == "" || m.id == "" {
logger.LogIf(ctx, fmt.Errorf("metacache.delete: bucket (%s) or id (%s) empty", m.bucket, m.id))
}
objAPI := newObjectLayerFn()
if objAPI == nil {
logger.LogIf(ctx, errors.New("metacache.delete: no object layer"))
return
}
ez, ok := objAPI.(*erasureServerPools)
if !ok {
logger.LogIf(ctx, errors.New("metacache.delete: expected objAPI to be *erasureServerPools"))
return
}
ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(m.bucket, m.id))
}