diff --git a/cmd/metacache-bucket.go b/cmd/metacache-bucket.go index b06eef4c2..e151de6ab 100644 --- a/cmd/metacache-bucket.go +++ b/cmd/metacache-bucket.go @@ -73,6 +73,12 @@ func newBucketMetacache(bucket string, cleanup bool) *bucketMetacache { } } +func (b *bucketMetacache) debugf(format string, data ...interface{}) { + if metacacheDebug { + console.Debugf(format, data...) + } +} + // loadBucketMetaCache will load the cache from the object layer. // If the cache cannot be found a new one is created. func loadBucketMetaCache(ctx context.Context, bucket string) (*bucketMetacache, error) { @@ -199,7 +205,6 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache { } extend := globalAPIConfig.getExtendListLife() - const debugPrint = false // Grab a write lock, since we create one if we cannot find one. if o.Create { @@ -212,9 +217,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache { // Check if exists already. if c, ok := b.caches[o.ID]; ok { - if debugPrint { - console.Info("returning existing %v", o.ID) - } + b.debugf("returning existing %v", o.ID) return c } // No need to do expensive checks on transients. @@ -231,103 +234,30 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache { best := o.newMetacache() b.caches[o.ID] = best b.updated = true - if debugPrint { - console.Info("returning new cache %s, bucket: %v", best.id, best.bucket) - } + b.debugf("returning new cache %s, bucket: %v", best.id, best.bucket) return best } - interesting := interestingCaches(o.BaseDir, b.cachesRoot) - var best metacache - for _, id := range interesting { - cached, ok := b.caches[id] - if !ok { - continue - } + rootSplit := strings.Split(o.BaseDir, slashSeparator) + for i := range rootSplit { + interesting := b.cachesRoot[path.Join(rootSplit[:i+1]...)] - // Never return transient caches if there is no id. - if cached.status == scanStateError || cached.status == scanStateNone || cached.dataVersion != metacacheStreamVersion { - if debugPrint { - console.Info("cache %s state or stream version mismatch", cached.id) - } - continue - } - if cached.startedCycle < o.OldestCycle { - if debugPrint { - console.Info("cache %s cycle too old", cached.id) - } - continue - } - - // If the existing listing wasn't recursive root must match. - if !cached.recursive && o.BaseDir != cached.root { - if debugPrint { - console.Info("cache %s non rec prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir) - } - continue - } - - // Root of what we are looking for must at least have - if !strings.HasPrefix(o.BaseDir, cached.root) { - if debugPrint { - console.Info("cache %s prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir) - } - continue - } - if cached.filter != "" && strings.HasPrefix(cached.filter, o.FilterPrefix) { - if debugPrint { - console.Info("cache %s cannot be used because of filter %s", cached.id, cached.filter) - } - continue - } - - if o.Recursive && !cached.recursive { - if debugPrint { - console.Info("cache %s not recursive", cached.id) - } - // If this is recursive the cached listing must be as well. - continue - } - if o.Separator != slashSeparator && !cached.recursive { - if debugPrint { - console.Info("cache %s not slashsep and not recursive", cached.id) - } - // Non slash separator requires recursive. - continue - } - if !cached.finished() && time.Since(cached.lastUpdate) > metacacheMaxRunningAge { - if debugPrint { - console.Info("cache %s not running, time: %v", cached.id, time.Since(cached.lastUpdate)) - } - // Abandoned - continue - } - - if cached.finished() && cached.endedCycle <= o.OldestCycle { - if extend <= 0 { - // If scan has ended the oldest requested must be less. - if debugPrint { - console.Info("cache %s ended and cycle (%v) <= oldest allowed (%v)", cached.id, cached.endedCycle, o.OldestCycle) - } + for _, id := range interesting { + cached, ok := b.caches[id] + if !ok { continue } - if time.Since(cached.lastUpdate) > metacacheMaxRunningAge+extend { - // Cache ended within bloom cycle, but we can extend the life. - if debugPrint { - console.Info("cache %s ended (%v) and beyond extended life (%v)", cached.id, cached.lastUpdate, extend+metacacheMaxRunningAge) - } + if !cached.matches(&o, extend) { continue } - } - if cached.started.Before(best.started) { - if debugPrint { - console.Info("cache %s disregarded - we have a better", cached.id) + if cached.started.Before(best.started) { + b.debugf("cache %s disregarded - we have a better", cached.id) + // If we already have a newer, keep that. + continue } - // If we already have a newer, keep that. - continue + best = cached } - best = cached } if !best.started.IsZero() { if o.Create { @@ -335,9 +265,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache { b.caches[best.id] = best b.updated = true } - if debugPrint { - console.Info("returning cached %s, status: %v, ended: %v", best.id, best.status, best.ended) - } + b.debugf("returning cached %s, status: %v, ended: %v", best.id, best.status, best.ended) return best } if !o.Create { @@ -353,9 +281,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache { b.caches[o.ID] = best b.cachesRoot[best.root] = append(b.cachesRoot[best.root], best.id) b.updated = true - if debugPrint { - console.Info("returning new cache %s, bucket: %v", best.id, best.bucket) - } + b.debugf("returning new cache %s, bucket: %v", best.id, best.bucket) return best } @@ -365,8 +291,6 @@ func (b *bucketMetacache) cleanup() { remove := make(map[string]struct{}) currentCycle := intDataUpdateTracker.current() - const debugPrint = false - // Test on a copy // cleanup is the only one deleting caches. caches, rootIdx := b.cloneCaches() @@ -378,9 +302,7 @@ func (b *bucketMetacache) cleanup() { continue } if !cache.worthKeeping(currentCycle) { - if debugPrint { - logger.Info("cache %s not worth keeping", id) - } + b.debugf("cache %s not worth keeping", id) remove[id] = struct{}{} continue } @@ -418,15 +340,11 @@ func (b *bucketMetacache) cleanup() { } if cache.canBeReplacedBy(&cache2) { - if debugPrint { - logger.Info("cache %s can be replaced by %s", id, cache2.id) - } + b.debugf("cache %s can be replaced by %s", id, cache2.id) remove[id] = struct{}{} break } else { - if debugPrint { - logger.Info("cache %s can be NOT replaced by %s", id, cache2.id) - } + b.debugf("cache %s can be NOT replaced by %s", id, cache2.id) } } } @@ -515,9 +433,6 @@ func (b *bucketMetacache) getCache(id string) *metacache { // deleteAll will delete all on disk data for ALL caches. // Deletes are performed concurrently. func (b *bucketMetacache) deleteAll() { - b.mu.Lock() - defer b.mu.Unlock() - ctx := context.Background() ez, ok := newObjectLayerFn().(*erasureServerPools) if !ok { @@ -525,6 +440,9 @@ func (b *bucketMetacache) deleteAll() { return } + b.mu.Lock() + defer b.mu.Unlock() + b.updated = true if !b.transient { // Delete all. diff --git a/cmd/metacache-bucket_test.go b/cmd/metacache-bucket_test.go new file mode 100644 index 000000000..1f157a4f2 --- /dev/null +++ b/cmd/metacache-bucket_test.go @@ -0,0 +1,55 @@ +package cmd + +import ( + "fmt" + "testing" +) + +func Benchmark_bucketMetacache_findCache(b *testing.B) { + bm := newBucketMetacache("", false) + const elements = 50000 + const paths = 100 + if elements%paths != 0 { + b.Fatal("elements must be divisible by the number of paths") + } + var pathNames [paths]string + for i := range pathNames[:] { + pathNames[i] = fmt.Sprintf("prefix/%d", i) + } + for i := 0; i < elements; i++ { + bm.findCache(listPathOptions{ + ID: mustGetUUID(), + Bucket: "", + BaseDir: pathNames[i%paths], + Prefix: "", + FilterPrefix: "", + Marker: "", + Limit: 0, + AskDisks: 0, + Recursive: false, + Separator: slashSeparator, + Create: true, + CurrentCycle: uint64(i), + OldestCycle: uint64(i - 1), + }) + } + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bm.findCache(listPathOptions{ + ID: mustGetUUID(), + Bucket: "", + BaseDir: pathNames[i%paths], + Prefix: "", + FilterPrefix: "", + Marker: "", + Limit: 0, + AskDisks: 0, + Recursive: false, + Separator: slashSeparator, + Create: true, + CurrentCycle: uint64(i % elements), + OldestCycle: uint64(0), + }) + } +} diff --git a/cmd/metacache-set.go b/cmd/metacache-set.go index ca32b39fc..b1174e474 100644 --- a/cmd/metacache-set.go +++ b/cmd/metacache-set.go @@ -29,6 +29,7 @@ import ( "sync" "time" + jsoniter "github.com/json-iterator/go" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/console" "github.com/minio/minio/pkg/hash" @@ -122,11 +123,22 @@ func (o listPathOptions) newMetacache() metacache { } } +func (o *listPathOptions) debugf(format string, data ...interface{}) { + if metacacheDebug { + console.Debugf(format, data...) + } +} + +func (o *listPathOptions) debugln(data ...interface{}) { + if metacacheDebug { + console.Debugln(data...) + } +} + // gatherResults will collect all results on the input channel and filter results according to the options. // Caller should close the channel when done. // The returned function will return the results once there is enough or input is closed. func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCacheEntriesSorted, error) { - const debugPrint = false var resultsDone = make(chan metaCacheEntriesSorted) // Copy so we can mutate resCh := resultsDone @@ -142,32 +154,22 @@ func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCa if !o.IncludeDirectories && entry.isDir() { continue } - if debugPrint { - console.Infoln("gather got:", entry.name) - } + o.debugln("gather got:", entry.name) if o.Marker != "" && entry.name <= o.Marker { - if debugPrint { - console.Infoln("pre marker") - } + o.debugln("pre marker") continue } if !strings.HasPrefix(entry.name, o.Prefix) { - if debugPrint { - console.Infoln("not in prefix") - } + o.debugln("not in prefix") continue } if !o.Recursive && !entry.isInDir(o.Prefix, o.Separator) { - if debugPrint { - console.Infoln("not in dir", o.Prefix, o.Separator) - } + o.debugln("not in dir", o.Prefix, o.Separator) continue } if !o.InclDeleted && entry.isObject() { if entry.isLatestDeletemarker() { - if debugPrint { - console.Infoln("latest delete") - } + o.debugln("latest delete") continue } } @@ -181,9 +183,7 @@ func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCa } continue } - if debugPrint { - console.Infoln("adding...") - } + o.debugln("adding...") results.o = append(results.o, entry) } if resCh != nil { @@ -207,19 +207,15 @@ func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) { if search == "" { return 0, nil } - const debugPrint = false - if debugPrint { - console.Infoln("searching for ", search) - } + o.debugln("searching for ", search) var tmp metacacheBlock + var json = jsoniter.ConfigCompatibleWithStandardLibrary i := 0 for { partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i) v, ok := fi.Metadata[partKey] if !ok { - if debugPrint { - console.Infoln("no match in metadata, waiting") - } + o.debugln("no match in metadata, waiting") return -1, io.ErrUnexpectedEOF } err := json.Unmarshal([]byte(v), &tmp) @@ -231,27 +227,18 @@ func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) { return 0, errFileNotFound } if tmp.First >= search { - if debugPrint { - console.Infoln("First >= search", v) - } + o.debugln("First >= search", v) return i, nil } if tmp.Last >= search { - if debugPrint { - - console.Infoln("Last >= search", v) - } + o.debugln("Last >= search", v) return i, nil } if tmp.EOS { - if debugPrint { - console.Infoln("no match, at EOS", v) - } + o.debugln("no match, at EOS", v) return -3, io.EOF } - if debugPrint { - console.Infoln("First ", tmp.First, "<", search, " search", i) - } + o.debugln("First ", tmp.First, "<", search, " search", i) i++ } } @@ -314,7 +301,6 @@ func (o *listPathOptions) SetFilter() { // Will return io.EOF if there are no more entries with the same filter. // The last entry can be used as a marker to resume the listing. func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSorted, err error) { - const debugPrint = false // Forward to prefix, if any err = r.forwardTo(o.Prefix) if err != nil { @@ -336,9 +322,8 @@ func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSor } } } - if debugPrint { - console.Infoln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator) - } + o.debugln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator) + // Filter if !o.Recursive { entries.o = make(metaCacheEntries, 0, o.Limit) @@ -557,10 +542,7 @@ func (er erasureObjects) SetDriveCount() int { // Will return io.EOF if continuing would not yield more results. func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { - const debugPrint = false - if debugPrint { - console.Printf("listPath with options: %#v\n", o) - } + o.debugf("listPath with options: %#v\n", o) // See if we have the listing stored. if !o.Create && !o.singleObject { @@ -586,9 +568,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr rpc := globalNotificationSys.restClientFromHash(o.Bucket) var metaMu sync.Mutex - if debugPrint { - console.Println("listPath: scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker) - } + o.debugln("listPath: scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker) // Disconnect from call above, but cancel on exit. ctx, cancel := context.WithCancel(GlobalContext) @@ -596,9 +576,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr disks := er.getOnlineDisks() defer func() { - if debugPrint { - console.Println("listPath returning:", entries.len(), "err:", err) - } + o.debugln("listPath returning:", entries.len(), "err:", err) if err != nil && !errors.Is(err, io.EOF) { go func(err string) { metaMu.Lock() @@ -684,9 +662,7 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr // Don't save single object listings. return nil } - if debugPrint { - console.Println("listPath: saving block", b.n, "to", o.objectPath(b.n)) - } + o.debugln("listPath: saving block", b.n, "to", o.objectPath(b.n)) r, err := hash.NewReader(bytes.NewBuffer(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false) logger.LogIf(ctx, err) custom := b.headerKV() diff --git a/cmd/metacache.go b/cmd/metacache.go index 3af0ca37a..7b0750989 100644 --- a/cmd/metacache.go +++ b/cmd/metacache.go @@ -46,6 +46,9 @@ const ( // Enabling this will make cache sharing more likely and cause less IO, // but may cause additional latency to some calls. metacacheSharePrefix = false + + // metacacheDebug will enable debug printing + metacacheDebug = false ) //go:generate msgp -file $GOFILE -unexported @@ -73,6 +76,64 @@ func (m *metacache) finished() bool { return !m.ended.IsZero() } +// matches returns whether the metacache matches the options given. +func (m *metacache) matches(o *listPathOptions, extend time.Duration) bool { + if o == nil { + return false + } + + // Never return transient caches if there is no id. + if m.status == scanStateError || m.status == scanStateNone || m.dataVersion != metacacheStreamVersion { + o.debugf("cache %s state or stream version mismatch", m.id) + return false + } + if m.startedCycle < o.OldestCycle { + o.debugf("cache %s cycle too old", m.id) + return false + } + + // Root of what we are looking for must at least have the same + if !strings.HasPrefix(o.BaseDir, m.root) { + o.debugf("cache %s prefix mismatch, cached:%v, want:%v", m.id, m.root, o.BaseDir) + return false + } + if m.filter != "" && strings.HasPrefix(m.filter, o.FilterPrefix) { + o.debugf("cache %s cannot be used because of filter %s", m.id, m.filter) + return false + } + + if o.Recursive && !m.recursive { + o.debugf("cache %s not recursive", m.id) + // If this is recursive the cached listing must be as well. + return false + } + if o.Separator != slashSeparator && !m.recursive { + o.debugf("cache %s not slashsep and not recursive", m.id) + // Non slash separator requires recursive. + return false + } + if !m.finished() && time.Since(m.lastUpdate) > metacacheMaxRunningAge { + o.debugf("cache %s not running, time: %v", m.id, time.Since(m.lastUpdate)) + // Abandoned + return false + } + + if m.finished() && m.endedCycle <= o.OldestCycle { + if extend <= 0 { + // If scan has ended the oldest requested must be less. + o.debugf("cache %s ended and cycle (%v) <= oldest allowed (%v)", m.id, m.endedCycle, o.OldestCycle) + return false + } + if time.Since(m.lastUpdate) > metacacheMaxRunningAge+extend { + // Cache ended within bloom cycle, but we can extend the life. + o.debugf("cache %s ended (%v) and beyond extended life (%v)", m.id, m.lastUpdate, extend+metacacheMaxRunningAge) + return false + } + } + + return true +} + // worthKeeping indicates if the cache by itself is worth keeping. func (m *metacache) worthKeeping(currentCycle uint64) bool { if m == nil {