metacache: Speed up cleanup operation (#11078)

Perform cleanup operations on copied data. Avoids read locking
data while determining which caches to keep.

Also, reduce the log(N*N) operation to log(N*M) where M caches 
with the same root or below when checking potential replacements.
This commit is contained in:
Klaus Post 2020-12-10 12:30:28 -08:00 committed by GitHub
parent 4550ac6fff
commit 82e2be4239
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 57 additions and 24 deletions

View File

@ -237,17 +237,7 @@ func (b *bucketMetacache) findCache(o listPathOptions) metacache {
return best return best
} }
// Potentially interesting caches. interesting := interestingCaches(o.BaseDir, b.cachesRoot)
// Will only add root if request is for root.
var interesting []string
rootSplit := strings.Split(o.BaseDir, slashSeparator)
for i := range rootSplit {
want := path.Join(rootSplit[:i+1]...)
if debugPrint {
console.Info("base: %s, want: %s", o.BaseDir, want)
}
interesting = append(interesting, b.cachesRoot[want]...)
}
var best metacache var best metacache
for _, id := range interesting { for _, id := range interesting {
@ -375,20 +365,22 @@ func (b *bucketMetacache) cleanup() {
remove := make(map[string]struct{}) remove := make(map[string]struct{})
currentCycle := intDataUpdateTracker.current() currentCycle := intDataUpdateTracker.current()
debugPrint := func(msg string, data ...interface{}) {} const debugPrint = false
if false {
debugPrint = logger.Info
}
b.mu.RLock() // Test on a copy
for id, cache := range b.caches { // cleanup is the only one deleting caches.
caches, rootIdx := b.cloneCaches()
for id, cache := range caches {
if b.transient && time.Since(cache.lastUpdate) > 15*time.Minute && time.Since(cache.lastHandout) > 15*time.Minute { if b.transient && time.Since(cache.lastUpdate) > 15*time.Minute && time.Since(cache.lastHandout) > 15*time.Minute {
// Keep transient caches only for 15 minutes. // Keep transient caches only for 15 minutes.
remove[id] = struct{}{} remove[id] = struct{}{}
continue continue
} }
if !cache.worthKeeping(currentCycle) { if !cache.worthKeeping(currentCycle) {
debugPrint("cache %s not worth keeping", id) if debugPrint {
logger.Info("cache %s not worth keeping", id)
}
remove[id] = struct{}{} remove[id] = struct{}{}
continue continue
} }
@ -406,34 +398,56 @@ func (b *bucketMetacache) cleanup() {
// Check all non-deleted against eachother. // Check all non-deleted against eachother.
// O(n*n), but should still be rather quick. // O(n*n), but should still be rather quick.
for id, cache := range b.caches { for id, cache := range caches {
if b.transient { if b.transient {
break break
} }
if _, ok := remove[id]; ok { if _, ok := remove[id]; ok {
continue continue
} }
for id2, cache2 := range b.caches {
if _, ok := remove[id2]; ok { interesting := interestingCaches(cache.root, rootIdx)
for _, id2 := range interesting {
if _, ok := remove[id2]; ok || id2 == id {
// Don't check against one we are already removing // Don't check against one we are already removing
continue continue
} }
cache2, ok := caches[id2]
if !ok {
continue
}
if cache.canBeReplacedBy(&cache2) { if cache.canBeReplacedBy(&cache2) {
debugPrint("cache %s can be replaced by %s", id, cache2.id) if debugPrint {
logger.Info("cache %s can be replaced by %s", id, cache2.id)
}
remove[id] = struct{}{} remove[id] = struct{}{}
break break
} else { } else {
debugPrint("cache %s can be NOT replaced by %s", id, cache2.id) if debugPrint {
logger.Info("cache %s can be NOT replaced by %s", id, cache2.id)
}
} }
} }
} }
b.mu.RUnlock()
for id := range remove { for id := range remove {
b.deleteCache(id) b.deleteCache(id)
} }
} }
// Potentially interesting caches.
// Will only add root if request is for root.
func interestingCaches(root string, cachesRoot map[string][]string) []string {
var interesting []string
rootSplit := strings.Split(root, slashSeparator)
for i := range rootSplit {
want := path.Join(rootSplit[:i+1]...)
interesting = append(interesting, cachesRoot[want]...)
}
return interesting
}
// updateCache will update a cache by id. // updateCache will update a cache by id.
// If the cache cannot be found nil is returned. // If the cache cannot be found nil is returned.
// The bucket cache will be locked until the done . // The bucket cache will be locked until the done .
@ -467,6 +481,25 @@ func (b *bucketMetacache) updateCacheEntry(update metacache) (metacache, error)
return existing, nil return existing, nil
} }
// cloneCaches will return a clone of all current caches.
func (b *bucketMetacache) cloneCaches() (map[string]metacache, map[string][]string) {
b.mu.RLock()
defer b.mu.RUnlock()
dst := make(map[string]metacache, len(b.caches))
for k, v := range b.caches {
dst[k] = v
}
// Copy indexes
dst2 := make(map[string][]string, len(b.cachesRoot))
for k, v := range b.cachesRoot {
tmp := make([]string, len(v))
copy(tmp, v)
dst2[k] = tmp
}
return dst, dst2
}
// getCache will return a clone of a specific metacache. // getCache will return a clone of a specific metacache.
// Will return nil if the cache doesn't exist. // Will return nil if the cache doesn't exist.
func (b *bucketMetacache) getCache(id string) *metacache { func (b *bucketMetacache) getCache(id string) *metacache {