Improve caching (#19130)

* Remove lock for cached operations.
* Rename "Relax" to `ReturnLastGood`.
* Add `CacheError` to allow caching values even on errors.
* Add NoWait that will return current value with async fetching if within 2xTTL.
* Make benchmark somewhat representative.

```
Before: BenchmarkCache-12       16408370                63.12 ns/op            0 B/op
After:  BenchmarkCache-12       428282187                2.789 ns/op           0 B/op
```

* Remove `storageRESTClient.scanning`. Nonsensical - RPC clients will not have any idea about scanning.
* Always fetch remote diskinfo metrics and cache them. Seems most calls are requesting metrics.
* Do async fetching of usage caches.
This commit is contained in:
Klaus Post 2024-02-26 10:49:19 -08:00 committed by GitHub
parent 85bcb5874a
commit 2b5e4b853c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 84 additions and 67 deletions

View File

@ -1203,7 +1203,8 @@ func (a adminAPIHandlers) AccountInfoHandler(w http.ResponseWriter, r *http.Requ
bucketStorageCache.TTL = 10 * time.Second bucketStorageCache.TTL = 10 * time.Second
// Rely on older value if usage loading fails from disk. // Rely on older value if usage loading fails from disk.
bucketStorageCache.Relax = true bucketStorageCache.ReturnLastGood = true
bucketStorageCache.NoWait = true
bucketStorageCache.Update = func() (DataUsageInfo, error) { bucketStorageCache.Update = func() (DataUsageInfo, error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Second) ctx, done := context.WithTimeout(context.Background(), 2*time.Second)
defer done() defer done()

View File

@ -52,7 +52,8 @@ func (sys *BucketQuotaSys) Init(objAPI ObjectLayer) {
// does not update the bucket usage values frequently. // does not update the bucket usage values frequently.
bucketStorageCache.TTL = 10 * time.Second bucketStorageCache.TTL = 10 * time.Second
// Rely on older value if usage loading fails from disk. // Rely on older value if usage loading fails from disk.
bucketStorageCache.Relax = true bucketStorageCache.ReturnLastGood = true
bucketStorageCache.NoWait = true
bucketStorageCache.Update = func() (DataUsageInfo, error) { bucketStorageCache.Update = func() (DataUsageInfo, error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Second) ctx, done := context.WithTimeout(context.Background(), 2*time.Second)
defer done() defer done()

View File

@ -81,7 +81,8 @@ func loadPrefixUsageFromBackend(ctx context.Context, objAPI ObjectLayer, bucket
prefixUsageCache.TTL = 30 * time.Second prefixUsageCache.TTL = 30 * time.Second
// No need to fail upon Update() error, fallback to old value. // No need to fail upon Update() error, fallback to old value.
prefixUsageCache.Relax = true prefixUsageCache.ReturnLastGood = true
prefixUsageCache.NoWait = true
prefixUsageCache.Update = func() (map[string]uint64, error) { prefixUsageCache.Update = func() (map[string]uint64, error) {
m := make(map[string]uint64) m := make(map[string]uint64)
for _, pool := range z.serverPools { for _, pool := range z.serverPools {

View File

@ -1851,7 +1851,8 @@ func (z *erasureServerPools) ListBuckets(ctx context.Context, opts BucketOptions
listBucketsCache.Once.Do(func() { listBucketsCache.Once.Do(func() {
listBucketsCache.TTL = time.Second listBucketsCache.TTL = time.Second
listBucketsCache.Relax = true listBucketsCache.ReturnLastGood = true
listBucketsCache.NoWait = true
listBucketsCache.Update = func() ([]BucketInfo, error) { listBucketsCache.Update = func() ([]BucketInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
buckets, err = z.s3Peer.ListBuckets(ctx, opts) buckets, err = z.s3Peer.ListBuckets(ctx, opts)

View File

@ -357,7 +357,7 @@ type MetricsGroupOpts struct {
func (g *MetricsGroup) RegisterRead(read func(ctx context.Context) []Metric) { func (g *MetricsGroup) RegisterRead(read func(ctx context.Context) []Metric) {
g.metricsCache = cachevalue.New[[]Metric]() g.metricsCache = cachevalue.New[[]Metric]()
g.metricsCache.Once.Do(func() { g.metricsCache.Once.Do(func() {
g.metricsCache.Relax = true g.metricsCache.ReturnLastGood = true
g.metricsCache.TTL = g.cacheInterval g.metricsCache.TTL = g.cacheInterval
g.metricsCache.Update = func() ([]Metric, error) { g.metricsCache.Update = func() ([]Metric, error) {
if g.metricsGroupOpts.dependGlobalObjectAPI { if g.metricsGroupOpts.dependGlobalObjectAPI {

View File

@ -31,7 +31,6 @@ import (
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"sync/atomic"
"time" "time"
"github.com/minio/madmin-go/v3" "github.com/minio/madmin-go/v3"
@ -157,9 +156,6 @@ func toStorageErr(err error) error {
// Abstracts a remote disk. // Abstracts a remote disk.
type storageRESTClient struct { type storageRESTClient struct {
// Indicate of NSScanner is in progress in this disk
scanning int32
endpoint Endpoint endpoint Endpoint
restClient *rest.Client restClient *rest.Client
gridConn *grid.Subroute gridConn *grid.Subroute
@ -236,8 +232,6 @@ func (client *storageRESTClient) Healing() *healingTracker {
} }
func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, _ func() bool) (dataUsageCache, error) { func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, _ func() bool) (dataUsageCache, error) {
atomic.AddInt32(&client.scanning, 1)
defer atomic.AddInt32(&client.scanning, -1)
defer xioutil.SafeClose(updates) defer xioutil.SafeClose(updates)
st, err := storageNSScannerRPC.Call(ctx, client.gridConn, &nsScannerOptions{ st, err := storageNSScannerRPC.Call(ctx, client.gridConn, &nsScannerOptions{
@ -310,8 +304,8 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
return info, errDiskNotFound return info, errDiskNotFound
} }
// if metrics was asked, or it was a NoOp we do not need to cache the value. // if 'NoOp' we do not cache the value.
if opts.Metrics || opts.NoOp { if opts.NoOp {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second) ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel() defer cancel()
@ -325,17 +319,17 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
if info.Error != "" { if info.Error != "" {
return info, toStorageErr(errors.New(info.Error)) return info, toStorageErr(errors.New(info.Error))
} }
info.Scanning = atomic.LoadInt32(&client.scanning) == 1
return info, nil return info, nil
} // In all other cases cache the value upto 1sec. } // In all other cases cache the value upto 1sec.
client.diskInfoCache.Once.Do(func() { client.diskInfoCache.Once.Do(func() {
client.diskInfoCache.TTL = time.Second client.diskInfoCache.TTL = time.Second
client.diskInfoCache.CacheError = true
client.diskInfoCache.Update = func() (info DiskInfo, err error) { client.diskInfoCache.Update = func() (info DiskInfo, err error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel() defer cancel()
nopts := DiskInfoOptions{DiskID: client.diskID} nopts := DiskInfoOptions{DiskID: client.diskID, Metrics: true}
infop, err := storageDiskInfoRPC.Call(ctx, client.gridConn, &nopts) infop, err := storageDiskInfoRPC.Call(ctx, client.gridConn, &nopts)
if err != nil { if err != nil {
return info, toStorageErr(err) return info, toStorageErr(err)
@ -348,9 +342,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
} }
}) })
info, err = client.diskInfoCache.Get() return client.diskInfoCache.Get()
info.Scanning = atomic.LoadInt32(&client.scanning) == 1
return info, err
} }
// MakeVolBulk - create multiple volumes in a bulk operation. // MakeVolBulk - create multiple volumes in a bulk operation.

View File

@ -19,19 +19,20 @@ package cachevalue
import ( import (
"sync" "sync"
"sync/atomic"
"time" "time"
) )
// Cache contains a synchronized value that is considered valid // Cache contains a synchronized value that is considered valid
// for a specific amount of time. // for a specific amount of time.
// An Update function must be set to provide an updated value when needed. // An Update function must be set to provide an updated value when needed.
type Cache[I any] struct { type Cache[T any] struct {
// Update must return an updated value. // Update must return an updated value.
// If an error is returned the cached value is not set. // If an error is returned the cached value is not set.
// Only one caller will call this function at any time, others will be blocking. // Only one caller will call this function at any time, others will be blocking.
// The returned value can no longer be modified once returned. // The returned value can no longer be modified once returned.
// Should be set before calling Get(). // Should be set before calling Get().
Update func() (item I, err error) Update func() (T, error)
// TTL for a cached value. // TTL for a cached value.
// If not set 1 second TTL is assumed. // If not set 1 second TTL is assumed.
@ -39,18 +40,31 @@ type Cache[I any] struct {
TTL time.Duration TTL time.Duration
// When set to true, return the last cached value // When set to true, return the last cached value
// even if updating the value errors out // even if updating the value errors out.
Relax bool // Returns the last good value AND the error.
ReturnLastGood bool
// If CacheError is set, errors will be cached as well
// and not continuously try to update.
// Should not be combined with ReturnLastGood.
CacheError bool
// If NoWait is set, Get() will return the last good value,
// if TTL has expired but 2x TTL has not yet passed,
// but will fetch a new value in the background.
NoWait bool
// Once can be used to initialize values for lazy initialization. // Once can be used to initialize values for lazy initialization.
// Should be set before calling Get(). // Should be set before calling Get().
Once sync.Once Once sync.Once
// Managed values. // Managed values.
value I // our cached value valErr atomic.Pointer[struct {
valueSet bool // 'true' if the value 'I' has a value v T
lastUpdate time.Time // indicates when value 'I' was updated last, used for invalidation. e error
mu sync.RWMutex }]
lastUpdateMs atomic.Int64
updating sync.Mutex
} }
// New initializes a new cached value instance. // New initializes a new cached value instance.
@ -61,30 +75,42 @@ func New[I any]() *Cache[I] {
// Get will return a cached value or fetch a new one. // Get will return a cached value or fetch a new one.
// If the Update function returns an error the value is forwarded as is and not cached. // If the Update function returns an error the value is forwarded as is and not cached.
func (t *Cache[I]) Get() (item I, err error) { func (t *Cache[I]) Get() (item I, err error) {
item, ok := t.get(t.ttl()) v := t.valErr.Load()
if ok { ttl := t.ttl()
return item, nil vTime := t.lastUpdateMs.Load()
tNow := time.Now().UnixMilli()
if v != nil && tNow-vTime < ttl.Milliseconds() {
if v.e == nil {
return v.v, nil
}
if v.e != nil && t.CacheError || t.ReturnLastGood {
return v.v, v.e
}
} }
item, err = t.Update() // Fetch new value.
if err != nil { if t.NoWait && v != nil && tNow-vTime < ttl.Milliseconds()*2 && (v.e == nil || t.CacheError) {
if t.Relax { if t.updating.TryLock() {
// if update fails, return current go func() {
// cached value along with error. defer t.updating.Unlock()
// t.update()
// Let the caller decide if they want }()
// to use the returned value based
// on error.
item, ok = t.get(0)
if ok {
return item, err
} }
} return v.v, v.e
return item, err
} }
t.update(item) // Get lock. Either we get it or we wait for it.
return item, nil t.updating.Lock()
if time.Since(time.UnixMilli(t.lastUpdateMs.Load())) < ttl {
// There is a new value, release lock and return it.
v = t.valErr.Load()
t.updating.Unlock()
return v.v, v.e
}
t.update()
v = t.valErr.Load()
t.updating.Unlock()
return v.v, v.e
} }
func (t *Cache[_]) ttl() time.Duration { func (t *Cache[_]) ttl() time.Duration {
@ -95,25 +121,20 @@ func (t *Cache[_]) ttl() time.Duration {
return ttl return ttl
} }
func (t *Cache[I]) get(ttl time.Duration) (item I, ok bool) { func (t *Cache[T]) update() {
t.mu.RLock() val, err := t.Update()
defer t.mu.RUnlock() if err != nil {
if t.valueSet { if t.ReturnLastGood {
item = t.value // Keep last good value.
if ttl <= 0 { v := t.valErr.Load()
return item, true if v != nil {
} val = v.v
if time.Since(t.lastUpdate) < ttl {
return item, true
} }
} }
return item, false
} }
t.valErr.Store(&struct {
func (t *Cache[I]) update(item I) { v T
t.mu.Lock() e error
defer t.mu.Unlock() }{v: val, e: err})
t.value = item t.lastUpdateMs.Store(time.Now().UnixMilli())
t.valueSet = true
t.lastUpdate = time.Now()
} }

View File

@ -51,7 +51,7 @@ func TestCache(t *testing.T) {
func BenchmarkCache(b *testing.B) { func BenchmarkCache(b *testing.B) {
cache := New[time.Time]() cache := New[time.Time]()
cache.Once.Do(func() { cache.Once.Do(func() {
cache.TTL = 1 * time.Microsecond cache.TTL = 1 * time.Millisecond
cache.Update = func() (time.Time, error) { cache.Update = func() (time.Time, error) {
return time.Now(), nil return time.Now(), nil
} }