Improve caching (#19130)

* Remove lock for cached operations.
* Rename "Relax" to `ReturnLastGood`.
* Add `CacheError` to allow caching values even on errors.
* Add NoWait that will return current value with async fetching if within 2xTTL.
* Make benchmark somewhat representative.

```
Before: BenchmarkCache-12       16408370                63.12 ns/op            0 B/op
After:  BenchmarkCache-12       428282187                2.789 ns/op           0 B/op
```

* Remove `storageRESTClient.scanning`. Nonsensical - RPC clients will not have any idea about scanning.
* Always fetch remote diskinfo metrics and cache them. Seems most calls are requesting metrics.
* Do async fetching of usage caches.
This commit is contained in:
Klaus Post 2024-02-26 10:49:19 -08:00 committed by GitHub
parent 85bcb5874a
commit 2b5e4b853c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 84 additions and 67 deletions

View File

@ -1203,7 +1203,8 @@ func (a adminAPIHandlers) AccountInfoHandler(w http.ResponseWriter, r *http.Requ
bucketStorageCache.TTL = 10 * time.Second
// Rely on older value if usage loading fails from disk.
bucketStorageCache.Relax = true
bucketStorageCache.ReturnLastGood = true
bucketStorageCache.NoWait = true
bucketStorageCache.Update = func() (DataUsageInfo, error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Second)
defer done()

View File

@ -52,7 +52,8 @@ func (sys *BucketQuotaSys) Init(objAPI ObjectLayer) {
// does not update the bucket usage values frequently.
bucketStorageCache.TTL = 10 * time.Second
// Rely on older value if usage loading fails from disk.
bucketStorageCache.Relax = true
bucketStorageCache.ReturnLastGood = true
bucketStorageCache.NoWait = true
bucketStorageCache.Update = func() (DataUsageInfo, error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Second)
defer done()

View File

@ -81,7 +81,8 @@ func loadPrefixUsageFromBackend(ctx context.Context, objAPI ObjectLayer, bucket
prefixUsageCache.TTL = 30 * time.Second
// No need to fail upon Update() error, fallback to old value.
prefixUsageCache.Relax = true
prefixUsageCache.ReturnLastGood = true
prefixUsageCache.NoWait = true
prefixUsageCache.Update = func() (map[string]uint64, error) {
m := make(map[string]uint64)
for _, pool := range z.serverPools {

View File

@ -1851,7 +1851,8 @@ func (z *erasureServerPools) ListBuckets(ctx context.Context, opts BucketOptions
listBucketsCache.Once.Do(func() {
listBucketsCache.TTL = time.Second
listBucketsCache.Relax = true
listBucketsCache.ReturnLastGood = true
listBucketsCache.NoWait = true
listBucketsCache.Update = func() ([]BucketInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
buckets, err = z.s3Peer.ListBuckets(ctx, opts)

View File

@ -357,7 +357,7 @@ type MetricsGroupOpts struct {
func (g *MetricsGroup) RegisterRead(read func(ctx context.Context) []Metric) {
g.metricsCache = cachevalue.New[[]Metric]()
g.metricsCache.Once.Do(func() {
g.metricsCache.Relax = true
g.metricsCache.ReturnLastGood = true
g.metricsCache.TTL = g.cacheInterval
g.metricsCache.Update = func() ([]Metric, error) {
if g.metricsGroupOpts.dependGlobalObjectAPI {

View File

@ -31,7 +31,6 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/minio/madmin-go/v3"
@ -157,9 +156,6 @@ func toStorageErr(err error) error {
// Abstracts a remote disk.
type storageRESTClient struct {
// Indicate of NSScanner is in progress in this disk
scanning int32
endpoint Endpoint
restClient *rest.Client
gridConn *grid.Subroute
@ -236,8 +232,6 @@ func (client *storageRESTClient) Healing() *healingTracker {
}
func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, _ func() bool) (dataUsageCache, error) {
atomic.AddInt32(&client.scanning, 1)
defer atomic.AddInt32(&client.scanning, -1)
defer xioutil.SafeClose(updates)
st, err := storageNSScannerRPC.Call(ctx, client.gridConn, &nsScannerOptions{
@ -310,8 +304,8 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
return info, errDiskNotFound
}
// if metrics was asked, or it was a NoOp we do not need to cache the value.
if opts.Metrics || opts.NoOp {
// if 'NoOp' we do not cache the value.
if opts.NoOp {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
@ -325,17 +319,17 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
if info.Error != "" {
return info, toStorageErr(errors.New(info.Error))
}
info.Scanning = atomic.LoadInt32(&client.scanning) == 1
return info, nil
} // In all other cases cache the value upto 1sec.
client.diskInfoCache.Once.Do(func() {
client.diskInfoCache.TTL = time.Second
client.diskInfoCache.CacheError = true
client.diskInfoCache.Update = func() (info DiskInfo, err error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
nopts := DiskInfoOptions{DiskID: client.diskID}
nopts := DiskInfoOptions{DiskID: client.diskID, Metrics: true}
infop, err := storageDiskInfoRPC.Call(ctx, client.gridConn, &nopts)
if err != nil {
return info, toStorageErr(err)
@ -348,9 +342,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
}
})
info, err = client.diskInfoCache.Get()
info.Scanning = atomic.LoadInt32(&client.scanning) == 1
return info, err
return client.diskInfoCache.Get()
}
// MakeVolBulk - create multiple volumes in a bulk operation.

View File

@ -19,19 +19,20 @@ package cachevalue
import (
"sync"
"sync/atomic"
"time"
)
// Cache contains a synchronized value that is considered valid
// for a specific amount of time.
// An Update function must be set to provide an updated value when needed.
type Cache[I any] struct {
type Cache[T any] struct {
// Update must return an updated value.
// If an error is returned the cached value is not set.
// Only one caller will call this function at any time, others will be blocking.
// The returned value can no longer be modified once returned.
// Should be set before calling Get().
Update func() (item I, err error)
Update func() (T, error)
// TTL for a cached value.
// If not set 1 second TTL is assumed.
@ -39,18 +40,31 @@ type Cache[I any] struct {
TTL time.Duration
// When set to true, return the last cached value
// even if updating the value errors out
Relax bool
// even if updating the value errors out.
// Returns the last good value AND the error.
ReturnLastGood bool
// If CacheError is set, errors will be cached as well
// and not continuously try to update.
// Should not be combined with ReturnLastGood.
CacheError bool
// If NoWait is set, Get() will return the last good value,
// if TTL has expired but 2x TTL has not yet passed,
// but will fetch a new value in the background.
NoWait bool
// Once can be used to initialize values for lazy initialization.
// Should be set before calling Get().
Once sync.Once
// Managed values.
value I // our cached value
valueSet bool // 'true' if the value 'I' has a value
lastUpdate time.Time // indicates when value 'I' was updated last, used for invalidation.
mu sync.RWMutex
valErr atomic.Pointer[struct {
v T
e error
}]
lastUpdateMs atomic.Int64
updating sync.Mutex
}
// New initializes a new cached value instance.
@ -61,30 +75,42 @@ func New[I any]() *Cache[I] {
// Get will return a cached value or fetch a new one.
// If the Update function returns an error the value is forwarded as is and not cached.
func (t *Cache[I]) Get() (item I, err error) {
item, ok := t.get(t.ttl())
if ok {
return item, nil
}
item, err = t.Update()
if err != nil {
if t.Relax {
// if update fails, return current
// cached value along with error.
//
// Let the caller decide if they want
// to use the returned value based
// on error.
item, ok = t.get(0)
if ok {
return item, err
}
v := t.valErr.Load()
ttl := t.ttl()
vTime := t.lastUpdateMs.Load()
tNow := time.Now().UnixMilli()
if v != nil && tNow-vTime < ttl.Milliseconds() {
if v.e == nil {
return v.v, nil
}
if v.e != nil && t.CacheError || t.ReturnLastGood {
return v.v, v.e
}
return item, err
}
t.update(item)
return item, nil
// Fetch new value.
if t.NoWait && v != nil && tNow-vTime < ttl.Milliseconds()*2 && (v.e == nil || t.CacheError) {
if t.updating.TryLock() {
go func() {
defer t.updating.Unlock()
t.update()
}()
}
return v.v, v.e
}
// Get lock. Either we get it or we wait for it.
t.updating.Lock()
if time.Since(time.UnixMilli(t.lastUpdateMs.Load())) < ttl {
// There is a new value, release lock and return it.
v = t.valErr.Load()
t.updating.Unlock()
return v.v, v.e
}
t.update()
v = t.valErr.Load()
t.updating.Unlock()
return v.v, v.e
}
func (t *Cache[_]) ttl() time.Duration {
@ -95,25 +121,20 @@ func (t *Cache[_]) ttl() time.Duration {
return ttl
}
func (t *Cache[I]) get(ttl time.Duration) (item I, ok bool) {
t.mu.RLock()
defer t.mu.RUnlock()
if t.valueSet {
item = t.value
if ttl <= 0 {
return item, true
}
if time.Since(t.lastUpdate) < ttl {
return item, true
func (t *Cache[T]) update() {
val, err := t.Update()
if err != nil {
if t.ReturnLastGood {
// Keep last good value.
v := t.valErr.Load()
if v != nil {
val = v.v
}
}
}
return item, false
}
func (t *Cache[I]) update(item I) {
t.mu.Lock()
defer t.mu.Unlock()
t.value = item
t.valueSet = true
t.lastUpdate = time.Now()
t.valErr.Store(&struct {
v T
e error
}{v: val, e: err})
t.lastUpdateMs.Store(time.Now().UnixMilli())
}

View File

@ -51,7 +51,7 @@ func TestCache(t *testing.T) {
func BenchmarkCache(b *testing.B) {
cache := New[time.Time]()
cache.Once.Do(func() {
cache.TTL = 1 * time.Microsecond
cache.TTL = 1 * time.Millisecond
cache.Update = func() (time.Time, error) {
return time.Now(), nil
}