Improve caching (#19130)

* Remove lock for cached operations.
* Rename "Relax" to `ReturnLastGood`.
* Add `CacheError` to allow caching values even on errors.
* Add NoWait that will return current value with async fetching if within 2xTTL.
* Make benchmark somewhat representative.

```
Before: BenchmarkCache-12       16408370                63.12 ns/op            0 B/op
After:  BenchmarkCache-12       428282187                2.789 ns/op           0 B/op
```

* Remove `storageRESTClient.scanning`. Nonsensical - RPC clients will not have any idea about scanning.
* Always fetch remote diskinfo metrics and cache them. Seems most calls are requesting metrics.
* Do async fetching of usage caches.
This commit is contained in:
Klaus Post
2024-02-26 10:49:19 -08:00
committed by GitHub
parent 85bcb5874a
commit 2b5e4b853c
8 changed files with 84 additions and 67 deletions

View File

@@ -1203,7 +1203,8 @@ func (a adminAPIHandlers) AccountInfoHandler(w http.ResponseWriter, r *http.Requ
bucketStorageCache.TTL = 10 * time.Second
// Rely on older value if usage loading fails from disk.
bucketStorageCache.Relax = true
bucketStorageCache.ReturnLastGood = true
bucketStorageCache.NoWait = true
bucketStorageCache.Update = func() (DataUsageInfo, error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Second)
defer done()

View File

@@ -52,7 +52,8 @@ func (sys *BucketQuotaSys) Init(objAPI ObjectLayer) {
// does not update the bucket usage values frequently.
bucketStorageCache.TTL = 10 * time.Second
// Rely on older value if usage loading fails from disk.
bucketStorageCache.Relax = true
bucketStorageCache.ReturnLastGood = true
bucketStorageCache.NoWait = true
bucketStorageCache.Update = func() (DataUsageInfo, error) {
ctx, done := context.WithTimeout(context.Background(), 2*time.Second)
defer done()

View File

@@ -81,7 +81,8 @@ func loadPrefixUsageFromBackend(ctx context.Context, objAPI ObjectLayer, bucket
prefixUsageCache.TTL = 30 * time.Second
// No need to fail upon Update() error, fallback to old value.
prefixUsageCache.Relax = true
prefixUsageCache.ReturnLastGood = true
prefixUsageCache.NoWait = true
prefixUsageCache.Update = func() (map[string]uint64, error) {
m := make(map[string]uint64)
for _, pool := range z.serverPools {

View File

@@ -1851,7 +1851,8 @@ func (z *erasureServerPools) ListBuckets(ctx context.Context, opts BucketOptions
listBucketsCache.Once.Do(func() {
listBucketsCache.TTL = time.Second
listBucketsCache.Relax = true
listBucketsCache.ReturnLastGood = true
listBucketsCache.NoWait = true
listBucketsCache.Update = func() ([]BucketInfo, error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
buckets, err = z.s3Peer.ListBuckets(ctx, opts)

View File

@@ -357,7 +357,7 @@ type MetricsGroupOpts struct {
func (g *MetricsGroup) RegisterRead(read func(ctx context.Context) []Metric) {
g.metricsCache = cachevalue.New[[]Metric]()
g.metricsCache.Once.Do(func() {
g.metricsCache.Relax = true
g.metricsCache.ReturnLastGood = true
g.metricsCache.TTL = g.cacheInterval
g.metricsCache.Update = func() ([]Metric, error) {
if g.metricsGroupOpts.dependGlobalObjectAPI {

View File

@@ -31,7 +31,6 @@ import (
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/minio/madmin-go/v3"
@@ -157,9 +156,6 @@ func toStorageErr(err error) error {
// Abstracts a remote disk.
type storageRESTClient struct {
// Indicate of NSScanner is in progress in this disk
scanning int32
endpoint Endpoint
restClient *rest.Client
gridConn *grid.Subroute
@@ -236,8 +232,6 @@ func (client *storageRESTClient) Healing() *healingTracker {
}
func (client *storageRESTClient) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry, scanMode madmin.HealScanMode, _ func() bool) (dataUsageCache, error) {
atomic.AddInt32(&client.scanning, 1)
defer atomic.AddInt32(&client.scanning, -1)
defer xioutil.SafeClose(updates)
st, err := storageNSScannerRPC.Call(ctx, client.gridConn, &nsScannerOptions{
@@ -310,8 +304,8 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
return info, errDiskNotFound
}
// if metrics was asked, or it was a NoOp we do not need to cache the value.
if opts.Metrics || opts.NoOp {
// if 'NoOp' we do not cache the value.
if opts.NoOp {
ctx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
@@ -325,17 +319,17 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
if info.Error != "" {
return info, toStorageErr(errors.New(info.Error))
}
info.Scanning = atomic.LoadInt32(&client.scanning) == 1
return info, nil
} // In all other cases cache the value upto 1sec.
client.diskInfoCache.Once.Do(func() {
client.diskInfoCache.TTL = time.Second
client.diskInfoCache.CacheError = true
client.diskInfoCache.Update = func() (info DiskInfo, err error) {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
nopts := DiskInfoOptions{DiskID: client.diskID}
nopts := DiskInfoOptions{DiskID: client.diskID, Metrics: true}
infop, err := storageDiskInfoRPC.Call(ctx, client.gridConn, &nopts)
if err != nil {
return info, toStorageErr(err)
@@ -348,9 +342,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context, opts DiskInfoOpti
}
})
info, err = client.diskInfoCache.Get()
info.Scanning = atomic.LoadInt32(&client.scanning) == 1
return info, err
return client.diskInfoCache.Get()
}
// MakeVolBulk - create multiple volumes in a bulk operation.