ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c

Gist of improvements:

* Cross-server caching and listing will use the same data across servers and requests.
* Lists can be arbitrarily resumed at a constant speed.
* Metadata for all files scanned is stored for streaming retrieval.
* The existing bloom filters controlled by the crawler is used for validating caches.
* Concurrent requests for the same data (or parts of it) will not spawn additional walkers.
* Listing a subdirectory of an existing recursive cache will use the cache.
* All listing operations are fully streamable so the number of objects in a bucket no 
  longer dictates the amount of memory.
* Listings can be handled by any server within the cluster.
* Caches are cleaned up when out of date or superseded by a more recent one.
This commit is contained in:
Klaus Post
2020-10-28 09:18:35 -07:00
committed by GitHub
parent 51222cc664
commit a982baff27
65 changed files with 6328 additions and 742 deletions

View File

@@ -29,6 +29,7 @@ import (
"sync"
"time"
"github.com/cespare/xxhash/v2"
"github.com/klauspost/compress/zip"
"github.com/minio/minio-go/v7/pkg/set"
"github.com/minio/minio/cmd/crypto"
@@ -51,6 +52,7 @@ type NotificationSys struct {
bucketRulesMap map[string]event.RulesMap
bucketRemoteTargetRulesMap map[string]map[event.TargetID]event.RulesMap
peerClients []*peerRESTClient
allPeerClients []*peerRESTClient
}
// GetARNList - returns available ARNs.
@@ -451,7 +453,7 @@ func (sys *NotificationSys) updateBloomFilter(ctx context.Context, current uint6
// Load initial state from local...
var bf *bloomFilter
bfr, err := intDataUpdateTracker.cycleFilter(ctx, req.Oldest, req.Current)
bfr, err := intDataUpdateTracker.cycleFilter(ctx, req)
logger.LogIf(ctx, err)
if err == nil && bfr.Complete {
nbf := intDataUpdateTracker.newBloomFilter()
@@ -507,6 +509,124 @@ func (sys *NotificationSys) updateBloomFilter(ctx context.Context, current uint6
return bf, nil
}
// collectBloomFilter will collect bloom filters from all servers from the specified cycle.
func (sys *NotificationSys) collectBloomFilter(ctx context.Context, from uint64) (*bloomFilter, error) {
var req = bloomFilterRequest{
Current: 0,
Oldest: from,
}
// Load initial state from local...
var bf *bloomFilter
bfr, err := intDataUpdateTracker.cycleFilter(ctx, req)
logger.LogIf(ctx, err)
if err == nil && bfr.Complete {
nbf := intDataUpdateTracker.newBloomFilter()
bf = &nbf
_, err = bf.ReadFrom(bytes.NewBuffer(bfr.Filter))
logger.LogIf(ctx, err)
}
if !bfr.Complete {
// If local isn't complete just return early
return nil, nil
}
var mu sync.Mutex
g := errgroup.WithNErrs(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
continue
}
client := client
g.Go(func() error {
serverBF, err := client.cycleServerBloomFilter(ctx, req)
if false && intDataUpdateTracker.debug {
b, _ := json.MarshalIndent(serverBF, "", " ")
logger.Info("Disk %v, Bloom filter: %v", client.host.Name, string(b))
}
// Keep lock while checking result.
mu.Lock()
defer mu.Unlock()
if err != nil || !serverBF.Complete || bf == nil {
logger.LogIf(ctx, err)
bf = nil
return nil
}
var tmp bloom.BloomFilter
_, err = tmp.ReadFrom(bytes.NewBuffer(serverBF.Filter))
if err != nil {
logger.LogIf(ctx, err)
bf = nil
return nil
}
if bf.BloomFilter == nil {
bf.BloomFilter = &tmp
} else {
err = bf.Merge(&tmp)
if err != nil {
logger.LogIf(ctx, err)
bf = nil
return nil
}
}
return nil
}, idx)
}
g.Wait()
return bf, nil
}
// findEarliestCleanBloomFilter will find the earliest bloom filter across the cluster
// where the directory is clean.
// Due to how objects are stored this can include object names.
func (sys *NotificationSys) findEarliestCleanBloomFilter(ctx context.Context, dir string) uint64 {
// Load initial state from local...
current := intDataUpdateTracker.current()
best := intDataUpdateTracker.latestWithDir(dir)
if best == current {
// If the current is dirty no need to check others.
return current
}
var req = bloomFilterRequest{
Current: 0,
Oldest: best,
OldestClean: dir,
}
var mu sync.Mutex
g := errgroup.WithNErrs(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
continue
}
client := client
g.Go(func() error {
serverBF, err := client.cycleServerBloomFilter(ctx, req)
// Keep lock while checking result.
mu.Lock()
defer mu.Unlock()
if err != nil {
// Error, don't assume clean.
best = current
logger.LogIf(ctx, err)
return nil
}
if serverBF.OldestIdx > best {
best = serverBF.OldestIdx
}
return nil
}, idx)
}
g.Wait()
return best
}
// GetLocks - makes GetLocks RPC call on all peers.
func (sys *NotificationSys) GetLocks(ctx context.Context, r *http.Request) []*PeerLocks {
locksResp := make([]*PeerLocks, len(sys.peerClients))
@@ -1156,15 +1276,27 @@ func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) (localDiskIDs [
return localDiskIDs
}
// restClientFromHash will return a deterministic peerRESTClient based on s.
// Will return nil if client is local.
func (sys *NotificationSys) restClientFromHash(s string) (client *peerRESTClient) {
if len(sys.peerClients) == 0 {
return nil
}
idx := xxhash.Sum64String(s) % uint64(len(sys.allPeerClients))
return sys.allPeerClients[idx]
}
// NewNotificationSys - creates new notification system object.
func NewNotificationSys(endpoints EndpointServerSets) *NotificationSys {
// targetList/bucketRulesMap/bucketRemoteTargetRulesMap are populated by NotificationSys.Init()
remote, all := newPeerRestClients(endpoints)
return &NotificationSys{
targetList: event.NewTargetList(),
targetResCh: make(chan event.TargetIDResult),
bucketRulesMap: make(map[string]event.RulesMap),
bucketRemoteTargetRulesMap: make(map[string]map[event.TargetID]event.RulesMap),
peerClients: newPeerRestClients(endpoints),
peerClients: remote,
allPeerClients: all,
}
}