ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c

Gist of improvements:

* Cross-server caching and listing will use the same data across servers and requests.
* Lists can be arbitrarily resumed at a constant speed.
* Metadata for all files scanned is stored for streaming retrieval.
* The existing bloom filters controlled by the crawler is used for validating caches.
* Concurrent requests for the same data (or parts of it) will not spawn additional walkers.
* Listing a subdirectory of an existing recursive cache will use the cache.
* All listing operations are fully streamable so the number of objects in a bucket no 
  longer dictates the amount of memory.
* Listings can be handled by any server within the cluster.
* Caches are cleaned up when out of date or superseded by a more recent one.
This commit is contained in:
Klaus Post
2020-10-28 09:18:35 -07:00
committed by GitHub
parent 51222cc664
commit a982baff27
65 changed files with 6328 additions and 742 deletions

View File

@@ -34,6 +34,7 @@ import (
"github.com/minio/minio/pkg/event"
"github.com/minio/minio/pkg/madmin"
trace "github.com/minio/minio/pkg/trace"
"github.com/tinylib/msgp/msgp"
)
// To abstract a node over network.
@@ -616,7 +617,7 @@ func (s *peerRESTServer) ReloadFormatHandler(w http.ResponseWriter, r *http.Requ
w.(http.Flusher).Flush()
}
// CycleServerBloomFilterHandler cycles bllom filter on server.
// CycleServerBloomFilterHandler cycles bloom filter on server.
func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
@@ -631,7 +632,7 @@ func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r
s.writeErrorResponse(w, err)
return
}
bf, err := intDataUpdateTracker.cycleFilter(ctx, req.Oldest, req.Current)
bf, err := intDataUpdateTracker.cycleFilter(ctx, req)
if err != nil {
s.writeErrorResponse(w, err)
return
@@ -640,6 +641,51 @@ func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r
logger.LogIf(ctx, gob.NewEncoder(w).Encode(bf))
}
func (s *peerRESTServer) GetMetacacheListingHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
ctx := newContext(r, w, "GetMetacacheListing")
var opts listPathOptions
err := gob.NewDecoder(r.Body).Decode(&opts)
if err != nil && err != io.EOF {
s.writeErrorResponse(w, err)
return
}
resp := localMetacacheMgr.getBucket(ctx, opts.Bucket).findCache(opts)
logger.LogIf(ctx, msgp.Encode(w, &resp))
}
func (s *peerRESTServer) UpdateMetacacheListingHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
ctx := newContext(r, w, "UpdateMetacacheListing")
var req metacache
err := msgp.Decode(r.Body, &req)
if err != nil {
s.writeErrorResponse(w, err)
return
}
b := localMetacacheMgr.getBucket(ctx, req.bucket)
if b == nil {
s.writeErrorResponse(w, errServerNotInitialized)
return
}
cache, err := b.updateCacheEntry(req)
if err != nil {
s.writeErrorResponse(w, err)
return
}
// Return updated metadata.
logger.LogIf(ctx, msgp.Encode(w, &cache))
}
// PutBucketNotificationHandler - Set bucket policy.
func (s *peerRESTServer) PutBucketNotificationHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
@@ -1054,4 +1100,6 @@ func registerPeerRESTHandlers(router *mux.Router) {
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodLog).HandlerFunc(server.ConsoleLogHandler)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetLocalDiskIDs).HandlerFunc(httpTraceHdrs(server.GetLocalDiskIDs))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetBandwidth).HandlerFunc(httpTraceHdrs(server.GetBandwidth))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetMetacacheListing).HandlerFunc(httpTraceHdrs(server.GetMetacacheListingHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodUpdateMetacacheListing).HandlerFunc(httpTraceHdrs(server.UpdateMetacacheListingHandler))
}