ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c

Gist of improvements:

* Cross-server caching and listing will use the same data across servers and requests.
* Lists can be arbitrarily resumed at a constant speed.
* Metadata for all files scanned is stored for streaming retrieval.
* The existing bloom filters controlled by the crawler is used for validating caches.
* Concurrent requests for the same data (or parts of it) will not spawn additional walkers.
* Listing a subdirectory of an existing recursive cache will use the cache.
* All listing operations are fully streamable so the number of objects in a bucket no 
  longer dictates the amount of memory.
* Listings can be handled by any server within the cluster.
* Caches are cleaned up when out of date or superseded by a more recent one.
This commit is contained in:
Klaus Post
2020-10-28 09:18:35 -07:00
committed by GitHub
parent 51222cc664
commit a982baff27
65 changed files with 6328 additions and 742 deletions

View File

@@ -18,7 +18,6 @@ package cmd
import (
"context"
"fmt"
"net/http"
"strconv"
"strings"
@@ -114,15 +113,6 @@ func (api objectAPIHandlers) ListObjectVersionsHandler(w http.ResponseWriter, r
return
}
// Forward the request using Source IP or bucket
forwardStr := handlers.GetSourceIPFromHeaders(r)
if forwardStr == "" {
forwardStr = bucket
}
if proxyRequestByStringHash(ctx, w, r, forwardStr) {
return
}
listObjectVersions := objectAPI.ListObjectVersions
// Inititate a list object versions operation based on the input params.
@@ -145,7 +135,7 @@ func (api objectAPIHandlers) ListObjectVersionsHandler(w http.ResponseWriter, r
// ListObjectsV2MHandler - GET Bucket (List Objects) Version 2 with metadata.
// --------------------------
// This implementation of the GET operation returns some or all (up to 10000)
// of the objects in a bucket. You can use the request parame<ters as selection
// of the objects in a bucket. You can use the request parameters as selection
// criteria to return a subset of the objects in a bucket.
//
// NOTE: It is recommended that this API to be used for application development.
@@ -185,13 +175,6 @@ func (api objectAPIHandlers) ListObjectsV2MHandler(w http.ResponseWriter, r *htt
return
}
// Analyze continuation token and route the request accordingly
var success bool
token, success = proxyRequestByToken(ctx, w, r, token)
if success {
return
}
listObjectsV2 := objectAPI.ListObjectsV2
// Inititate a list objects operation based on the input params.
@@ -207,9 +190,6 @@ func (api objectAPIHandlers) ListObjectsV2MHandler(w http.ResponseWriter, r *htt
// The next continuation token has id@node_index format to optimize paginated listing
nextContinuationToken := listObjectsV2Info.NextContinuationToken
if nextContinuationToken != "" && listObjectsV2Info.IsTruncated {
nextContinuationToken = fmt.Sprintf("%s@%d", listObjectsV2Info.NextContinuationToken, getLocalNodeIndex())
}
response := generateListObjectsV2Response(bucket, prefix, token, nextContinuationToken, startAfter,
delimiter, encodingType, fetchOwner, listObjectsV2Info.IsTruncated,
@@ -262,13 +242,6 @@ func (api objectAPIHandlers) ListObjectsV2Handler(w http.ResponseWriter, r *http
return
}
// Analyze continuation token and route the request accordingly
var success bool
token, success = proxyRequestByToken(ctx, w, r, token)
if success {
return
}
listObjectsV2 := objectAPI.ListObjectsV2
// Inititate a list objects operation based on the input params.
@@ -282,13 +255,7 @@ func (api objectAPIHandlers) ListObjectsV2Handler(w http.ResponseWriter, r *http
concurrentDecryptETag(ctx, listObjectsV2Info.Objects)
// The next continuation token has id@node_index format to optimize paginated listing
nextContinuationToken := listObjectsV2Info.NextContinuationToken
if nextContinuationToken != "" && listObjectsV2Info.IsTruncated {
nextContinuationToken = fmt.Sprintf("%s@%d", listObjectsV2Info.NextContinuationToken, getLocalNodeIndex())
}
response := generateListObjectsV2Response(bucket, prefix, token, nextContinuationToken, startAfter,
response := generateListObjectsV2Response(bucket, prefix, token, listObjectsV2Info.NextContinuationToken, startAfter,
delimiter, encodingType, fetchOwner, listObjectsV2Info.IsTruncated,
maxKeys, listObjectsV2Info.Objects, listObjectsV2Info.Prefixes, false)
@@ -296,18 +263,6 @@ func (api objectAPIHandlers) ListObjectsV2Handler(w http.ResponseWriter, r *http
writeSuccessResponseXML(w, encodeResponse(response))
}
func getLocalNodeIndex() int {
if len(globalProxyEndpoints) == 0 {
return -1
}
for i, ep := range globalProxyEndpoints {
if ep.IsLocal {
return i
}
}
return -1
}
func parseRequestToken(token string) (subToken string, nodeIndex int) {
if token == "" {
return token, -1