ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c

Gist of improvements:

* Cross-server caching and listing will use the same data across servers and requests.
* Lists can be arbitrarily resumed at a constant speed.
* Metadata for all files scanned is stored for streaming retrieval.
* The existing bloom filters controlled by the crawler is used for validating caches.
* Concurrent requests for the same data (or parts of it) will not spawn additional walkers.
* Listing a subdirectory of an existing recursive cache will use the cache.
* All listing operations are fully streamable so the number of objects in a bucket no 
  longer dictates the amount of memory.
* Listings can be handled by any server within the cluster.
* Caches are cleaned up when out of date or superseded by a more recent one.
This commit is contained in:
Klaus Post
2020-10-28 09:18:35 -07:00
committed by GitHub
parent 51222cc664
commit a982baff27
65 changed files with 6328 additions and 742 deletions

View File

@@ -197,27 +197,13 @@ func TestNewErasureSets(t *testing.T) {
// TestHashedLayer - tests the hashed layer which will be returned
// consistently for a given object name.
func TestHashedLayer(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []*erasureObjects
for i := 0; i < 16; i++ {
obj, fsDirs, err := prepareErasure16(ctx)
if err != nil {
t.Fatal("Unable to initialize 'Erasure' object layer.", err)
}
defer obj.Shutdown(ctx)
// Remove all dirs.
for _, dir := range fsDirs {
defer os.RemoveAll(dir)
}
z := obj.(*erasureServerSets)
objs = append(objs, z.serverSets[0].sets[0])
// Test distribution with 16 sets.
var objs [16]*erasureObjects
for i := range objs {
objs[i] = &erasureObjects{}
}
sets := &erasureSets{sets: objs, distributionAlgo: "CRCMOD"}
sets := &erasureSets{sets: objs[:], distributionAlgo: "CRCMOD"}
testCases := []struct {
objectName string