ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c

Gist of improvements:

* Cross-server caching and listing will use the same data across servers and requests.
* Lists can be arbitrarily resumed at a constant speed.
* Metadata for all files scanned is stored for streaming retrieval.
* The existing bloom filters controlled by the crawler is used for validating caches.
* Concurrent requests for the same data (or parts of it) will not spawn additional walkers.
* Listing a subdirectory of an existing recursive cache will use the cache.
* All listing operations are fully streamable so the number of objects in a bucket no 
  longer dictates the amount of memory.
* Listings can be handled by any server within the cluster.
* Caches are cleaned up when out of date or superseded by a more recent one.
This commit is contained in:
Klaus Post
2020-10-28 09:18:35 -07:00
committed by GitHub
parent 51222cc664
commit a982baff27
65 changed files with 6328 additions and 742 deletions

View File

@@ -26,17 +26,19 @@ import (
"path/filepath"
"reflect"
"runtime"
"sort"
"strconv"
"strings"
"time"
humanize "github.com/dustin/go-humanize"
"github.com/dustin/go-humanize"
"github.com/minio/minio-go/v7/pkg/set"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest"
"github.com/minio/minio/pkg/env"
"github.com/minio/minio/pkg/mountinfo"
xnet "github.com/minio/minio/pkg/net"
)
// EndpointType - enum for endpoint type.
@@ -269,6 +271,52 @@ func (l EndpointServerSets) Hostnames() []string {
return foundSet.ToSlice()
}
// hostsSorted will return all hosts found.
// The LOCAL host will be nil, but the indexes of all hosts should
// remain consistent across the cluster.
func (l EndpointServerSets) hostsSorted() []*xnet.Host {
peers, localPeer := l.peers()
sort.Strings(peers)
hosts := make([]*xnet.Host, len(peers))
for i, hostStr := range peers {
if hostStr == localPeer {
continue
}
host, err := xnet.ParseHost(hostStr)
if err != nil {
logger.LogIf(GlobalContext, err)
continue
}
hosts[i] = host
}
return hosts
}
// peers will return all peers, including local.
// The local peer is returned as a separate string.
func (l EndpointServerSets) peers() (peers []string, local string) {
allSet := set.NewStringSet()
for _, ep := range l {
for _, endpoint := range ep.Endpoints {
if endpoint.Type() != URLEndpointType {
continue
}
peer := endpoint.Host
if endpoint.IsLocal {
if _, port := mustSplitHostPort(peer); port == globalMinioPort {
local = peer
}
}
allSet.Add(peer)
}
}
return allSet.ToSlice(), local
}
// Endpoints - list of same type of endpoint.
type Endpoints []Endpoint
@@ -712,28 +760,6 @@ func GetLocalPeer(endpointServerSets EndpointServerSets) (localPeer string) {
return peerSet.ToSlice()[0]
}
// GetRemotePeers - get hosts information other than this minio service.
func GetRemotePeers(endpointServerSets EndpointServerSets) []string {
peerSet := set.NewStringSet()
for _, ep := range endpointServerSets {
for _, endpoint := range ep.Endpoints {
if endpoint.Type() != URLEndpointType {
continue
}
peer := endpoint.Host
if endpoint.IsLocal {
if _, port := mustSplitHostPort(peer); port == globalMinioPort {
continue
}
}
peerSet.Add(peer)
}
}
return peerSet.ToSlice()
}
// GetProxyEndpointLocalIndex returns index of the local proxy endpoint
func GetProxyEndpointLocalIndex(proxyEps []ProxyEndpoint) int {
for i, pep := range proxyEps {