ListObjects Metadata Caching (#10648)

Design: https://gist.github.com/klauspost/025c09b48ed4a1293c917cecfabdf21c

Gist of improvements:

* Cross-server caching and listing will use the same data across servers and requests.
* Lists can be arbitrarily resumed at a constant speed.
* Metadata for all files scanned is stored for streaming retrieval.
* The existing bloom filters controlled by the crawler is used for validating caches.
* Concurrent requests for the same data (or parts of it) will not spawn additional walkers.
* Listing a subdirectory of an existing recursive cache will use the cache.
* All listing operations are fully streamable so the number of objects in a bucket no 
  longer dictates the amount of memory.
* Listings can be handled by any server within the cluster.
* Caches are cleaned up when out of date or superseded by a more recent one.
This commit is contained in:
Klaus Post 2020-10-28 09:18:35 -07:00 committed by GitHub
parent 51222cc664
commit a982baff27
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
65 changed files with 6328 additions and 742 deletions

View File

@ -1035,7 +1035,7 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
// Use buffered channel to take care of burst sends or slow w.Write()
traceCh := make(chan interface{}, 4000)
peers := newPeerRestClients(globalEndpoints)
peers, _ := newPeerRestClients(globalEndpoints)
globalHTTPTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool {
return mustTrace(entry, trcAll, trcErr)
@ -1103,7 +1103,7 @@ func (a adminAPIHandlers) ConsoleLogHandler(w http.ResponseWriter, r *http.Reque
logCh := make(chan interface{}, 4000)
peers := newPeerRestClients(globalEndpoints)
peers, _ := newPeerRestClients(globalEndpoints)
globalConsoleSys.Subscribe(logCh, ctx.Done(), node, limitLines, logKind, nil)

View File

@ -221,5 +221,5 @@ func registerAdminRouter(router *mux.Router, enableConfigOps, enableIAMOps bool)
// If none of the routes match add default error handler routes
adminRouter.NotFoundHandler = httpTraceAll(errorResponseHandler)
adminRouter.MethodNotAllowedHandler = httpTraceAll(errorResponseHandler)
adminRouter.MethodNotAllowedHandler = httpTraceAll(methodNotAllowedHandler("Admin"))
}

View File

@ -35,11 +35,11 @@ import (
const (
// RFC3339 a subset of the ISO8601 timestamp format. e.g 2014-04-29T18:30:38Z
iso8601TimeFormat = "2006-01-02T15:04:05.000Z" // Reply date format with nanosecond precision.
maxObjectList = 1000 // Limit number of objects in a listObjectsResponse/listObjectsVersionsResponse.
maxDeleteList = 10000 // Limit number of objects deleted in a delete call.
maxUploadsList = 10000 // Limit number of uploads in a listUploadsResponse.
maxPartsList = 10000 // Limit number of parts in a listPartsResponse.
iso8601TimeFormat = "2006-01-02T15:04:05.000Z" // Reply date format with nanosecond precision.
maxObjectList = metacacheBlockSize - (metacacheBlockSize / 10) // Limit number of objects in a listObjectsResponse/listObjectsVersionsResponse.
maxDeleteList = 10000 // Limit number of objects deleted in a delete call.
maxUploadsList = 10000 // Limit number of uploads in a listUploadsResponse.
maxPartsList = 10000 // Limit number of parts in a listPartsResponse.
)
// LocationResponse - format for location response.

View File

@ -44,6 +44,12 @@ func newCachedObjectLayerFn() CacheObjectLayer {
return globalCacheObjectAPI
}
func setObjectLayer(o ObjectLayer) {
globalObjLayerMutex.Lock()
globalObjectAPI = o
globalObjLayerMutex.Unlock()
}
// objectAPIHandler implements and provides http handlers for S3 API.
type objectAPIHandlers struct {
ObjectAPI func() ObjectLayer
@ -320,7 +326,7 @@ func registerAPIRouter(router *mux.Router) {
// If none of the routes match add default error handler routes
apiRouter.NotFoundHandler = collectAPIStats("notfound", httpTraceAll(errorResponseHandler))
apiRouter.MethodNotAllowedHandler = collectAPIStats("methodnotallowed", httpTraceAll(errorResponseHandler))
apiRouter.MethodNotAllowedHandler = collectAPIStats("methodnotallowed", httpTraceAll(methodNotAllowedHandler("S3")))
}

View File

@ -175,8 +175,8 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq *
logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1))
if err := disk.DeleteFile(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix),
healingTrackerFilename); err != nil && !errors.Is(err, errFileNotFound) {
if err := disk.Delete(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix),
healingTrackerFilename, false); err != nil && !errors.Is(err, errFileNotFound) {
logger.LogIf(ctx, err)
continue
}

View File

@ -19,7 +19,6 @@ package cmd
import (
"bytes"
"encoding/xml"
"fmt"
"io/ioutil"
"net/http"
"net/http/httptest"
@ -837,7 +836,7 @@ func testAPIDeleteMultipleObjectsHandler(obj ObjectLayer, instanceType, bucketNa
// Verify whether the bucket obtained object is same as the one created.
if testCase.expectedContent != nil && !bytes.Equal(testCase.expectedContent, actualContent) {
fmt.Println(string(testCase.expectedContent), string(actualContent))
t.Log(string(testCase.expectedContent), string(actualContent))
t.Errorf("Test %d : MinIO %s: Object content differs from expected value.", i+1, instanceType)
}
}

View File

@ -18,7 +18,6 @@ package cmd
import (
"context"
"fmt"
"net/http"
"strconv"
"strings"
@ -114,15 +113,6 @@ func (api objectAPIHandlers) ListObjectVersionsHandler(w http.ResponseWriter, r
return
}
// Forward the request using Source IP or bucket
forwardStr := handlers.GetSourceIPFromHeaders(r)
if forwardStr == "" {
forwardStr = bucket
}
if proxyRequestByStringHash(ctx, w, r, forwardStr) {
return
}
listObjectVersions := objectAPI.ListObjectVersions
// Inititate a list object versions operation based on the input params.
@ -145,7 +135,7 @@ func (api objectAPIHandlers) ListObjectVersionsHandler(w http.ResponseWriter, r
// ListObjectsV2MHandler - GET Bucket (List Objects) Version 2 with metadata.
// --------------------------
// This implementation of the GET operation returns some or all (up to 10000)
// of the objects in a bucket. You can use the request parame<ters as selection
// of the objects in a bucket. You can use the request parameters as selection
// criteria to return a subset of the objects in a bucket.
//
// NOTE: It is recommended that this API to be used for application development.
@ -185,13 +175,6 @@ func (api objectAPIHandlers) ListObjectsV2MHandler(w http.ResponseWriter, r *htt
return
}
// Analyze continuation token and route the request accordingly
var success bool
token, success = proxyRequestByToken(ctx, w, r, token)
if success {
return
}
listObjectsV2 := objectAPI.ListObjectsV2
// Inititate a list objects operation based on the input params.
@ -207,9 +190,6 @@ func (api objectAPIHandlers) ListObjectsV2MHandler(w http.ResponseWriter, r *htt
// The next continuation token has id@node_index format to optimize paginated listing
nextContinuationToken := listObjectsV2Info.NextContinuationToken
if nextContinuationToken != "" && listObjectsV2Info.IsTruncated {
nextContinuationToken = fmt.Sprintf("%s@%d", listObjectsV2Info.NextContinuationToken, getLocalNodeIndex())
}
response := generateListObjectsV2Response(bucket, prefix, token, nextContinuationToken, startAfter,
delimiter, encodingType, fetchOwner, listObjectsV2Info.IsTruncated,
@ -262,13 +242,6 @@ func (api objectAPIHandlers) ListObjectsV2Handler(w http.ResponseWriter, r *http
return
}
// Analyze continuation token and route the request accordingly
var success bool
token, success = proxyRequestByToken(ctx, w, r, token)
if success {
return
}
listObjectsV2 := objectAPI.ListObjectsV2
// Inititate a list objects operation based on the input params.
@ -282,13 +255,7 @@ func (api objectAPIHandlers) ListObjectsV2Handler(w http.ResponseWriter, r *http
concurrentDecryptETag(ctx, listObjectsV2Info.Objects)
// The next continuation token has id@node_index format to optimize paginated listing
nextContinuationToken := listObjectsV2Info.NextContinuationToken
if nextContinuationToken != "" && listObjectsV2Info.IsTruncated {
nextContinuationToken = fmt.Sprintf("%s@%d", listObjectsV2Info.NextContinuationToken, getLocalNodeIndex())
}
response := generateListObjectsV2Response(bucket, prefix, token, nextContinuationToken, startAfter,
response := generateListObjectsV2Response(bucket, prefix, token, listObjectsV2Info.NextContinuationToken, startAfter,
delimiter, encodingType, fetchOwner, listObjectsV2Info.IsTruncated,
maxKeys, listObjectsV2Info.Objects, listObjectsV2Info.Prefixes, false)
@ -296,18 +263,6 @@ func (api objectAPIHandlers) ListObjectsV2Handler(w http.ResponseWriter, r *http
writeSuccessResponseXML(w, encodeResponse(response))
}
func getLocalNodeIndex() int {
if len(globalProxyEndpoints) == 0 {
return -1
}
for i, ep := range globalProxyEndpoints {
if ep.IsLocal {
return i
}
}
return -1
}
func parseRequestToken(token string) (subToken string, nodeIndex int) {
if token == "" {
return token, -1

View File

@ -235,7 +235,8 @@ func replicateObject(ctx context.Context, objInfo ObjectInfo, objectAPI ObjectLa
replicationStatus := replication.Complete
// Setup bandwidth throttling
totalNodesCount := len(GetRemotePeers(globalEndpoints)) + 1
peers, _ := globalEndpoints.peers()
totalNodesCount := len(peers)
b := target.BandwidthLimit / int64(totalNodesCount)
var headerSize int
for k, v := range putOpts.Header() {

View File

@ -40,13 +40,13 @@ import (
const (
// Estimate bloom filter size. With this many items
dataUpdateTrackerEstItems = 1000000
dataUpdateTrackerEstItems = 10000000
// ... we want this false positive rate:
dataUpdateTrackerFP = 0.99
dataUpdateTrackerQueueSize = 10000
dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin"
dataUpdateTrackerVersion = 3
dataUpdateTrackerVersion = 4
dataUpdateTrackerSaveInterval = 5 * time.Minute
)
@ -168,6 +168,43 @@ func (d *dataUpdateTracker) current() uint64 {
return d.Current.idx
}
// latestWithDir returns the highest index that contains the directory.
// This means that any cycle higher than this does NOT contain the entry.
func (d *dataUpdateTracker) latestWithDir(dir string) uint64 {
bucket, _ := path2BucketObjectWithBasePath("", dir)
if bucket == "" {
if d.debug && len(dir) > 0 {
logger.Info(color.Green("dataUpdateTracker:")+" no bucket (%s)", dir)
}
return d.current()
}
if isReservedOrInvalidBucket(bucket, false) {
if d.debug {
logger.Info(color.Green("dataUpdateTracker:")+" isReservedOrInvalidBucket: %v, entry: %v", bucket, dir)
}
return d.current()
}
d.mu.Lock()
defer d.mu.Unlock()
if d.Current.bf.containsDir(dir) || d.Current.idx == 0 {
return d.Current.idx
}
if d.debug {
logger.Info("current bloom does NOT contains dir %s", dir)
}
idx := d.Current.idx - 1
for {
f := d.History.find(idx)
if f == nil || f.bf.containsDir(dir) || idx == 0 {
break
}
idx--
}
return idx
}
// start will load the current data from the drives start collecting information and
// start a saver goroutine.
// All of these will exit when the context is canceled.
@ -445,26 +482,30 @@ func (d *dataUpdateTracker) startCollector(ctx context.Context) {
case <-ctx.Done():
return
case in := <-d.input:
if d.debug {
logger.Info(color.Green("dataUpdateTracker:")+" got (%s)", in)
}
bucket, _ := path2BucketObjectWithBasePath("", in)
if bucket == "" {
if d.debug && len(in) > 0 {
logger.Info(color.Green("data-usage:")+" no bucket (%s)", in)
logger.Info(color.Green("dataUpdateTracker:")+" no bucket (%s)", in)
}
continue
}
if isReservedOrInvalidBucket(bucket, false) {
if false && d.debug {
logger.Info(color.Green("data-usage:")+" isReservedOrInvalidBucket: %v, entry: %v", bucket, in)
if d.debug {
logger.Info(color.Green("dataUpdateTracker:")+" isReservedOrInvalidBucket: %v, entry: %v", bucket, in)
}
continue
}
split := splitPathDeterministic(in)
// Add all paths until level 3.
// Add all paths until done.
d.mu.Lock()
for i := range split {
if d.debug && false {
if d.debug {
logger.Info(color.Green("dataUpdateTracker:") + " Marking path dirty: " + color.Blue(path.Join(split[:i+1]...)))
}
d.Current.bf.AddString(hashPath(path.Join(split[:i+1]...)).String())
@ -534,8 +575,13 @@ func (d *dataUpdateTracker) filterFrom(ctx context.Context, oldest, newest uint6
// cycleFilter will cycle the bloom filter to start recording to index y if not already.
// The response will contain a bloom filter starting at index x up to, but not including index y.
// If y is 0, the response will not update y, but return the currently recorded information
// from the up until and including current y.
func (d *dataUpdateTracker) cycleFilter(ctx context.Context, oldest, current uint64) (*bloomFilterResponse, error) {
// from the oldest (unless 0, then it will be all) until and including current y.
func (d *dataUpdateTracker) cycleFilter(ctx context.Context, req bloomFilterRequest) (*bloomFilterResponse, error) {
if req.OldestClean != "" {
return &bloomFilterResponse{OldestIdx: d.latestWithDir(req.OldestClean)}, nil
}
current := req.Current
oldest := req.Oldest
d.mu.Lock()
defer d.mu.Unlock()
if current == 0 {
@ -543,7 +589,10 @@ func (d *dataUpdateTracker) cycleFilter(ctx context.Context, oldest, current uin
return d.filterFrom(ctx, d.Current.idx, d.Current.idx), nil
}
d.History.sort()
return d.filterFrom(ctx, d.History[len(d.History)-1].idx, d.Current.idx), nil
if oldest == 0 {
oldest = d.History[len(d.History)-1].idx
}
return d.filterFrom(ctx, oldest, d.Current.idx), nil
}
// Move current to history if new one requested
@ -587,10 +636,6 @@ func splitPathDeterministic(in string) []string {
split = split[:len(split)-1]
}
// Return up to 3 parts.
if len(split) > 3 {
split = split[:3]
}
return split
}
@ -599,6 +644,9 @@ func splitPathDeterministic(in string) []string {
type bloomFilterRequest struct {
Oldest uint64
Current uint64
// If set the oldest clean version will be returned in OldestIdx
// and the rest of the request will be ignored.
OldestClean string
}
type bloomFilterResponse struct {
@ -617,6 +665,9 @@ type bloomFilterResponse struct {
// ObjectPathUpdated indicates a path has been updated.
// The function will never block.
func ObjectPathUpdated(s string) {
if strings.HasPrefix(s, minioMetaBucket) {
return
}
select {
case objectUpdatedCh <- s:
default:

View File

@ -169,7 +169,12 @@ func TestDataUpdateTracker(t *testing.T) {
})
}
// Cycle to history
_, err = dut.cycleFilter(ctx, 1, 2)
req := bloomFilterRequest{
Oldest: 1,
Current: 2,
}
_, err = dut.cycleFilter(ctx, req)
if err != nil {
t.Fatal(err)
}
@ -200,7 +205,11 @@ func TestDataUpdateTracker(t *testing.T) {
if dut.current() != 2 {
t.Fatal("current idx after load not preserved. want 2, got:", dut.current())
}
bfr2, err := dut.cycleFilter(ctx, 1, 3)
req = bloomFilterRequest{
Oldest: 1,
Current: 3,
}
bfr2, err := dut.cycleFilter(ctx, req)
if err != nil {
t.Fatal(err)
}

View File

@ -26,17 +26,19 @@ import (
"path/filepath"
"reflect"
"runtime"
"sort"
"strconv"
"strings"
"time"
humanize "github.com/dustin/go-humanize"
"github.com/dustin/go-humanize"
"github.com/minio/minio-go/v7/pkg/set"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest"
"github.com/minio/minio/pkg/env"
"github.com/minio/minio/pkg/mountinfo"
xnet "github.com/minio/minio/pkg/net"
)
// EndpointType - enum for endpoint type.
@ -269,6 +271,52 @@ func (l EndpointServerSets) Hostnames() []string {
return foundSet.ToSlice()
}
// hostsSorted will return all hosts found.
// The LOCAL host will be nil, but the indexes of all hosts should
// remain consistent across the cluster.
func (l EndpointServerSets) hostsSorted() []*xnet.Host {
peers, localPeer := l.peers()
sort.Strings(peers)
hosts := make([]*xnet.Host, len(peers))
for i, hostStr := range peers {
if hostStr == localPeer {
continue
}
host, err := xnet.ParseHost(hostStr)
if err != nil {
logger.LogIf(GlobalContext, err)
continue
}
hosts[i] = host
}
return hosts
}
// peers will return all peers, including local.
// The local peer is returned as a separate string.
func (l EndpointServerSets) peers() (peers []string, local string) {
allSet := set.NewStringSet()
for _, ep := range l {
for _, endpoint := range ep.Endpoints {
if endpoint.Type() != URLEndpointType {
continue
}
peer := endpoint.Host
if endpoint.IsLocal {
if _, port := mustSplitHostPort(peer); port == globalMinioPort {
local = peer
}
}
allSet.Add(peer)
}
}
return allSet.ToSlice(), local
}
// Endpoints - list of same type of endpoint.
type Endpoints []Endpoint
@ -712,28 +760,6 @@ func GetLocalPeer(endpointServerSets EndpointServerSets) (localPeer string) {
return peerSet.ToSlice()[0]
}
// GetRemotePeers - get hosts information other than this minio service.
func GetRemotePeers(endpointServerSets EndpointServerSets) []string {
peerSet := set.NewStringSet()
for _, ep := range endpointServerSets {
for _, endpoint := range ep.Endpoints {
if endpoint.Type() != URLEndpointType {
continue
}
peer := endpoint.Host
if endpoint.IsLocal {
if _, port := mustSplitHostPort(peer); port == globalMinioPort {
continue
}
}
peerSet.Add(peer)
}
}
return peerSet.ToSlice()
}
// GetProxyEndpointLocalIndex returns index of the local proxy endpoint
func GetProxyEndpointLocalIndex(proxyEps []ProxyEndpoint) int {
for i, pep := range proxyEps {

View File

@ -380,24 +380,28 @@ func TestGetRemotePeers(t *testing.T) {
testCases := []struct {
endpointArgs []string
expectedResult []string
expectedLocal string
}{
{[]string{"/d1", "/d2", "d3", "d4"}, []string{}},
{[]string{"http://localhost:9000/d1", "http://localhost:9000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000"}},
{[]string{"http://localhost:9000/d1", "http://localhost:10000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000", "localhost:10000"}},
{[]string{"http://localhost:9000/d1", "http://example.org:9000/d2", "http://example.com:9000/d3", "http://example.net:9000/d4"}, []string{"example.com:9000", "example.net:9000", "example.org:9000"}},
{[]string{"http://localhost:9000/d1", "http://localhost:9001/d2", "http://localhost:9002/d3", "http://localhost:9003/d4"}, []string{"localhost:9001", "localhost:9002", "localhost:9003"}},
{[]string{"/d1", "/d2", "d3", "d4"}, []string{}, ""},
{[]string{"http://localhost:9000/d1", "http://localhost:9000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000", "localhost:9000"}, "localhost:9000"},
{[]string{"http://localhost:9000/d1", "http://localhost:10000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000", "localhost:10000", "localhost:9000"}, "localhost:9000"},
{[]string{"http://localhost:9000/d1", "http://example.org:9000/d2", "http://example.com:9000/d3", "http://example.net:9000/d4"}, []string{"example.com:9000", "example.net:9000", "example.org:9000", "localhost:9000"}, "localhost:9000"},
{[]string{"http://localhost:9000/d1", "http://localhost:9001/d2", "http://localhost:9002/d3", "http://localhost:9003/d4"}, []string{"localhost:9000", "localhost:9001", "localhost:9002", "localhost:9003"}, "localhost:9000"},
}
for _, testCase := range testCases {
zendpoints := mustGetZoneEndpoints(testCase.endpointArgs...)
if !zendpoints[0].Endpoints[0].IsLocal {
if err := zendpoints[0].Endpoints.UpdateIsLocal(false); err != nil {
t.Fatalf("error: expected = <nil>, got = %v", err)
t.Errorf("error: expected = <nil>, got = %v", err)
}
}
remotePeers := GetRemotePeers(zendpoints)
remotePeers, local := zendpoints.peers()
if !reflect.DeepEqual(remotePeers, testCase.expectedResult) {
t.Fatalf("expected: %v, got: %v", testCase.expectedResult, remotePeers)
t.Errorf("expected: %v, got: %v", testCase.expectedResult, remotePeers)
}
if local != testCase.expectedLocal {
t.Errorf("expected: %v, got: %v", testCase.expectedLocal, local)
}
}
}

View File

@ -158,6 +158,7 @@ func deleteDanglingBucket(ctx context.Context, storageDisks []StorageAPI, dErrs
// DeleteBucket - deletes a bucket.
func (er erasureObjects) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error {
// Collect if all disks report volume not found.
defer ObjectPathUpdated(bucket + slashSeparator)
storageDisks := er.getDisks()
g := errgroup.WithNErrs(len(storageDisks))

View File

@ -51,6 +51,7 @@ func (er erasureObjects) getOnlineDisks() (newDisks []StorageAPI) {
}
di, err := disks[i-1].DiskInfo(context.Background())
if err != nil || di.Healing {
// - Do not consume disks which are not reachable
// unformatted or simply not accessible for some reason.
//

View File

@ -195,7 +195,7 @@ func benchmarkErasureEncode(data, parity, dataDown, parityDown int, size int64,
if disk == OfflineDisk {
continue
}
disk.DeleteFile(context.Background(), "testbucket", "object")
disk.Delete(context.Background(), "testbucket", "object", false)
writers[i] = newBitrotWriter(disk, "testbucket", "object", erasure.ShardFileSize(size), DefaultBitrotAlgorithm, erasure.ShardSize())
}
_, err := erasure.Encode(context.Background(), bytes.NewReader(content), writers, buffer, erasure.dataBlocks+1)

View File

@ -211,7 +211,7 @@ func TestListOnlineDisks(t *testing.T) {
// and check if that disk
// appears in outDatedDisks.
tamperedIndex = index
dErr := erasureDisks[index].DeleteFile(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"))
dErr := erasureDisks[index].Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if dErr != nil {
t.Fatalf("Test %d: Failed to delete %s - %v", i+1,
filepath.Join(object, "part.1"), dErr)

View File

@ -21,6 +21,7 @@ import (
"errors"
"fmt"
"io"
"path"
"sync"
"time"
@ -304,6 +305,10 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
})
}
if isAllNotFound(errs) {
return defaultHealResult(latestFileInfo, storageDisks, storageEndpoints, errs, bucket, object), nil
}
// If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < dataBlocks {
@ -342,6 +347,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
if pErr != nil {
return result, toObjectErr(pErr, bucket, object)
}
defer ObjectPathUpdated(pathJoin(bucket, object))
cleanFileInfo := func(fi FileInfo) FileInfo {
// Returns a copy of the 'fi' with checksums and parts nil'ed.
@ -518,10 +524,11 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
_ = disk.DeleteFile(ctx, bucket, object)
_ = disk.Delete(ctx, bucket, object, false)
}(index, disk)
}
wg.Wait()
ObjectPathUpdated(path.Join(bucket, object))
}
}
@ -544,7 +551,7 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin
hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt}
}
}
if dryRun || danglingObject {
if dryRun || danglingObject || isAllNotFound(errs) {
return hr, nil
}
for i, err := range errs {
@ -649,9 +656,23 @@ func statAllDirs(ctx context.Context, storageDisks []StorageAPI, bucket, prefix
return g.Wait()
}
// isAllNotFound will return if any element of the error slice is not
// errFileNotFound, errFileVersionNotFound or errVolumeNotFound.
// A 0 length slice will always return false.
func isAllNotFound(errs []error) bool {
for _, err := range errs {
if errors.Is(err, errFileNotFound) || errors.Is(err, errVolumeNotFound) || errors.Is(err, errFileVersionNotFound) {
continue
}
return false
}
return len(errs) > 0
}
// ObjectDir is considered dangling/corrupted if any only
// if total disks - a combination of corrupted and missing
// files is lesser than N/2+1 number of disks.
// If no files were found false will be returned.
func isObjectDirDangling(errs []error) (ok bool) {
var found int
var notFound int
@ -668,7 +689,8 @@ func isObjectDirDangling(errs []error) (ok bool) {
otherFound++
}
}
return found+foundNotEmpty+otherFound < notFound
found = found + foundNotEmpty + otherFound
return found < notFound && found > 0
}
// Object is considered dangling/corrupted if any only
@ -748,6 +770,10 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version
// Read metadata files from all the disks
partsMetadata, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID)
if isAllNotFound(errs) {
// Nothing to do
return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object), nil
}
// Check if the object is dangling, if yes and user requested
// remove we simply delete it from namespace.
if m, ok := isObjectDangling(partsMetadata, errs, []error{}); ok {

View File

@ -201,7 +201,7 @@ func TestHealObjectCorrupted(t *testing.T) {
er := z.serverSets[0].sets[0]
erasureDisks := er.getDisks()
firstDisk := erasureDisks[0]
err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile))
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}
@ -221,7 +221,7 @@ func TestHealObjectCorrupted(t *testing.T) {
t.Errorf("Expected er.meta file to be present but stat failed - %v", err)
}
err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"))
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err)
}
@ -246,7 +246,7 @@ func TestHealObjectCorrupted(t *testing.T) {
t.Fatalf("FileInfo not equal after healing")
}
err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"))
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false)
if err != nil {
t.Errorf("Failure during deleting part.1 - %v", err)
}
@ -275,7 +275,7 @@ func TestHealObjectCorrupted(t *testing.T) {
// Test 4: checks if HealObject returns an error when xl.meta is not found
// in more than read quorum number of disks, to create a corrupted situation.
for i := 0; i <= len(er.getDisks())/2; i++ {
er.getDisks()[i].DeleteFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile))
er.getDisks()[i].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
}
// Try healing now, expect to receive errFileNotFound.
@ -351,7 +351,7 @@ func TestHealObjectErasure(t *testing.T) {
t.Fatalf("Failed to complete multipart upload - %v", err)
}
err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile))
err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false)
if err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}

View File

@ -81,7 +81,7 @@ func (er erasureObjects) removeObjectPart(bucket, object, uploadID, dataDir stri
// Ignoring failure to remove parts that weren't present in CompleteMultipartUpload
// requests. xl.meta is the authoritative source of truth on which parts constitute
// the object. The presence of parts that don't belong in the object doesn't affect correctness.
_ = storageDisks[index].DeleteFile(context.TODO(), minioMetaMultipartBucket, curpartPath)
_ = storageDisks[index].Delete(context.TODO(), minioMetaMultipartBucket, curpartPath, false)
return nil
}, index)
}

View File

@ -177,7 +177,7 @@ func (er erasureObjects) GetObjectNInfo(ctx context.Context, bucket, object stri
pr, pw := io.Pipe()
go func() {
err := er.getObjectWithFileInfo(ctx, bucket, object, off, length, pw, "", opts, fi, metaArr, onlineDisks)
err := er.getObjectWithFileInfo(ctx, bucket, object, off, length, pw, fi, metaArr, onlineDisks)
pw.CloseWithError(err)
}()
@ -214,11 +214,10 @@ func (er erasureObjects) GetObject(ctx context.Context, bucket, object string, s
return errUnexpected
}
return er.getObject(ctx, bucket, object, startOffset, length, writer, etag, opts)
return er.getObject(ctx, bucket, object, startOffset, length, writer, opts)
}
func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions, fi FileInfo, metaArr []FileInfo, onlineDisks []StorageAPI) error {
func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, fi FileInfo, metaArr []FileInfo, onlineDisks []StorageAPI) error {
// Reorder online disks based on erasure distribution order.
// Reorder parts metadata based on erasure distribution order.
onlineDisks, metaArr = shuffleDisksAndPartsMetadataByIndex(onlineDisks, metaArr, fi.Erasure.Distribution)
@ -325,7 +324,7 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje
}
// getObject wrapper for erasure GetObject
func (er erasureObjects) getObject(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions) error {
func (er erasureObjects) getObject(ctx context.Context, bucket, object string, startOffset, length int64, writer io.Writer, opts ObjectOptions) error {
fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, bucket, object, opts)
if err != nil {
return toObjectErr(err, bucket, object)
@ -338,7 +337,7 @@ func (er erasureObjects) getObject(ctx context.Context, bucket, object string, s
return toObjectErr(errMethodNotAllowed, bucket, object)
}
return er.getObjectWithFileInfo(ctx, bucket, object, startOffset, length, writer, etag, opts, fi, metaArr, onlineDisks)
return er.getObjectWithFileInfo(ctx, bucket, object, startOffset, length, writer, fi, metaArr, onlineDisks)
}
// GetObjectInfo - reads object metadata and replies back ObjectInfo.
@ -426,6 +425,9 @@ func undoRename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry str
// Similar to rename but renames data from srcEntry to dstEntry at dataDir
func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dataDir, dstBucket, dstEntry string, writeQuorum int, ignoredErr []error) ([]StorageAPI, error) {
dataDir = retainSlash(dataDir)
defer ObjectPathUpdated(path.Join(srcBucket, srcEntry))
defer ObjectPathUpdated(path.Join(dstBucket, dstEntry))
g := errgroup.WithNErrs(len(disks))
// Rename file on all underlying storage disks.
@ -473,11 +475,12 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, da
// rename - common function that renamePart and renameObject use to rename
// the respective underlying storage layer representations.
func rename(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, isDir bool, writeQuorum int, ignoredErr []error) ([]StorageAPI, error) {
if isDir {
dstEntry = retainSlash(dstEntry)
srcEntry = retainSlash(srcEntry)
}
defer ObjectPathUpdated(path.Join(srcBucket, srcEntry))
defer ObjectPathUpdated(path.Join(dstBucket, dstEntry))
g := errgroup.WithNErrs(len(disks))
@ -705,10 +708,9 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
}
func (er erasureObjects) deleteObjectVersion(ctx context.Context, bucket, object string, writeQuorum int, fi FileInfo) error {
defer ObjectPathUpdated(pathJoin(bucket, object))
disks := er.getDisks()
g := errgroup.WithNErrs(len(disks))
for index := range disks {
index := index
g.Go(func() error {
@ -853,6 +855,7 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
}
errs[objIndex] = reduceWriteQuorumErrs(ctx, diskErrs, objectOpIgnoredErrs, writeQuorums[objIndex])
if errs[objIndex] == nil {
ObjectPathUpdated(pathJoin(bucket, objects[objIndex].ObjectName))
if versions[objIndex].Deleted {
dobjects[objIndex] = DeletedObject{
DeleteMarker: versions[objIndex].Deleted,
@ -892,6 +895,7 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec
// any error as it is not necessary for the handler to reply back a
// response to the client request.
func (er erasureObjects) DeleteObject(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) {
defer ObjectPathUpdated(path.Join(bucket, object))
goi, gerr := er.GetObjectInfo(ctx, bucket, object, opts)
if gerr != nil && goi.Name == "" {
switch gerr.(type) {
@ -1021,6 +1025,66 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin
return nil
}
// updateObjectMeta will update the metadata of a file.
func (er erasureObjects) updateObjectMeta(ctx context.Context, bucket, object string, meta map[string]string, opts ObjectOptions) error {
if len(meta) == 0 {
return nil
}
disks := er.getDisks()
// Read metadata associated with the object from all disks.
metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID)
readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, er, metaArr, errs)
if err != nil {
return toObjectErr(err, bucket, object)
}
// List all online disks.
_, modTime := listOnlineDisks(disks, metaArr, errs)
// Pick latest valid metadata.
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
if err != nil {
return toObjectErr(err, bucket, object)
}
// Update metadata
for k, v := range meta {
fi.Metadata[k] = v
}
if fi.Deleted {
if opts.VersionID == "" {
return toObjectErr(errFileNotFound, bucket, object)
}
return toObjectErr(errMethodNotAllowed, bucket, object)
}
for i := range metaArr {
if errs[i] != nil {
// Avoid disks where loading metadata fail
continue
}
metaArr[i].Metadata = fi.Metadata
}
tempObj := mustGetUUID()
// Write unique `xl.meta` for each disk.
if disks, err = writeUniqueFileInfo(ctx, disks, minioMetaTmpBucket, tempObj, metaArr, writeQuorum); err != nil {
return toObjectErr(err, bucket, object)
}
// Atomically rename metadata from tmp location to destination for each disk.
if _, err = renameFileInfo(ctx, disks, minioMetaTmpBucket, tempObj, bucket, object, writeQuorum); err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
// DeleteObjectTags - delete object tags from an existing object
func (er erasureObjects) DeleteObjectTags(ctx context.Context, bucket, object string, opts ObjectOptions) error {
return er.PutObjectTags(ctx, bucket, object, "", opts)

View File

@ -31,7 +31,6 @@ import (
"github.com/minio/minio-go/v7/pkg/set"
"github.com/minio/minio-go/v7/pkg/tags"
"github.com/minio/minio/cmd/config/storageclass"
xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/dsync"
"github.com/minio/minio/pkg/madmin"
@ -659,274 +658,7 @@ func (z *erasureServerSets) ListObjectsV2(ctx context.Context, bucket, prefix, c
return listObjectsV2Info, err
}
func (z *erasureServerSets) listObjectsNonSlash(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi ListObjectsInfo, err error) {
serverSetsEntryChs := make([][]FileInfoCh, 0, len(z.serverSets))
serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets))
endWalkCh := make(chan struct{})
defer close(endWalkCh)
for _, zone := range z.serverSets {
serverSetsEntryChs = append(serverSetsEntryChs,
zone.startMergeWalksN(ctx, bucket, prefix, "", true, endWalkCh, zone.listTolerancePerSet, false))
if zone.listTolerancePerSet == -1 {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2)
} else {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2)
}
}
var objInfos []ObjectInfo
var eof bool
var prevPrefix string
serverSetsEntriesInfos := make([][]FileInfo, 0, len(serverSetsEntryChs))
serverSetsEntriesValid := make([][]bool, 0, len(serverSetsEntryChs))
for _, entryChs := range serverSetsEntryChs {
serverSetsEntriesInfos = append(serverSetsEntriesInfos, make([]FileInfo, len(entryChs)))
serverSetsEntriesValid = append(serverSetsEntriesValid, make([]bool, len(entryChs)))
}
for {
if len(objInfos) == maxKeys {
break
}
result, quorumCount, zoneIndex, ok := lexicallySortedEntryZone(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid)
if !ok {
eof = true
break
}
if quorumCount < serverSetsListTolerancePerSet[zoneIndex] {
// Skip entries which are not found on upto expected tolerance
continue
}
var objInfo ObjectInfo
index := strings.Index(strings.TrimPrefix(result.Name, prefix), delimiter)
if index == -1 {
objInfo = ObjectInfo{
IsDir: false,
Bucket: bucket,
Name: result.Name,
ModTime: result.ModTime,
Size: result.Size,
ContentType: result.Metadata["content-type"],
ContentEncoding: result.Metadata["content-encoding"],
}
// Extract etag from metadata.
objInfo.ETag = extractETag(result.Metadata)
// All the parts per object.
objInfo.Parts = result.Parts
// etag/md5Sum has already been extracted. We need to
// remove to avoid it from appearing as part of
// response headers. e.g, X-Minio-* or X-Amz-*.
objInfo.UserDefined = cleanMetadata(result.Metadata)
// Update storage class
if sc, ok := result.Metadata[xhttp.AmzStorageClass]; ok {
objInfo.StorageClass = sc
} else {
objInfo.StorageClass = globalMinioDefaultStorageClass
}
} else {
index = len(prefix) + index + len(delimiter)
currPrefix := result.Name[:index]
if currPrefix == prevPrefix {
continue
}
prevPrefix = currPrefix
objInfo = ObjectInfo{
Bucket: bucket,
Name: currPrefix,
IsDir: true,
}
}
if objInfo.Name <= marker {
continue
}
objInfos = append(objInfos, objInfo)
}
result := ListObjectsInfo{}
for _, objInfo := range objInfos {
if objInfo.IsDir {
result.Prefixes = append(result.Prefixes, objInfo.Name)
continue
}
result.Objects = append(result.Objects, objInfo)
}
if !eof {
result.IsTruncated = true
if len(objInfos) > 0 {
result.NextMarker = objInfos[len(objInfos)-1].Name
}
}
return result, nil
}
func (z *erasureServerSets) listObjectsSplunk(ctx context.Context, bucket, prefix, marker string, maxKeys int) (loi ListObjectsInfo, err error) {
if strings.Contains(prefix, guidSplunk) {
logger.LogIf(ctx, NotImplemented{})
return loi, NotImplemented{}
}
recursive := true
serverSetsEntryChs := make([][]FileInfoCh, 0, len(z.serverSets))
serverSetsEndWalkCh := make([]chan struct{}, 0, len(z.serverSets))
serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets))
for _, zone := range z.serverSets {
entryChs, endWalkCh := zone.poolSplunk.Release(listParams{bucket, recursive, marker, prefix})
if entryChs == nil {
endWalkCh = make(chan struct{})
entryChs = zone.startMergeWalksN(ctx, bucket, prefix, marker, recursive, endWalkCh, zone.listTolerancePerSet, true)
}
serverSetsEntryChs = append(serverSetsEntryChs, entryChs)
serverSetsEndWalkCh = append(serverSetsEndWalkCh, endWalkCh)
if zone.listTolerancePerSet == -1 {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2)
} else {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2)
}
}
entries := mergeServerSetsEntriesCh(serverSetsEntryChs, maxKeys, serverSetsListTolerancePerSet)
if len(entries.Files) == 0 {
return loi, nil
}
loi.IsTruncated = entries.IsTruncated
if loi.IsTruncated {
loi.NextMarker = entries.Files[len(entries.Files)-1].Name
}
for _, entry := range entries.Files {
objInfo := entry.ToObjectInfo(bucket, entry.Name)
splits := strings.Split(objInfo.Name, guidSplunk)
if len(splits) == 0 {
loi.Objects = append(loi.Objects, objInfo)
continue
}
loi.Prefixes = append(loi.Prefixes, splits[0]+guidSplunk)
}
if loi.IsTruncated {
for i, zone := range z.serverSets {
zone.poolSplunk.Set(listParams{bucket, recursive, loi.NextMarker, prefix}, serverSetsEntryChs[i],
serverSetsEndWalkCh[i])
}
}
return loi, nil
}
func (z *erasureServerSets) listObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
loi := ListObjectsInfo{}
if err := checkListObjsArgs(ctx, bucket, prefix, marker, z); err != nil {
return loi, err
}
// Marker is set validate pre-condition.
if marker != "" {
// Marker not common with prefix is not implemented. Send an empty response
if !HasPrefix(marker, prefix) {
return loi, nil
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return loi, nil
}
// For delimiter and prefix as '/' we do not list anything at all
// since according to s3 spec we stop at the 'delimiter'
// along // with the prefix. On a flat namespace with 'prefix'
// as '/' we don't have any entries, since all the keys are
// of form 'keyName/...'
if delimiter == SlashSeparator && prefix == SlashSeparator {
return loi, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
if delimiter != SlashSeparator && delimiter != "" {
if delimiter == guidSplunk {
return z.listObjectsSplunk(ctx, bucket, prefix, marker, maxKeys)
}
return z.listObjectsNonSlash(ctx, bucket, prefix, marker, delimiter, maxKeys)
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == SlashSeparator {
recursive = false
}
serverSetsEntryChs := make([][]FileInfoCh, 0, len(z.serverSets))
serverSetsEndWalkCh := make([]chan struct{}, 0, len(z.serverSets))
serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets))
for _, zone := range z.serverSets {
entryChs, endWalkCh := zone.pool.Release(listParams{bucket, recursive, marker, prefix})
if entryChs == nil {
endWalkCh = make(chan struct{})
entryChs = zone.startMergeWalksN(ctx, bucket, prefix, marker, recursive, endWalkCh, zone.listTolerancePerSet, false)
}
serverSetsEntryChs = append(serverSetsEntryChs, entryChs)
serverSetsEndWalkCh = append(serverSetsEndWalkCh, endWalkCh)
if zone.listTolerancePerSet == -1 {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2)
} else {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2)
}
}
entries := mergeServerSetsEntriesCh(serverSetsEntryChs, maxKeys, serverSetsListTolerancePerSet)
if len(entries.Files) == 0 {
return loi, nil
}
loi.IsTruncated = entries.IsTruncated
if loi.IsTruncated {
loi.NextMarker = entries.Files[len(entries.Files)-1].Name
}
for _, entry := range entries.Files {
objInfo := entry.ToObjectInfo(entry.Volume, entry.Name)
if HasSuffix(objInfo.Name, SlashSeparator) && !recursive {
loi.Prefixes = append(loi.Prefixes, objInfo.Name)
continue
}
loi.Objects = append(loi.Objects, objInfo)
}
if loi.IsTruncated {
for i, zone := range z.serverSets {
zone.pool.Set(listParams{bucket, recursive, loi.NextMarker, prefix}, serverSetsEntryChs[i],
serverSetsEndWalkCh[i])
}
}
return loi, nil
}
// Calculate least entry across serverSets and across multiple FileInfo
// Calculate least entry across zones and across multiple FileInfo
// channels, returns the least common entry and the total number of times
// we found this entry. Additionally also returns a boolean
// to indicate if the caller needs to call this function
@ -1110,236 +842,57 @@ func lexicallySortedEntryZoneVersions(zoneEntryChs [][]FileInfoVersionsCh, zoneE
return lentry, lexicallySortedEntryCount, zoneIndex, isTruncated
}
// mergeServerSetsEntriesVersionsCh - merges FileInfoVersions channel to entries upto maxKeys.
func mergeServerSetsEntriesVersionsCh(serverSetsEntryChs [][]FileInfoVersionsCh, maxKeys int, serverSetsListTolerancePerSet []int) (entries FilesInfoVersions) {
var i = 0
serverSetsEntriesInfos := make([][]FileInfoVersions, 0, len(serverSetsEntryChs))
serverSetsEntriesValid := make([][]bool, 0, len(serverSetsEntryChs))
for _, entryChs := range serverSetsEntryChs {
serverSetsEntriesInfos = append(serverSetsEntriesInfos, make([]FileInfoVersions, len(entryChs)))
serverSetsEntriesValid = append(serverSetsEntriesValid, make([]bool, len(entryChs)))
}
for {
fi, quorumCount, zoneIndex, ok := lexicallySortedEntryZoneVersions(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid)
if !ok {
// We have reached EOF across all entryChs, break the loop.
break
}
if quorumCount < serverSetsListTolerancePerSet[zoneIndex] {
// Skip entries which are not found upto the expected tolerance
continue
}
entries.FilesVersions = append(entries.FilesVersions, fi)
i++
if i == maxKeys {
entries.IsTruncated = isTruncatedServerSetsVersions(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid)
break
}
}
return entries
}
// mergeServerSetsEntriesCh - merges FileInfo channel to entries upto maxKeys.
func mergeServerSetsEntriesCh(serverSetsEntryChs [][]FileInfoCh, maxKeys int, serverSetsListTolerancePerSet []int) (entries FilesInfo) {
var i = 0
serverSetsEntriesInfos := make([][]FileInfo, 0, len(serverSetsEntryChs))
serverSetsEntriesValid := make([][]bool, 0, len(serverSetsEntryChs))
for _, entryChs := range serverSetsEntryChs {
serverSetsEntriesInfos = append(serverSetsEntriesInfos, make([]FileInfo, len(entryChs)))
serverSetsEntriesValid = append(serverSetsEntriesValid, make([]bool, len(entryChs)))
}
var prevEntry string
for {
fi, quorumCount, zoneIndex, ok := lexicallySortedEntryZone(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid)
if !ok {
// We have reached EOF across all entryChs, break the loop.
break
}
if quorumCount < serverSetsListTolerancePerSet[zoneIndex] {
// Skip entries which are not found upto configured tolerance.
continue
}
if HasSuffix(fi.Name, slashSeparator) && fi.Name == prevEntry {
continue
}
entries.Files = append(entries.Files, fi)
i++
if i == maxKeys {
entries.IsTruncated = isTruncatedServerSets(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid)
break
}
prevEntry = fi.Name
}
return entries
}
func isTruncatedServerSets(zoneEntryChs [][]FileInfoCh, zoneEntries [][]FileInfo, zoneEntriesValid [][]bool) bool {
for i, entryChs := range zoneEntryChs {
for j := range entryChs {
zoneEntries[i][j], zoneEntriesValid[i][j] = entryChs[j].Pop()
}
}
var isTruncated = false
for _, entriesValid := range zoneEntriesValid {
for _, valid := range entriesValid {
if valid {
isTruncated = true
break
}
}
if isTruncated {
break
}
}
for i, entryChs := range zoneEntryChs {
for j := range entryChs {
if zoneEntriesValid[i][j] {
zoneEntryChs[i][j].Push(zoneEntries[i][j])
}
}
}
return isTruncated
}
func isTruncatedServerSetsVersions(zoneEntryChs [][]FileInfoVersionsCh, zoneEntries [][]FileInfoVersions, zoneEntriesValid [][]bool) bool {
for i, entryChs := range zoneEntryChs {
for j := range entryChs {
zoneEntries[i][j], zoneEntriesValid[i][j] = entryChs[j].Pop()
}
}
var isTruncated = false
for _, entriesValid := range zoneEntriesValid {
for _, valid := range entriesValid {
if !valid {
continue
}
isTruncated = true
break
}
if isTruncated {
break
}
}
for i, entryChs := range zoneEntryChs {
for j := range entryChs {
if zoneEntriesValid[i][j] {
zoneEntryChs[i][j].Push(zoneEntries[i][j])
}
}
}
return isTruncated
}
func (z *erasureServerSets) listObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) {
func (z *erasureServerSets) ListObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) {
loi := ListObjectVersionsInfo{}
if err := checkListObjsArgs(ctx, bucket, prefix, marker, z); err != nil {
return loi, err
}
// Marker is set validate pre-condition.
if marker != "" {
// Marker not common with prefix is not implemented. Send an empty response
if !HasPrefix(marker, prefix) {
return loi, nil
}
}
if marker == "" && versionMarker != "" {
return loi, NotImplemented{}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return loi, nil
merged, err := z.listPath(ctx, listPathOptions{
Bucket: bucket,
Prefix: prefix,
Separator: delimiter,
Limit: maxKeys,
Marker: marker,
InclDeleted: true,
})
if err != nil && err != io.EOF {
return loi, err
}
// For delimiter and prefix as '/' we do not list anything at all
// since according to s3 spec we stop at the 'delimiter'
// along // with the prefix. On a flat namespace with 'prefix'
// as '/' we don't have any entries, since all the keys are
// of form 'keyName/...'
if delimiter == SlashSeparator && prefix == SlashSeparator {
return loi, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
if delimiter != SlashSeparator && delimiter != "" {
return loi, NotImplemented{}
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == SlashSeparator {
recursive = false
}
serverSetsEntryChs := make([][]FileInfoVersionsCh, 0, len(z.serverSets))
serverSetsEndWalkCh := make([]chan struct{}, 0, len(z.serverSets))
serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets))
for _, zone := range z.serverSets {
entryChs, endWalkCh := zone.poolVersions.Release(listParams{bucket, recursive, marker, prefix})
if entryChs == nil {
endWalkCh = make(chan struct{})
entryChs = zone.startMergeWalksVersionsN(ctx, bucket, prefix, marker, recursive, endWalkCh, zone.listTolerancePerSet)
}
serverSetsEntryChs = append(serverSetsEntryChs, entryChs)
serverSetsEndWalkCh = append(serverSetsEndWalkCh, endWalkCh)
if zone.listTolerancePerSet == -1 {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2)
} else {
serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2)
}
}
entries := mergeServerSetsEntriesVersionsCh(serverSetsEntryChs, maxKeys, serverSetsListTolerancePerSet)
if len(entries.FilesVersions) == 0 {
return loi, nil
}
loi.IsTruncated = entries.IsTruncated
if loi.IsTruncated {
loi.NextMarker = entries.FilesVersions[len(entries.FilesVersions)-1].Name
}
for _, entry := range entries.FilesVersions {
for _, version := range entry.Versions {
objInfo := version.ToObjectInfo(bucket, entry.Name)
if HasSuffix(objInfo.Name, SlashSeparator) && !recursive {
loi.Prefixes = append(loi.Prefixes, objInfo.Name)
continue
}
loi.Objects = append(loi.Objects, objInfo)
}
loi.Objects, loi.Prefixes = merged.fileInfoVersions(bucket, prefix, delimiter, versionMarker)
loi.IsTruncated = err == nil && len(loi.Objects) > 0
if maxKeys > 0 && len(loi.Objects) > maxKeys {
loi.Objects = loi.Objects[:maxKeys]
loi.IsTruncated = true
}
if loi.IsTruncated {
for i, zone := range z.serverSets {
zone.poolVersions.Set(listParams{bucket, recursive, loi.NextMarker, prefix}, serverSetsEntryChs[i],
serverSetsEndWalkCh[i])
}
last := loi.Objects[len(loi.Objects)-1]
loi.NextMarker = encodeMarker(last.Name, merged.listID)
loi.NextVersionIDMarker = last.VersionID
}
return loi, nil
}
func (z *erasureServerSets) ListObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) {
return z.listObjectVersions(ctx, bucket, prefix, marker, versionMarker, delimiter, maxKeys)
}
func (z *erasureServerSets) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return z.listObjects(ctx, bucket, prefix, marker, delimiter, maxKeys)
var loi ListObjectsInfo
merged, err := z.listPath(ctx, listPathOptions{
Bucket: bucket,
Prefix: prefix,
Separator: delimiter,
Limit: maxKeys,
Marker: marker,
InclDeleted: false,
})
if err != nil && err != io.EOF {
logger.LogIf(ctx, err)
return loi, err
}
// Default is recursive, if delimiter is set then list non recursive.
loi.Objects, loi.Prefixes = merged.fileInfos(bucket, prefix, delimiter)
loi.IsTruncated = err == nil && len(loi.Objects) > 0
if loi.IsTruncated {
loi.NextMarker = encodeMarker(loi.Objects[len(loi.Objects)-1].Name, merged.listID)
}
return loi, nil
}
func (z *erasureServerSets) ListMultipartUploads(ctx context.Context, bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
@ -1634,6 +1187,30 @@ func (z *erasureServerSets) DeleteBucket(ctx context.Context, bucket string, for
return nil
}
// deleteAll will delete a bucket+prefix unconditionally across all disks.
// Note that set distribution is ignored so it should only be used in cases where
// data is not distributed across sets.
// Errors are logged but individual disk failures are not returned.
func (z *erasureServerSets) deleteAll(ctx context.Context, bucket, prefix string) error {
var wg sync.WaitGroup
for _, servers := range z.serverSets {
for _, set := range servers.sets {
for _, disk := range set.getDisks() {
if disk == nil {
continue
}
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
logger.LogIf(ctx, disk.Delete(ctx, bucket, prefix, true))
}(disk)
}
}
}
wg.Wait()
return nil
}
// This function is used to undo a successful DeleteBucket operation.
func undoDeleteBucketServerSets(ctx context.Context, bucket string, serverSets []*erasureSets, errs []error) {
g := errgroup.WithNErrs(len(serverSets))

View File

@ -197,27 +197,13 @@ func TestNewErasureSets(t *testing.T) {
// TestHashedLayer - tests the hashed layer which will be returned
// consistently for a given object name.
func TestHashedLayer(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []*erasureObjects
for i := 0; i < 16; i++ {
obj, fsDirs, err := prepareErasure16(ctx)
if err != nil {
t.Fatal("Unable to initialize 'Erasure' object layer.", err)
}
defer obj.Shutdown(ctx)
// Remove all dirs.
for _, dir := range fsDirs {
defer os.RemoveAll(dir)
}
z := obj.(*erasureServerSets)
objs = append(objs, z.serverSets[0].sets[0])
// Test distribution with 16 sets.
var objs [16]*erasureObjects
for i := range objs {
objs[i] = &erasureObjects{}
}
sets := &erasureSets{sets: objs, distributionAlgo: "CRCMOD"}
sets := &erasureSets{sets: objs[:], distributionAlgo: "CRCMOD"}
testCases := []struct {
objectName string

View File

@ -360,7 +360,7 @@ func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) erro
tmpFormat := mustGetUUID()
// Purge any existing temporary file, okay to ignore errors here.
defer disk.DeleteFile(context.TODO(), minioMetaBucket, tmpFormat)
defer disk.Delete(context.TODO(), minioMetaBucket, tmpFormat, false)
// write to unique file.
if err = disk.WriteAll(context.TODO(), minioMetaBucket, tmpFormat, bytes.NewReader(formatBytes)); err != nil {

View File

@ -297,7 +297,7 @@ func TestFSDeletes(t *testing.T) {
srcPath: "success-file",
expectedErr: nil,
},
// The file was deleted in the last case, so DeleteFile should fail.
// The file was deleted in the last case, so Delete should fail.
{
basePath: path,
srcVol: "success-vol",
@ -423,7 +423,7 @@ func TestFSRemoves(t *testing.T) {
expectedErr: nil,
},
// Test case - 2.
// The file was deleted in the last case, so DeleteFile should fail.
// The file was deleted in the last case, so Delete should fail.
{
srcFSPath: path,
srcVol: "success-vol",

View File

@ -402,6 +402,7 @@ func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket string,
return BucketNameInvalid{Bucket: bucket}
}
defer ObjectPathUpdated(bucket + slashSeparator)
atomic.AddInt64(&fs.activeIOCount, 1)
defer func() {
atomic.AddInt64(&fs.activeIOCount, -1)

View File

@ -424,6 +424,17 @@ func extractAPIVersion(r *http.Request) string {
return regexVersion.FindString(r.URL.Path)
}
func methodNotAllowedHandler(api string) func(w http.ResponseWriter, r *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
code := "XMinio" + api + "VersionMismatch"
writeErrorResponseString(r.Context(), w, APIError{
Code: code,
Description: "Not allowed (" + r.Method + " " + r.URL.String() + " on " + api + " API)",
HTTPStatusCode: http.StatusMethodNotAllowed,
}, r.URL)
}
}
// If none of the http routes match respond with appropriate errors
func errorResponseHandler(w http.ResponseWriter, r *http.Request) {
if r.Method == http.MethodOptions {

View File

@ -124,7 +124,7 @@ func (api objectAPIHandlers) ListenNotificationHandler(w http.ResponseWriter, r
// Use buffered channel to take care of burst sends or slow w.Write()
listenCh := make(chan interface{}, 4000)
peers := newPeerRestClients(globalEndpoints)
peers, _ := newPeerRestClients(globalEndpoints)
globalHTTPListen.Subscribe(listenCh, ctx.Done(), func(evI interface{}) bool {
ev, ok := evI.(event.Event)

View File

@ -337,7 +337,7 @@ func logIf(ctx context.Context, err error, errKind ...interface{}) {
trace := getTrace(3)
// Get the cause for the Error
message := err.Error()
message := fmt.Sprintf("%v (%T)", err, err)
if req.DeploymentID == "" {
req.DeploymentID = globalDeploymentID
}

427
cmd/metacache-bucket.go Normal file
View File

@ -0,0 +1,427 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"runtime/debug"
"strings"
"sync"
"time"
"github.com/klauspost/compress/s2"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/hash"
"github.com/tinylib/msgp/msgp"
)
//go:generate msgp -file $GOFILE -unexported
// a bucketMetacache keeps track of all caches generated
// for a bucket.
type bucketMetacache struct {
// Name of bucket
bucket string
// caches indexed by id.
caches map[string]metacache
// Internal state
mu sync.RWMutex `msg:"-"`
updated bool `msg:"-"`
transient bool `msg:"-"` // bucket used for non-persisted caches.
}
// newBucketMetacache creates a new bucketMetacache.
func newBucketMetacache(bucket string) *bucketMetacache {
return &bucketMetacache{
bucket: bucket,
caches: make(map[string]metacache, 10),
}
}
// loadBucketMetaCache will load the cache from the object layer.
// If the cache cannot be found a new one is created.
func loadBucketMetaCache(ctx context.Context, bucket string) (*bucketMetacache, error) {
objAPI := newObjectLayerFn()
for objAPI == nil {
select {
case <-ctx.Done():
return nil, ctx.Err()
case <-time.After(250 * time.Millisecond):
}
objAPI = newObjectLayerFn()
if objAPI == nil {
logger.LogIf(ctx, fmt.Errorf("loadBucketMetaCache: object layer not ready. bucket: %q", bucket))
}
}
var meta bucketMetacache
var decErr error
var wg sync.WaitGroup
wg.Add(1)
r, w := io.Pipe()
go func() {
defer wg.Done()
dec := s2DecPool.Get().(*s2.Reader)
dec.Reset(r)
decErr = meta.DecodeMsg(msgp.NewReader(dec))
dec.Reset(nil)
s2DecPool.Put(dec)
r.CloseWithError(decErr)
}()
// Use global context for this.
err := objAPI.GetObject(GlobalContext, minioMetaBucket, pathJoin("buckets", bucket, ".metacache", "index.s2"), 0, -1, w, "", ObjectOptions{})
logger.LogIf(ctx, w.CloseWithError(err))
if err != nil {
if isErrObjectNotFound(err) {
err = nil
} else {
logger.LogIf(ctx, err)
}
return newBucketMetacache(bucket), err
}
wg.Wait()
if decErr != nil {
// Log the error, but assume the data is lost and return a fresh bucket.
// Otherwise a broken cache will never recover.
logger.LogIf(ctx, decErr)
return newBucketMetacache(bucket), nil
}
// Sanity check...
if meta.bucket != bucket {
logger.Info("loadBucketMetaCache: loaded cache name mismatch, want %s, got %s. Discarding.", bucket, meta.bucket)
return newBucketMetacache(bucket), nil
}
return &meta, nil
}
// save the bucket cache to the object storage.
func (b *bucketMetacache) save(ctx context.Context) error {
if b.transient {
return nil
}
objAPI := newObjectLayerFn()
if objAPI == nil {
return errServerNotInitialized
}
// Keep lock while we marshal.
// We need a write lock since we update 'updated'
b.mu.Lock()
if !b.updated {
b.mu.Unlock()
return nil
}
// Save as s2 compressed msgpack
tmp := bytes.NewBuffer(make([]byte, 0, b.Msgsize()))
enc := s2.NewWriter(tmp)
err := msgp.Encode(enc, b)
if err != nil {
b.mu.Unlock()
return err
}
err = enc.Close()
if err != nil {
b.mu.Unlock()
return err
}
b.updated = false
b.mu.Unlock()
hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len()), false)
if err != nil {
return err
}
_, err = objAPI.PutObject(ctx, minioMetaBucket, pathJoin("buckets", b.bucket, ".metacache", "index.s2"), NewPutObjReader(hr, nil, nil), ObjectOptions{})
logger.LogIf(ctx, err)
return err
}
// findCache will attempt to find a matching cache for the provided options.
// If a cache with the same ID exists already it will be returned.
// If none can be found a new is created with the provided ID.
func (b *bucketMetacache) findCache(o listPathOptions) metacache {
if b == nil {
logger.Info("bucketMetacache.findCache: nil cache for bucket %s", o.Bucket)
return metacache{}
}
if o.Bucket != b.bucket && !b.transient {
logger.Info("bucketMetacache.findCache: bucket %s does not match this bucket %s", o.Bucket, b.bucket)
debug.PrintStack()
return metacache{}
}
debugPrint := func(msg string, data ...interface{}) {}
if false {
debugPrint = logger.Info
}
// Grab a write lock, since we create one if we cannot find one.
if o.Create {
b.mu.Lock()
defer b.mu.Unlock()
} else {
b.mu.RLock()
defer b.mu.RUnlock()
}
// Check if exists already.
if c, ok := b.caches[o.ID]; ok {
return c
}
var best metacache
for _, cached := range b.caches {
// Never return transient caches if there is no id.
if b.transient {
break
}
if cached.status == scanStateError || cached.dataVersion != metacacheStreamVersion {
debugPrint("cache %s state or stream version mismatch", cached.id)
continue
}
if cached.startedCycle < o.OldestCycle {
debugPrint("cache %s cycle too old", cached.id)
continue
}
// Root of what we are looking for must at least have
if !strings.HasPrefix(o.BaseDir, cached.root) {
debugPrint("cache %s prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir)
continue
}
// If the existing listing wasn't recursive root must match.
if !cached.recursive && o.BaseDir != cached.root {
debugPrint("cache %s non rec prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir)
continue
}
if o.Recursive && !cached.recursive {
debugPrint("cache %s not recursive", cached.id)
// If this is recursive the cached listing must be as well.
continue
}
if o.Separator != slashSeparator && !cached.recursive {
debugPrint("cache %s not slashsep and not recursive", cached.id)
// Non slash separator requires recursive.
continue
}
if cached.ended.IsZero() && time.Since(cached.lastUpdate) > metacacheMaxRunningAge {
debugPrint("cache %s not running, time: %v", cached.id, time.Since(cached.lastUpdate))
// Abandoned
continue
}
if !cached.ended.IsZero() && cached.endedCycle <= o.OldestCycle {
debugPrint("cache %s ended and cycle (%v) <= oldest allowed (%v)", cached.id, cached.endedCycle, o.OldestCycle)
// If scan has ended the oldest requested must be less.
continue
}
if cached.started.Before(best.started) {
debugPrint("cache %s disregarded - we have a better", cached.id)
// If we already have a newer, keep that.
continue
}
best = cached
}
if !best.started.IsZero() {
if o.Create {
best.lastHandout = UTCNow()
b.caches[best.id] = best
b.updated = true
}
debugPrint("returning cached")
return best
}
if !o.Create {
return metacache{
id: o.ID,
bucket: o.Bucket,
status: scanStateNone,
}
}
// Create new and add.
best = o.newMetacache()
b.caches[o.ID] = best
b.updated = true
return best
}
// cleanup removes redundant and outdated entries.
func (b *bucketMetacache) cleanup() {
// Entries to remove.
remove := make(map[string]struct{})
currentCycle := intDataUpdateTracker.current()
debugPrint := func(msg string, data ...interface{}) {}
if false {
debugPrint = logger.Info
}
b.mu.RLock()
for id, cache := range b.caches {
if b.transient && time.Since(cache.started) > time.Hour {
// Keep transient caches only for 1 hour.
remove[id] = struct{}{}
}
if !cache.worthKeeping(currentCycle) {
debugPrint("cache %s not worth keeping", id)
remove[id] = struct{}{}
}
if cache.id != id {
logger.Info("cache ID mismatch %s != %s", id, cache.id)
remove[id] = struct{}{}
}
if cache.bucket != b.bucket && !b.transient {
logger.Info("cache bucket mismatch %s != %s", b.bucket, cache.bucket)
remove[id] = struct{}{}
}
}
// Check all non-deleted against eachother.
// O(n*n), but should still be rather quick.
for id, cache := range b.caches {
if _, ok := remove[id]; ok {
continue
}
for _, cache2 := range b.caches {
if cache.canBeReplacedBy(&cache2) {
debugPrint("cache %s can be replaced by %s", id, cache2.id)
remove[id] = struct{}{}
break
} else {
debugPrint("cache %s can be NOT replaced by %s", id, cache2.id)
}
}
}
b.mu.RUnlock()
for id := range remove {
b.deleteCache(id)
}
}
// updateCache will update a cache by id.
// If the cache cannot be found nil is returned.
// The bucket cache will be locked until the done .
func (b *bucketMetacache) updateCache(id string) (cache *metacache, done func()) {
b.mu.Lock()
c, ok := b.caches[id]
if !ok {
b.mu.Unlock()
return nil, func() {}
}
return &c, func() {
c.lastUpdate = UTCNow()
b.caches[id] = c
b.mu.Unlock()
}
}
// updateCacheEntry will update a cache.
// Returns the updated status.
func (b *bucketMetacache) updateCacheEntry(update metacache) (metacache, error) {
b.mu.Lock()
defer b.mu.Unlock()
existing, ok := b.caches[update.id]
if !ok {
logger.Info("updateCacheEntry: bucket %s list id %v not found", b.bucket, update.id)
return update, errFileNotFound
}
existing.lastUpdate = UTCNow()
if existing.status == scanStateStarted && update.status != scanStateStarted {
existing.status = update.status
}
if existing.status == scanStateSuccess && update.status == scanStateSuccess {
existing.ended = UTCNow()
existing.endedCycle = update.endedCycle
}
if existing.error == "" && update.error != "" {
existing.error = update.error
existing.status = scanStateError
}
existing.fileNotFound = existing.fileNotFound || update.fileNotFound
b.caches[update.id] = existing
b.updated = true
return existing, nil
}
// getCache will return a clone of a specific metacache.
// Will return nil if the cache doesn't exist.
func (b *bucketMetacache) getCache(id string) *metacache {
b.mu.RLock()
c, ok := b.caches[id]
b.mu.RUnlock()
if !ok {
return nil
}
return &c
}
// deleteAll will delete all on disk data for ALL caches.
// Deletes are performed concurrently.
func (b *bucketMetacache) deleteAll() {
b.mu.Lock()
defer b.mu.Unlock()
ctx := context.Background()
ez, ok := newObjectLayerFn().(*erasureServerSets)
if !ok {
logger.LogIf(ctx, errors.New("bucketMetacache: expected objAPI to be *erasureZones"))
return
}
var wg sync.WaitGroup
for id := range b.caches {
wg.Add(1)
go func(cache metacache) {
defer wg.Done()
logger.LogIf(ctx, ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id)))
}(b.caches[id])
delete(b.caches, id)
}
wg.Wait()
}
// deleteCache will delete a specific cache and all files related to it across the cluster.
func (b *bucketMetacache) deleteCache(id string) {
b.mu.Lock()
c, ok := b.caches[id]
if ok {
delete(b.caches, id)
b.updated = true
}
b.mu.Unlock()
if ok {
ctx := context.Background()
objAPI := newObjectLayerFn()
if objAPI == nil {
logger.LogIf(ctx, errors.New("bucketMetacache: no object layer"))
return
}
ez, ok := objAPI.(*erasureServerSets)
if !ok {
logger.LogIf(ctx, errors.New("bucketMetacache: expected objAPI to be *erasureServerSets"))
return
}
logger.LogIf(ctx, ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(c.bucket, c.id)))
}
}

209
cmd/metacache-bucket_gen.go Normal file
View File

@ -0,0 +1,209 @@
package cmd
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"github.com/tinylib/msgp/msgp"
)
// DecodeMsg implements msgp.Decodable
func (z *bucketMetacache) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, err = dc.ReadMapKeyPtr()
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "bucket":
z.bucket, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "bucket")
return
}
case "caches":
var zb0002 uint32
zb0002, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err, "caches")
return
}
if z.caches == nil {
z.caches = make(map[string]metacache, zb0002)
} else if len(z.caches) > 0 {
for key := range z.caches {
delete(z.caches, key)
}
}
for zb0002 > 0 {
zb0002--
var za0001 string
var za0002 metacache
za0001, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "caches")
return
}
err = za0002.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "caches", za0001)
return
}
z.caches[za0001] = za0002
}
default:
err = dc.Skip()
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
return
}
// EncodeMsg implements msgp.Encodable
func (z *bucketMetacache) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 2
// write "bucket"
err = en.Append(0x82, 0xa6, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74)
if err != nil {
return
}
err = en.WriteString(z.bucket)
if err != nil {
err = msgp.WrapError(err, "bucket")
return
}
// write "caches"
err = en.Append(0xa6, 0x63, 0x61, 0x63, 0x68, 0x65, 0x73)
if err != nil {
return
}
err = en.WriteMapHeader(uint32(len(z.caches)))
if err != nil {
err = msgp.WrapError(err, "caches")
return
}
for za0001, za0002 := range z.caches {
err = en.WriteString(za0001)
if err != nil {
err = msgp.WrapError(err, "caches")
return
}
err = za0002.EncodeMsg(en)
if err != nil {
err = msgp.WrapError(err, "caches", za0001)
return
}
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *bucketMetacache) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 2
// string "bucket"
o = append(o, 0x82, 0xa6, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74)
o = msgp.AppendString(o, z.bucket)
// string "caches"
o = append(o, 0xa6, 0x63, 0x61, 0x63, 0x68, 0x65, 0x73)
o = msgp.AppendMapHeader(o, uint32(len(z.caches)))
for za0001, za0002 := range z.caches {
o = msgp.AppendString(o, za0001)
o, err = za0002.MarshalMsg(o)
if err != nil {
err = msgp.WrapError(err, "caches", za0001)
return
}
}
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *bucketMetacache) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, bts, err = msgp.ReadMapKeyZC(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "bucket":
z.bucket, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "bucket")
return
}
case "caches":
var zb0002 uint32
zb0002, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err, "caches")
return
}
if z.caches == nil {
z.caches = make(map[string]metacache, zb0002)
} else if len(z.caches) > 0 {
for key := range z.caches {
delete(z.caches, key)
}
}
for zb0002 > 0 {
var za0001 string
var za0002 metacache
zb0002--
za0001, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "caches")
return
}
bts, err = za0002.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "caches", za0001)
return
}
z.caches[za0001] = za0002
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *bucketMetacache) Msgsize() (s int) {
s = 1 + 7 + msgp.StringPrefixSize + len(z.bucket) + 7 + msgp.MapHeaderSize
if z.caches != nil {
for za0001, za0002 := range z.caches {
_ = za0002
s += msgp.StringPrefixSize + len(za0001) + za0002.Msgsize()
}
}
return
}

View File

@ -0,0 +1,123 @@
package cmd
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"bytes"
"testing"
"github.com/tinylib/msgp/msgp"
)
func TestMarshalUnmarshalbucketMetacache(t *testing.T) {
v := bucketMetacache{}
bts, err := v.MarshalMsg(nil)
if err != nil {
t.Fatal(err)
}
left, err := v.UnmarshalMsg(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left)
}
left, err = msgp.Skip(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after Skip(): %q", len(left), left)
}
}
func BenchmarkMarshalMsgbucketMetacache(b *testing.B) {
v := bucketMetacache{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.MarshalMsg(nil)
}
}
func BenchmarkAppendMsgbucketMetacache(b *testing.B) {
v := bucketMetacache{}
bts := make([]byte, 0, v.Msgsize())
bts, _ = v.MarshalMsg(bts[0:0])
b.SetBytes(int64(len(bts)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bts, _ = v.MarshalMsg(bts[0:0])
}
}
func BenchmarkUnmarshalbucketMetacache(b *testing.B) {
v := bucketMetacache{}
bts, _ := v.MarshalMsg(nil)
b.ReportAllocs()
b.SetBytes(int64(len(bts)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := v.UnmarshalMsg(bts)
if err != nil {
b.Fatal(err)
}
}
}
func TestEncodeDecodebucketMetacache(t *testing.T) {
v := bucketMetacache{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
m := v.Msgsize()
if buf.Len() > m {
t.Log("WARNING: TestEncodeDecodebucketMetacache Msgsize() is inaccurate")
}
vn := bucketMetacache{}
err := msgp.Decode(&buf, &vn)
if err != nil {
t.Error(err)
}
buf.Reset()
msgp.Encode(&buf, &v)
err = msgp.NewReader(&buf).Skip()
if err != nil {
t.Error(err)
}
}
func BenchmarkEncodebucketMetacache(b *testing.B) {
v := bucketMetacache{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
en := msgp.NewWriter(msgp.Nowhere)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.EncodeMsg(en)
}
en.Flush()
}
func BenchmarkDecodebucketMetacache(b *testing.B) {
v := bucketMetacache{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
rd := msgp.NewEndlessReader(buf.Bytes(), b)
dc := msgp.NewReader(rd)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := v.DecodeMsg(dc)
if err != nil {
b.Fatal(err)
}
}
}

559
cmd/metacache-entries.go Normal file
View File

@ -0,0 +1,559 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"io"
"os"
"sort"
"strings"
)
// metaCacheEntry is an object or a directory within an unknown bucket.
type metaCacheEntry struct {
// name is the full name of the object including prefixes
name string
// Metadata. If none is present it is not an object but only a prefix.
// Entries without metadata will only be present in non-recursive scans.
metadata []byte
// cached contains the metadata if decoded.
cached *FileInfo
}
// isDir returns if the entry is representing a prefix directory.
func (e metaCacheEntry) isDir() bool {
return len(e.metadata) == 0
}
// isObject returns if the entry is representing an object.
func (e metaCacheEntry) isObject() bool {
return len(e.metadata) > 0
}
// hasPrefix returns whether an entry has a specific prefix
func (e metaCacheEntry) hasPrefix(s string) bool {
return strings.HasPrefix(e.name, s)
}
// likelyMatches returns if the entries match by comparing name and metadata length.
func (e *metaCacheEntry) likelyMatches(other *metaCacheEntry) bool {
// This should reject 99%
if len(e.metadata) != len(other.metadata) || e.name != other.name {
return false
}
return true
}
// matches returns if the entries match by comparing their latest version fileinfo.
func (e *metaCacheEntry) matches(other *metaCacheEntry, bucket string) bool {
if e == nil && other == nil {
return true
}
if e == nil || other == nil {
return false
}
// This should reject 99%
if len(e.metadata) != len(other.metadata) || e.name != other.name {
return false
}
eFi, eErr := e.fileInfo(bucket)
oFi, oErr := e.fileInfo(bucket)
if eErr != nil || oErr != nil {
return eErr == oErr
}
return eFi.ModTime.Equal(oFi.ModTime) && eFi.Size == oFi.Size && eFi.VersionID == oFi.VersionID
}
// isInDir returns whether the entry is in the dir when considering the separator.
func (e metaCacheEntry) isInDir(dir, separator string) bool {
if len(dir) == 0 {
// Root
idx := strings.Index(e.name, separator)
return idx == -1 || idx == len(e.name)-len(separator)
}
ext := strings.TrimPrefix(e.name, dir)
if len(ext) != len(e.name) {
idx := strings.Index(ext, separator)
// If separator is not found or is last entry, ok.
return idx == -1 || idx == len(ext)-len(separator)
}
return false
}
// isLatestDeletemarker returns whether the latest version is a delete marker.
// If metadata is NOT versioned false will always be returned.
// If v2 and UNABLE to load metadata true will be returned.
func (e *metaCacheEntry) isLatestDeletemarker() bool {
if e.cached != nil {
return e.cached.Deleted
}
if !isXL2V1Format(e.metadata) {
return false
}
var xlMeta xlMetaV2
if err := xlMeta.Load(e.metadata); err != nil || len(xlMeta.Versions) == 0 {
return true
}
return xlMeta.Versions[len(xlMeta.Versions)-1].Type == DeleteType
}
// fileInfo returns the decoded metadata.
// If entry is a directory it is returned as that.
// If versioned the latest version will be returned.
func (e *metaCacheEntry) fileInfo(bucket string) (*FileInfo, error) {
if e.isDir() {
return &FileInfo{
Volume: bucket,
Name: e.name,
Mode: os.ModeDir,
}, nil
}
if e.cached == nil {
fi, err := getFileInfo(e.metadata, bucket, e.name, "")
if err != nil {
return nil, err
}
e.cached = &fi
}
return e.cached, nil
}
// fileInfoVersions returns the metadata as FileInfoVersions.
// If entry is a directory it is returned as that.
func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, error) {
if e.isDir() {
return FileInfoVersions{
Volume: bucket,
Name: e.name,
Versions: []FileInfo{
{
Volume: bucket,
Name: e.name,
Mode: os.ModeDir,
},
},
}, nil
}
return getFileInfoVersions(e.metadata, bucket, e.name)
}
// metaCacheEntries is a slice of metacache entries.
type metaCacheEntries []metaCacheEntry
// less function for sorting.
func (m metaCacheEntries) less(i, j int) bool {
return m[i].name < m[j].name
}
// sort entries by name.
// m is sorted and a sorted metadata object is returned.
// Changes to m will also be reflected in the returned object.
func (m metaCacheEntries) sort() metaCacheEntriesSorted {
if m.isSorted() {
return metaCacheEntriesSorted{o: m}
}
sort.Slice(m, m.less)
return metaCacheEntriesSorted{o: m}
}
// isSorted returns whether the objects are sorted.
// This is usually orders of magnitude faster than actually sorting.
func (m metaCacheEntries) isSorted() bool {
return sort.SliceIsSorted(m, m.less)
}
// shallowClone will create a shallow clone of the array objects,
// but object metadata will not be cloned.
func (m metaCacheEntries) shallowClone() metaCacheEntries {
dst := make(metaCacheEntries, len(m))
copy(dst, m)
return dst
}
type metadataResolutionParams struct {
dirQuorum int // Number if disks needed for a directory to 'exist'.
objQuorum int // Number of disks needed for an object to 'exist'.
bucket string // Name of the bucket. Used for generating cached fileinfo.
}
func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCacheEntry, ok bool) {
if len(m) == 0 {
return nil, false
}
dirExists := 0
objExists := 0
var selFIV *FileInfo
for i := range m {
entry := &m[i]
if entry.name == "" {
continue
}
if entry.isDir() {
dirExists++
selected = entry
continue
}
// Get new entry metadata
objExists++
fiv, err := entry.fileInfo(r.bucket)
if err != nil {
continue
}
if selFIV == nil {
selected = entry
selFIV = fiv
continue
}
if selected.matches(entry, r.bucket) {
continue
}
// Select latest modtime.
if fiv.ModTime.After(selFIV.ModTime) {
selected = entry
selFIV = fiv
continue
}
}
// If directory, we need quorum.
if dirExists > 0 && dirExists < r.dirQuorum {
return nil, false
}
if objExists < r.objQuorum {
return nil, false
}
// Take the latest selected.
return selected, selected != nil
}
// names will return all names in order.
// Since this allocates it should not be used in critical functions.
func (m metaCacheEntries) names() []string {
res := make([]string, 0, len(m))
for _, obj := range m {
res = append(res, obj.name)
}
return res
}
// metaCacheEntriesSorted contains metacache entries that are sorted.
type metaCacheEntriesSorted struct {
o metaCacheEntries
// list id is not serialized
listID string
}
// writeTo will write all objects to the provided output.
func (m metaCacheEntriesSorted) writeTo(writer io.Writer) error {
w := newMetacacheWriter(writer, 1<<20)
if err := w.write(m.o...); err != nil {
w.Close()
return err
}
return w.Close()
}
// shallowClone will create a shallow clone of the array objects,
// but object metadata will not be cloned.
func (m metaCacheEntriesSorted) shallowClone() metaCacheEntriesSorted {
// We have value receiver so we already have a copy.
m.o = m.o.shallowClone()
return m
}
// iterate the entries in order.
// If the iterator function returns iterating stops.
func (m *metaCacheEntriesSorted) iterate(fn func(entry metaCacheEntry) (cont bool)) {
if m == nil {
return
}
for _, o := range m.o {
if !fn(o) {
return
}
}
}
// fileInfoVersions converts the metadata to FileInfoVersions where possible.
// Metadata that cannot be decoded is skipped.
func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, afterV string) (versions []ObjectInfo, commonPrefixes []string) {
versions = make([]ObjectInfo, 0, m.len())
prevPrefix := ""
for _, entry := range m.o {
if entry.isObject() {
fiv, err := entry.fileInfoVersions(bucket)
if afterV != "" {
// Forward first entry to specified version
fiv.forwardPastVersion(afterV)
afterV = ""
}
if err == nil {
for _, version := range fiv.Versions {
versions = append(versions, version.ToObjectInfo(bucket, entry.name))
}
}
continue
}
if entry.isDir() {
if delimiter == "" {
continue
}
idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter)
if idx < 0 {
continue
}
idx = len(prefix) + idx + len(delimiter)
currPrefix := entry.name[:idx]
if currPrefix == prevPrefix {
continue
}
prevPrefix = currPrefix
commonPrefixes = append(commonPrefixes, currPrefix)
continue
}
}
return versions, commonPrefixes
}
// fileInfoVersions converts the metadata to FileInfoVersions where possible.
// Metadata that cannot be decoded is skipped.
func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (objects []ObjectInfo, commonPrefixes []string) {
objects = make([]ObjectInfo, 0, m.len())
prevPrefix := ""
for _, entry := range m.o {
if entry.isObject() {
fi, err := entry.fileInfo(bucket)
if err == nil {
objects = append(objects, fi.ToObjectInfo(bucket, entry.name))
}
continue
}
if entry.isDir() {
if delimiter == "" {
continue
}
idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter)
if idx < 0 {
continue
}
idx = len(prefix) + idx + len(delimiter)
currPrefix := entry.name[:idx]
if currPrefix == prevPrefix {
continue
}
prevPrefix = currPrefix
commonPrefixes = append(commonPrefixes, currPrefix)
continue
}
}
return objects, commonPrefixes
}
// forwardTo will truncate m so only entries that are s or after is in the list.
func (m *metaCacheEntriesSorted) forwardTo(s string) {
if s == "" {
return
}
idx := sort.Search(len(m.o), func(i int) bool {
return m.o[i].name >= s
})
m.o = m.o[idx:]
}
// merge will merge other into m.
// If the same entries exists in both and metadata matches only one is added,
// otherwise the entry from m will be placed first.
// Operation time is expected to be O(n+m).
func (m *metaCacheEntriesSorted) merge(other metaCacheEntriesSorted, limit int) {
merged := make(metaCacheEntries, 0, m.len()+other.len())
a := m.entries()
b := other.entries()
for len(a) > 0 && len(b) > 0 {
if a[0].name == b[0].name && bytes.Equal(a[0].metadata, b[0].metadata) {
// Same, discard one.
merged = append(merged, a[0])
a = a[1:]
b = b[1:]
} else if a[0].name < b[0].name {
merged = append(merged, a[0])
a = a[1:]
} else {
merged = append(merged, b[0])
b = b[1:]
}
if limit > 0 && len(merged) >= limit {
break
}
}
// Append anything left.
if limit < 0 || len(merged) < limit {
merged = append(merged, a...)
merged = append(merged, b...)
}
m.o = merged
}
// filter allows selective filtering with the provided function.
func (m *metaCacheEntriesSorted) filter(fn func(entry *metaCacheEntry) bool) {
dst := m.o[:0]
for _, o := range m.o {
if fn(&o) {
dst = append(dst, o)
}
}
m.o = dst
}
// filterPrefix will filter m to only contain entries with the specified prefix.
func (m *metaCacheEntriesSorted) filterPrefix(s string) {
if s == "" {
return
}
m.forwardTo(s)
for i, o := range m.o {
if !o.hasPrefix(s) {
m.o = m.o[:i]
break
}
}
}
// filterObjectsOnly will remove prefix directories.
// Order is preserved, but the underlying slice is modified.
func (m *metaCacheEntriesSorted) filterObjectsOnly() {
dst := m.o[:0]
for _, o := range m.o {
if !o.isDir() {
dst = append(dst, o)
}
}
m.o = dst
}
// filterPrefixesOnly will remove objects.
// Order is preserved, but the underlying slice is modified.
func (m *metaCacheEntriesSorted) filterPrefixesOnly() {
dst := m.o[:0]
for _, o := range m.o {
if o.isDir() {
dst = append(dst, o)
}
}
m.o = dst
}
// filterRecursiveEntries will keep entries only with the prefix that doesn't contain separator.
// This can be used to remove recursive listings.
// To return root elements only set prefix to an empty string.
// Order is preserved, but the underlying slice is modified.
func (m *metaCacheEntriesSorted) filterRecursiveEntries(prefix, separator string) {
dst := m.o[:0]
if prefix != "" {
m.forwardTo(prefix)
for _, o := range m.o {
ext := strings.TrimPrefix(o.name, prefix)
if len(ext) != len(o.name) {
if !strings.Contains(ext, separator) {
dst = append(dst, o)
}
}
}
} else {
// No prefix, simpler
for _, o := range m.o {
if !strings.Contains(o.name, separator) {
dst = append(dst, o)
}
}
}
m.o = dst
}
// truncate the number of entries to maximum n.
func (m *metaCacheEntriesSorted) truncate(n int) {
if m == nil {
return
}
if len(m.o) > n {
m.o = m.o[:n]
}
}
// len returns the number of objects and prefix dirs in m.
func (m *metaCacheEntriesSorted) len() int {
if m == nil {
return 0
}
return len(m.o)
}
// entries returns the underlying objects as is currently represented.
func (m *metaCacheEntriesSorted) entries() metaCacheEntries {
if m == nil {
return nil
}
return m.o
}
// deduplicate entries in the list.
// If compareMeta is set it will be used to resolve conflicts.
// The function should return whether the existing entry should be replaced with other.
// If no compareMeta is provided duplicates may be left.
// This is indicated by the returned boolean.
func (m *metaCacheEntriesSorted) deduplicate(compareMeta func(existing, other *metaCacheEntry) (replace bool)) (dupesLeft bool) {
dst := m.o[:0]
for j := range m.o {
found := false
obj := &m.o[j]
for i := len(dst) - 1; i >= 0; i++ {
existing := &dst[i]
if existing.name != obj.name {
break
}
// Use given resolution function first if any.
if compareMeta != nil {
if compareMeta(existing, obj) {
dst[i] = *obj
}
found = true
break
}
if obj.likelyMatches(existing) {
found = true
break
}
// Matches, move on.
dupesLeft = true
continue
}
if !found {
dst = append(dst, *obj)
}
}
m.o = dst
return dupesLeft
}

View File

@ -0,0 +1,265 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"reflect"
"sort"
"testing"
)
func Test_metaCacheEntries_sort(t *testing.T) {
entries := loadMetacacheSampleEntries(t)
o := entries.entries()
if !o.isSorted() {
t.Fatal("Expected sorted objects")
}
// Swap first and last
o[0], o[len(o)-1] = o[len(o)-1], o[0]
if o.isSorted() {
t.Fatal("Expected unsorted objects")
}
sorted := o.sort()
if !o.isSorted() {
t.Fatal("Expected sorted o objects")
}
if !sorted.entries().isSorted() {
t.Fatal("Expected sorted wrapped objects")
}
want := loadMetacacheSampleNames
for i, got := range o {
if got.name != want[i] {
t.Errorf("entry %d, want %q, got %q", i, want[i], got.name)
}
}
}
func Test_metaCacheEntries_forwardTo(t *testing.T) {
org := loadMetacacheSampleEntries(t)
entries := org
want := []string{"src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"}
entries.forwardTo("src/compress/zlib/reader_test.go")
got := entries.entries().names()
if !reflect.DeepEqual(got, want) {
t.Errorf("got unexpected result: %#v", got)
}
// Try with prefix
entries = org
entries.forwardTo("src/compress/zlib/reader_t")
got = entries.entries().names()
if !reflect.DeepEqual(got, want) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_merge(t *testing.T) {
org := loadMetacacheSampleEntries(t)
a, b := org.shallowClone(), org.shallowClone()
be := b.entries()
for i := range be {
// Modify b so it isn't deduplicated.
be[i].metadata = []byte("something-else")
}
// Merge b into a
a.merge(b, -1)
want := append(loadMetacacheSampleNames, loadMetacacheSampleNames...)
sort.Strings(want)
got := a.entries().names()
if len(got) != len(want) {
t.Errorf("unexpected count, want %v, got %v", len(want), len(got))
}
for i, name := range got {
if want[i] != name {
t.Errorf("unexpected name, want %q, got %q", want[i], name)
}
}
}
func Test_metaCacheEntries_dedupe(t *testing.T) {
org := loadMetacacheSampleEntries(t)
a, b := org.shallowClone(), org.shallowClone()
// Merge b into a
a.merge(b, -1)
if a.deduplicate(nil) {
t.Fatal("deduplicate returned duplicate entries left")
}
want := loadMetacacheSampleNames
got := a.entries().names()
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_dedupe2(t *testing.T) {
org := loadMetacacheSampleEntries(t)
a, b := org.shallowClone(), org.shallowClone()
// Replace metadata in b
testMarker := []byte("sampleset")
for i := range b.o {
b.o[i].metadata = testMarker
}
// Merge b into a
a.merge(b, -1)
if a.deduplicate(func(existing, other *metaCacheEntry) (replace bool) {
a := bytes.Equal(existing.metadata, testMarker)
b := bytes.Equal(other.metadata, testMarker)
if a == b {
t.Fatal("got same number of testmarkers, only one should be given", a, b)
}
return b
}) {
t.Fatal("deduplicate returned duplicate entries left, we should always resolve")
}
want := loadMetacacheSampleNames
got := a.entries().names()
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_filterObjects(t *testing.T) {
data := loadMetacacheSampleEntries(t)
data.filterObjectsOnly()
got := data.entries().names()
want := []string{"src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/Isaac.Newton-Opticks.txt.bz2", "src/compress/bzip2/testdata/e.txt.bz2", "src/compress/bzip2/testdata/fail-issue5747.bz2", "src/compress/bzip2/testdata/pass-random1.bin", "src/compress/bzip2/testdata/pass-random1.bz2", "src/compress/bzip2/testdata/pass-random2.bin", "src/compress/bzip2/testdata/pass-random2.bz2", "src/compress/bzip2/testdata/pass-sawtooth.bz2", "src/compress/bzip2/testdata/random.data.bz2", "src/compress/flate/deflate.go", "src/compress/flate/deflate_test.go", "src/compress/flate/deflatefast.go", "src/compress/flate/dict_decoder.go", "src/compress/flate/dict_decoder_test.go", "src/compress/flate/example_test.go", "src/compress/flate/flate_test.go", "src/compress/flate/huffman_bit_writer.go", "src/compress/flate/huffman_bit_writer_test.go", "src/compress/flate/huffman_code.go", "src/compress/flate/inflate.go", "src/compress/flate/inflate_test.go", "src/compress/flate/reader_test.go", "src/compress/flate/testdata/huffman-null-max.dyn.expect", "src/compress/flate/testdata/huffman-null-max.dyn.expect-noinput", "src/compress/flate/testdata/huffman-null-max.golden", "src/compress/flate/testdata/huffman-null-max.in", "src/compress/flate/testdata/huffman-null-max.wb.expect", "src/compress/flate/testdata/huffman-null-max.wb.expect-noinput", "src/compress/flate/testdata/huffman-pi.dyn.expect", "src/compress/flate/testdata/huffman-pi.dyn.expect-noinput", "src/compress/flate/testdata/huffman-pi.golden", "src/compress/flate/testdata/huffman-pi.in", "src/compress/flate/testdata/huffman-pi.wb.expect", "src/compress/flate/testdata/huffman-pi.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.golden", "src/compress/flate/testdata/huffman-rand-1k.in", "src/compress/flate/testdata/huffman-rand-1k.wb.expect", "src/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.golden", "src/compress/flate/testdata/huffman-rand-limit.in", "src/compress/flate/testdata/huffman-rand-limit.wb.expect", "src/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-max.golden", "src/compress/flate/testdata/huffman-rand-max.in", "src/compress/flate/testdata/huffman-shifts.dyn.expect", "src/compress/flate/testdata/huffman-shifts.dyn.expect-noinput", "src/compress/flate/testdata/huffman-shifts.golden", "src/compress/flate/testdata/huffman-shifts.in", "src/compress/flate/testdata/huffman-shifts.wb.expect", "src/compress/flate/testdata/huffman-shifts.wb.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.dyn.expect", "src/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.golden", "src/compress/flate/testdata/huffman-text-shift.in", "src/compress/flate/testdata/huffman-text-shift.wb.expect", "src/compress/flate/testdata/huffman-text-shift.wb.expect-noinput", "src/compress/flate/testdata/huffman-text.dyn.expect", "src/compress/flate/testdata/huffman-text.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text.golden", "src/compress/flate/testdata/huffman-text.in", "src/compress/flate/testdata/huffman-text.wb.expect", "src/compress/flate/testdata/huffman-text.wb.expect-noinput", "src/compress/flate/testdata/huffman-zero.dyn.expect", "src/compress/flate/testdata/huffman-zero.dyn.expect-noinput", "src/compress/flate/testdata/huffman-zero.golden", "src/compress/flate/testdata/huffman-zero.in", "src/compress/flate/testdata/huffman-zero.wb.expect", "src/compress/flate/testdata/huffman-zero.wb.expect-noinput", "src/compress/flate/testdata/null-long-match.dyn.expect-noinput", "src/compress/flate/testdata/null-long-match.wb.expect-noinput", "src/compress/flate/token.go", "src/compress/flate/writer_test.go", "src/compress/gzip/example_test.go", "src/compress/gzip/gunzip.go", "src/compress/gzip/gunzip_test.go", "src/compress/gzip/gzip.go", "src/compress/gzip/gzip_test.go", "src/compress/gzip/issue14937_test.go", "src/compress/gzip/testdata/issue6550.gz.base64", "src/compress/lzw/reader.go", "src/compress/lzw/reader_test.go", "src/compress/lzw/writer.go", "src/compress/lzw/writer_test.go", "src/compress/testdata/e.txt", "src/compress/testdata/gettysburg.txt", "src/compress/testdata/pi.txt", "src/compress/zlib/example_test.go", "src/compress/zlib/reader.go", "src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"}
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_filterPrefixes(t *testing.T) {
data := loadMetacacheSampleEntries(t)
data.filterPrefixesOnly()
got := data.entries().names()
want := []string{"src/compress/bzip2/", "src/compress/bzip2/testdata/", "src/compress/flate/", "src/compress/flate/testdata/", "src/compress/gzip/", "src/compress/gzip/testdata/", "src/compress/lzw/", "src/compress/testdata/", "src/compress/zlib/"}
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_filterRecursive(t *testing.T) {
data := loadMetacacheSampleEntries(t)
data.filterRecursiveEntries("src/compress/bzip2/", slashSeparator)
got := data.entries().names()
want := []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go"}
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_filterRecursiveRoot(t *testing.T) {
data := loadMetacacheSampleEntries(t)
data.filterRecursiveEntries("", slashSeparator)
got := data.entries().names()
want := []string{}
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_filterRecursiveRootSep(t *testing.T) {
data := loadMetacacheSampleEntries(t)
// This will remove anything with "bzip2/" in the path since it is separator
data.filterRecursiveEntries("", "bzip2/")
got := data.entries().names()
want := []string{"src/compress/flate/", "src/compress/flate/deflate.go", "src/compress/flate/deflate_test.go", "src/compress/flate/deflatefast.go", "src/compress/flate/dict_decoder.go", "src/compress/flate/dict_decoder_test.go", "src/compress/flate/example_test.go", "src/compress/flate/flate_test.go", "src/compress/flate/huffman_bit_writer.go", "src/compress/flate/huffman_bit_writer_test.go", "src/compress/flate/huffman_code.go", "src/compress/flate/inflate.go", "src/compress/flate/inflate_test.go", "src/compress/flate/reader_test.go", "src/compress/flate/testdata/", "src/compress/flate/testdata/huffman-null-max.dyn.expect", "src/compress/flate/testdata/huffman-null-max.dyn.expect-noinput", "src/compress/flate/testdata/huffman-null-max.golden", "src/compress/flate/testdata/huffman-null-max.in", "src/compress/flate/testdata/huffman-null-max.wb.expect", "src/compress/flate/testdata/huffman-null-max.wb.expect-noinput", "src/compress/flate/testdata/huffman-pi.dyn.expect", "src/compress/flate/testdata/huffman-pi.dyn.expect-noinput", "src/compress/flate/testdata/huffman-pi.golden", "src/compress/flate/testdata/huffman-pi.in", "src/compress/flate/testdata/huffman-pi.wb.expect", "src/compress/flate/testdata/huffman-pi.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.golden", "src/compress/flate/testdata/huffman-rand-1k.in", "src/compress/flate/testdata/huffman-rand-1k.wb.expect", "src/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.golden", "src/compress/flate/testdata/huffman-rand-limit.in", "src/compress/flate/testdata/huffman-rand-limit.wb.expect", "src/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-max.golden", "src/compress/flate/testdata/huffman-rand-max.in", "src/compress/flate/testdata/huffman-shifts.dyn.expect", "src/compress/flate/testdata/huffman-shifts.dyn.expect-noinput", "src/compress/flate/testdata/huffman-shifts.golden", "src/compress/flate/testdata/huffman-shifts.in", "src/compress/flate/testdata/huffman-shifts.wb.expect", "src/compress/flate/testdata/huffman-shifts.wb.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.dyn.expect", "src/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.golden", "src/compress/flate/testdata/huffman-text-shift.in", "src/compress/flate/testdata/huffman-text-shift.wb.expect", "src/compress/flate/testdata/huffman-text-shift.wb.expect-noinput", "src/compress/flate/testdata/huffman-text.dyn.expect", "src/compress/flate/testdata/huffman-text.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text.golden", "src/compress/flate/testdata/huffman-text.in", "src/compress/flate/testdata/huffman-text.wb.expect", "src/compress/flate/testdata/huffman-text.wb.expect-noinput", "src/compress/flate/testdata/huffman-zero.dyn.expect", "src/compress/flate/testdata/huffman-zero.dyn.expect-noinput", "src/compress/flate/testdata/huffman-zero.golden", "src/compress/flate/testdata/huffman-zero.in", "src/compress/flate/testdata/huffman-zero.wb.expect", "src/compress/flate/testdata/huffman-zero.wb.expect-noinput", "src/compress/flate/testdata/null-long-match.dyn.expect-noinput", "src/compress/flate/testdata/null-long-match.wb.expect-noinput", "src/compress/flate/token.go", "src/compress/flate/writer_test.go", "src/compress/gzip/", "src/compress/gzip/example_test.go", "src/compress/gzip/gunzip.go", "src/compress/gzip/gunzip_test.go", "src/compress/gzip/gzip.go", "src/compress/gzip/gzip_test.go", "src/compress/gzip/issue14937_test.go", "src/compress/gzip/testdata/", "src/compress/gzip/testdata/issue6550.gz.base64", "src/compress/lzw/", "src/compress/lzw/reader.go", "src/compress/lzw/reader_test.go", "src/compress/lzw/writer.go", "src/compress/lzw/writer_test.go", "src/compress/testdata/", "src/compress/testdata/e.txt", "src/compress/testdata/gettysburg.txt", "src/compress/testdata/pi.txt", "src/compress/zlib/", "src/compress/zlib/example_test.go", "src/compress/zlib/reader.go", "src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"}
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntries_filterPrefix(t *testing.T) {
data := loadMetacacheSampleEntries(t)
data.filterPrefix("src/compress/bzip2/")
got := data.entries().names()
want := []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/", "src/compress/bzip2/testdata/Isaac.Newton-Opticks.txt.bz2", "src/compress/bzip2/testdata/e.txt.bz2", "src/compress/bzip2/testdata/fail-issue5747.bz2", "src/compress/bzip2/testdata/pass-random1.bin", "src/compress/bzip2/testdata/pass-random1.bz2", "src/compress/bzip2/testdata/pass-random2.bin", "src/compress/bzip2/testdata/pass-random2.bz2", "src/compress/bzip2/testdata/pass-sawtooth.bz2", "src/compress/bzip2/testdata/random.data.bz2"}
if !reflect.DeepEqual(want, got) {
t.Errorf("got unexpected result: %#v", got)
}
}
func Test_metaCacheEntry_isInDir(t *testing.T) {
tests := []struct {
testName string
entry string
dir string
sep string
want bool
}{
{
testName: "basic-file",
entry: "src/file",
dir: "src/",
sep: slashSeparator,
want: true,
},
{
testName: "basic-dir",
entry: "src/dir/",
dir: "src/",
sep: slashSeparator,
want: true,
},
{
testName: "deeper-file",
entry: "src/dir/somewhere.ext",
dir: "src/",
sep: slashSeparator,
want: false,
},
{
testName: "deeper-dir",
entry: "src/dir/somewhere/",
dir: "src/",
sep: slashSeparator,
want: false,
},
{
testName: "root-dir",
entry: "doc/",
dir: "",
sep: slashSeparator,
want: true,
},
{
testName: "root-file",
entry: "word.doc",
dir: "",
sep: slashSeparator,
want: true,
},
}
for _, tt := range tests {
t.Run(tt.testName, func(t *testing.T) {
e := metaCacheEntry{
name: tt.entry,
}
if got := e.isInDir(tt.dir, tt.sep); got != tt.want {
t.Errorf("isInDir() = %v, want %v", got, tt.want)
}
})
}
}

181
cmd/metacache-manager.go Normal file
View File

@ -0,0 +1,181 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"errors"
"fmt"
"runtime/debug"
"sync"
"time"
"github.com/minio/minio/cmd/logger"
)
// localMetacacheMgr is the *local* manager for this peer.
// It should never be used directly since buckets are
// distributed deterministically.
// Therefore no cluster locks are required.
var localMetacacheMgr = &metacacheManager{
buckets: make(map[string]*bucketMetacache),
}
type metacacheManager struct {
mu sync.RWMutex
init sync.Once
buckets map[string]*bucketMetacache
}
const metacacheManagerTransientBucket = "**transient**"
// initManager will start async saving the cache.
func (m *metacacheManager) initManager() {
// Add a transient bucket.
tb := newBucketMetacache(metacacheManagerTransientBucket)
tb.transient = true
m.buckets[metacacheManagerTransientBucket] = tb
// Start saver when object layer is ready.
go func() {
objAPI := newObjectLayerFn()
for objAPI == nil {
time.Sleep(time.Second)
objAPI = newObjectLayerFn()
}
if !globalIsErasure {
logger.Info("metacacheManager was initialized in non-erasure mode, skipping save")
return
}
t := time.NewTicker(time.Minute)
var exit bool
bg := context.Background()
for !exit {
select {
case <-t.C:
case <-GlobalContext.Done():
exit = true
}
m.mu.RLock()
for _, v := range m.buckets {
if !exit {
v.cleanup()
}
logger.LogIf(bg, v.save(bg))
}
m.mu.RUnlock()
}
m.getTransient().deleteAll()
}()
}
// getBucket will get a bucket metacache or load it from disk if needed.
func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucketMetacache {
m.init.Do(m.initManager)
// Return a transient bucket for invalid or system buckets.
if isReservedOrInvalidBucket(bucket, false) {
return m.getTransient()
}
m.mu.RLock()
b, ok := m.buckets[bucket]
m.mu.RUnlock()
if ok {
if b.bucket != bucket {
logger.Info("getBucket: cached bucket %s does not match this bucket %s", b.bucket, bucket)
debug.PrintStack()
}
return b
}
m.mu.Lock()
// See if someone else fetched it while we waited for the lock.
b, ok = m.buckets[bucket]
if ok {
m.mu.Unlock()
if b.bucket != bucket {
logger.Info("getBucket: newly cached bucket %s does not match this bucket %s", b.bucket, bucket)
debug.PrintStack()
}
return b
}
// Load bucket. If we fail return the transient bucket.
b, err := loadBucketMetaCache(ctx, bucket)
if err != nil {
m.mu.Unlock()
return m.getTransient()
}
if b.bucket != bucket {
logger.LogIf(ctx, fmt.Errorf("getBucket: loaded bucket %s does not match this bucket %s", b.bucket, bucket))
}
m.buckets[bucket] = b
m.mu.Unlock()
return b
}
// getTransient will return a transient bucket.
func (m *metacacheManager) getTransient() *bucketMetacache {
m.init.Do(m.initManager)
m.mu.RLock()
bmc := m.buckets[metacacheManagerTransientBucket]
m.mu.RUnlock()
return bmc
}
// checkMetacacheState should be used if data is not updating.
// Should only be called if a failure occurred.
func (o listPathOptions) checkMetacacheState(ctx context.Context) error {
// We operate on a copy...
o.Create = false
var cache metacache
if !o.Transient {
rpc := globalNotificationSys.restClientFromHash(o.Bucket)
if rpc == nil {
// Local
cache = localMetacacheMgr.getBucket(ctx, o.Bucket).findCache(o)
} else {
c, err := rpc.GetMetacacheListing(ctx, o)
if err != nil {
return err
}
cache = *c
}
} else {
cache = localMetacacheMgr.getTransient().findCache(o)
}
if cache.status == scanStateNone {
return errFileNotFound
}
if cache.status == scanStateSuccess {
if time.Since(cache.lastUpdate) > 10*time.Second {
return fmt.Errorf("timeout: Finished and data not available after 10 seconds")
}
return nil
}
if cache.error != "" {
return errors.New(cache.error)
}
if cache.status == scanStateStarted {
if time.Since(cache.lastUpdate) > metacacheMaxRunningAge {
return errors.New("cache listing not updating")
}
}
return nil
}

70
cmd/metacache-marker.go Normal file
View File

@ -0,0 +1,70 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"fmt"
"strings"
"github.com/minio/minio/cmd/logger"
)
// markerTagVersion is the marker version.
// Should not need to be updated unless a fundamental change is made to the marker format.
const markerTagVersion = "v1"
// parseMarker will parse a marker possibly encoded with encodeMarker
func parseMarker(s string) (marker, uuid string) {
if !strings.Contains(s, "[minio_cache:"+markerTagVersion) {
return s, ""
}
start := strings.LastIndex(s, "[")
marker = s[:start]
end := strings.LastIndex(s, "]")
tag := strings.Trim(s[start:end], "[]")
tags := strings.Split(tag, ",")
for _, tag := range tags {
kv := strings.Split(tag, ":")
if len(kv) < 2 {
continue
}
switch kv[0] {
case "minio_cache":
if kv[1] != markerTagVersion {
break
}
case "id":
uuid = kv[1]
default:
// Ignore unknown
}
}
return
}
// encodeMarker will encode a uuid and return it as a marker.
// uuid cannot contain '[', ':' or ','.
func encodeMarker(marker, uuid string) string {
if uuid == "" {
return marker
}
if strings.ContainsAny(uuid, "[:,") {
logger.LogIf(context.Background(), fmt.Errorf("encodeMarker: uuid %s contained invalid characters", uuid))
}
return fmt.Sprintf("%s[minio_cache:%s,id:%s]", marker, markerTagVersion, uuid)
}

View File

@ -0,0 +1,213 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"io"
"path"
"sync"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/env"
)
// listPath will return the requested entries.
// If no more entries are in the listing io.EOF is returned,
// otherwise nil or an unexpected error is returned.
// The listPathOptions given will be checked and modified internally.
// Required important fields are Bucket, Prefix, Separator.
// Other important fields are Limit, Marker.
// List ID always derived from the Marker.
func (z *erasureServerSets) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) {
if err := checkListObjsArgs(ctx, o.Bucket, o.Prefix, o.Marker, z); err != nil {
return entries, err
}
// Marker is set validate pre-condition.
if o.Marker != "" && o.Prefix != "" {
// Marker not common with prefix is not implemented. Send an empty response
if !HasPrefix(o.Marker, o.Prefix) {
return entries, io.EOF
}
}
// With max keys of zero we have reached eof, return right here.
if o.Limit == 0 {
return entries, io.EOF
}
// For delimiter and prefix as '/' we do not list anything at all
// since according to s3 spec we stop at the 'delimiter'
// along // with the prefix. On a flat namespace with 'prefix'
// as '/' we don't have any entries, since all the keys are
// of form 'keyName/...'
if o.Separator == SlashSeparator && o.Prefix == SlashSeparator {
return entries, io.EOF
}
// Over flowing count - reset to maxObjectList.
if o.Limit < 0 || o.Limit > maxObjectList {
o.Limit = maxObjectList
}
// If delimiter is slashSeparator we must return directories of
// the non-recursive scan unless explicitly requested.
o.IncludeDirectories = o.Separator == slashSeparator
if (o.Separator == slashSeparator || o.Separator == "") && !o.Recursive {
o.Recursive = o.Separator != slashSeparator
o.Separator = slashSeparator
} else {
// Default is recursive, if delimiter is set then list non recursive.
o.Recursive = true
}
// Decode and get the optional list id from the marker.
o.Marker, o.ID = parseMarker(o.Marker)
o.Create = o.ID == ""
if o.ID == "" {
o.ID = mustGetUUID()
}
o.BaseDir = baseDirFromPrefix(o.Prefix)
var cache metacache
// If we don't have a list id we must ask the server if it has a cache or create a new.
if o.Create {
o.CurrentCycle = intDataUpdateTracker.current()
o.OldestCycle = globalNotificationSys.findEarliestCleanBloomFilter(ctx, path.Join(o.Bucket, o.BaseDir))
var cache metacache
rpc := globalNotificationSys.restClientFromHash(o.Bucket)
if rpc == nil {
// Local
cache = localMetacacheMgr.getBucket(ctx, o.Bucket).findCache(o)
} else {
c, err := rpc.GetMetacacheListing(ctx, o)
if err != nil {
logger.LogIf(ctx, err)
cache = localMetacacheMgr.getTransient().findCache(o)
o.Transient = true
} else {
cache = *c
}
}
if cache.fileNotFound {
return entries, errFileNotFound
}
// Only create if we created a new.
o.Create = o.ID == cache.id
o.ID = cache.id
}
if o.AskDisks == 0 {
switch env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOff) {
case config.EnableOn:
// If strict, ask at least 50%.
o.AskDisks = -1
case "reduced":
// Reduced safety.
o.AskDisks = 2
case "disk":
// Ask single disk.
o.AskDisks = 1
}
}
var mu sync.Mutex
var wg sync.WaitGroup
var errs []error
allAtEOF := true
asked := 0
mu.Lock()
// Ask all sets and merge entries.
for _, zone := range z.serverSets {
for _, set := range zone.sets {
wg.Add(1)
asked++
go func(i int, set *erasureObjects) {
defer wg.Done()
e, err := set.listPath(ctx, o)
mu.Lock()
defer mu.Unlock()
if err == nil {
allAtEOF = false
}
errs[i] = err
entries.merge(e, -1)
// Resolve non-trivial conflicts
entries.deduplicate(func(existing, other *metaCacheEntry) (replace bool) {
if existing.isDir() {
return false
}
eFIV, err := existing.fileInfo(o.Bucket)
if err != nil {
return true
}
oFIV, err := existing.fileInfo(o.Bucket)
if err != nil {
return false
}
return oFIV.ModTime.After(eFIV.ModTime)
})
if entries.len() > o.Limit {
allAtEOF = false
entries.truncate(o.Limit)
}
}(len(errs), set)
errs = append(errs, nil)
}
}
mu.Unlock()
wg.Wait()
if isAllNotFound(errs) {
// All sets returned not found.
// Update master cache with that information.
cache.status = scanStateSuccess
cache.fileNotFound = true
client := globalNotificationSys.restClientFromHash(o.Bucket)
if o.Transient {
cache, err = localMetacacheMgr.getTransient().updateCacheEntry(cache)
} else if client == nil {
cache, err = localMetacacheMgr.getBucket(GlobalContext, o.Bucket).updateCacheEntry(cache)
} else {
cache, err = client.UpdateMetacacheListing(context.Background(), cache)
}
logger.LogIf(ctx, err)
return entries, errFileNotFound
}
for _, err := range errs {
if err == nil {
allAtEOF = false
continue
}
if err == io.EOF {
continue
}
logger.LogIf(ctx, err)
return entries, err
}
truncated := entries.len() > o.Limit || !allAtEOF
entries.truncate(o.Limit)
entries.listID = o.ID
if !truncated {
return entries, io.EOF
}
return entries, nil
}

769
cmd/metacache-set.go Normal file
View File

@ -0,0 +1,769 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"context"
"encoding/gob"
"encoding/json"
"errors"
"fmt"
"io"
"strconv"
"strings"
"sync"
"time"
"github.com/minio/minio/cmd/config/storageclass"
xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/console"
"github.com/minio/minio/pkg/hash"
)
type listPathOptions struct {
// ID of the listing.
// This will be used to persist the list.
ID string
// Bucket of the listing.
Bucket string
// Directory inside the bucket.
BaseDir string
// Scan/return only content with prefix.
Prefix string
// Marker to resume listing.
// The response will be the first entry AFTER this object name.
Marker string
// Limit the number of results.
Limit int
// The number of disks to ask. Special values:
// 0 uses default number of disks.
// -1 use at least 50% of disks or at least the default number.
AskDisks int
// InclDeleted will keep all entries where latest version is a delete marker.
InclDeleted bool
// Scan recursively.
// If false only main directory will be scanned.
// Should always be true if Separator is n SlashSeparator.
Recursive bool
// Separator to use.
Separator string
// Create indicates that the lister should not attempt to load an existing cache.
Create bool
// CurrentCycle indicates the current bloom cycle.
// Will be used if a new scan is started.
CurrentCycle uint64
// OldestCycle indicates the oldest cycle acceptable.
OldestCycle uint64
// Include pure directories.
IncludeDirectories bool
// Transient is set if the cache is transient due to an error or being a reserved bucket.
// This means the cache metadata will not be persisted on disk.
// A transient result will never be returned from the cache so knowing the list id is required.
Transient bool
}
func init() {
gob.Register(listPathOptions{})
}
// gatherResults will collect all results on the input channel and filter results according to the options.
// Caller should close the channel when done.
// The returned function will return the results once there is enough or input is closed.
func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCacheEntriesSorted, error) {
const debugPrint = false
var resultsDone = make(chan metaCacheEntriesSorted)
// Copy so we can mutate
resCh := resultsDone
resErr := io.EOF
go func() {
var results metaCacheEntriesSorted
for entry := range in {
if resCh == nil {
// past limit
continue
}
if !o.IncludeDirectories && entry.isDir() {
continue
}
if debugPrint {
console.Infoln("gather got:", entry.name)
}
if o.Marker != "" && entry.name <= o.Marker {
if debugPrint {
console.Infoln("pre marker")
}
continue
}
if !strings.HasPrefix(entry.name, o.Prefix) {
if debugPrint {
console.Infoln("not in prefix")
}
continue
}
if !o.Recursive && !entry.isInDir(o.Prefix, o.Separator) {
if debugPrint {
console.Infoln("not in dir", o.Prefix, o.Separator)
}
continue
}
if !o.InclDeleted && entry.isObject() {
if entry.isLatestDeletemarker() {
if debugPrint {
console.Infoln("latest delete")
}
continue
}
}
if o.Limit > 0 && results.len() >= o.Limit {
// We have enough and we have more.
// Do not return io.EOF
if resCh != nil {
resErr = nil
resCh <- results
resCh = nil
}
continue
}
if debugPrint {
console.Infoln("adding...")
}
results.o = append(results.o, entry)
}
if resCh != nil {
resErr = io.EOF
resCh <- results
}
}()
return func() (metaCacheEntriesSorted, error) {
return <-resultsDone, resErr
}
}
// findFirstPart will find the part with 0 being the first that corresponds to the marker in the options.
// io.ErrUnexpectedEOF is returned if the place containing the marker hasn't been scanned yet.
// io.EOF indicates the marker is beyond the end of the stream and does not exist.
func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) {
search := o.Marker
if search == "" {
search = o.Prefix
}
if search == "" {
return 0, nil
}
const debugPrint = false
if debugPrint {
console.Infoln("searching for ", search)
}
var tmp metacacheBlock
i := 0
for {
partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i)
v, ok := fi.Metadata[partKey]
if !ok {
if debugPrint {
console.Infoln("no match in metadata, waiting")
}
return -1, io.ErrUnexpectedEOF
}
err := json.Unmarshal([]byte(v), &tmp)
if !ok {
logger.LogIf(context.Background(), err)
return -1, err
}
if tmp.First == "" && tmp.Last == "" && tmp.EOS {
return 0, errFileNotFound
}
if tmp.First >= search {
if debugPrint {
console.Infoln("First >= search", v)
}
return i, nil
}
if tmp.Last >= search {
if debugPrint {
console.Infoln("Last >= search", v)
}
return i, nil
}
if tmp.EOS {
if debugPrint {
console.Infoln("no match, at EOS", v)
}
return -3, io.EOF
}
if debugPrint {
console.Infoln("First ", tmp.First, "<", search, " search", i)
}
i++
}
}
// newMetacache constructs a new metacache from the options.
func (o listPathOptions) newMetacache() metacache {
return metacache{
id: o.ID,
bucket: o.Bucket,
root: o.BaseDir,
recursive: o.Recursive,
status: scanStateStarted,
error: "",
started: UTCNow(),
lastHandout: UTCNow(),
lastUpdate: UTCNow(),
ended: time.Time{},
startedCycle: o.CurrentCycle,
endedCycle: 0,
dataVersion: metacacheStreamVersion,
}
}
func getMetacacheBlockInfo(fi FileInfo, block int) (*metacacheBlock, error) {
var tmp metacacheBlock
partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, block)
v, ok := fi.Metadata[partKey]
if !ok {
return nil, io.ErrUnexpectedEOF
}
return &tmp, json.Unmarshal([]byte(v), &tmp)
}
func metacachePrefixForID(bucket, id string) string {
return pathJoin("buckets", bucket, ".metacache", id)
}
// objectPath returns the object path of the cache.
func (o *listPathOptions) objectPath(block int) string {
return pathJoin(metacachePrefixForID(o.Bucket, o.ID), "block-"+strconv.Itoa(block)+".s2")
}
// filter will apply the options and return the number of objects requested by the limit.
// Will return io.EOF if there are no more entries with the same filter.
// The last entry can be used as a marker to resume the listing.
func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSorted, err error) {
const debugPrint = false
// Forward to prefix, if any
err = r.forwardTo(o.Prefix)
if err != nil {
return entries, err
}
if o.Marker != "" {
err = r.forwardTo(o.Marker)
if err != nil {
return entries, err
}
next, err := r.peek()
if err != nil {
return entries, err
}
if next.name == o.Marker {
err := r.skip(1)
if err != nil {
return entries, err
}
}
}
if debugPrint {
console.Infoln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator)
}
// Filter
if !o.Recursive {
entries.o = make(metaCacheEntries, 0, o.Limit)
pastPrefix := false
err := r.readFn(func(entry metaCacheEntry) bool {
if o.Prefix != "" && !strings.HasPrefix(entry.name, o.Prefix) {
// We are past the prefix, don't continue.
pastPrefix = true
return false
}
if !o.IncludeDirectories && entry.isDir() {
return true
}
if !entry.isInDir(o.Prefix, o.Separator) {
return true
}
if !o.InclDeleted && entry.isObject() && entry.isLatestDeletemarker() {
return entries.len() < o.Limit
}
entries.o = append(entries.o, entry)
return entries.len() < o.Limit
})
if err == io.EOF || pastPrefix || r.nextEOF() {
return entries, io.EOF
}
return entries, err
}
// We should not need to filter more.
return r.readN(o.Limit, o.InclDeleted, o.IncludeDirectories, o.Prefix)
}
func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) {
retries := 0
const debugPrint = false
for {
select {
case <-ctx.Done():
return entries, ctx.Err()
default:
}
// Load first part metadata...
// All operations are performed without locks, so we must be careful and allow for failures.
fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(0), ObjectOptions{})
if err != nil {
if err == errFileNotFound || errors.Is(err, errErasureReadQuorum) || errors.Is(err, InsufficientReadQuorum{}) {
// Not ready yet...
if retries == 10 {
err := o.checkMetacacheState(ctx)
if debugPrint {
logger.Info("waiting for first part (%s), err: %v", o.objectPath(0), err)
}
if err != nil {
return entries, err
}
retries = 0
continue
}
retries++
time.Sleep(100 * time.Millisecond)
continue
}
if debugPrint {
console.Infoln("first getObjectFileInfo", o.objectPath(0), "returned err:", err)
console.Infof("err type: %T\n", err)
}
return entries, err
}
if fi.Deleted {
return entries, errFileNotFound
}
partN, err := o.findFirstPart(fi)
switch err {
case nil:
case io.ErrUnexpectedEOF, errErasureReadQuorum, InsufficientReadQuorum{}:
if retries == 10 {
err := o.checkMetacacheState(ctx)
if debugPrint {
logger.Info("waiting for metadata, err: %v", err)
}
if err != nil {
return entries, err
}
retries = 0
continue
}
retries++
time.Sleep(100 * time.Millisecond)
continue
case io.EOF:
return entries, io.EOF
}
// We got a stream to start at.
loadedPart := 0
var buf bytes.Buffer
for {
select {
case <-ctx.Done():
return entries, ctx.Err()
default:
}
if partN != loadedPart {
// Load first part metadata...
fi, metaArr, onlineDisks, err = er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(partN), ObjectOptions{})
switch err {
case errFileNotFound, errErasureReadQuorum, InsufficientReadQuorum{}:
if retries >= 10 {
err := o.checkMetacacheState(ctx)
if debugPrint {
logger.Info("waiting for part data (%v), err: %v", o.objectPath(partN), err)
}
if err != nil {
return entries, err
}
retries = 0
continue
}
time.Sleep(100 * time.Millisecond)
continue
default:
time.Sleep(100 * time.Millisecond)
if retries >= 20 {
// We had at least 10 retries without getting a result.
logger.LogIf(ctx, err)
return entries, err
}
retries++
continue
case nil:
loadedPart = partN
bi, err := getMetacacheBlockInfo(fi, partN)
logger.LogIf(ctx, err)
if err == nil {
if bi.pastPrefix(o.Prefix) {
return entries, io.EOF
}
}
}
if fi.Deleted {
return entries, io.ErrUnexpectedEOF
}
}
buf.Reset()
err := er.getObjectWithFileInfo(ctx, minioMetaBucket, o.objectPath(partN), 0, fi.Size, &buf, fi, metaArr, onlineDisks)
switch err {
case errFileNotFound, errErasureReadQuorum, InsufficientReadQuorum{}:
if retries >= 20 {
// We had at least 10 retries without getting a result.
logger.LogIf(ctx, err)
return entries, err
}
retries++
time.Sleep(100 * time.Millisecond)
continue
default:
logger.LogIf(ctx, err)
return entries, err
case nil:
}
tmp, err := newMetacacheReader(&buf)
if err != nil {
return entries, err
}
e, err := tmp.filter(o)
entries.o = append(entries.o, e.o...)
if o.Limit > 0 && entries.len() > o.Limit {
entries.truncate(o.Limit)
return entries, nil
}
switch err {
case io.EOF:
// We finished at the end of the block.
// And should not expect any more results.
bi, err := getMetacacheBlockInfo(fi, partN)
logger.LogIf(ctx, err)
if err != nil || bi.EOS {
// We are done and there are no more parts.
return entries, io.EOF
}
if bi.endedPrefix(o.Prefix) {
// Nothing more for prefix.
return entries, io.EOF
}
partN++
case nil:
// We stopped within the listing, we are done for now...
return entries, nil
default:
return entries, err
}
}
}
}
// Will return io.EOF if continuing would not yield more results.
func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) {
const debugPrint = false
if debugPrint {
console.Printf("listPath with options: %#v", o)
}
// See if we have the listing stored.
if !o.Create {
entries, err := er.streamMetadataParts(ctx, o)
switch err {
case nil, io.EOF, context.Canceled, context.DeadlineExceeded:
return entries, err
}
logger.LogIf(ctx, err)
return entries, err
}
rpcClient := globalNotificationSys.restClientFromHash(o.Bucket)
meta := o.newMetacache()
var metaMu sync.Mutex
defer func() {
if debugPrint {
console.Println("listPath returning:", entries.len(), "err:", err)
}
if err != nil {
metaMu.Lock()
if meta.status != scanStateError {
meta.error = err.Error()
meta.status = scanStateError
}
lm := meta
metaMu.Unlock()
if rpcClient == nil {
localMetacacheMgr.getBucket(GlobalContext, o.Bucket).updateCacheEntry(lm)
} else {
rpcClient.UpdateMetacacheListing(context.Background(), lm)
}
}
}()
if debugPrint {
console.Println("listPath: scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker)
}
// Disconnect from call above, but cancel on exit.
ctx, cancel := context.WithCancel(GlobalContext)
// We need to ask disks.
disks := er.getOnlineDisks()
var askDisks = o.AskDisks
switch askDisks {
// 50% or at least 3.
case -1:
o.AskDisks = getReadQuorum(len(er.getDisks()))
if o.AskDisks < 3 {
o.AskDisks = 3
}
// Default is 3 disks.
case 0:
askDisks = 3
}
if len(disks) < askDisks {
err = InsufficientReadQuorum{}
if debugPrint {
console.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks)
}
logger.LogIf(ctx, fmt.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks))
cancel()
return
}
// Select askDisks random disks, 3 is ok.
if len(disks) > askDisks {
disks = disks[:askDisks]
}
var readers = make([]*metacacheReader, askDisks)
for i := range disks {
r, w := io.Pipe()
d := disks[i]
readers[i], err = newMetacacheReader(r)
if err != nil {
cancel()
return entries, err
}
// Send request.
go func() {
err := d.WalkDir(ctx, WalkDirOptions{Bucket: o.Bucket, BaseDir: o.BaseDir, Recursive: o.Recursive || o.Separator != SlashSeparator}, w)
w.CloseWithError(err)
if err != io.EOF {
logger.LogIf(ctx, err)
}
}()
}
// Create output for our results.
cacheCh := make(chan metaCacheEntry, metacacheBlockSize)
// Create filter for results.
filterCh := make(chan metaCacheEntry, 100)
filteredResults := o.gatherResults(filterCh)
closeChannels := func() {
close(cacheCh)
close(filterCh)
}
go func() {
defer cancel()
// Save continuous updates
go func() {
ticker := time.NewTicker(10 * time.Second)
defer ticker.Stop()
var exit bool
for !exit {
select {
case <-ticker.C:
case <-ctx.Done():
exit = true
}
metaMu.Lock()
meta.endedCycle = intDataUpdateTracker.current()
lm := meta
metaMu.Unlock()
var err error
if o.Transient {
lm, err = localMetacacheMgr.getTransient().updateCacheEntry(lm)
} else if rpcClient == nil {
lm, err = localMetacacheMgr.getBucket(GlobalContext, o.Bucket).updateCacheEntry(lm)
} else {
lm, err = rpcClient.UpdateMetacacheListing(context.Background(), lm)
}
logger.LogIf(ctx, err)
if lm.status == scanStateError {
cancel()
exit = true
}
}
}()
// Write results to disk.
bw := newMetacacheBlockWriter(cacheCh, func(b *metacacheBlock) error {
if debugPrint {
console.Println("listPath: saving block", b.n, "to", o.objectPath(b.n))
}
r, err := hash.NewReader(bytes.NewBuffer(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false)
logger.LogIf(ctx, err)
custom := b.headerKV()
custom[xhttp.AmzStorageClass] = storageclass.RRS
_, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r, nil, nil), ObjectOptions{UserDefined: custom})
if err != nil {
metaMu.Lock()
meta.status = scanStateError
meta.error = err.Error()
metaMu.Unlock()
cancel()
return err
}
if b.n == 0 {
return nil
}
// Update block 0 metadata.
for {
err := er.updateObjectMeta(ctx, minioMetaBucket, o.objectPath(0), b.headerKV(), ObjectOptions{})
if err == nil {
break
}
logger.LogIf(ctx, err)
time.Sleep(100 * time.Millisecond)
}
return nil
})
// How to resolve results.
resolver := metadataResolutionParams{
dirQuorum: askDisks - 1,
objQuorum: askDisks - 1,
bucket: o.Bucket,
}
topEntries := make(metaCacheEntries, len(readers))
for {
// Get the top entry from each
var current metaCacheEntry
var atEOF, agree int
for i, r := range readers {
topEntries[i].name = ""
entry, err := r.peek()
switch err {
case io.EOF:
atEOF++
continue
case nil:
default:
closeChannels()
metaMu.Lock()
meta.status = scanStateError
meta.error = err.Error()
metaMu.Unlock()
return
}
// If no current, add it.
if current.name == "" {
topEntries[i] = entry
current = entry
agree++
continue
}
// If exact match, we agree.
if current.matches(&entry, o.Bucket) {
topEntries[i] = entry
agree++
continue
}
// If only the name matches we didn't agree, but add it for resolution.
if entry.name == current.name {
topEntries[i] = entry
continue
}
// We got different entries
if entry.name > current.name {
continue
}
// We got a new, better current.
// Clear existing entries.
for i := range topEntries[:i] {
topEntries[i] = metaCacheEntry{}
}
agree = 1
current = entry
topEntries[i] = entry
}
// Break if all at EOF.
if atEOF == len(readers) {
break
}
if agree == len(readers) {
// Everybody agreed
for _, r := range readers {
r.skip(1)
}
cacheCh <- topEntries[0]
filterCh <- topEntries[0]
continue
}
// Results Disagree :-(
entry, ok := topEntries.resolve(&resolver)
if ok {
cacheCh <- *entry
filterCh <- *entry
}
// Skip the inputs we used.
for i, r := range readers {
if topEntries[i].name != "" {
r.skip(1)
}
}
}
closeChannels()
metaMu.Lock()
if meta.error == "" {
if err := bw.Close(); err != nil {
meta.error = err.Error()
meta.status = scanStateError
} else {
meta.status = scanStateSuccess
meta.endedCycle = intDataUpdateTracker.current()
}
}
metaMu.Unlock()
}()
return filteredResults()
}

881
cmd/metacache-stream.go Normal file
View File

@ -0,0 +1,881 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"strings"
"sync"
"github.com/klauspost/compress/s2"
"github.com/minio/minio/cmd/logger"
"github.com/tinylib/msgp/msgp"
)
// metadata stream format:
//
// The stream is s2 compressed.
// https://github.com/klauspost/compress/tree/master/s2#s2-compression
// This ensures integrity and reduces the size typically by at least 50%.
//
// All stream elements are msgpack encoded.
//
// 1 Integer, metacacheStreamVersion of the writer.
// This can be used for managing breaking changes.
//
// For each element:
// 1. Bool. If false at end of stream.
// 2. String. Name of object. Directories contains a trailing slash.
// 3. Binary. Blob of metadata. Length 0 on directories.
// ... Next element.
//
// Streams can be assumed to be sorted in ascending order.
// If the stream ends before a false boolean it can be assumed it was truncated.
const metacacheStreamVersion = 1
// metacacheWriter provides a serializer of metacache objects.
type metacacheWriter struct {
mw *msgp.Writer
creator func() error
closer func() error
blockSize int
streamErr error
streamWg sync.WaitGroup
}
// newMetacacheWriter will create a serializer that will write objects in given order to the output.
// Provide a block size that affects latency. If 0 a default of 128KiB will be used.
// Block size can be up to 4MiB.
func newMetacacheWriter(out io.Writer, blockSize int) *metacacheWriter {
if blockSize < 8<<10 {
blockSize = 128 << 10
}
w := metacacheWriter{
mw: nil,
blockSize: blockSize,
}
w.creator = func() error {
s2w := s2.NewWriter(out, s2.WriterBlockSize(blockSize))
w.mw = msgp.NewWriter(s2w)
w.creator = nil
if err := w.mw.WriteByte(metacacheStreamVersion); err != nil {
return err
}
w.closer = func() error {
if w.streamErr != nil {
return w.streamErr
}
if err := w.mw.WriteBool(false); err != nil {
return err
}
if err := w.mw.Flush(); err != nil {
return err
}
return s2w.Close()
}
return nil
}
return &w
}
func newMetacacheFile(file string) *metacacheWriter {
w := metacacheWriter{
mw: nil,
}
w.creator = func() error {
fw, err := os.Create(file)
if err != nil {
return err
}
s2w := s2.NewWriter(fw, s2.WriterBlockSize(1<<20))
w.mw = msgp.NewWriter(s2w)
w.creator = nil
if err := w.mw.WriteByte(metacacheStreamVersion); err != nil {
return err
}
w.closer = func() error {
if w.streamErr != nil {
fw.Close()
return w.streamErr
}
// Indicate EOS
if err := w.mw.WriteBool(false); err != nil {
return err
}
if err := w.mw.Flush(); err != nil {
fw.Close()
return err
}
if err := s2w.Close(); err != nil {
fw.Close()
return err
}
return fw.Close()
}
return nil
}
return &w
}
// write one or more objects to the stream in order.
// It is favorable to send as many objects as possible in a single write,
// but no more than math.MaxUint32
func (w *metacacheWriter) write(objs ...metaCacheEntry) error {
if w == nil {
return errors.New("metacacheWriter: nil writer")
}
if len(objs) == 0 {
return nil
}
if w.creator != nil {
err := w.creator()
w.creator = nil
if err != nil {
return fmt.Errorf("metacacheWriter: unable to create writer: %w", err)
}
if w.mw == nil {
return errors.New("metacacheWriter: writer not initialized")
}
}
for _, o := range objs {
if len(o.name) == 0 {
return errors.New("metacacheWriter: no name provided")
}
// Indicate EOS
err := w.mw.WriteBool(true)
if err != nil {
return err
}
err = w.mw.WriteString(o.name)
if err != nil {
return err
}
err = w.mw.WriteBytes(o.metadata)
if err != nil {
return err
}
}
return nil
}
// stream entries to the output.
// The returned channel should be closed when done.
// Any error is reported when closing the metacacheWriter.
func (w *metacacheWriter) stream() (chan<- metaCacheEntry, error) {
if w.creator != nil {
err := w.creator()
w.creator = nil
if err != nil {
return nil, fmt.Errorf("metacacheWriter: unable to create writer: %w", err)
}
if w.mw == nil {
return nil, errors.New("metacacheWriter: writer not initialized")
}
}
var objs = make(chan metaCacheEntry, 100)
w.streamErr = nil
w.streamWg.Add(1)
go func() {
defer w.streamWg.Done()
for o := range objs {
if len(o.name) == 0 || w.streamErr != nil {
continue
}
// Indicate EOS
err := w.mw.WriteBool(true)
if err != nil {
w.streamErr = err
continue
}
err = w.mw.WriteString(o.name)
if err != nil {
w.streamErr = err
continue
}
err = w.mw.WriteBytes(o.metadata)
if err != nil {
w.streamErr = err
continue
}
}
}()
return objs, nil
}
// Close and release resources.
func (w *metacacheWriter) Close() error {
if w == nil || w.closer == nil {
return nil
}
w.streamWg.Wait()
err := w.closer()
w.closer = nil
return err
}
// Reset and start writing to new writer.
// Close must have been called before this.
func (w *metacacheWriter) Reset(out io.Writer) {
w.streamErr = nil
w.creator = func() error {
s2w := s2.NewWriter(out, s2.WriterBlockSize(w.blockSize))
w.mw = msgp.NewWriter(s2w)
w.creator = nil
if err := w.mw.WriteByte(metacacheStreamVersion); err != nil {
return err
}
w.closer = func() error {
if w.streamErr != nil {
return w.streamErr
}
if err := w.mw.WriteBool(false); err != nil {
return err
}
if err := w.mw.Flush(); err != nil {
return err
}
return s2w.Close()
}
return nil
}
}
var s2DecPool = sync.Pool{New: func() interface{} {
return s2.NewReader(nil)
}}
// metacacheReader allows reading a cache stream.
type metacacheReader struct {
mr *msgp.Reader
current metaCacheEntry
err error // stateful error
closer func()
creator func() error
}
// newMetacacheReader creates a new cache reader.
// Nothing will be read from the stream yet.
func newMetacacheReader(r io.Reader) (*metacacheReader, error) {
dec := s2DecPool.Get().(*s2.Reader)
dec.Reset(r)
mr := msgp.NewReader(dec)
m := metacacheReader{
mr: mr,
closer: func() {
dec.Reset(nil)
s2DecPool.Put(dec)
},
creator: func() error {
v, err := mr.ReadByte()
if err != nil {
return err
}
switch v {
case metacacheStreamVersion:
default:
return fmt.Errorf("metacacheReader: Unknown version: %d", v)
}
return nil
},
}
return &m, nil
}
func (r *metacacheReader) checkInit() {
if r.creator == nil || r.err != nil {
return
}
r.err = r.creator()
r.creator = nil
}
// peek will return the name of the next object.
// Will return io.EOF if there are no more objects.
// Should be used sparingly.
func (r *metacacheReader) peek() (metaCacheEntry, error) {
r.checkInit()
if r.err != nil {
return metaCacheEntry{}, r.err
}
if r.current.name != "" {
return r.current, nil
}
if more, err := r.mr.ReadBool(); !more {
switch err {
case nil:
r.err = io.EOF
return metaCacheEntry{}, io.EOF
case io.EOF:
r.err = io.ErrUnexpectedEOF
return metaCacheEntry{}, io.ErrUnexpectedEOF
}
r.err = err
return metaCacheEntry{}, err
}
var err error
if r.current.name, err = r.mr.ReadString(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return metaCacheEntry{}, err
}
r.current.metadata, err = r.mr.ReadBytes(r.current.metadata[:0])
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return r.current, err
}
// next will read one entry from the stream.
// Generally not recommended for fast operation.
func (r *metacacheReader) next() (metaCacheEntry, error) {
r.checkInit()
if r.err != nil {
return metaCacheEntry{}, r.err
}
var m metaCacheEntry
var err error
if r.current.name != "" {
m.name = r.current.name
m.metadata = r.current.metadata
r.current.name = ""
r.current.metadata = nil
return m, nil
}
if more, err := r.mr.ReadBool(); !more {
switch err {
case nil:
r.err = io.EOF
return m, io.EOF
case io.EOF:
r.err = io.ErrUnexpectedEOF
return m, io.ErrUnexpectedEOF
}
r.err = err
return m, err
}
if m.name, err = r.mr.ReadString(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return m, err
}
m.metadata, err = r.mr.ReadBytes(nil)
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return m, err
}
// next will read one entry from the stream.
// Generally not recommended for fast operation.
func (r *metacacheReader) nextEOF() bool {
r.checkInit()
if r.err != nil {
return r.err == io.EOF
}
if r.current.name != "" {
return false
}
_, err := r.peek()
if err != nil {
r.err = err
return r.err == io.EOF
}
return false
}
// forwardTo will forward to the first entry that is >= s.
// Will return io.EOF if end of stream is reached without finding any.
func (r *metacacheReader) forwardTo(s string) error {
r.checkInit()
if r.err != nil {
return r.err
}
if s == "" {
return nil
}
if r.current.name != "" {
if r.current.name >= s {
return nil
}
r.current.name = ""
r.current.metadata = nil
}
// temporary name buffer.
var tmp = make([]byte, 0, 256)
for {
if more, err := r.mr.ReadBool(); !more {
switch err {
case nil:
r.err = io.EOF
return io.EOF
case io.EOF:
r.err = io.ErrUnexpectedEOF
return io.ErrUnexpectedEOF
}
r.err = err
return err
}
// Read name without allocating more than 1 buffer.
sz, err := r.mr.ReadStringHeader()
if err != nil {
r.err = err
return err
}
if cap(tmp) < int(sz) {
tmp = make([]byte, 0, sz+256)
}
tmp = tmp[:sz]
_, err = r.mr.R.ReadFull(tmp)
if err != nil {
r.err = err
return err
}
if string(tmp) >= s {
r.current.name = string(tmp)
r.current.metadata, err = r.mr.ReadBytes(nil)
return err
}
// Skip metadata
err = r.mr.Skip()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
}
}
// readN will return all the requested number of entries in order
// or all if n < 0.
// Will return io.EOF if end of stream is reached.
// If requesting 0 objects nil error will always be returned regardless of at end of stream.
// Use peek to determine if at end of stream.
func (r *metacacheReader) readN(n int, inclDeleted, inclDirs bool, prefix string) (metaCacheEntriesSorted, error) {
r.checkInit()
if n == 0 {
return metaCacheEntriesSorted{}, nil
}
if r.err != nil {
return metaCacheEntriesSorted{}, r.err
}
var res metaCacheEntries
if n > 0 {
res = make(metaCacheEntries, 0, n)
}
if prefix != "" {
if err := r.forwardTo(prefix); err != nil {
return metaCacheEntriesSorted{}, err
}
}
next, err := r.peek()
if err != nil {
return metaCacheEntriesSorted{}, err
}
if !next.hasPrefix(prefix) {
return metaCacheEntriesSorted{}, io.EOF
}
if r.current.name != "" {
if (inclDeleted || !r.current.isLatestDeletemarker()) && r.current.hasPrefix(prefix) && (inclDirs || r.current.isObject()) {
res = append(res, r.current)
}
r.current.name = ""
r.current.metadata = nil
}
for n < 0 || len(res) < n {
if more, err := r.mr.ReadBool(); !more {
switch err {
case nil:
r.err = io.EOF
return metaCacheEntriesSorted{o: res}, io.EOF
case io.EOF:
r.err = io.ErrUnexpectedEOF
return metaCacheEntriesSorted{o: res}, io.ErrUnexpectedEOF
}
r.err = err
return metaCacheEntriesSorted{o: res}, err
}
var err error
var meta metaCacheEntry
if meta.name, err = r.mr.ReadString(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return metaCacheEntriesSorted{o: res}, err
}
if !meta.hasPrefix(prefix) {
r.mr.R.Skip(1)
return metaCacheEntriesSorted{o: res}, io.EOF
}
if meta.metadata, err = r.mr.ReadBytes(nil); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return metaCacheEntriesSorted{o: res}, err
}
if !inclDirs && meta.isDir() {
continue
}
if meta.isDir() && !inclDeleted && meta.isLatestDeletemarker() {
continue
}
res = append(res, meta)
}
return metaCacheEntriesSorted{o: res}, nil
}
// readAll will return all remaining objects on the dst channel and close it when done.
// The context allows the operation to be canceled.
func (r *metacacheReader) readAll(ctx context.Context, dst chan<- metaCacheEntry) error {
r.checkInit()
if r.err != nil {
return r.err
}
defer close(dst)
if r.current.name != "" {
select {
case <-ctx.Done():
r.err = ctx.Err()
return ctx.Err()
case dst <- r.current:
}
r.current.name = ""
r.current.metadata = nil
}
for {
if more, err := r.mr.ReadBool(); !more {
switch err {
case io.EOF:
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
var err error
var meta metaCacheEntry
if meta.name, err = r.mr.ReadString(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
if meta.metadata, err = r.mr.ReadBytes(nil); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
select {
case <-ctx.Done():
r.err = ctx.Err()
return ctx.Err()
case dst <- meta:
}
}
}
// readFn will return all remaining objects
// and provide a callback for each entry read in order
// as long as true is returned on the callback.
func (r *metacacheReader) readFn(fn func(entry metaCacheEntry) bool) error {
r.checkInit()
if r.err != nil {
return r.err
}
if r.current.name != "" {
fn(r.current)
r.current.name = ""
r.current.metadata = nil
}
for {
if more, err := r.mr.ReadBool(); !more {
switch err {
case io.EOF:
r.err = io.ErrUnexpectedEOF
return io.ErrUnexpectedEOF
case nil:
r.err = io.EOF
return io.EOF
}
return err
}
var err error
var meta metaCacheEntry
if meta.name, err = r.mr.ReadString(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
if meta.metadata, err = r.mr.ReadBytes(nil); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
// Send it!
if !fn(meta) {
return nil
}
}
}
// readNames will return all the requested number of names in order
// or all if n < 0.
// Will return io.EOF if end of stream is reached.
func (r *metacacheReader) readNames(n int) ([]string, error) {
r.checkInit()
if r.err != nil {
return nil, r.err
}
if n == 0 {
return nil, nil
}
var res []string
if n > 0 {
res = make([]string, 0, n)
}
if r.current.name != "" {
res = append(res, r.current.name)
r.current.name = ""
r.current.metadata = nil
}
for n < 0 || len(res) < n {
if more, err := r.mr.ReadBool(); !more {
switch err {
case nil:
r.err = io.EOF
return res, io.EOF
case io.EOF:
r.err = io.ErrUnexpectedEOF
return res, io.ErrUnexpectedEOF
}
return res, err
}
var err error
var name string
if name, err = r.mr.ReadString(); err != nil {
r.err = err
return res, err
}
if err = r.mr.Skip(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return res, err
}
res = append(res, name)
}
return res, nil
}
// skip n entries on the input stream.
// If there are less entries left io.EOF is returned.
func (r *metacacheReader) skip(n int) error {
r.checkInit()
if r.err != nil {
return r.err
}
if n <= 0 {
return nil
}
if r.current.name != "" {
n--
r.current.name = ""
r.current.metadata = nil
}
for n > 0 {
if more, err := r.mr.ReadBool(); !more {
switch err {
case nil:
r.err = io.EOF
return io.EOF
case io.EOF:
r.err = io.ErrUnexpectedEOF
return io.ErrUnexpectedEOF
}
return err
}
if err := r.mr.Skip(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
if err := r.mr.Skip(); err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
r.err = err
return err
}
n--
}
return nil
}
// Close and release resources.
func (r *metacacheReader) Close() error {
if r == nil || r.closer == nil {
return nil
}
r.closer()
r.closer = nil
r.creator = nil
return nil
}
// metacacheBlockWriter collects blocks and provides a callaback to store them.
type metacacheBlockWriter struct {
wg sync.WaitGroup
streamErr error
blockEntries int
}
// newMetacacheBlockWriter provides a streaming block writer.
// Each block is the size of the capacity of the input channel.
// The caller should close to indicate the stream has ended.
func newMetacacheBlockWriter(in <-chan metaCacheEntry, nextBlock func(b *metacacheBlock) error) *metacacheBlockWriter {
w := metacacheBlockWriter{blockEntries: cap(in)}
w.wg.Add(1)
go func() {
defer w.wg.Done()
var current metacacheBlock
var n int
var buf bytes.Buffer
block := newMetacacheWriter(&buf, 1<<20)
finishBlock := func() {
err := block.Close()
if err != nil {
w.streamErr = err
return
}
current.data = buf.Bytes()
w.streamErr = nextBlock(&current)
// Prepare for next
current.n++
buf.Reset()
block.Reset(&buf)
current.First = ""
}
for o := range in {
if len(o.name) == 0 || w.streamErr != nil {
continue
}
if current.First == "" {
current.First = o.name
}
if n >= w.blockEntries-1 {
finishBlock()
n = 0
}
n++
w.streamErr = block.write(o)
if w.streamErr != nil {
continue
}
current.Last = o.name
}
if n > 0 || current.n == 0 {
current.EOS = true
finishBlock()
}
}()
return &w
}
// Close the stream.
// The incoming channel must be closed before calling this.
// Returns the first error the occurred during the writing if any.
func (w *metacacheBlockWriter) Close() error {
w.wg.Wait()
return w.streamErr
}
type metacacheBlock struct {
data []byte
n int
First string `json:"f"`
Last string `json:"l"`
EOS bool `json:"eos,omitempty"`
}
func (b metacacheBlock) headerKV() map[string]string {
v, err := json.Marshal(b)
if err != nil {
logger.LogIf(context.Background(), err) // Unlikely
return nil
}
return map[string]string{fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, b.n): string(v)}
}
// pastPrefix returns true if the given prefix is before start of the block.
func (b metacacheBlock) pastPrefix(prefix string) bool {
if prefix == "" || strings.HasPrefix(b.First, prefix) {
return false
}
// We have checked if prefix matches, so we can do direct compare.
return b.First > prefix
}
// endedPrefix returns true if the given prefix ends within the block.
func (b metacacheBlock) endedPrefix(prefix string) bool {
if prefix == "" || strings.HasPrefix(b.Last, prefix) {
return false
}
// We have checked if prefix matches, so we can do direct compare.
return b.Last > prefix
}

File diff suppressed because one or more lines are too long

237
cmd/metacache-walk.go Normal file
View File

@ -0,0 +1,237 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"sort"
"strconv"
"strings"
"sync/atomic"
"github.com/gorilla/mux"
"github.com/minio/minio/cmd/logger"
)
// WalkDirOptions provides options for WalkDir operations.
type WalkDirOptions struct {
// Bucket to crawl
Bucket string
// Directory inside the bucket.
BaseDir string
// Do a full recursive scan.
Recursive bool
}
// WalkDir will traverse a directory and return all entries found.
// On success a sorted meta cache stream will be returned.
func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
atomic.AddInt32(&s.activeIOCount, 1)
defer func() {
atomic.AddInt32(&s.activeIOCount, -1)
}()
// Verify if volume is valid and it exists.
volumeDir, err := s.getVolDir(opts.Bucket)
if err != nil {
return err
}
// Stat a volume entry.
_, err = os.Stat(volumeDir)
if err != nil {
if os.IsNotExist(err) {
return errVolumeNotFound
} else if isSysErrIO(err) {
return errFaultyDisk
}
return err
}
// Fast exit track to check if we are listing an object with
// a trailing slash, this will avoid to list the object content.
if HasSuffix(opts.BaseDir, SlashSeparator) {
if st, err := os.Stat(pathJoin(volumeDir, opts.BaseDir, xlStorageFormatFile)); err == nil && st.Mode().IsRegular() {
return errFileNotFound
}
}
// Use a small block size to start sending quickly
w := newMetacacheWriter(wr, 16<<10)
defer w.Close()
out, err := w.stream()
if err != nil {
return err
}
defer close(out)
var scanDir func(path string) error
scanDir = func(current string) error {
entries, err := s.ListDir(ctx, opts.Bucket, current, -1)
if err != nil {
// Folder could have gone away in-between
if err != errVolumeNotFound && err != errFileNotFound {
logger.LogIf(ctx, err)
}
// Forward some errors?
return nil
}
for i, entry := range entries {
if strings.HasSuffix(entry, slashSeparator) {
// Trim slash, maybe compiler is clever?
entries[i] = entries[i][:len(entry)-1]
continue
}
// Do do not retain the file.
entries[i] = ""
// If root was an object return it as such.
if HasSuffix(entry, xlStorageFormatFile) {
var meta metaCacheEntry
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFile))
if err != nil {
logger.LogIf(ctx, err)
continue
}
meta.name = strings.TrimSuffix(meta.name, xlStorageFormatFile)
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
out <- meta
return nil
}
// Check legacy.
if HasSuffix(entry, xlStorageFormatFileV1) {
var meta metaCacheEntry
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1))
if err != nil {
logger.LogIf(ctx, err)
continue
}
meta.name = strings.TrimSuffix(meta.name, xlStorageFormatFileV1)
meta.name = strings.TrimSuffix(meta.name, SlashSeparator)
out <- meta
return nil
}
// Skip all other files.
}
// Process in sort order.
sort.Strings(entries)
dirStack := make([]string, 0, 5)
for _, entry := range entries {
if entry == "" {
continue
}
meta := metaCacheEntry{name: PathJoin(current, entry)}
// If directory entry on stack before this, pop it now.
for len(dirStack) > 0 && dirStack[len(dirStack)-1] < meta.name {
pop := dirStack[len(dirStack)-1]
out <- metaCacheEntry{name: pop}
if opts.Recursive {
// Scan folder we found. Should be in correct sort order where we are.
err := scanDir(pop)
logger.LogIf(ctx, err)
}
dirStack = dirStack[:len(dirStack)-1]
}
// All objects will be returned as directories, there has been no object check yet.
// Check it by attempting to read metadata.
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFile))
switch {
case err == nil:
// It was an object
out <- meta
case os.IsNotExist(err):
meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1))
if err == nil {
// Maybe rename? Would make it inconsistent across disks though.
// os.Rename(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1), pathJoin(volumeDir, meta.name, xlStorageFormatFile))
// It was an object
out <- meta
continue
}
// NOT an object, append to stack (with slash)
dirStack = append(dirStack, meta.name+slashSeparator)
default:
logger.LogIf(ctx, err)
}
}
// If directory entry left on stack, pop it now.
for len(dirStack) > 0 {
pop := dirStack[len(dirStack)-1]
out <- metaCacheEntry{name: pop}
if opts.Recursive {
// Scan folder we found. Should be in correct sort order where we are.
err := scanDir(pop)
logger.LogIf(ctx, err)
}
dirStack = dirStack[:len(dirStack)-1]
}
return nil
}
// Stream output.
return scanDir(opts.BaseDir)
}
func (p *xlStorageDiskIDCheck) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
if err := p.checkDiskStale(); err != nil {
return err
}
return p.storage.WalkDir(ctx, opts, wr)
}
// WalkDir will traverse a directory and return all entries found.
// On success a meta cache stream will be returned, that should be closed when done.
func (client *storageRESTClient) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
values := make(url.Values)
values.Set(storageRESTVolume, opts.Bucket)
values.Set(storageRESTDirPath, opts.BaseDir)
values.Set(storageRESTRecursive, strconv.FormatBool(opts.Recursive))
respBody, err := client.call(ctx, storageRESTMethodWalkDir, values, nil, -1)
if err != nil {
logger.LogIf(ctx, err)
return err
}
return waitForHTTPStream(respBody, wr)
}
// WalkDirHandler - remote caller to list files and folders in a requested directory path.
func (s *storageRESTServer) WalkDirHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
return
}
vars := mux.Vars(r)
volume := vars[storageRESTVolume]
dirPath := vars[storageRESTDirPath]
recursive, err := strconv.ParseBool(vars[storageRESTRecursive])
if err != nil {
s.writeErrorResponse(w, err)
return
}
writer := streamHTTPResponse(w)
writer.CloseWithError(s.storage.WalkDir(r.Context(), WalkDirOptions{Bucket: volume, BaseDir: dirPath, Recursive: recursive}, writer))
}

132
cmd/metacache.go Normal file
View File

@ -0,0 +1,132 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"path"
"strings"
"time"
)
type scanStatus uint8
const (
scanStateNone scanStatus = iota
scanStateStarted
scanStateSuccess
scanStateError
// Time in which the initiator of a scan must have reported back.
metacacheMaxRunningAge = time.Minute
// metacacheBlockSize is the number of file/directory entries to have in each block.
metacacheBlockSize = 5000
)
//go:generate msgp -file $GOFILE -unexported
// metacache contains a tracked cache entry.
type metacache struct {
id string `msg:"id"`
bucket string `msg:"b"`
root string `msg:"root"`
recursive bool `msg:"rec"`
status scanStatus `msg:"stat"`
fileNotFound bool `msg:"fnf"`
error string `msg:"err"`
started time.Time `msg:"st"`
ended time.Time `msg:"end"`
lastUpdate time.Time `msg:"u"`
lastHandout time.Time `msg:"lh"`
startedCycle uint64 `msg:"stc"`
endedCycle uint64 `msg:"endc"`
dataVersion uint8 `msg:"v"`
}
func (m *metacache) finished() bool {
return !m.ended.IsZero()
}
// worthKeeping indicates if the cache by itself is worth keeping.
func (m *metacache) worthKeeping(currentCycle uint64) bool {
if m == nil {
return false
}
cache := m
switch {
case !cache.finished() && time.Since(cache.lastUpdate) > metacacheMaxRunningAge:
// Not finished and update for metacacheMaxRunningAge, discard it.
return false
case cache.finished() && cache.startedCycle > currentCycle:
// Cycle is somehow bigger.
return false
case cache.finished() && currentCycle >= dataUsageUpdateDirCycles && cache.startedCycle < currentCycle-dataUsageUpdateDirCycles:
// Cycle is too old to be valuable.
return false
case cache.status == scanStateError || cache.status == scanStateNone:
// Remove failed listings
return false
}
return true
}
// canBeReplacedBy.
// Both must pass the worthKeeping check.
func (m *metacache) canBeReplacedBy(other *metacache) bool {
// If the other is older it can never replace.
if other.started.Before(m.started) || m.id == other.id {
return false
}
// Keep it around a bit longer.
if time.Since(m.lastHandout) < time.Hour {
return false
}
// Go through recursive combinations.
switch {
case !m.recursive && !other.recursive:
// If both not recursive root must match.
return m.root == other.root
case m.recursive && !other.recursive:
// A recursive can never be replaced by a non-recursive
return false
case !m.recursive && other.recursive:
// If other is recursive it must contain this root
return strings.HasPrefix(m.root, other.root)
case m.recursive && other.recursive:
// Similar if both are recursive
return strings.HasPrefix(m.root, other.root)
}
panic("should be unreachable")
}
// baseDirFromPrefix will return the base directory given an object path.
// For example an object with name prefix/folder/object.ext will return `prefix/folder/`.
func baseDirFromPrefix(prefix string) string {
b := path.Dir(prefix)
if b == "." || b == "./" || b == "/" {
b = ""
}
if !strings.Contains(prefix, slashSeparator) {
b = ""
}
if len(b) > 0 && !strings.HasSuffix(b, slashSeparator) {
b += slashSeparator
}
return b
}

495
cmd/metacache_gen.go Normal file
View File

@ -0,0 +1,495 @@
package cmd
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"github.com/tinylib/msgp/msgp"
)
// DecodeMsg implements msgp.Decodable
func (z *metacache) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, err = dc.ReadMapKeyPtr()
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "id":
z.id, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "id")
return
}
case "b":
z.bucket, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "bucket")
return
}
case "root":
z.root, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "root")
return
}
case "rec":
z.recursive, err = dc.ReadBool()
if err != nil {
err = msgp.WrapError(err, "recursive")
return
}
case "stat":
{
var zb0002 uint8
zb0002, err = dc.ReadUint8()
if err != nil {
err = msgp.WrapError(err, "status")
return
}
z.status = scanStatus(zb0002)
}
case "fnf":
z.fileNotFound, err = dc.ReadBool()
if err != nil {
err = msgp.WrapError(err, "fileNotFound")
return
}
case "err":
z.error, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "error")
return
}
case "st":
z.started, err = dc.ReadTime()
if err != nil {
err = msgp.WrapError(err, "started")
return
}
case "end":
z.ended, err = dc.ReadTime()
if err != nil {
err = msgp.WrapError(err, "ended")
return
}
case "u":
z.lastUpdate, err = dc.ReadTime()
if err != nil {
err = msgp.WrapError(err, "lastUpdate")
return
}
case "lh":
z.lastHandout, err = dc.ReadTime()
if err != nil {
err = msgp.WrapError(err, "lastHandout")
return
}
case "stc":
z.startedCycle, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "startedCycle")
return
}
case "endc":
z.endedCycle, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "endedCycle")
return
}
case "v":
z.dataVersion, err = dc.ReadUint8()
if err != nil {
err = msgp.WrapError(err, "dataVersion")
return
}
default:
err = dc.Skip()
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
return
}
// EncodeMsg implements msgp.Encodable
func (z *metacache) EncodeMsg(en *msgp.Writer) (err error) {
// map header, size 14
// write "id"
err = en.Append(0x8e, 0xa2, 0x69, 0x64)
if err != nil {
return
}
err = en.WriteString(z.id)
if err != nil {
err = msgp.WrapError(err, "id")
return
}
// write "b"
err = en.Append(0xa1, 0x62)
if err != nil {
return
}
err = en.WriteString(z.bucket)
if err != nil {
err = msgp.WrapError(err, "bucket")
return
}
// write "root"
err = en.Append(0xa4, 0x72, 0x6f, 0x6f, 0x74)
if err != nil {
return
}
err = en.WriteString(z.root)
if err != nil {
err = msgp.WrapError(err, "root")
return
}
// write "rec"
err = en.Append(0xa3, 0x72, 0x65, 0x63)
if err != nil {
return
}
err = en.WriteBool(z.recursive)
if err != nil {
err = msgp.WrapError(err, "recursive")
return
}
// write "stat"
err = en.Append(0xa4, 0x73, 0x74, 0x61, 0x74)
if err != nil {
return
}
err = en.WriteUint8(uint8(z.status))
if err != nil {
err = msgp.WrapError(err, "status")
return
}
// write "fnf"
err = en.Append(0xa3, 0x66, 0x6e, 0x66)
if err != nil {
return
}
err = en.WriteBool(z.fileNotFound)
if err != nil {
err = msgp.WrapError(err, "fileNotFound")
return
}
// write "err"
err = en.Append(0xa3, 0x65, 0x72, 0x72)
if err != nil {
return
}
err = en.WriteString(z.error)
if err != nil {
err = msgp.WrapError(err, "error")
return
}
// write "st"
err = en.Append(0xa2, 0x73, 0x74)
if err != nil {
return
}
err = en.WriteTime(z.started)
if err != nil {
err = msgp.WrapError(err, "started")
return
}
// write "end"
err = en.Append(0xa3, 0x65, 0x6e, 0x64)
if err != nil {
return
}
err = en.WriteTime(z.ended)
if err != nil {
err = msgp.WrapError(err, "ended")
return
}
// write "u"
err = en.Append(0xa1, 0x75)
if err != nil {
return
}
err = en.WriteTime(z.lastUpdate)
if err != nil {
err = msgp.WrapError(err, "lastUpdate")
return
}
// write "lh"
err = en.Append(0xa2, 0x6c, 0x68)
if err != nil {
return
}
err = en.WriteTime(z.lastHandout)
if err != nil {
err = msgp.WrapError(err, "lastHandout")
return
}
// write "stc"
err = en.Append(0xa3, 0x73, 0x74, 0x63)
if err != nil {
return
}
err = en.WriteUint64(z.startedCycle)
if err != nil {
err = msgp.WrapError(err, "startedCycle")
return
}
// write "endc"
err = en.Append(0xa4, 0x65, 0x6e, 0x64, 0x63)
if err != nil {
return
}
err = en.WriteUint64(z.endedCycle)
if err != nil {
err = msgp.WrapError(err, "endedCycle")
return
}
// write "v"
err = en.Append(0xa1, 0x76)
if err != nil {
return
}
err = en.WriteUint8(z.dataVersion)
if err != nil {
err = msgp.WrapError(err, "dataVersion")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *metacache) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// map header, size 14
// string "id"
o = append(o, 0x8e, 0xa2, 0x69, 0x64)
o = msgp.AppendString(o, z.id)
// string "b"
o = append(o, 0xa1, 0x62)
o = msgp.AppendString(o, z.bucket)
// string "root"
o = append(o, 0xa4, 0x72, 0x6f, 0x6f, 0x74)
o = msgp.AppendString(o, z.root)
// string "rec"
o = append(o, 0xa3, 0x72, 0x65, 0x63)
o = msgp.AppendBool(o, z.recursive)
// string "stat"
o = append(o, 0xa4, 0x73, 0x74, 0x61, 0x74)
o = msgp.AppendUint8(o, uint8(z.status))
// string "fnf"
o = append(o, 0xa3, 0x66, 0x6e, 0x66)
o = msgp.AppendBool(o, z.fileNotFound)
// string "err"
o = append(o, 0xa3, 0x65, 0x72, 0x72)
o = msgp.AppendString(o, z.error)
// string "st"
o = append(o, 0xa2, 0x73, 0x74)
o = msgp.AppendTime(o, z.started)
// string "end"
o = append(o, 0xa3, 0x65, 0x6e, 0x64)
o = msgp.AppendTime(o, z.ended)
// string "u"
o = append(o, 0xa1, 0x75)
o = msgp.AppendTime(o, z.lastUpdate)
// string "lh"
o = append(o, 0xa2, 0x6c, 0x68)
o = msgp.AppendTime(o, z.lastHandout)
// string "stc"
o = append(o, 0xa3, 0x73, 0x74, 0x63)
o = msgp.AppendUint64(o, z.startedCycle)
// string "endc"
o = append(o, 0xa4, 0x65, 0x6e, 0x64, 0x63)
o = msgp.AppendUint64(o, z.endedCycle)
// string "v"
o = append(o, 0xa1, 0x76)
o = msgp.AppendUint8(o, z.dataVersion)
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *metacache) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, bts, err = msgp.ReadMapKeyZC(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "id":
z.id, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "id")
return
}
case "b":
z.bucket, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "bucket")
return
}
case "root":
z.root, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "root")
return
}
case "rec":
z.recursive, bts, err = msgp.ReadBoolBytes(bts)
if err != nil {
err = msgp.WrapError(err, "recursive")
return
}
case "stat":
{
var zb0002 uint8
zb0002, bts, err = msgp.ReadUint8Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "status")
return
}
z.status = scanStatus(zb0002)
}
case "fnf":
z.fileNotFound, bts, err = msgp.ReadBoolBytes(bts)
if err != nil {
err = msgp.WrapError(err, "fileNotFound")
return
}
case "err":
z.error, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "error")
return
}
case "st":
z.started, bts, err = msgp.ReadTimeBytes(bts)
if err != nil {
err = msgp.WrapError(err, "started")
return
}
case "end":
z.ended, bts, err = msgp.ReadTimeBytes(bts)
if err != nil {
err = msgp.WrapError(err, "ended")
return
}
case "u":
z.lastUpdate, bts, err = msgp.ReadTimeBytes(bts)
if err != nil {
err = msgp.WrapError(err, "lastUpdate")
return
}
case "lh":
z.lastHandout, bts, err = msgp.ReadTimeBytes(bts)
if err != nil {
err = msgp.WrapError(err, "lastHandout")
return
}
case "stc":
z.startedCycle, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "startedCycle")
return
}
case "endc":
z.endedCycle, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "endedCycle")
return
}
case "v":
z.dataVersion, bts, err = msgp.ReadUint8Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "dataVersion")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *metacache) Msgsize() (s int) {
s = 1 + 3 + msgp.StringPrefixSize + len(z.id) + 2 + msgp.StringPrefixSize + len(z.bucket) + 5 + msgp.StringPrefixSize + len(z.root) + 4 + msgp.BoolSize + 5 + msgp.Uint8Size + 4 + msgp.BoolSize + 4 + msgp.StringPrefixSize + len(z.error) + 3 + msgp.TimeSize + 4 + msgp.TimeSize + 2 + msgp.TimeSize + 3 + msgp.TimeSize + 4 + msgp.Uint64Size + 5 + msgp.Uint64Size + 2 + msgp.Uint8Size
return
}
// DecodeMsg implements msgp.Decodable
func (z *scanStatus) DecodeMsg(dc *msgp.Reader) (err error) {
{
var zb0001 uint8
zb0001, err = dc.ReadUint8()
if err != nil {
err = msgp.WrapError(err)
return
}
(*z) = scanStatus(zb0001)
}
return
}
// EncodeMsg implements msgp.Encodable
func (z scanStatus) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteUint8(uint8(z))
if err != nil {
err = msgp.WrapError(err)
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z scanStatus) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
o = msgp.AppendUint8(o, uint8(z))
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *scanStatus) UnmarshalMsg(bts []byte) (o []byte, err error) {
{
var zb0001 uint8
zb0001, bts, err = msgp.ReadUint8Bytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
(*z) = scanStatus(zb0001)
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z scanStatus) Msgsize() (s int) {
s = msgp.Uint8Size
return
}

123
cmd/metacache_gen_test.go Normal file
View File

@ -0,0 +1,123 @@
package cmd
// Code generated by github.com/tinylib/msgp DO NOT EDIT.
import (
"bytes"
"testing"
"github.com/tinylib/msgp/msgp"
)
func TestMarshalUnmarshalmetacache(t *testing.T) {
v := metacache{}
bts, err := v.MarshalMsg(nil)
if err != nil {
t.Fatal(err)
}
left, err := v.UnmarshalMsg(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left)
}
left, err = msgp.Skip(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after Skip(): %q", len(left), left)
}
}
func BenchmarkMarshalMsgmetacache(b *testing.B) {
v := metacache{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.MarshalMsg(nil)
}
}
func BenchmarkAppendMsgmetacache(b *testing.B) {
v := metacache{}
bts := make([]byte, 0, v.Msgsize())
bts, _ = v.MarshalMsg(bts[0:0])
b.SetBytes(int64(len(bts)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bts, _ = v.MarshalMsg(bts[0:0])
}
}
func BenchmarkUnmarshalmetacache(b *testing.B) {
v := metacache{}
bts, _ := v.MarshalMsg(nil)
b.ReportAllocs()
b.SetBytes(int64(len(bts)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := v.UnmarshalMsg(bts)
if err != nil {
b.Fatal(err)
}
}
}
func TestEncodeDecodemetacache(t *testing.T) {
v := metacache{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
m := v.Msgsize()
if buf.Len() > m {
t.Log("WARNING: TestEncodeDecodemetacache Msgsize() is inaccurate")
}
vn := metacache{}
err := msgp.Decode(&buf, &vn)
if err != nil {
t.Error(err)
}
buf.Reset()
msgp.Encode(&buf, &v)
err = msgp.NewReader(&buf).Skip()
if err != nil {
t.Error(err)
}
}
func BenchmarkEncodemetacache(b *testing.B) {
v := metacache{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
en := msgp.NewWriter(msgp.Nowhere)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.EncodeMsg(en)
}
en.Flush()
}
func BenchmarkDecodemetacache(b *testing.B) {
v := metacache{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
rd := msgp.NewEndlessReader(buf.Bytes(), b)
dc := msgp.NewReader(rd)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := v.DecodeMsg(dc)
if err != nil {
b.Fatal(err)
}
}
}

284
cmd/metacache_test.go Normal file
View File

@ -0,0 +1,284 @@
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"testing"
"time"
)
var metaCacheTestsetTimestamp, _ = time.Parse(time.RFC822Z, time.RFC822Z)
var metaCacheTestset = []metacache{
0: {
id: "case-1-normal",
bucket: "bucket",
root: "folder/prefix",
recursive: false,
status: scanStateSuccess,
fileNotFound: false,
error: "",
started: metaCacheTestsetTimestamp,
ended: metaCacheTestsetTimestamp.Add(time.Minute),
lastUpdate: metaCacheTestsetTimestamp.Add(time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
},
1: {
id: "case-2-recursive",
bucket: "bucket",
root: "folder/prefix",
recursive: true,
status: scanStateSuccess,
fileNotFound: false,
error: "",
started: metaCacheTestsetTimestamp,
ended: metaCacheTestsetTimestamp.Add(time.Minute),
lastUpdate: metaCacheTestsetTimestamp.Add(time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
},
2: {
id: "case-3-older",
bucket: "bucket",
root: "folder/prefix",
recursive: false,
status: scanStateSuccess,
fileNotFound: true,
error: "",
started: metaCacheTestsetTimestamp.Add(-time.Minute),
ended: metaCacheTestsetTimestamp,
lastUpdate: metaCacheTestsetTimestamp,
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
},
3: {
id: "case-4-error",
bucket: "bucket",
root: "folder/prefix",
recursive: false,
status: scanStateError,
fileNotFound: false,
error: "an error lol",
started: metaCacheTestsetTimestamp.Add(time.Minute),
ended: metaCacheTestsetTimestamp.Add(2 * time.Minute),
lastUpdate: metaCacheTestsetTimestamp.Add(2 * time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
},
4: {
id: "case-5-noupdate",
bucket: "bucket",
root: "folder/prefix",
recursive: false,
status: scanStateStarted,
fileNotFound: false,
error: "",
started: metaCacheTestsetTimestamp.Add(-time.Minute),
ended: time.Time{},
lastUpdate: metaCacheTestsetTimestamp.Add(-time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
},
5: {
id: "case-6-404notfound",
bucket: "bucket",
root: "folder/notfound",
recursive: true,
status: scanStateSuccess,
fileNotFound: true,
error: "",
started: metaCacheTestsetTimestamp,
ended: metaCacheTestsetTimestamp.Add(time.Minute),
lastUpdate: metaCacheTestsetTimestamp.Add(time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
},
6: {
id: "case-7-oldcycle",
bucket: "bucket",
root: "folder/prefix",
recursive: true,
status: scanStateSuccess,
fileNotFound: false,
error: "",
started: metaCacheTestsetTimestamp.Add(-10 * time.Minute),
ended: metaCacheTestsetTimestamp.Add(-8 * time.Minute),
lastUpdate: metaCacheTestsetTimestamp.Add(-8 * time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 6,
endedCycle: 8,
dataVersion: metacacheStreamVersion,
},
7: {
id: "case-8-running",
bucket: "bucket",
root: "folder/running",
recursive: false,
status: scanStateStarted,
fileNotFound: false,
error: "",
started: metaCacheTestsetTimestamp.Add(-1 * time.Minute),
ended: time.Time{},
lastUpdate: metaCacheTestsetTimestamp.Add(-1 * time.Minute),
lastHandout: metaCacheTestsetTimestamp,
startedCycle: 10,
endedCycle: 0,
dataVersion: metacacheStreamVersion,
},
}
func Test_baseDirFromPrefix(t *testing.T) {
tests := []struct {
name string
prefix string
want string
}{
{
name: "root",
prefix: "object.ext",
want: "",
},
{
name: "rootdotslash",
prefix: "./object.ext",
want: "",
},
{
name: "rootslash",
prefix: "/",
want: "",
},
{
name: "folder",
prefix: "prefix/",
want: "prefix/",
},
{
name: "folderobj",
prefix: "prefix/obj.ext",
want: "prefix/",
},
{
name: "folderfolderobj",
prefix: "prefix/prefix2/obj.ext",
want: "prefix/prefix2/",
},
{
name: "folderfolder",
prefix: "prefix/prefix2/",
want: "prefix/prefix2/",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := baseDirFromPrefix(tt.prefix); got != tt.want {
t.Errorf("baseDirFromPrefix() = %v, want %v", got, tt.want)
}
})
}
}
func Test_metacache_canBeReplacedBy(t *testing.T) {
testAgainst := metacache{
id: "case-1-modified",
bucket: "bucket",
root: "folder/prefix",
recursive: true,
status: scanStateSuccess,
fileNotFound: false,
error: "",
started: metaCacheTestsetTimestamp.Add(time.Minute),
ended: metaCacheTestsetTimestamp.Add(2 * time.Minute),
lastUpdate: metaCacheTestsetTimestamp.Add(2 * time.Minute),
lastHandout: metaCacheTestsetTimestamp.Add(time.Minute),
startedCycle: 10,
endedCycle: 10,
dataVersion: metacacheStreamVersion,
}
wantResults := []bool{0: true, 1: true, 2: true, 3: true, 4: true, 5: false, 6: true, 7: false}
for i, tt := range metaCacheTestset {
t.Run(tt.id, func(t *testing.T) {
var want bool
if i >= len(wantResults) {
t.Logf("no expected result for test #%d", i)
} else {
want = wantResults[i]
}
// Add an hour, otherwise it will never be replaced.
// We operated on a copy.
tt.lastHandout.Add(-2 * time.Hour)
got := tt.canBeReplacedBy(&testAgainst)
if got != want {
t.Errorf("#%d: want %v, got %v", i, want, got)
}
})
}
}
func Test_metacache_finished(t *testing.T) {
wantResults := []bool{0: true, 1: true, 2: true, 3: true, 4: false, 5: true, 6: true, 7: false}
for i, tt := range metaCacheTestset {
t.Run(tt.id, func(t *testing.T) {
var want bool
if i >= len(wantResults) {
t.Logf("no expected result for test #%d", i)
} else {
want = wantResults[i]
}
got := tt.finished()
if got != want {
t.Errorf("#%d: want %v, got %v", i, want, got)
}
})
}
}
func Test_metacache_worthKeeping(t *testing.T) {
wantResults := []bool{0: true, 1: true, 2: true, 3: false, 4: false, 5: true, 6: false, 7: false}
for i, tt := range metaCacheTestset {
t.Run(tt.id, func(t *testing.T) {
var want bool
if i >= len(wantResults) {
t.Logf("no expected result for test #%d", i)
} else {
want = wantResults[i]
}
got := tt.worthKeeping(7 + dataUsageUpdateDirCycles)
if got != want {
t.Errorf("#%d: want %v, got %v", i, want, got)
}
})
}
}

View File

@ -143,6 +143,13 @@ func (d *naughtyDisk) DeleteVol(ctx context.Context, volume string, forceDelete
return d.disk.DeleteVol(ctx, volume, forceDelete)
}
func (d *naughtyDisk) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
if err := d.calcError(); err != nil {
return err
}
return d.disk.WalkDir(ctx, opts, wr)
}
func (d *naughtyDisk) WalkSplunk(ctx context.Context, volume, dirPath, marker string, endWalkCh <-chan struct{}) (chan FileInfo, error) {
if err := d.calcError(); err != nil {
return nil, err
@ -227,11 +234,11 @@ func (d *naughtyDisk) CheckFile(ctx context.Context, volume string, path string)
return d.disk.CheckFile(ctx, volume, path)
}
func (d *naughtyDisk) DeleteFile(ctx context.Context, volume string, path string) (err error) {
func (d *naughtyDisk) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) {
if err := d.calcError(); err != nil {
return err
}
return d.disk.DeleteFile(ctx, volume, path)
return d.disk.Delete(ctx, volume, path, recursive)
}
func (d *naughtyDisk) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) []error {

View File

@ -29,6 +29,7 @@ import (
"sync"
"time"
"github.com/cespare/xxhash/v2"
"github.com/klauspost/compress/zip"
"github.com/minio/minio-go/v7/pkg/set"
"github.com/minio/minio/cmd/crypto"
@ -51,6 +52,7 @@ type NotificationSys struct {
bucketRulesMap map[string]event.RulesMap
bucketRemoteTargetRulesMap map[string]map[event.TargetID]event.RulesMap
peerClients []*peerRESTClient
allPeerClients []*peerRESTClient
}
// GetARNList - returns available ARNs.
@ -451,7 +453,7 @@ func (sys *NotificationSys) updateBloomFilter(ctx context.Context, current uint6
// Load initial state from local...
var bf *bloomFilter
bfr, err := intDataUpdateTracker.cycleFilter(ctx, req.Oldest, req.Current)
bfr, err := intDataUpdateTracker.cycleFilter(ctx, req)
logger.LogIf(ctx, err)
if err == nil && bfr.Complete {
nbf := intDataUpdateTracker.newBloomFilter()
@ -507,6 +509,124 @@ func (sys *NotificationSys) updateBloomFilter(ctx context.Context, current uint6
return bf, nil
}
// collectBloomFilter will collect bloom filters from all servers from the specified cycle.
func (sys *NotificationSys) collectBloomFilter(ctx context.Context, from uint64) (*bloomFilter, error) {
var req = bloomFilterRequest{
Current: 0,
Oldest: from,
}
// Load initial state from local...
var bf *bloomFilter
bfr, err := intDataUpdateTracker.cycleFilter(ctx, req)
logger.LogIf(ctx, err)
if err == nil && bfr.Complete {
nbf := intDataUpdateTracker.newBloomFilter()
bf = &nbf
_, err = bf.ReadFrom(bytes.NewBuffer(bfr.Filter))
logger.LogIf(ctx, err)
}
if !bfr.Complete {
// If local isn't complete just return early
return nil, nil
}
var mu sync.Mutex
g := errgroup.WithNErrs(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
continue
}
client := client
g.Go(func() error {
serverBF, err := client.cycleServerBloomFilter(ctx, req)
if false && intDataUpdateTracker.debug {
b, _ := json.MarshalIndent(serverBF, "", " ")
logger.Info("Disk %v, Bloom filter: %v", client.host.Name, string(b))
}
// Keep lock while checking result.
mu.Lock()
defer mu.Unlock()
if err != nil || !serverBF.Complete || bf == nil {
logger.LogIf(ctx, err)
bf = nil
return nil
}
var tmp bloom.BloomFilter
_, err = tmp.ReadFrom(bytes.NewBuffer(serverBF.Filter))
if err != nil {
logger.LogIf(ctx, err)
bf = nil
return nil
}
if bf.BloomFilter == nil {
bf.BloomFilter = &tmp
} else {
err = bf.Merge(&tmp)
if err != nil {
logger.LogIf(ctx, err)
bf = nil
return nil
}
}
return nil
}, idx)
}
g.Wait()
return bf, nil
}
// findEarliestCleanBloomFilter will find the earliest bloom filter across the cluster
// where the directory is clean.
// Due to how objects are stored this can include object names.
func (sys *NotificationSys) findEarliestCleanBloomFilter(ctx context.Context, dir string) uint64 {
// Load initial state from local...
current := intDataUpdateTracker.current()
best := intDataUpdateTracker.latestWithDir(dir)
if best == current {
// If the current is dirty no need to check others.
return current
}
var req = bloomFilterRequest{
Current: 0,
Oldest: best,
OldestClean: dir,
}
var mu sync.Mutex
g := errgroup.WithNErrs(len(sys.peerClients))
for idx, client := range sys.peerClients {
if client == nil {
continue
}
client := client
g.Go(func() error {
serverBF, err := client.cycleServerBloomFilter(ctx, req)
// Keep lock while checking result.
mu.Lock()
defer mu.Unlock()
if err != nil {
// Error, don't assume clean.
best = current
logger.LogIf(ctx, err)
return nil
}
if serverBF.OldestIdx > best {
best = serverBF.OldestIdx
}
return nil
}, idx)
}
g.Wait()
return best
}
// GetLocks - makes GetLocks RPC call on all peers.
func (sys *NotificationSys) GetLocks(ctx context.Context, r *http.Request) []*PeerLocks {
locksResp := make([]*PeerLocks, len(sys.peerClients))
@ -1156,15 +1276,27 @@ func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) (localDiskIDs [
return localDiskIDs
}
// restClientFromHash will return a deterministic peerRESTClient based on s.
// Will return nil if client is local.
func (sys *NotificationSys) restClientFromHash(s string) (client *peerRESTClient) {
if len(sys.peerClients) == 0 {
return nil
}
idx := xxhash.Sum64String(s) % uint64(len(sys.allPeerClients))
return sys.allPeerClients[idx]
}
// NewNotificationSys - creates new notification system object.
func NewNotificationSys(endpoints EndpointServerSets) *NotificationSys {
// targetList/bucketRulesMap/bucketRemoteTargetRulesMap are populated by NotificationSys.Init()
remote, all := newPeerRestClients(endpoints)
return &NotificationSys{
targetList: event.NewTargetList(),
targetResCh: make(chan event.TargetIDResult),
bucketRulesMap: make(map[string]event.RulesMap),
bucketRemoteTargetRulesMap: make(map[string]map[event.TargetID]event.RulesMap),
peerClients: newPeerRestClients(endpoints),
peerClients: remote,
allPeerClients: all,
}
}

View File

@ -89,7 +89,7 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
delFunc = func(entryPath string) error {
if !HasSuffix(entryPath, SlashSeparator) {
// Delete the file entry.
err := storage.DeleteFile(ctx, volume, entryPath)
err := storage.Delete(ctx, volume, entryPath, false)
if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,
@ -118,7 +118,7 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string)
// Entry path is empty, just delete it.
if len(entries) == 0 {
err = storage.DeleteFile(ctx, volume, entryPath)
err = storage.Delete(ctx, volume, entryPath, false)
if !IsErrIgnored(err, []error{
errDiskNotFound,
errUnformattedDisk,

View File

@ -169,6 +169,11 @@ func (e InsufficientReadQuorum) Error() string {
return "Storage resources are insufficient for the read operation."
}
// Unwrap the error.
func (e InsufficientReadQuorum) Unwrap() error {
return errErasureReadQuorum
}
// InsufficientWriteQuorum storage cannot satisfy quorum for write operation.
type InsufficientWriteQuorum struct{}
@ -176,6 +181,11 @@ func (e InsufficientWriteQuorum) Error() string {
return "Storage resources are insufficient for the write operation."
}
// Unwrap the error.
func (e InsufficientWriteQuorum) Unwrap() error {
return errErasureWriteQuorum
}
// GenericError - generic object layer error.
type GenericError struct {
Bucket string

View File

@ -578,6 +578,7 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
for i, testCase := range testCases {
testCase := testCase
t.Run(fmt.Sprintf("%s-Test%d", instanceType, i+1), func(t *testing.T) {
t.Log("ListObjects, bucket:", testCase.bucketName, "prefix:", testCase.prefix, "marker:", testCase.marker, "delimiter:", testCase.delimiter, "maxkeys:", testCase.maxKeys)
result, err := obj.ListObjects(context.Background(), testCase.bucketName,
testCase.prefix, testCase.marker, testCase.delimiter, int(testCase.maxKeys))
if err != nil && testCase.shouldPass {
@ -602,9 +603,15 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
// otherwise it may lead to index out of range error in
// assertion following this.
if len(testCase.result.Objects) != len(result.Objects) {
t.Fatalf("Test %d: %s: Expected number of object in the result to be '%d', but found '%d' objects instead", i+1, instanceType, len(testCase.result.Objects), len(result.Objects))
t.Logf("want: %v", objInfoNames(testCase.result.Objects))
t.Logf("got: %v", objInfoNames(result.Objects))
t.Errorf("Test %d: %s: Expected number of object in the result to be '%d', but found '%d' objects instead", i+1, instanceType, len(testCase.result.Objects), len(result.Objects))
}
for j := 0; j < len(testCase.result.Objects); j++ {
if j >= len(result.Objects) {
t.Errorf("Test %d: %s: Expected object name to be \"%s\", but not nothing instead", i+1, instanceType, testCase.result.Objects[j].Name)
continue
}
if testCase.result.Objects[j].Name != result.Objects[j].Name {
t.Errorf("Test %d: %s: Expected object name to be \"%s\", but found \"%s\" instead", i+1, instanceType, testCase.result.Objects[j].Name, result.Objects[j].Name)
}
@ -616,16 +623,25 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
}
if len(testCase.result.Prefixes) != len(result.Prefixes) {
t.Fatalf("Test %d: %s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", i+1, instanceType, len(testCase.result.Prefixes), len(result.Prefixes))
t.Logf("want: %v", testCase.result.Prefixes)
t.Logf("got: %v", result.Prefixes)
t.Errorf("Test %d: %s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", i+1, instanceType, len(testCase.result.Prefixes), len(result.Prefixes))
}
for j := 0; j < len(testCase.result.Prefixes); j++ {
if j >= len(result.Prefixes) {
t.Errorf("Test %d: %s: Expected prefix name to be \"%s\", but found no result", i+1, instanceType, testCase.result.Prefixes[j])
continue
}
if testCase.result.Prefixes[j] != result.Prefixes[j] {
t.Errorf("Test %d: %s: Expected prefix name to be \"%s\", but found \"%s\" instead", i+1, instanceType, testCase.result.Prefixes[j], result.Prefixes[j])
}
}
if testCase.result.IsTruncated != result.IsTruncated {
t.Errorf("Test %d: %s: Expected IsTruncated flag to be %v, but instead found it to be %v", i+1, instanceType, testCase.result.IsTruncated, result.IsTruncated)
// Allow an extra continuation token.
if !result.IsTruncated || len(result.Objects) == 0 {
t.Errorf("Test %d: %s: Expected IsTruncated flag to be %v, but instead found it to be %v", i+1, instanceType, testCase.result.IsTruncated, result.IsTruncated)
}
}
if testCase.result.IsTruncated && result.NextMarker == "" {
@ -633,22 +649,35 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) {
}
if !testCase.result.IsTruncated && result.NextMarker != "" {
t.Errorf("Test %d: %s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", i+1, instanceType, result.NextMarker)
if !result.IsTruncated || len(result.Objects) == 0 {
t.Errorf("Test %d: %s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", i+1, instanceType, result.NextMarker)
}
}
}
// Take ListObject treeWalk go-routine to completion, if available in the treewalk pool.
if result.IsTruncated {
_, err = obj.ListObjects(context.Background(), testCase.bucketName,
for result.IsTruncated {
result, err = obj.ListObjects(context.Background(), testCase.bucketName,
testCase.prefix, result.NextMarker, testCase.delimiter, 1000)
if err != nil {
t.Fatal(err)
}
if !testCase.result.IsTruncated && len(result.Objects) > 0 {
t.Errorf("expected to get all objects in the previous call, but got %d more", len(result.Objects))
}
}
})
}
}
func objInfoNames(o []ObjectInfo) []string {
var res = make([]string, len(o))
for i := range o {
res[i] = o[i].Name
}
return res
}
// Wrapper for calling ListObjectVersions tests for both Erasure multiple disks and single node setup.
func TestListObjectVersions(t *testing.T) {
ExecObjectLayerTest(t, testListObjectVersions)
@ -1240,7 +1269,7 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand
}
if len(testCase.result.Prefixes) != len(result.Prefixes) {
fmt.Println(testCase, testCase.result.Prefixes, result.Prefixes)
t.Log(testCase, testCase.result.Prefixes, result.Prefixes)
t.Fatalf("%s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", instanceType, len(testCase.result.Prefixes), len(result.Prefixes))
}
for j := 0; j < len(testCase.result.Prefixes); j++ {
@ -1250,7 +1279,10 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand
}
if testCase.result.IsTruncated != result.IsTruncated {
t.Errorf("%s: Expected IsTruncated flag to be %v, but instead found it to be %v", instanceType, testCase.result.IsTruncated, result.IsTruncated)
// Allow an extra continuation token.
if !result.IsTruncated || len(result.Objects) == 0 {
t.Errorf("%s: Expected IsTruncated flag to be %v, but instead found it to be %v", instanceType, testCase.result.IsTruncated, result.IsTruncated)
}
}
if testCase.result.IsTruncated && result.NextMarker == "" {
@ -1258,17 +1290,22 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand
}
if !testCase.result.IsTruncated && result.NextMarker != "" {
t.Errorf("%s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", instanceType, result.NextMarker)
if !result.IsTruncated || len(result.Objects) == 0 {
t.Errorf("%s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", instanceType, result.NextMarker)
}
}
}
// Take ListObject treeWalk go-routine to completion, if available in the treewalk pool.
if result.IsTruncated {
_, err = obj.ListObjectVersions(context.Background(), testCase.bucketName,
for result.IsTruncated {
result, err = obj.ListObjectVersions(context.Background(), testCase.bucketName,
testCase.prefix, result.NextMarker, "", testCase.delimiter, 1000)
if err != nil {
t.Fatal(err)
}
if !testCase.result.IsTruncated && len(result.Objects) > 0 {
t.Errorf("expected to get all objects in the previous call, but got %d more", len(result.Objects))
}
}
})
}

View File

@ -22,6 +22,7 @@ import (
"crypto/tls"
"encoding/gob"
"errors"
"fmt"
"io"
"math"
"net/url"
@ -40,7 +41,8 @@ import (
"github.com/minio/minio/pkg/event"
"github.com/minio/minio/pkg/madmin"
xnet "github.com/minio/minio/pkg/net"
trace "github.com/minio/minio/pkg/trace"
"github.com/minio/minio/pkg/trace"
"github.com/tinylib/msgp/msgp"
)
// client to talk to peer Nodes.
@ -657,6 +659,40 @@ func (client *peerRESTClient) GetLocalDiskIDs(ctx context.Context) (diskIDs []st
return diskIDs
}
// GetMetacacheListing - get a new or existing metacache.
func (client *peerRESTClient) GetMetacacheListing(ctx context.Context, o listPathOptions) (*metacache, error) {
var reader bytes.Buffer
err := gob.NewEncoder(&reader).Encode(o)
if err != nil {
return nil, err
}
respBody, err := client.callWithContext(ctx, peerRESTMethodGetMetacacheListing, nil, &reader, int64(reader.Len()))
if err != nil {
logger.LogIf(ctx, err)
return nil, err
}
var resp metacache
defer http.DrainBody(respBody)
return &resp, msgp.Decode(respBody, &resp)
}
// UpdateMetacacheListing - update an existing metacache it will unconditionally be updated to the new state.
func (client *peerRESTClient) UpdateMetacacheListing(ctx context.Context, m metacache) (metacache, error) {
b, err := m.MarshalMsg(nil)
if err != nil {
return m, err
}
respBody, err := client.callWithContext(ctx, peerRESTMethodUpdateMetacacheListing, nil, bytes.NewBuffer(b), int64(len(b)))
if err != nil {
logger.LogIf(ctx, err)
return m, err
}
defer http.DrainBody(respBody)
var resp metacache
return resp, msgp.Decode(respBody, &resp)
}
func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) {
values := make(url.Values)
values.Set(peerRESTTraceAll, strconv.FormatBool(trcAll))
@ -811,30 +847,25 @@ func (client *peerRESTClient) ConsoleLog(logCh chan interface{}, doneCh <-chan s
}()
}
func getRemoteHosts(endpointServerSets EndpointServerSets) []*xnet.Host {
peers := GetRemotePeers(endpointServerSets)
remoteHosts := make([]*xnet.Host, 0, len(peers))
for _, hostStr := range peers {
host, err := xnet.ParseHost(hostStr)
if err != nil {
logger.LogIf(GlobalContext, err)
// newPeerRestClients creates new peer clients.
// The two slices will point to the same clients,
// but 'all' will contain nil entry for local client.
// The 'all' slice will be in the same order across the cluster.
func newPeerRestClients(endpoints EndpointServerSets) (remote, all []*peerRESTClient) {
hosts := endpoints.hostsSorted()
remote = make([]*peerRESTClient, 0, len(hosts))
all = make([]*peerRESTClient, len(hosts))
for i, host := range hosts {
if host == nil {
continue
}
remoteHosts = append(remoteHosts, host)
all[i] = newPeerRESTClient(host)
remote = append(remote, all[i])
}
return remoteHosts
}
// newPeerRestClients creates new peer clients.
func newPeerRestClients(endpoints EndpointServerSets) []*peerRESTClient {
peerHosts := getRemoteHosts(endpoints)
restClients := make([]*peerRESTClient, len(peerHosts))
for i, host := range peerHosts {
restClients[i] = newPeerRESTClient(host)
if len(all) != len(remote)+1 {
logger.LogIf(context.Background(), fmt.Errorf("WARNING: Expected number of all hosts (%v) to be remote +1 (%v)", len(all), len(remote)))
}
return restClients
return remote, all
}
// Returns a peer rest client.

View File

@ -24,39 +24,41 @@ const (
)
const (
peerRESTMethodHealth = "/health"
peerRESTMethodServerInfo = "/serverinfo"
peerRESTMethodDriveOBDInfo = "/driveobdinfo"
peerRESTMethodNetOBDInfo = "/netobdinfo"
peerRESTMethodCPUOBDInfo = "/cpuobdinfo"
peerRESTMethodDiskHwOBDInfo = "/diskhwobdinfo"
peerRESTMethodOsInfoOBDInfo = "/osinfoobdinfo"
peerRESTMethodMemOBDInfo = "/memobdinfo"
peerRESTMethodProcOBDInfo = "/procobdinfo"
peerRESTMethodDispatchNetOBDInfo = "/dispatchnetobdinfo"
peerRESTMethodDeleteBucketMetadata = "/deletebucketmetadata"
peerRESTMethodLoadBucketMetadata = "/loadbucketmetadata"
peerRESTMethodServerUpdate = "/serverupdate"
peerRESTMethodSignalService = "/signalservice"
peerRESTMethodBackgroundHealStatus = "/backgroundhealstatus"
peerRESTMethodGetLocks = "/getlocks"
peerRESTMethodLoadUser = "/loaduser"
peerRESTMethodLoadServiceAccount = "/loadserviceaccount"
peerRESTMethodDeleteUser = "/deleteuser"
peerRESTMethodDeleteServiceAccount = "/deleteserviceaccount"
peerRESTMethodLoadPolicy = "/loadpolicy"
peerRESTMethodLoadPolicyMapping = "/loadpolicymapping"
peerRESTMethodDeletePolicy = "/deletepolicy"
peerRESTMethodLoadGroup = "/loadgroup"
peerRESTMethodStartProfiling = "/startprofiling"
peerRESTMethodDownloadProfilingData = "/downloadprofilingdata"
peerRESTMethodReloadFormat = "/reloadformat"
peerRESTMethodCycleBloom = "/cyclebloom"
peerRESTMethodTrace = "/trace"
peerRESTMethodListen = "/listen"
peerRESTMethodLog = "/log"
peerRESTMethodGetLocalDiskIDs = "/getlocaldiskids"
peerRESTMethodGetBandwidth = "/bandwidth"
peerRESTMethodHealth = "/health"
peerRESTMethodServerInfo = "/serverinfo"
peerRESTMethodDriveOBDInfo = "/driveobdinfo"
peerRESTMethodNetOBDInfo = "/netobdinfo"
peerRESTMethodCPUOBDInfo = "/cpuobdinfo"
peerRESTMethodDiskHwOBDInfo = "/diskhwobdinfo"
peerRESTMethodOsInfoOBDInfo = "/osinfoobdinfo"
peerRESTMethodMemOBDInfo = "/memobdinfo"
peerRESTMethodProcOBDInfo = "/procobdinfo"
peerRESTMethodDispatchNetOBDInfo = "/dispatchnetobdinfo"
peerRESTMethodDeleteBucketMetadata = "/deletebucketmetadata"
peerRESTMethodLoadBucketMetadata = "/loadbucketmetadata"
peerRESTMethodServerUpdate = "/serverupdate"
peerRESTMethodSignalService = "/signalservice"
peerRESTMethodBackgroundHealStatus = "/backgroundhealstatus"
peerRESTMethodGetLocks = "/getlocks"
peerRESTMethodLoadUser = "/loaduser"
peerRESTMethodLoadServiceAccount = "/loadserviceaccount"
peerRESTMethodDeleteUser = "/deleteuser"
peerRESTMethodDeleteServiceAccount = "/deleteserviceaccount"
peerRESTMethodLoadPolicy = "/loadpolicy"
peerRESTMethodLoadPolicyMapping = "/loadpolicymapping"
peerRESTMethodDeletePolicy = "/deletepolicy"
peerRESTMethodLoadGroup = "/loadgroup"
peerRESTMethodStartProfiling = "/startprofiling"
peerRESTMethodDownloadProfilingData = "/downloadprofilingdata"
peerRESTMethodReloadFormat = "/reloadformat"
peerRESTMethodCycleBloom = "/cyclebloom"
peerRESTMethodTrace = "/trace"
peerRESTMethodListen = "/listen"
peerRESTMethodLog = "/log"
peerRESTMethodGetLocalDiskIDs = "/getlocaldiskids"
peerRESTMethodGetBandwidth = "/bandwidth"
peerRESTMethodGetMetacacheListing = "/getmetacache"
peerRESTMethodUpdateMetacacheListing = "/updatemetacache"
)
const (

View File

@ -34,6 +34,7 @@ import (
"github.com/minio/minio/pkg/event"
"github.com/minio/minio/pkg/madmin"
trace "github.com/minio/minio/pkg/trace"
"github.com/tinylib/msgp/msgp"
)
// To abstract a node over network.
@ -616,7 +617,7 @@ func (s *peerRESTServer) ReloadFormatHandler(w http.ResponseWriter, r *http.Requ
w.(http.Flusher).Flush()
}
// CycleServerBloomFilterHandler cycles bllom filter on server.
// CycleServerBloomFilterHandler cycles bloom filter on server.
func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
@ -631,7 +632,7 @@ func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r
s.writeErrorResponse(w, err)
return
}
bf, err := intDataUpdateTracker.cycleFilter(ctx, req.Oldest, req.Current)
bf, err := intDataUpdateTracker.cycleFilter(ctx, req)
if err != nil {
s.writeErrorResponse(w, err)
return
@ -640,6 +641,51 @@ func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r
logger.LogIf(ctx, gob.NewEncoder(w).Encode(bf))
}
func (s *peerRESTServer) GetMetacacheListingHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
ctx := newContext(r, w, "GetMetacacheListing")
var opts listPathOptions
err := gob.NewDecoder(r.Body).Decode(&opts)
if err != nil && err != io.EOF {
s.writeErrorResponse(w, err)
return
}
resp := localMetacacheMgr.getBucket(ctx, opts.Bucket).findCache(opts)
logger.LogIf(ctx, msgp.Encode(w, &resp))
}
func (s *peerRESTServer) UpdateMetacacheListingHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
ctx := newContext(r, w, "UpdateMetacacheListing")
var req metacache
err := msgp.Decode(r.Body, &req)
if err != nil {
s.writeErrorResponse(w, err)
return
}
b := localMetacacheMgr.getBucket(ctx, req.bucket)
if b == nil {
s.writeErrorResponse(w, errServerNotInitialized)
return
}
cache, err := b.updateCacheEntry(req)
if err != nil {
s.writeErrorResponse(w, err)
return
}
// Return updated metadata.
logger.LogIf(ctx, msgp.Encode(w, &cache))
}
// PutBucketNotificationHandler - Set bucket policy.
func (s *peerRESTServer) PutBucketNotificationHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
@ -1054,4 +1100,6 @@ func registerPeerRESTHandlers(router *mux.Router) {
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodLog).HandlerFunc(server.ConsoleLogHandler)
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetLocalDiskIDs).HandlerFunc(httpTraceHdrs(server.GetLocalDiskIDs))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetBandwidth).HandlerFunc(httpTraceHdrs(server.GetBandwidth))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetMetacacheListing).HandlerFunc(httpTraceHdrs(server.GetMetacacheListingHandler))
subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodUpdateMetacacheListing).HandlerFunc(httpTraceHdrs(server.UpdateMetacacheListingHandler))
}

View File

@ -27,6 +27,8 @@ import (
"sync/atomic"
"time"
"github.com/minio/minio/cmd/logger"
xhttp "github.com/minio/minio/cmd/http"
xnet "github.com/minio/minio/pkg/net"
)
@ -113,6 +115,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
resp, err := c.httpClient.Do(req)
if err != nil {
if xnet.IsNetworkOrHostDown(err) {
logger.LogIf(ctx, err, "marking disk offline")
c.MarkOffline()
}
return nil, &NetworkError{err}
@ -142,6 +145,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod
b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize))
if err != nil {
if xnet.IsNetworkOrHostDown(err) {
logger.LogIf(ctx, err, "marking disk offline")
c.MarkOffline()
}
return nil, err

View File

@ -59,6 +59,20 @@ type FileInfoVersions struct {
Versions []FileInfo
}
// forwardPastVersion will truncate the result to only contain versions after 'v'.
// If v is empty or the version isn't found no changes will be made.
func (f *FileInfoVersions) forwardPastVersion(v string) {
if v == "" {
return
}
for i, ver := range f.Versions {
if ver.VersionID == v {
f.Versions = f.Versions[i+1:]
return
}
}
}
// FileInfo - represents file stat information.
type FileInfo struct {
// Name of the volume.

View File

@ -48,6 +48,9 @@ type StorageAPI interface {
StatVol(ctx context.Context, volume string) (vol VolInfo, err error)
DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error)
// WalkDir will walk a directory on disk and return a metacache stream on wr.
WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error
// WalkVersions in sorted order directly on disk.
WalkVersions(ctx context.Context, volume, dirPath, marker string, recursive bool, endWalkCh <-chan struct{}) (chan FileInfoVersions, error)
// Walk in sorted order directly on disk.
@ -71,7 +74,7 @@ type StorageAPI interface {
RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error
CheckParts(ctx context.Context, volume string, path string, fi FileInfo) error
CheckFile(ctx context.Context, volume string, path string) (err error)
DeleteFile(ctx context.Context, volume string, path string) (err error)
Delete(ctx context.Context, volume string, path string, recursive bool) (err error)
VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error
// Write all data, syncs the data to disk.

View File

@ -555,10 +555,11 @@ func (client *storageRESTClient) ListDir(ctx context.Context, volume, dirPath st
}
// DeleteFile - deletes a file.
func (client *storageRESTClient) DeleteFile(ctx context.Context, volume string, path string) error {
func (client *storageRESTClient) Delete(ctx context.Context, volume string, path string, recursive bool) error {
values := make(url.Values)
values.Set(storageRESTVolume, volume)
values.Set(storageRESTFilePath, path)
values.Set(storageRESTRecursive, strconv.FormatBool(recursive))
respBody, err := client.call(ctx, storageRESTMethodDeleteFile, values, nil, -1)
defer http.DrainBody(respBody)
return err

View File

@ -17,7 +17,7 @@
package cmd
const (
storageRESTVersion = "v21" // Add checkDataDir in ReadVersion API
storageRESTVersion = "v22" // Add dir listing and recursive delete operation.
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
storageRESTPrefix = minioReservedBucketPath + "/storage"
)
@ -52,6 +52,7 @@ const (
storageRESTMethodDeleteVersions = "/deleteverions"
storageRESTMethodRenameFile = "/renamefile"
storageRESTMethodVerifyFile = "/verifyfile"
storageRESTMethodWalkDir = "/walkdir"
)
const (

View File

@ -18,6 +18,8 @@ package cmd
import (
"bufio"
"bytes"
"encoding/binary"
"encoding/gob"
"encoding/hex"
"errors"
@ -633,8 +635,13 @@ func (s *storageRESTServer) DeleteFileHandler(w http.ResponseWriter, r *http.Req
vars := mux.Vars(r)
volume := vars[storageRESTVolume]
filePath := vars[storageRESTFilePath]
recursive, err := strconv.ParseBool(vars[storageRESTRecursive])
if err != nil {
s.writeErrorResponse(w, err)
return
}
err := s.storage.DeleteFile(r.Context(), volume, filePath)
err = s.storage.Delete(r.Context(), volume, filePath, recursive)
if err != nil {
s.writeErrorResponse(w, err)
}
@ -792,6 +799,161 @@ func waitForHTTPResponse(respBody io.Reader) (io.Reader, error) {
}
}
// drainCloser can be used for wrapping an http response.
// It will drain the body before closing.
type drainCloser struct {
rc io.ReadCloser
}
// Read forwards the read operation.
func (f drainCloser) Read(p []byte) (n int, err error) {
return f.rc.Read(p)
}
// Close drains the body and closes the upstream.
func (f drainCloser) Close() error {
xhttp.DrainBody(f.rc)
return nil
}
// httpStreamResponse allows streaming a response, but still send an error.
type httpStreamResponse struct {
done chan error
block chan []byte
err error
}
// Write part of the the streaming response.
// Note that upstream errors are currently not forwarded, but may be in the future.
func (h *httpStreamResponse) Write(b []byte) (int, error) {
tmp := make([]byte, len(b))
copy(tmp, b)
h.block <- tmp
return len(b), h.err
}
// CloseWithError will close the stream and return the specified error.
// This can be done several times, but only the first error will be sent.
// After calling this the stream should not be written to.
func (h *httpStreamResponse) CloseWithError(err error) {
if h.done == nil {
return
}
h.done <- err
h.err = err
// Indicates that the response is done.
<-h.done
h.done = nil
}
// streamHTTPResponse can be used to avoid timeouts with long storage
// operations, such as bitrot verification or data usage crawling.
// Every 10 seconds a space character is sent.
// The returned function should always be called to release resources.
// An optional error can be sent which will be picked as text only error,
// without its original type by the receiver.
// waitForHTTPStream should be used to the receiving side.
func streamHTTPResponse(w http.ResponseWriter) *httpStreamResponse {
doneCh := make(chan error)
blockCh := make(chan []byte)
h := httpStreamResponse{done: doneCh, block: blockCh}
go func() {
ticker := time.NewTicker(time.Second * 10)
for {
select {
case <-ticker.C:
// Response not ready, write a filler byte.
w.Write([]byte{32})
w.(http.Flusher).Flush()
case err := <-doneCh:
ticker.Stop()
defer close(doneCh)
if err != nil {
var buf bytes.Buffer
enc := gob.NewEncoder(&buf)
if ee := enc.Encode(err); ee == nil {
w.Write([]byte{3})
w.Write(buf.Bytes())
} else {
w.Write([]byte{1})
w.Write([]byte(err.Error()))
}
} else {
w.Write([]byte{0})
}
return
case block := <-blockCh:
var tmp [5]byte
tmp[0] = 2
binary.LittleEndian.PutUint32(tmp[1:], uint32(len(block)))
w.Write(tmp[:])
w.Write(block)
w.(http.Flusher).Flush()
}
}
}()
return &h
}
// waitForHTTPStream will wait for responses where
// streamHTTPResponse has been used.
// The returned reader contains the payload and must be closed if no error is returned.
func waitForHTTPStream(respBody io.ReadCloser, w io.Writer) error {
var tmp [1]byte
for {
_, err := io.ReadFull(respBody, tmp[:])
if err != nil {
return err
}
// Check if we have a response ready or a filler byte.
switch tmp[0] {
case 0:
// 0 is unbuffered, copy the rest.
_, err := io.Copy(w, respBody)
respBody.Close()
if err == io.EOF {
return nil
}
return err
case 1:
errorText, err := ioutil.ReadAll(respBody)
if err != nil {
return err
}
respBody.Close()
return errors.New(string(errorText))
case 3:
// Typed error
defer respBody.Close()
dec := gob.NewDecoder(respBody)
var err error
if de := dec.Decode(&err); de == nil {
return err
}
return errors.New("rpc error")
case 2:
// Block of data
var tmp [4]byte
_, err := io.ReadFull(respBody, tmp[:])
if err != nil {
return err
}
length := binary.LittleEndian.Uint32(tmp[:])
_, err = io.CopyN(w, respBody, int64(length))
if err != nil {
return err
}
continue
case 32:
continue
default:
go xhttp.DrainBody(respBody)
return fmt.Errorf("unexpected filler byte: %d", tmp[0])
}
}
}
// VerifyFileResp - VerifyFile()'s response.
type VerifyFileResp struct {
Err error
@ -960,12 +1122,14 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerSets Endpoint
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDeleteVersions).HandlerFunc(httpTraceHdrs(server.DeleteVersionsHandler)).
Queries(restQueries(storageRESTVolume, storageRESTTotalVersions)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDeleteFile).HandlerFunc(httpTraceHdrs(server.DeleteFileHandler)).
Queries(restQueries(storageRESTVolume, storageRESTFilePath)...)
Queries(restQueries(storageRESTVolume, storageRESTFilePath, storageRESTRecursive)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodRenameFile).HandlerFunc(httpTraceHdrs(server.RenameFileHandler)).
Queries(restQueries(storageRESTSrcVolume, storageRESTSrcPath, storageRESTDstVolume, storageRESTDstPath)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodVerifyFile).HandlerFunc(httpTraceHdrs(server.VerifyFileHandler)).
Queries(restQueries(storageRESTVolume, storageRESTFilePath)...)
subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodWalkDir).HandlerFunc(httpTraceHdrs(server.WalkDirHandler)).
Queries(restQueries(storageRESTVolume, storageRESTDirPath, storageRESTRecursive)...)
}
}
}

View File

@ -362,7 +362,7 @@ func testStorageAPIDeleteFile(t *testing.T, storage StorageAPI) {
}
for i, testCase := range testCases {
err := storage.DeleteFile(context.Background(), testCase.volumeName, testCase.objectName)
err := storage.Delete(context.Background(), testCase.volumeName, testCase.objectName, false)
expectErr := (err != nil)
if expectErr != testCase.expectErr {

View File

@ -34,6 +34,7 @@ import (
"encoding/pem"
"encoding/xml"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
@ -65,8 +66,9 @@ import (
"github.com/minio/minio/pkg/hash"
)
// Tests should initNSLock only once.
func init() {
// TestMain to set up global env.
func TestMain(m *testing.M) {
flag.Parse()
globalActiveCred = auth.Credentials{
AccessKey: auth.DefaultAccessKey,
SecretKey: auth.DefaultSecretKey,
@ -89,8 +91,13 @@ func init() {
// Set as non-distributed.
globalIsDistErasure = false
// Disable printing console messages during tests.
color.Output = ioutil.Discard
if !testing.Verbose() {
// Disable printing console messages during tests.
color.Output = ioutil.Discard
logger.Disable = true
}
// Uncomment the following line to see trace logs during unit tests.
// logger.AddTarget(console.New())
// Set system resources to maximum.
setMaxResources()
@ -98,18 +105,16 @@ func init() {
// Initialize globalConsoleSys system
globalConsoleSys = NewConsoleLogger(context.Background())
logger.Disable = true
globalDNSCache = xhttp.NewDNSCache(3*time.Second, 10*time.Second)
initHelp()
resetTestGlobals()
// Uncomment the following line to see trace logs during unit tests.
// logger.AddTarget(console.New())
os.Exit(m.Run())
}
// concurreny level for certain parallel tests.
// concurrency level for certain parallel tests.
const testConcurrencyLevel = 10
///
@ -1874,10 +1879,13 @@ func ExecObjectLayerTest(t TestErrHandler, objTest objTestType) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
defer setObjectLayer(newObjectLayerFn())
objLayer, fsDir, err := prepareFS()
if err != nil {
t.Fatalf("Initialization of object layer failed for single node setup: %s", err)
}
setObjectLayer(objLayer)
newAllSubsystems()
@ -1893,11 +1901,12 @@ func ExecObjectLayerTest(t TestErrHandler, objTest objTestType) {
objTest(objLayer, FSTestStr, t)
newAllSubsystems()
objLayer, fsDirs, err := prepareErasureSets32(ctx)
if err != nil {
t.Fatalf("Initialization of object layer failed for Erasure setup: %s", err)
}
setObjectLayer(objLayer)
defer objLayer.Shutdown(context.Background())
initAllSubsystems(ctx, objLayer)

BIN
cmd/testdata/metacache.s2 vendored Normal file

Binary file not shown.

View File

@ -237,12 +237,12 @@ func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, pat
return p.storage.CheckFile(ctx, volume, path)
}
func (p *xlStorageDiskIDCheck) DeleteFile(ctx context.Context, volume string, path string) (err error) {
func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) {
if err = p.checkDiskStale(); err != nil {
return err
}
return p.storage.DeleteFile(ctx, volume, path)
return p.storage.Delete(ctx, volume, path, recursive)
}
func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) (errs []error) {

View File

@ -1146,7 +1146,7 @@ func (s *xlStorage) DeleteVersions(ctx context.Context, volume string, versions
// DeleteVersion - deletes FileInfo metadata for path at `xl.meta`
func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo) error {
if HasSuffix(path, SlashSeparator) {
return s.DeleteFile(ctx, volume, path)
return s.Delete(ctx, volume, path, false)
}
buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
@ -1980,7 +1980,7 @@ func deleteFile(basePath, deletePath string, recursive bool) error {
}
// DeleteFile - delete a file at path.
func (s *xlStorage) DeleteFile(ctx context.Context, volume string, path string) (err error) {
func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) {
atomic.AddInt32(&s.activeIOCount, 1)
defer func() {
atomic.AddInt32(&s.activeIOCount, -1)
@ -2011,8 +2011,8 @@ func (s *xlStorage) DeleteFile(ctx context.Context, volume string, path string)
return err
}
// Delete file and delete parent directory as well if its empty.
return deleteFile(volumeDir, filePath, false)
// Delete file and delete parent directory as well if it's empty.
return deleteFile(volumeDir, filePath, recursive)
}
func (s *xlStorage) DeleteFileBulk(volume string, paths []string) (errs []error, err error) {

View File

@ -817,14 +817,14 @@ func TestXLStorageXlStorageListDir(t *testing.T) {
t.Fatalf("Unable to initialize xlStorage, %s", err)
}
if err = xlStorageNew.DeleteFile(context.Background(), "mybucket", "myobject"); err != errFileAccessDenied {
if err = xlStorageNew.Delete(context.Background(), "mybucket", "myobject", false); err != errFileAccessDenied {
t.Errorf("expected: %s, got: %s", errFileAccessDenied, err)
}
}
// TestXLStorage for delete on an removed disk.
// should fail with disk not found.
err = xlStorageDeletedStorage.DeleteFile(context.Background(), "del-vol", "my-file")
err = xlStorageDeletedStorage.Delete(context.Background(), "del-vol", "my-file", false)
if err != errDiskNotFound {
t.Errorf("Expected: \"Disk not found\", got \"%s\"", err)
}
@ -878,7 +878,7 @@ func TestXLStorageDeleteFile(t *testing.T) {
expectedErr: nil,
},
// TestXLStorage case - 2.
// The file was deleted in the last case, so DeleteFile should fail.
// The file was deleted in the last case, so Delete should fail.
{
srcVol: "success-vol",
srcPath: "success-file",
@ -916,7 +916,7 @@ func TestXLStorageDeleteFile(t *testing.T) {
}
for i, testCase := range testCases {
if err = xlStorage.DeleteFile(context.Background(), testCase.srcVol, testCase.srcPath); err != testCase.expectedErr {
if err = xlStorage.Delete(context.Background(), testCase.srcVol, testCase.srcPath, false); err != testCase.expectedErr {
t.Errorf("TestXLStorage case %d: Expected: \"%s\", got: \"%s\"", i+1, testCase.expectedErr, err)
}
}
@ -941,14 +941,14 @@ func TestXLStorageDeleteFile(t *testing.T) {
t.Fatalf("Unable to initialize xlStorage, %s", err)
}
if err = xlStorageNew.DeleteFile(context.Background(), "mybucket", "myobject"); err != errFileAccessDenied {
if err = xlStorageNew.Delete(context.Background(), "mybucket", "myobject", false); err != errFileAccessDenied {
t.Errorf("expected: %s, got: %s", errFileAccessDenied, err)
}
}
// TestXLStorage for delete on an removed disk.
// should fail with disk not found.
err = xlStorageDeletedStorage.DeleteFile(context.Background(), "del-vol", "my-file")
err = xlStorageDeletedStorage.Delete(context.Background(), "del-vol", "my-file", false)
if err != errDiskNotFound {
t.Errorf("Expected: \"Disk not found\", got \"%s\"", err)
}
@ -1671,7 +1671,7 @@ func TestXLStorageVerifyFile(t *testing.T) {
t.Fatal("expected to fail bitrot check")
}
if err := xlStorage.DeleteFile(context.Background(), volName, fileName); err != nil {
if err := xlStorage.Delete(context.Background(), volName, fileName, false); err != nil {
t.Fatal(err)
}

View File

@ -68,7 +68,7 @@ func TestUNCPaths(t *testing.T) {
} else if err == nil && !test.pass {
t.Error(err)
}
fs.DeleteFile(context.Background(), "voldir", test.objName)
fs.Delete(context.Background(), "voldir", test.objName, false)
})
}
}