diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index 871df24e3..0f3a66554 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -1035,7 +1035,7 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) { // Use buffered channel to take care of burst sends or slow w.Write() traceCh := make(chan interface{}, 4000) - peers := newPeerRestClients(globalEndpoints) + peers, _ := newPeerRestClients(globalEndpoints) globalHTTPTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool { return mustTrace(entry, trcAll, trcErr) @@ -1103,7 +1103,7 @@ func (a adminAPIHandlers) ConsoleLogHandler(w http.ResponseWriter, r *http.Reque logCh := make(chan interface{}, 4000) - peers := newPeerRestClients(globalEndpoints) + peers, _ := newPeerRestClients(globalEndpoints) globalConsoleSys.Subscribe(logCh, ctx.Done(), node, limitLines, logKind, nil) diff --git a/cmd/admin-router.go b/cmd/admin-router.go index 3c52c1ce5..8865c6b93 100644 --- a/cmd/admin-router.go +++ b/cmd/admin-router.go @@ -221,5 +221,5 @@ func registerAdminRouter(router *mux.Router, enableConfigOps, enableIAMOps bool) // If none of the routes match add default error handler routes adminRouter.NotFoundHandler = httpTraceAll(errorResponseHandler) - adminRouter.MethodNotAllowedHandler = httpTraceAll(errorResponseHandler) + adminRouter.MethodNotAllowedHandler = httpTraceAll(methodNotAllowedHandler("Admin")) } diff --git a/cmd/api-response.go b/cmd/api-response.go index 968a46b8d..7f99d81ff 100644 --- a/cmd/api-response.go +++ b/cmd/api-response.go @@ -35,11 +35,11 @@ import ( const ( // RFC3339 a subset of the ISO8601 timestamp format. e.g 2014-04-29T18:30:38Z - iso8601TimeFormat = "2006-01-02T15:04:05.000Z" // Reply date format with nanosecond precision. - maxObjectList = 1000 // Limit number of objects in a listObjectsResponse/listObjectsVersionsResponse. - maxDeleteList = 10000 // Limit number of objects deleted in a delete call. - maxUploadsList = 10000 // Limit number of uploads in a listUploadsResponse. - maxPartsList = 10000 // Limit number of parts in a listPartsResponse. + iso8601TimeFormat = "2006-01-02T15:04:05.000Z" // Reply date format with nanosecond precision. + maxObjectList = metacacheBlockSize - (metacacheBlockSize / 10) // Limit number of objects in a listObjectsResponse/listObjectsVersionsResponse. + maxDeleteList = 10000 // Limit number of objects deleted in a delete call. + maxUploadsList = 10000 // Limit number of uploads in a listUploadsResponse. + maxPartsList = 10000 // Limit number of parts in a listPartsResponse. ) // LocationResponse - format for location response. diff --git a/cmd/api-router.go b/cmd/api-router.go index f69945dc9..2af41df4b 100644 --- a/cmd/api-router.go +++ b/cmd/api-router.go @@ -44,6 +44,12 @@ func newCachedObjectLayerFn() CacheObjectLayer { return globalCacheObjectAPI } +func setObjectLayer(o ObjectLayer) { + globalObjLayerMutex.Lock() + globalObjectAPI = o + globalObjLayerMutex.Unlock() +} + // objectAPIHandler implements and provides http handlers for S3 API. type objectAPIHandlers struct { ObjectAPI func() ObjectLayer @@ -320,7 +326,7 @@ func registerAPIRouter(router *mux.Router) { // If none of the routes match add default error handler routes apiRouter.NotFoundHandler = collectAPIStats("notfound", httpTraceAll(errorResponseHandler)) - apiRouter.MethodNotAllowedHandler = collectAPIStats("methodnotallowed", httpTraceAll(errorResponseHandler)) + apiRouter.MethodNotAllowedHandler = collectAPIStats("methodnotallowed", httpTraceAll(methodNotAllowedHandler("S3"))) } diff --git a/cmd/background-newdisks-heal-ops.go b/cmd/background-newdisks-heal-ops.go index 5bfc28242..33808db9d 100644 --- a/cmd/background-newdisks-heal-ops.go +++ b/cmd/background-newdisks-heal-ops.go @@ -175,8 +175,8 @@ func monitorLocalDisksAndHeal(ctx context.Context, z *erasureServerSets, bgSeq * logger.Info("Healing disk '%s' on %s zone complete", disk, humanize.Ordinal(i+1)) - if err := disk.DeleteFile(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix), - healingTrackerFilename); err != nil && !errors.Is(err, errFileNotFound) { + if err := disk.Delete(ctx, pathJoin(minioMetaBucket, bucketMetaPrefix), + healingTrackerFilename, false); err != nil && !errors.Is(err, errFileNotFound) { logger.LogIf(ctx, err) continue } diff --git a/cmd/bucket-handlers_test.go b/cmd/bucket-handlers_test.go index a0df1724f..70c5bdd44 100644 --- a/cmd/bucket-handlers_test.go +++ b/cmd/bucket-handlers_test.go @@ -19,7 +19,6 @@ package cmd import ( "bytes" "encoding/xml" - "fmt" "io/ioutil" "net/http" "net/http/httptest" @@ -837,7 +836,7 @@ func testAPIDeleteMultipleObjectsHandler(obj ObjectLayer, instanceType, bucketNa // Verify whether the bucket obtained object is same as the one created. if testCase.expectedContent != nil && !bytes.Equal(testCase.expectedContent, actualContent) { - fmt.Println(string(testCase.expectedContent), string(actualContent)) + t.Log(string(testCase.expectedContent), string(actualContent)) t.Errorf("Test %d : MinIO %s: Object content differs from expected value.", i+1, instanceType) } } diff --git a/cmd/bucket-listobjects-handlers.go b/cmd/bucket-listobjects-handlers.go index 30c09c899..93e8997de 100644 --- a/cmd/bucket-listobjects-handlers.go +++ b/cmd/bucket-listobjects-handlers.go @@ -18,7 +18,6 @@ package cmd import ( "context" - "fmt" "net/http" "strconv" "strings" @@ -114,15 +113,6 @@ func (api objectAPIHandlers) ListObjectVersionsHandler(w http.ResponseWriter, r return } - // Forward the request using Source IP or bucket - forwardStr := handlers.GetSourceIPFromHeaders(r) - if forwardStr == "" { - forwardStr = bucket - } - if proxyRequestByStringHash(ctx, w, r, forwardStr) { - return - } - listObjectVersions := objectAPI.ListObjectVersions // Inititate a list object versions operation based on the input params. @@ -145,7 +135,7 @@ func (api objectAPIHandlers) ListObjectVersionsHandler(w http.ResponseWriter, r // ListObjectsV2MHandler - GET Bucket (List Objects) Version 2 with metadata. // -------------------------- // This implementation of the GET operation returns some or all (up to 10000) -// of the objects in a bucket. You can use the request parame 0 { + logger.Info(color.Green("dataUpdateTracker:")+" no bucket (%s)", dir) + } + return d.current() + } + if isReservedOrInvalidBucket(bucket, false) { + if d.debug { + logger.Info(color.Green("dataUpdateTracker:")+" isReservedOrInvalidBucket: %v, entry: %v", bucket, dir) + } + return d.current() + } + + d.mu.Lock() + defer d.mu.Unlock() + if d.Current.bf.containsDir(dir) || d.Current.idx == 0 { + return d.Current.idx + } + if d.debug { + logger.Info("current bloom does NOT contains dir %s", dir) + } + + idx := d.Current.idx - 1 + for { + f := d.History.find(idx) + if f == nil || f.bf.containsDir(dir) || idx == 0 { + break + } + idx-- + } + return idx +} + // start will load the current data from the drives start collecting information and // start a saver goroutine. // All of these will exit when the context is canceled. @@ -445,26 +482,30 @@ func (d *dataUpdateTracker) startCollector(ctx context.Context) { case <-ctx.Done(): return case in := <-d.input: + if d.debug { + logger.Info(color.Green("dataUpdateTracker:")+" got (%s)", in) + } + bucket, _ := path2BucketObjectWithBasePath("", in) if bucket == "" { if d.debug && len(in) > 0 { - logger.Info(color.Green("data-usage:")+" no bucket (%s)", in) + logger.Info(color.Green("dataUpdateTracker:")+" no bucket (%s)", in) } continue } if isReservedOrInvalidBucket(bucket, false) { - if false && d.debug { - logger.Info(color.Green("data-usage:")+" isReservedOrInvalidBucket: %v, entry: %v", bucket, in) + if d.debug { + logger.Info(color.Green("dataUpdateTracker:")+" isReservedOrInvalidBucket: %v, entry: %v", bucket, in) } continue } split := splitPathDeterministic(in) - // Add all paths until level 3. + // Add all paths until done. d.mu.Lock() for i := range split { - if d.debug && false { + if d.debug { logger.Info(color.Green("dataUpdateTracker:") + " Marking path dirty: " + color.Blue(path.Join(split[:i+1]...))) } d.Current.bf.AddString(hashPath(path.Join(split[:i+1]...)).String()) @@ -534,8 +575,13 @@ func (d *dataUpdateTracker) filterFrom(ctx context.Context, oldest, newest uint6 // cycleFilter will cycle the bloom filter to start recording to index y if not already. // The response will contain a bloom filter starting at index x up to, but not including index y. // If y is 0, the response will not update y, but return the currently recorded information -// from the up until and including current y. -func (d *dataUpdateTracker) cycleFilter(ctx context.Context, oldest, current uint64) (*bloomFilterResponse, error) { +// from the oldest (unless 0, then it will be all) until and including current y. +func (d *dataUpdateTracker) cycleFilter(ctx context.Context, req bloomFilterRequest) (*bloomFilterResponse, error) { + if req.OldestClean != "" { + return &bloomFilterResponse{OldestIdx: d.latestWithDir(req.OldestClean)}, nil + } + current := req.Current + oldest := req.Oldest d.mu.Lock() defer d.mu.Unlock() if current == 0 { @@ -543,7 +589,10 @@ func (d *dataUpdateTracker) cycleFilter(ctx context.Context, oldest, current uin return d.filterFrom(ctx, d.Current.idx, d.Current.idx), nil } d.History.sort() - return d.filterFrom(ctx, d.History[len(d.History)-1].idx, d.Current.idx), nil + if oldest == 0 { + oldest = d.History[len(d.History)-1].idx + } + return d.filterFrom(ctx, oldest, d.Current.idx), nil } // Move current to history if new one requested @@ -587,10 +636,6 @@ func splitPathDeterministic(in string) []string { split = split[:len(split)-1] } - // Return up to 3 parts. - if len(split) > 3 { - split = split[:3] - } return split } @@ -599,6 +644,9 @@ func splitPathDeterministic(in string) []string { type bloomFilterRequest struct { Oldest uint64 Current uint64 + // If set the oldest clean version will be returned in OldestIdx + // and the rest of the request will be ignored. + OldestClean string } type bloomFilterResponse struct { @@ -617,6 +665,9 @@ type bloomFilterResponse struct { // ObjectPathUpdated indicates a path has been updated. // The function will never block. func ObjectPathUpdated(s string) { + if strings.HasPrefix(s, minioMetaBucket) { + return + } select { case objectUpdatedCh <- s: default: diff --git a/cmd/data-update-tracker_test.go b/cmd/data-update-tracker_test.go index 7ca9496ec..4eaf38590 100644 --- a/cmd/data-update-tracker_test.go +++ b/cmd/data-update-tracker_test.go @@ -169,7 +169,12 @@ func TestDataUpdateTracker(t *testing.T) { }) } // Cycle to history - _, err = dut.cycleFilter(ctx, 1, 2) + req := bloomFilterRequest{ + Oldest: 1, + Current: 2, + } + + _, err = dut.cycleFilter(ctx, req) if err != nil { t.Fatal(err) } @@ -200,7 +205,11 @@ func TestDataUpdateTracker(t *testing.T) { if dut.current() != 2 { t.Fatal("current idx after load not preserved. want 2, got:", dut.current()) } - bfr2, err := dut.cycleFilter(ctx, 1, 3) + req = bloomFilterRequest{ + Oldest: 1, + Current: 3, + } + bfr2, err := dut.cycleFilter(ctx, req) if err != nil { t.Fatal(err) } diff --git a/cmd/endpoint.go b/cmd/endpoint.go index ed56ebe69..a16398729 100644 --- a/cmd/endpoint.go +++ b/cmd/endpoint.go @@ -26,17 +26,19 @@ import ( "path/filepath" "reflect" "runtime" + "sort" "strconv" "strings" "time" - humanize "github.com/dustin/go-humanize" + "github.com/dustin/go-humanize" "github.com/minio/minio-go/v7/pkg/set" "github.com/minio/minio/cmd/config" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/rest" "github.com/minio/minio/pkg/env" "github.com/minio/minio/pkg/mountinfo" + xnet "github.com/minio/minio/pkg/net" ) // EndpointType - enum for endpoint type. @@ -269,6 +271,52 @@ func (l EndpointServerSets) Hostnames() []string { return foundSet.ToSlice() } +// hostsSorted will return all hosts found. +// The LOCAL host will be nil, but the indexes of all hosts should +// remain consistent across the cluster. +func (l EndpointServerSets) hostsSorted() []*xnet.Host { + peers, localPeer := l.peers() + sort.Strings(peers) + hosts := make([]*xnet.Host, len(peers)) + for i, hostStr := range peers { + if hostStr == localPeer { + continue + } + host, err := xnet.ParseHost(hostStr) + if err != nil { + logger.LogIf(GlobalContext, err) + continue + } + hosts[i] = host + } + + return hosts +} + +// peers will return all peers, including local. +// The local peer is returned as a separate string. +func (l EndpointServerSets) peers() (peers []string, local string) { + allSet := set.NewStringSet() + for _, ep := range l { + for _, endpoint := range ep.Endpoints { + if endpoint.Type() != URLEndpointType { + continue + } + + peer := endpoint.Host + if endpoint.IsLocal { + if _, port := mustSplitHostPort(peer); port == globalMinioPort { + local = peer + } + } + + allSet.Add(peer) + } + } + + return allSet.ToSlice(), local +} + // Endpoints - list of same type of endpoint. type Endpoints []Endpoint @@ -712,28 +760,6 @@ func GetLocalPeer(endpointServerSets EndpointServerSets) (localPeer string) { return peerSet.ToSlice()[0] } -// GetRemotePeers - get hosts information other than this minio service. -func GetRemotePeers(endpointServerSets EndpointServerSets) []string { - peerSet := set.NewStringSet() - for _, ep := range endpointServerSets { - for _, endpoint := range ep.Endpoints { - if endpoint.Type() != URLEndpointType { - continue - } - - peer := endpoint.Host - if endpoint.IsLocal { - if _, port := mustSplitHostPort(peer); port == globalMinioPort { - continue - } - } - - peerSet.Add(peer) - } - } - return peerSet.ToSlice() -} - // GetProxyEndpointLocalIndex returns index of the local proxy endpoint func GetProxyEndpointLocalIndex(proxyEps []ProxyEndpoint) int { for i, pep := range proxyEps { diff --git a/cmd/endpoint_test.go b/cmd/endpoint_test.go index f06695e93..145c10420 100644 --- a/cmd/endpoint_test.go +++ b/cmd/endpoint_test.go @@ -380,24 +380,28 @@ func TestGetRemotePeers(t *testing.T) { testCases := []struct { endpointArgs []string expectedResult []string + expectedLocal string }{ - {[]string{"/d1", "/d2", "d3", "d4"}, []string{}}, - {[]string{"http://localhost:9000/d1", "http://localhost:9000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000"}}, - {[]string{"http://localhost:9000/d1", "http://localhost:10000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000", "localhost:10000"}}, - {[]string{"http://localhost:9000/d1", "http://example.org:9000/d2", "http://example.com:9000/d3", "http://example.net:9000/d4"}, []string{"example.com:9000", "example.net:9000", "example.org:9000"}}, - {[]string{"http://localhost:9000/d1", "http://localhost:9001/d2", "http://localhost:9002/d3", "http://localhost:9003/d4"}, []string{"localhost:9001", "localhost:9002", "localhost:9003"}}, + {[]string{"/d1", "/d2", "d3", "d4"}, []string{}, ""}, + {[]string{"http://localhost:9000/d1", "http://localhost:9000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000", "localhost:9000"}, "localhost:9000"}, + {[]string{"http://localhost:9000/d1", "http://localhost:10000/d2", "http://example.org:9000/d3", "http://example.com:9000/d4"}, []string{"example.com:9000", "example.org:9000", "localhost:10000", "localhost:9000"}, "localhost:9000"}, + {[]string{"http://localhost:9000/d1", "http://example.org:9000/d2", "http://example.com:9000/d3", "http://example.net:9000/d4"}, []string{"example.com:9000", "example.net:9000", "example.org:9000", "localhost:9000"}, "localhost:9000"}, + {[]string{"http://localhost:9000/d1", "http://localhost:9001/d2", "http://localhost:9002/d3", "http://localhost:9003/d4"}, []string{"localhost:9000", "localhost:9001", "localhost:9002", "localhost:9003"}, "localhost:9000"}, } for _, testCase := range testCases { zendpoints := mustGetZoneEndpoints(testCase.endpointArgs...) if !zendpoints[0].Endpoints[0].IsLocal { if err := zendpoints[0].Endpoints.UpdateIsLocal(false); err != nil { - t.Fatalf("error: expected = , got = %v", err) + t.Errorf("error: expected = , got = %v", err) } } - remotePeers := GetRemotePeers(zendpoints) + remotePeers, local := zendpoints.peers() if !reflect.DeepEqual(remotePeers, testCase.expectedResult) { - t.Fatalf("expected: %v, got: %v", testCase.expectedResult, remotePeers) + t.Errorf("expected: %v, got: %v", testCase.expectedResult, remotePeers) + } + if local != testCase.expectedLocal { + t.Errorf("expected: %v, got: %v", testCase.expectedLocal, local) } } } diff --git a/cmd/erasure-bucket.go b/cmd/erasure-bucket.go index 08d45e559..5a3dba98d 100644 --- a/cmd/erasure-bucket.go +++ b/cmd/erasure-bucket.go @@ -158,6 +158,7 @@ func deleteDanglingBucket(ctx context.Context, storageDisks []StorageAPI, dErrs // DeleteBucket - deletes a bucket. func (er erasureObjects) DeleteBucket(ctx context.Context, bucket string, forceDelete bool) error { // Collect if all disks report volume not found. + defer ObjectPathUpdated(bucket + slashSeparator) storageDisks := er.getDisks() g := errgroup.WithNErrs(len(storageDisks)) diff --git a/cmd/erasure-common.go b/cmd/erasure-common.go index 174bc2a1a..3591d9d7e 100644 --- a/cmd/erasure-common.go +++ b/cmd/erasure-common.go @@ -51,6 +51,7 @@ func (er erasureObjects) getOnlineDisks() (newDisks []StorageAPI) { } di, err := disks[i-1].DiskInfo(context.Background()) if err != nil || di.Healing { + // - Do not consume disks which are not reachable // unformatted or simply not accessible for some reason. // diff --git a/cmd/erasure-encode_test.go b/cmd/erasure-encode_test.go index bb78b4dab..d3972f728 100644 --- a/cmd/erasure-encode_test.go +++ b/cmd/erasure-encode_test.go @@ -195,7 +195,7 @@ func benchmarkErasureEncode(data, parity, dataDown, parityDown int, size int64, if disk == OfflineDisk { continue } - disk.DeleteFile(context.Background(), "testbucket", "object") + disk.Delete(context.Background(), "testbucket", "object", false) writers[i] = newBitrotWriter(disk, "testbucket", "object", erasure.ShardFileSize(size), DefaultBitrotAlgorithm, erasure.ShardSize()) } _, err := erasure.Encode(context.Background(), bytes.NewReader(content), writers, buffer, erasure.dataBlocks+1) diff --git a/cmd/erasure-healing-common_test.go b/cmd/erasure-healing-common_test.go index b8aa31b27..8280a5f83 100644 --- a/cmd/erasure-healing-common_test.go +++ b/cmd/erasure-healing-common_test.go @@ -211,7 +211,7 @@ func TestListOnlineDisks(t *testing.T) { // and check if that disk // appears in outDatedDisks. tamperedIndex = index - dErr := erasureDisks[index].DeleteFile(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1")) + dErr := erasureDisks[index].Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false) if dErr != nil { t.Fatalf("Test %d: Failed to delete %s - %v", i+1, filepath.Join(object, "part.1"), dErr) diff --git a/cmd/erasure-healing.go b/cmd/erasure-healing.go index 754aa5e2c..522c5c499 100644 --- a/cmd/erasure-healing.go +++ b/cmd/erasure-healing.go @@ -21,6 +21,7 @@ import ( "errors" "fmt" "io" + "path" "sync" "time" @@ -304,6 +305,10 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s }) } + if isAllNotFound(errs) { + return defaultHealResult(latestFileInfo, storageDisks, storageEndpoints, errs, bucket, object), nil + } + // If less than read quorum number of disks have all the parts // of the data, we can't reconstruct the erasure-coded data. if numAvailableDisks < dataBlocks { @@ -342,6 +347,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s if pErr != nil { return result, toObjectErr(pErr, bucket, object) } + defer ObjectPathUpdated(pathJoin(bucket, object)) cleanFileInfo := func(fi FileInfo) FileInfo { // Returns a copy of the 'fi' with checksums and parts nil'ed. @@ -518,10 +524,11 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin wg.Add(1) go func(index int, disk StorageAPI) { defer wg.Done() - _ = disk.DeleteFile(ctx, bucket, object) + _ = disk.Delete(ctx, bucket, object, false) }(index, disk) } wg.Wait() + ObjectPathUpdated(path.Join(bucket, object)) } } @@ -544,7 +551,7 @@ func (er erasureObjects) healObjectDir(ctx context.Context, bucket, object strin hr.After.Drives[i] = madmin.HealDriveInfo{Endpoint: drive, State: madmin.DriveStateCorrupt} } } - if dryRun || danglingObject { + if dryRun || danglingObject || isAllNotFound(errs) { return hr, nil } for i, err := range errs { @@ -649,9 +656,23 @@ func statAllDirs(ctx context.Context, storageDisks []StorageAPI, bucket, prefix return g.Wait() } +// isAllNotFound will return if any element of the error slice is not +// errFileNotFound, errFileVersionNotFound or errVolumeNotFound. +// A 0 length slice will always return false. +func isAllNotFound(errs []error) bool { + for _, err := range errs { + if errors.Is(err, errFileNotFound) || errors.Is(err, errVolumeNotFound) || errors.Is(err, errFileVersionNotFound) { + continue + } + return false + } + return len(errs) > 0 +} + // ObjectDir is considered dangling/corrupted if any only // if total disks - a combination of corrupted and missing // files is lesser than N/2+1 number of disks. +// If no files were found false will be returned. func isObjectDirDangling(errs []error) (ok bool) { var found int var notFound int @@ -668,7 +689,8 @@ func isObjectDirDangling(errs []error) (ok bool) { otherFound++ } } - return found+foundNotEmpty+otherFound < notFound + found = found + foundNotEmpty + otherFound + return found < notFound && found > 0 } // Object is considered dangling/corrupted if any only @@ -748,6 +770,10 @@ func (er erasureObjects) HealObject(ctx context.Context, bucket, object, version // Read metadata files from all the disks partsMetadata, errs := readAllFileInfo(healCtx, storageDisks, bucket, object, versionID) + if isAllNotFound(errs) { + // Nothing to do + return defaultHealResult(FileInfo{}, storageDisks, storageEndpoints, errs, bucket, object), nil + } // Check if the object is dangling, if yes and user requested // remove we simply delete it from namespace. if m, ok := isObjectDangling(partsMetadata, errs, []error{}); ok { diff --git a/cmd/erasure-healing_test.go b/cmd/erasure-healing_test.go index b00041a83..1d5c742d6 100644 --- a/cmd/erasure-healing_test.go +++ b/cmd/erasure-healing_test.go @@ -201,7 +201,7 @@ func TestHealObjectCorrupted(t *testing.T) { er := z.serverSets[0].sets[0] erasureDisks := er.getDisks() firstDisk := erasureDisks[0] - err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile)) + err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false) if err != nil { t.Fatalf("Failed to delete a file - %v", err) } @@ -221,7 +221,7 @@ func TestHealObjectCorrupted(t *testing.T) { t.Errorf("Expected er.meta file to be present but stat failed - %v", err) } - err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1")) + err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false) if err != nil { t.Errorf("Failure during deleting part.1 - %v", err) } @@ -246,7 +246,7 @@ func TestHealObjectCorrupted(t *testing.T) { t.Fatalf("FileInfo not equal after healing") } - err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1")) + err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false) if err != nil { t.Errorf("Failure during deleting part.1 - %v", err) } @@ -275,7 +275,7 @@ func TestHealObjectCorrupted(t *testing.T) { // Test 4: checks if HealObject returns an error when xl.meta is not found // in more than read quorum number of disks, to create a corrupted situation. for i := 0; i <= len(er.getDisks())/2; i++ { - er.getDisks()[i].DeleteFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile)) + er.getDisks()[i].Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false) } // Try healing now, expect to receive errFileNotFound. @@ -351,7 +351,7 @@ func TestHealObjectErasure(t *testing.T) { t.Fatalf("Failed to complete multipart upload - %v", err) } - err = firstDisk.DeleteFile(context.Background(), bucket, pathJoin(object, xlStorageFormatFile)) + err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, xlStorageFormatFile), false) if err != nil { t.Fatalf("Failed to delete a file - %v", err) } diff --git a/cmd/erasure-multipart.go b/cmd/erasure-multipart.go index 15c5c65a7..25390cb82 100644 --- a/cmd/erasure-multipart.go +++ b/cmd/erasure-multipart.go @@ -81,7 +81,7 @@ func (er erasureObjects) removeObjectPart(bucket, object, uploadID, dataDir stri // Ignoring failure to remove parts that weren't present in CompleteMultipartUpload // requests. xl.meta is the authoritative source of truth on which parts constitute // the object. The presence of parts that don't belong in the object doesn't affect correctness. - _ = storageDisks[index].DeleteFile(context.TODO(), minioMetaMultipartBucket, curpartPath) + _ = storageDisks[index].Delete(context.TODO(), minioMetaMultipartBucket, curpartPath, false) return nil }, index) } diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index e2a15cebb..2e899db39 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -177,7 +177,7 @@ func (er erasureObjects) GetObjectNInfo(ctx context.Context, bucket, object stri pr, pw := io.Pipe() go func() { - err := er.getObjectWithFileInfo(ctx, bucket, object, off, length, pw, "", opts, fi, metaArr, onlineDisks) + err := er.getObjectWithFileInfo(ctx, bucket, object, off, length, pw, fi, metaArr, onlineDisks) pw.CloseWithError(err) }() @@ -214,11 +214,10 @@ func (er erasureObjects) GetObject(ctx context.Context, bucket, object string, s return errUnexpected } - return er.getObject(ctx, bucket, object, startOffset, length, writer, etag, opts) + return er.getObject(ctx, bucket, object, startOffset, length, writer, opts) } -func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions, fi FileInfo, metaArr []FileInfo, onlineDisks []StorageAPI) error { - +func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, fi FileInfo, metaArr []FileInfo, onlineDisks []StorageAPI) error { // Reorder online disks based on erasure distribution order. // Reorder parts metadata based on erasure distribution order. onlineDisks, metaArr = shuffleDisksAndPartsMetadataByIndex(onlineDisks, metaArr, fi.Erasure.Distribution) @@ -325,7 +324,7 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje } // getObject wrapper for erasure GetObject -func (er erasureObjects) getObject(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions) error { +func (er erasureObjects) getObject(ctx context.Context, bucket, object string, startOffset, length int64, writer io.Writer, opts ObjectOptions) error { fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, bucket, object, opts) if err != nil { return toObjectErr(err, bucket, object) @@ -338,7 +337,7 @@ func (er erasureObjects) getObject(ctx context.Context, bucket, object string, s return toObjectErr(errMethodNotAllowed, bucket, object) } - return er.getObjectWithFileInfo(ctx, bucket, object, startOffset, length, writer, etag, opts, fi, metaArr, onlineDisks) + return er.getObjectWithFileInfo(ctx, bucket, object, startOffset, length, writer, fi, metaArr, onlineDisks) } // GetObjectInfo - reads object metadata and replies back ObjectInfo. @@ -426,6 +425,9 @@ func undoRename(disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry str // Similar to rename but renames data from srcEntry to dstEntry at dataDir func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dataDir, dstBucket, dstEntry string, writeQuorum int, ignoredErr []error) ([]StorageAPI, error) { dataDir = retainSlash(dataDir) + defer ObjectPathUpdated(path.Join(srcBucket, srcEntry)) + defer ObjectPathUpdated(path.Join(dstBucket, dstEntry)) + g := errgroup.WithNErrs(len(disks)) // Rename file on all underlying storage disks. @@ -473,11 +475,12 @@ func renameData(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, da // rename - common function that renamePart and renameObject use to rename // the respective underlying storage layer representations. func rename(ctx context.Context, disks []StorageAPI, srcBucket, srcEntry, dstBucket, dstEntry string, isDir bool, writeQuorum int, ignoredErr []error) ([]StorageAPI, error) { - if isDir { dstEntry = retainSlash(dstEntry) srcEntry = retainSlash(srcEntry) } + defer ObjectPathUpdated(path.Join(srcBucket, srcEntry)) + defer ObjectPathUpdated(path.Join(dstBucket, dstEntry)) g := errgroup.WithNErrs(len(disks)) @@ -705,10 +708,9 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st } func (er erasureObjects) deleteObjectVersion(ctx context.Context, bucket, object string, writeQuorum int, fi FileInfo) error { + defer ObjectPathUpdated(pathJoin(bucket, object)) disks := er.getDisks() - g := errgroup.WithNErrs(len(disks)) - for index := range disks { index := index g.Go(func() error { @@ -853,6 +855,7 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec } errs[objIndex] = reduceWriteQuorumErrs(ctx, diskErrs, objectOpIgnoredErrs, writeQuorums[objIndex]) if errs[objIndex] == nil { + ObjectPathUpdated(pathJoin(bucket, objects[objIndex].ObjectName)) if versions[objIndex].Deleted { dobjects[objIndex] = DeletedObject{ DeleteMarker: versions[objIndex].Deleted, @@ -892,6 +895,7 @@ func (er erasureObjects) DeleteObjects(ctx context.Context, bucket string, objec // any error as it is not necessary for the handler to reply back a // response to the client request. func (er erasureObjects) DeleteObject(ctx context.Context, bucket, object string, opts ObjectOptions) (objInfo ObjectInfo, err error) { + defer ObjectPathUpdated(path.Join(bucket, object)) goi, gerr := er.GetObjectInfo(ctx, bucket, object, opts) if gerr != nil && goi.Name == "" { switch gerr.(type) { @@ -1021,6 +1025,66 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin return nil } +// updateObjectMeta will update the metadata of a file. +func (er erasureObjects) updateObjectMeta(ctx context.Context, bucket, object string, meta map[string]string, opts ObjectOptions) error { + if len(meta) == 0 { + return nil + } + disks := er.getDisks() + + // Read metadata associated with the object from all disks. + metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID) + + readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, er, metaArr, errs) + if err != nil { + return toObjectErr(err, bucket, object) + } + + // List all online disks. + _, modTime := listOnlineDisks(disks, metaArr, errs) + + // Pick latest valid metadata. + fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) + if err != nil { + return toObjectErr(err, bucket, object) + } + + // Update metadata + for k, v := range meta { + fi.Metadata[k] = v + } + + if fi.Deleted { + if opts.VersionID == "" { + return toObjectErr(errFileNotFound, bucket, object) + } + return toObjectErr(errMethodNotAllowed, bucket, object) + } + + for i := range metaArr { + if errs[i] != nil { + // Avoid disks where loading metadata fail + continue + } + + metaArr[i].Metadata = fi.Metadata + } + + tempObj := mustGetUUID() + + // Write unique `xl.meta` for each disk. + if disks, err = writeUniqueFileInfo(ctx, disks, minioMetaTmpBucket, tempObj, metaArr, writeQuorum); err != nil { + return toObjectErr(err, bucket, object) + } + + // Atomically rename metadata from tmp location to destination for each disk. + if _, err = renameFileInfo(ctx, disks, minioMetaTmpBucket, tempObj, bucket, object, writeQuorum); err != nil { + return toObjectErr(err, bucket, object) + } + + return nil +} + // DeleteObjectTags - delete object tags from an existing object func (er erasureObjects) DeleteObjectTags(ctx context.Context, bucket, object string, opts ObjectOptions) error { return er.PutObjectTags(ctx, bucket, object, "", opts) diff --git a/cmd/erasure-server-sets.go b/cmd/erasure-server-sets.go index 4481fd389..3e2b926bb 100644 --- a/cmd/erasure-server-sets.go +++ b/cmd/erasure-server-sets.go @@ -31,7 +31,6 @@ import ( "github.com/minio/minio-go/v7/pkg/set" "github.com/minio/minio-go/v7/pkg/tags" "github.com/minio/minio/cmd/config/storageclass" - xhttp "github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/dsync" "github.com/minio/minio/pkg/madmin" @@ -659,274 +658,7 @@ func (z *erasureServerSets) ListObjectsV2(ctx context.Context, bucket, prefix, c return listObjectsV2Info, err } -func (z *erasureServerSets) listObjectsNonSlash(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi ListObjectsInfo, err error) { - - serverSetsEntryChs := make([][]FileInfoCh, 0, len(z.serverSets)) - serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets)) - - endWalkCh := make(chan struct{}) - defer close(endWalkCh) - - for _, zone := range z.serverSets { - serverSetsEntryChs = append(serverSetsEntryChs, - zone.startMergeWalksN(ctx, bucket, prefix, "", true, endWalkCh, zone.listTolerancePerSet, false)) - if zone.listTolerancePerSet == -1 { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2) - } else { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2) - } - } - - var objInfos []ObjectInfo - var eof bool - var prevPrefix string - - serverSetsEntriesInfos := make([][]FileInfo, 0, len(serverSetsEntryChs)) - serverSetsEntriesValid := make([][]bool, 0, len(serverSetsEntryChs)) - for _, entryChs := range serverSetsEntryChs { - serverSetsEntriesInfos = append(serverSetsEntriesInfos, make([]FileInfo, len(entryChs))) - serverSetsEntriesValid = append(serverSetsEntriesValid, make([]bool, len(entryChs))) - } - - for { - if len(objInfos) == maxKeys { - break - } - - result, quorumCount, zoneIndex, ok := lexicallySortedEntryZone(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid) - if !ok { - eof = true - break - } - - if quorumCount < serverSetsListTolerancePerSet[zoneIndex] { - // Skip entries which are not found on upto expected tolerance - continue - } - - var objInfo ObjectInfo - - index := strings.Index(strings.TrimPrefix(result.Name, prefix), delimiter) - if index == -1 { - objInfo = ObjectInfo{ - IsDir: false, - Bucket: bucket, - Name: result.Name, - ModTime: result.ModTime, - Size: result.Size, - ContentType: result.Metadata["content-type"], - ContentEncoding: result.Metadata["content-encoding"], - } - - // Extract etag from metadata. - objInfo.ETag = extractETag(result.Metadata) - - // All the parts per object. - objInfo.Parts = result.Parts - - // etag/md5Sum has already been extracted. We need to - // remove to avoid it from appearing as part of - // response headers. e.g, X-Minio-* or X-Amz-*. - objInfo.UserDefined = cleanMetadata(result.Metadata) - - // Update storage class - if sc, ok := result.Metadata[xhttp.AmzStorageClass]; ok { - objInfo.StorageClass = sc - } else { - objInfo.StorageClass = globalMinioDefaultStorageClass - } - } else { - index = len(prefix) + index + len(delimiter) - currPrefix := result.Name[:index] - if currPrefix == prevPrefix { - continue - } - prevPrefix = currPrefix - - objInfo = ObjectInfo{ - Bucket: bucket, - Name: currPrefix, - IsDir: true, - } - } - - if objInfo.Name <= marker { - continue - } - - objInfos = append(objInfos, objInfo) - } - - result := ListObjectsInfo{} - for _, objInfo := range objInfos { - if objInfo.IsDir { - result.Prefixes = append(result.Prefixes, objInfo.Name) - continue - } - result.Objects = append(result.Objects, objInfo) - } - - if !eof { - result.IsTruncated = true - if len(objInfos) > 0 { - result.NextMarker = objInfos[len(objInfos)-1].Name - } - } - - return result, nil -} - -func (z *erasureServerSets) listObjectsSplunk(ctx context.Context, bucket, prefix, marker string, maxKeys int) (loi ListObjectsInfo, err error) { - if strings.Contains(prefix, guidSplunk) { - logger.LogIf(ctx, NotImplemented{}) - return loi, NotImplemented{} - } - - recursive := true - - serverSetsEntryChs := make([][]FileInfoCh, 0, len(z.serverSets)) - serverSetsEndWalkCh := make([]chan struct{}, 0, len(z.serverSets)) - serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets)) - - for _, zone := range z.serverSets { - entryChs, endWalkCh := zone.poolSplunk.Release(listParams{bucket, recursive, marker, prefix}) - if entryChs == nil { - endWalkCh = make(chan struct{}) - entryChs = zone.startMergeWalksN(ctx, bucket, prefix, marker, recursive, endWalkCh, zone.listTolerancePerSet, true) - } - serverSetsEntryChs = append(serverSetsEntryChs, entryChs) - serverSetsEndWalkCh = append(serverSetsEndWalkCh, endWalkCh) - if zone.listTolerancePerSet == -1 { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2) - } else { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2) - } - } - - entries := mergeServerSetsEntriesCh(serverSetsEntryChs, maxKeys, serverSetsListTolerancePerSet) - if len(entries.Files) == 0 { - return loi, nil - } - - loi.IsTruncated = entries.IsTruncated - if loi.IsTruncated { - loi.NextMarker = entries.Files[len(entries.Files)-1].Name - } - - for _, entry := range entries.Files { - objInfo := entry.ToObjectInfo(bucket, entry.Name) - splits := strings.Split(objInfo.Name, guidSplunk) - if len(splits) == 0 { - loi.Objects = append(loi.Objects, objInfo) - continue - } - - loi.Prefixes = append(loi.Prefixes, splits[0]+guidSplunk) - } - - if loi.IsTruncated { - for i, zone := range z.serverSets { - zone.poolSplunk.Set(listParams{bucket, recursive, loi.NextMarker, prefix}, serverSetsEntryChs[i], - serverSetsEndWalkCh[i]) - } - } - return loi, nil -} - -func (z *erasureServerSets) listObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { - loi := ListObjectsInfo{} - - if err := checkListObjsArgs(ctx, bucket, prefix, marker, z); err != nil { - return loi, err - } - - // Marker is set validate pre-condition. - if marker != "" { - // Marker not common with prefix is not implemented. Send an empty response - if !HasPrefix(marker, prefix) { - return loi, nil - } - } - - // With max keys of zero we have reached eof, return right here. - if maxKeys == 0 { - return loi, nil - } - - // For delimiter and prefix as '/' we do not list anything at all - // since according to s3 spec we stop at the 'delimiter' - // along // with the prefix. On a flat namespace with 'prefix' - // as '/' we don't have any entries, since all the keys are - // of form 'keyName/...' - if delimiter == SlashSeparator && prefix == SlashSeparator { - return loi, nil - } - - // Over flowing count - reset to maxObjectList. - if maxKeys < 0 || maxKeys > maxObjectList { - maxKeys = maxObjectList - } - - if delimiter != SlashSeparator && delimiter != "" { - if delimiter == guidSplunk { - return z.listObjectsSplunk(ctx, bucket, prefix, marker, maxKeys) - } - return z.listObjectsNonSlash(ctx, bucket, prefix, marker, delimiter, maxKeys) - } - - // Default is recursive, if delimiter is set then list non recursive. - recursive := true - if delimiter == SlashSeparator { - recursive = false - } - - serverSetsEntryChs := make([][]FileInfoCh, 0, len(z.serverSets)) - serverSetsEndWalkCh := make([]chan struct{}, 0, len(z.serverSets)) - serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets)) - - for _, zone := range z.serverSets { - entryChs, endWalkCh := zone.pool.Release(listParams{bucket, recursive, marker, prefix}) - if entryChs == nil { - endWalkCh = make(chan struct{}) - entryChs = zone.startMergeWalksN(ctx, bucket, prefix, marker, recursive, endWalkCh, zone.listTolerancePerSet, false) - } - serverSetsEntryChs = append(serverSetsEntryChs, entryChs) - serverSetsEndWalkCh = append(serverSetsEndWalkCh, endWalkCh) - if zone.listTolerancePerSet == -1 { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2) - } else { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2) - } - } - - entries := mergeServerSetsEntriesCh(serverSetsEntryChs, maxKeys, serverSetsListTolerancePerSet) - if len(entries.Files) == 0 { - return loi, nil - } - - loi.IsTruncated = entries.IsTruncated - if loi.IsTruncated { - loi.NextMarker = entries.Files[len(entries.Files)-1].Name - } - - for _, entry := range entries.Files { - objInfo := entry.ToObjectInfo(entry.Volume, entry.Name) - if HasSuffix(objInfo.Name, SlashSeparator) && !recursive { - loi.Prefixes = append(loi.Prefixes, objInfo.Name) - continue - } - loi.Objects = append(loi.Objects, objInfo) - } - if loi.IsTruncated { - for i, zone := range z.serverSets { - zone.pool.Set(listParams{bucket, recursive, loi.NextMarker, prefix}, serverSetsEntryChs[i], - serverSetsEndWalkCh[i]) - } - } - return loi, nil -} - -// Calculate least entry across serverSets and across multiple FileInfo +// Calculate least entry across zones and across multiple FileInfo // channels, returns the least common entry and the total number of times // we found this entry. Additionally also returns a boolean // to indicate if the caller needs to call this function @@ -1110,236 +842,57 @@ func lexicallySortedEntryZoneVersions(zoneEntryChs [][]FileInfoVersionsCh, zoneE return lentry, lexicallySortedEntryCount, zoneIndex, isTruncated } -// mergeServerSetsEntriesVersionsCh - merges FileInfoVersions channel to entries upto maxKeys. -func mergeServerSetsEntriesVersionsCh(serverSetsEntryChs [][]FileInfoVersionsCh, maxKeys int, serverSetsListTolerancePerSet []int) (entries FilesInfoVersions) { - var i = 0 - serverSetsEntriesInfos := make([][]FileInfoVersions, 0, len(serverSetsEntryChs)) - serverSetsEntriesValid := make([][]bool, 0, len(serverSetsEntryChs)) - for _, entryChs := range serverSetsEntryChs { - serverSetsEntriesInfos = append(serverSetsEntriesInfos, make([]FileInfoVersions, len(entryChs))) - serverSetsEntriesValid = append(serverSetsEntriesValid, make([]bool, len(entryChs))) - } - - for { - fi, quorumCount, zoneIndex, ok := lexicallySortedEntryZoneVersions(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid) - if !ok { - // We have reached EOF across all entryChs, break the loop. - break - } - - if quorumCount < serverSetsListTolerancePerSet[zoneIndex] { - // Skip entries which are not found upto the expected tolerance - continue - } - - entries.FilesVersions = append(entries.FilesVersions, fi) - i++ - if i == maxKeys { - entries.IsTruncated = isTruncatedServerSetsVersions(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid) - break - } - } - return entries -} - -// mergeServerSetsEntriesCh - merges FileInfo channel to entries upto maxKeys. -func mergeServerSetsEntriesCh(serverSetsEntryChs [][]FileInfoCh, maxKeys int, serverSetsListTolerancePerSet []int) (entries FilesInfo) { - var i = 0 - serverSetsEntriesInfos := make([][]FileInfo, 0, len(serverSetsEntryChs)) - serverSetsEntriesValid := make([][]bool, 0, len(serverSetsEntryChs)) - for _, entryChs := range serverSetsEntryChs { - serverSetsEntriesInfos = append(serverSetsEntriesInfos, make([]FileInfo, len(entryChs))) - serverSetsEntriesValid = append(serverSetsEntriesValid, make([]bool, len(entryChs))) - } - var prevEntry string - for { - fi, quorumCount, zoneIndex, ok := lexicallySortedEntryZone(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid) - if !ok { - // We have reached EOF across all entryChs, break the loop. - break - } - - if quorumCount < serverSetsListTolerancePerSet[zoneIndex] { - // Skip entries which are not found upto configured tolerance. - continue - } - - if HasSuffix(fi.Name, slashSeparator) && fi.Name == prevEntry { - continue - } - - entries.Files = append(entries.Files, fi) - i++ - if i == maxKeys { - entries.IsTruncated = isTruncatedServerSets(serverSetsEntryChs, serverSetsEntriesInfos, serverSetsEntriesValid) - break - } - prevEntry = fi.Name - } - return entries -} - -func isTruncatedServerSets(zoneEntryChs [][]FileInfoCh, zoneEntries [][]FileInfo, zoneEntriesValid [][]bool) bool { - for i, entryChs := range zoneEntryChs { - for j := range entryChs { - zoneEntries[i][j], zoneEntriesValid[i][j] = entryChs[j].Pop() - } - } - - var isTruncated = false - for _, entriesValid := range zoneEntriesValid { - for _, valid := range entriesValid { - if valid { - isTruncated = true - break - } - } - if isTruncated { - break - } - } - for i, entryChs := range zoneEntryChs { - for j := range entryChs { - if zoneEntriesValid[i][j] { - zoneEntryChs[i][j].Push(zoneEntries[i][j]) - } - } - - } - return isTruncated -} - -func isTruncatedServerSetsVersions(zoneEntryChs [][]FileInfoVersionsCh, zoneEntries [][]FileInfoVersions, zoneEntriesValid [][]bool) bool { - for i, entryChs := range zoneEntryChs { - for j := range entryChs { - zoneEntries[i][j], zoneEntriesValid[i][j] = entryChs[j].Pop() - } - } - - var isTruncated = false - for _, entriesValid := range zoneEntriesValid { - for _, valid := range entriesValid { - if !valid { - continue - } - isTruncated = true - break - } - if isTruncated { - break - } - } - for i, entryChs := range zoneEntryChs { - for j := range entryChs { - if zoneEntriesValid[i][j] { - zoneEntryChs[i][j].Push(zoneEntries[i][j]) - } - } - } - return isTruncated -} - -func (z *erasureServerSets) listObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) { +func (z *erasureServerSets) ListObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) { loi := ListObjectVersionsInfo{} - - if err := checkListObjsArgs(ctx, bucket, prefix, marker, z); err != nil { - return loi, err - } - - // Marker is set validate pre-condition. - if marker != "" { - // Marker not common with prefix is not implemented. Send an empty response - if !HasPrefix(marker, prefix) { - return loi, nil - } - } - if marker == "" && versionMarker != "" { return loi, NotImplemented{} } - - // With max keys of zero we have reached eof, return right here. - if maxKeys == 0 { - return loi, nil + merged, err := z.listPath(ctx, listPathOptions{ + Bucket: bucket, + Prefix: prefix, + Separator: delimiter, + Limit: maxKeys, + Marker: marker, + InclDeleted: true, + }) + if err != nil && err != io.EOF { + return loi, err } - - // For delimiter and prefix as '/' we do not list anything at all - // since according to s3 spec we stop at the 'delimiter' - // along // with the prefix. On a flat namespace with 'prefix' - // as '/' we don't have any entries, since all the keys are - // of form 'keyName/...' - if delimiter == SlashSeparator && prefix == SlashSeparator { - return loi, nil - } - - // Over flowing count - reset to maxObjectList. - if maxKeys < 0 || maxKeys > maxObjectList { - maxKeys = maxObjectList - } - - if delimiter != SlashSeparator && delimiter != "" { - return loi, NotImplemented{} - } - - // Default is recursive, if delimiter is set then list non recursive. - recursive := true - if delimiter == SlashSeparator { - recursive = false - } - - serverSetsEntryChs := make([][]FileInfoVersionsCh, 0, len(z.serverSets)) - serverSetsEndWalkCh := make([]chan struct{}, 0, len(z.serverSets)) - serverSetsListTolerancePerSet := make([]int, 0, len(z.serverSets)) - for _, zone := range z.serverSets { - entryChs, endWalkCh := zone.poolVersions.Release(listParams{bucket, recursive, marker, prefix}) - if entryChs == nil { - endWalkCh = make(chan struct{}) - entryChs = zone.startMergeWalksVersionsN(ctx, bucket, prefix, marker, recursive, endWalkCh, zone.listTolerancePerSet) - } - serverSetsEntryChs = append(serverSetsEntryChs, entryChs) - serverSetsEndWalkCh = append(serverSetsEndWalkCh, endWalkCh) - if zone.listTolerancePerSet == -1 { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.setDriveCount/2) - } else { - serverSetsListTolerancePerSet = append(serverSetsListTolerancePerSet, zone.listTolerancePerSet-2) - } - } - - entries := mergeServerSetsEntriesVersionsCh(serverSetsEntryChs, maxKeys, serverSetsListTolerancePerSet) - if len(entries.FilesVersions) == 0 { - return loi, nil - } - - loi.IsTruncated = entries.IsTruncated - if loi.IsTruncated { - loi.NextMarker = entries.FilesVersions[len(entries.FilesVersions)-1].Name - } - - for _, entry := range entries.FilesVersions { - for _, version := range entry.Versions { - objInfo := version.ToObjectInfo(bucket, entry.Name) - if HasSuffix(objInfo.Name, SlashSeparator) && !recursive { - loi.Prefixes = append(loi.Prefixes, objInfo.Name) - continue - } - loi.Objects = append(loi.Objects, objInfo) - } + loi.Objects, loi.Prefixes = merged.fileInfoVersions(bucket, prefix, delimiter, versionMarker) + loi.IsTruncated = err == nil && len(loi.Objects) > 0 + if maxKeys > 0 && len(loi.Objects) > maxKeys { + loi.Objects = loi.Objects[:maxKeys] + loi.IsTruncated = true } if loi.IsTruncated { - for i, zone := range z.serverSets { - zone.poolVersions.Set(listParams{bucket, recursive, loi.NextMarker, prefix}, serverSetsEntryChs[i], - serverSetsEndWalkCh[i]) - } + last := loi.Objects[len(loi.Objects)-1] + loi.NextMarker = encodeMarker(last.Name, merged.listID) + loi.NextVersionIDMarker = last.VersionID } return loi, nil } -func (z *erasureServerSets) ListObjectVersions(ctx context.Context, bucket, prefix, marker, versionMarker, delimiter string, maxKeys int) (ListObjectVersionsInfo, error) { - return z.listObjectVersions(ctx, bucket, prefix, marker, versionMarker, delimiter, maxKeys) -} - func (z *erasureServerSets) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { - return z.listObjects(ctx, bucket, prefix, marker, delimiter, maxKeys) + var loi ListObjectsInfo + merged, err := z.listPath(ctx, listPathOptions{ + Bucket: bucket, + Prefix: prefix, + Separator: delimiter, + Limit: maxKeys, + Marker: marker, + InclDeleted: false, + }) + if err != nil && err != io.EOF { + logger.LogIf(ctx, err) + return loi, err + } + // Default is recursive, if delimiter is set then list non recursive. + loi.Objects, loi.Prefixes = merged.fileInfos(bucket, prefix, delimiter) + loi.IsTruncated = err == nil && len(loi.Objects) > 0 + if loi.IsTruncated { + loi.NextMarker = encodeMarker(loi.Objects[len(loi.Objects)-1].Name, merged.listID) + } + return loi, nil } func (z *erasureServerSets) ListMultipartUploads(ctx context.Context, bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) { @@ -1634,6 +1187,30 @@ func (z *erasureServerSets) DeleteBucket(ctx context.Context, bucket string, for return nil } +// deleteAll will delete a bucket+prefix unconditionally across all disks. +// Note that set distribution is ignored so it should only be used in cases where +// data is not distributed across sets. +// Errors are logged but individual disk failures are not returned. +func (z *erasureServerSets) deleteAll(ctx context.Context, bucket, prefix string) error { + var wg sync.WaitGroup + for _, servers := range z.serverSets { + for _, set := range servers.sets { + for _, disk := range set.getDisks() { + if disk == nil { + continue + } + wg.Add(1) + go func(disk StorageAPI) { + defer wg.Done() + logger.LogIf(ctx, disk.Delete(ctx, bucket, prefix, true)) + }(disk) + } + } + } + wg.Wait() + return nil +} + // This function is used to undo a successful DeleteBucket operation. func undoDeleteBucketServerSets(ctx context.Context, bucket string, serverSets []*erasureSets, errs []error) { g := errgroup.WithNErrs(len(serverSets)) diff --git a/cmd/erasure-sets_test.go b/cmd/erasure-sets_test.go index 80ba8d9b1..806072723 100644 --- a/cmd/erasure-sets_test.go +++ b/cmd/erasure-sets_test.go @@ -197,27 +197,13 @@ func TestNewErasureSets(t *testing.T) { // TestHashedLayer - tests the hashed layer which will be returned // consistently for a given object name. func TestHashedLayer(t *testing.T) { - ctx, cancel := context.WithCancel(context.Background()) - defer cancel() - - var objs []*erasureObjects - for i := 0; i < 16; i++ { - obj, fsDirs, err := prepareErasure16(ctx) - if err != nil { - t.Fatal("Unable to initialize 'Erasure' object layer.", err) - } - defer obj.Shutdown(ctx) - - // Remove all dirs. - for _, dir := range fsDirs { - defer os.RemoveAll(dir) - } - - z := obj.(*erasureServerSets) - objs = append(objs, z.serverSets[0].sets[0]) + // Test distribution with 16 sets. + var objs [16]*erasureObjects + for i := range objs { + objs[i] = &erasureObjects{} } - sets := &erasureSets{sets: objs, distributionAlgo: "CRCMOD"} + sets := &erasureSets{sets: objs[:], distributionAlgo: "CRCMOD"} testCases := []struct { objectName string diff --git a/cmd/format-erasure.go b/cmd/format-erasure.go index 24c646875..24bda8d03 100644 --- a/cmd/format-erasure.go +++ b/cmd/format-erasure.go @@ -360,7 +360,7 @@ func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) erro tmpFormat := mustGetUUID() // Purge any existing temporary file, okay to ignore errors here. - defer disk.DeleteFile(context.TODO(), minioMetaBucket, tmpFormat) + defer disk.Delete(context.TODO(), minioMetaBucket, tmpFormat, false) // write to unique file. if err = disk.WriteAll(context.TODO(), minioMetaBucket, tmpFormat, bytes.NewReader(formatBytes)); err != nil { diff --git a/cmd/fs-v1-helpers_test.go b/cmd/fs-v1-helpers_test.go index ed0d906f7..21c8be76e 100644 --- a/cmd/fs-v1-helpers_test.go +++ b/cmd/fs-v1-helpers_test.go @@ -297,7 +297,7 @@ func TestFSDeletes(t *testing.T) { srcPath: "success-file", expectedErr: nil, }, - // The file was deleted in the last case, so DeleteFile should fail. + // The file was deleted in the last case, so Delete should fail. { basePath: path, srcVol: "success-vol", @@ -423,7 +423,7 @@ func TestFSRemoves(t *testing.T) { expectedErr: nil, }, // Test case - 2. - // The file was deleted in the last case, so DeleteFile should fail. + // The file was deleted in the last case, so Delete should fail. { srcFSPath: path, srcVol: "success-vol", diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index e09a3ebbc..963fd1f02 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -402,6 +402,7 @@ func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket string, return BucketNameInvalid{Bucket: bucket} } + defer ObjectPathUpdated(bucket + slashSeparator) atomic.AddInt64(&fs.activeIOCount, 1) defer func() { atomic.AddInt64(&fs.activeIOCount, -1) diff --git a/cmd/handler-utils.go b/cmd/handler-utils.go index 1e4a9d039..174c55a15 100644 --- a/cmd/handler-utils.go +++ b/cmd/handler-utils.go @@ -424,6 +424,17 @@ func extractAPIVersion(r *http.Request) string { return regexVersion.FindString(r.URL.Path) } +func methodNotAllowedHandler(api string) func(w http.ResponseWriter, r *http.Request) { + return func(w http.ResponseWriter, r *http.Request) { + code := "XMinio" + api + "VersionMismatch" + writeErrorResponseString(r.Context(), w, APIError{ + Code: code, + Description: "Not allowed (" + r.Method + " " + r.URL.String() + " on " + api + " API)", + HTTPStatusCode: http.StatusMethodNotAllowed, + }, r.URL) + } +} + // If none of the http routes match respond with appropriate errors func errorResponseHandler(w http.ResponseWriter, r *http.Request) { if r.Method == http.MethodOptions { diff --git a/cmd/listen-notification-handlers.go b/cmd/listen-notification-handlers.go index 09bb1e02c..311d5bdd5 100644 --- a/cmd/listen-notification-handlers.go +++ b/cmd/listen-notification-handlers.go @@ -124,7 +124,7 @@ func (api objectAPIHandlers) ListenNotificationHandler(w http.ResponseWriter, r // Use buffered channel to take care of burst sends or slow w.Write() listenCh := make(chan interface{}, 4000) - peers := newPeerRestClients(globalEndpoints) + peers, _ := newPeerRestClients(globalEndpoints) globalHTTPListen.Subscribe(listenCh, ctx.Done(), func(evI interface{}) bool { ev, ok := evI.(event.Event) diff --git a/cmd/logger/logger.go b/cmd/logger/logger.go index 3c8b3b8f8..7011f514e 100644 --- a/cmd/logger/logger.go +++ b/cmd/logger/logger.go @@ -337,7 +337,7 @@ func logIf(ctx context.Context, err error, errKind ...interface{}) { trace := getTrace(3) // Get the cause for the Error - message := err.Error() + message := fmt.Sprintf("%v (%T)", err, err) if req.DeploymentID == "" { req.DeploymentID = globalDeploymentID } diff --git a/cmd/metacache-bucket.go b/cmd/metacache-bucket.go new file mode 100644 index 000000000..21a77813a --- /dev/null +++ b/cmd/metacache-bucket.go @@ -0,0 +1,427 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "context" + "errors" + "fmt" + "io" + "runtime/debug" + "strings" + "sync" + "time" + + "github.com/klauspost/compress/s2" + "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/hash" + "github.com/tinylib/msgp/msgp" +) + +//go:generate msgp -file $GOFILE -unexported + +// a bucketMetacache keeps track of all caches generated +// for a bucket. +type bucketMetacache struct { + // Name of bucket + bucket string + + // caches indexed by id. + caches map[string]metacache + + // Internal state + mu sync.RWMutex `msg:"-"` + updated bool `msg:"-"` + transient bool `msg:"-"` // bucket used for non-persisted caches. +} + +// newBucketMetacache creates a new bucketMetacache. +func newBucketMetacache(bucket string) *bucketMetacache { + return &bucketMetacache{ + bucket: bucket, + caches: make(map[string]metacache, 10), + } +} + +// loadBucketMetaCache will load the cache from the object layer. +// If the cache cannot be found a new one is created. +func loadBucketMetaCache(ctx context.Context, bucket string) (*bucketMetacache, error) { + objAPI := newObjectLayerFn() + for objAPI == nil { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(250 * time.Millisecond): + } + objAPI = newObjectLayerFn() + if objAPI == nil { + logger.LogIf(ctx, fmt.Errorf("loadBucketMetaCache: object layer not ready. bucket: %q", bucket)) + } + } + var meta bucketMetacache + var decErr error + var wg sync.WaitGroup + wg.Add(1) + + r, w := io.Pipe() + go func() { + defer wg.Done() + dec := s2DecPool.Get().(*s2.Reader) + dec.Reset(r) + decErr = meta.DecodeMsg(msgp.NewReader(dec)) + dec.Reset(nil) + s2DecPool.Put(dec) + r.CloseWithError(decErr) + }() + // Use global context for this. + err := objAPI.GetObject(GlobalContext, minioMetaBucket, pathJoin("buckets", bucket, ".metacache", "index.s2"), 0, -1, w, "", ObjectOptions{}) + logger.LogIf(ctx, w.CloseWithError(err)) + if err != nil { + if isErrObjectNotFound(err) { + err = nil + } else { + logger.LogIf(ctx, err) + } + return newBucketMetacache(bucket), err + } + wg.Wait() + if decErr != nil { + // Log the error, but assume the data is lost and return a fresh bucket. + // Otherwise a broken cache will never recover. + logger.LogIf(ctx, decErr) + return newBucketMetacache(bucket), nil + } + // Sanity check... + if meta.bucket != bucket { + logger.Info("loadBucketMetaCache: loaded cache name mismatch, want %s, got %s. Discarding.", bucket, meta.bucket) + return newBucketMetacache(bucket), nil + } + return &meta, nil +} + +// save the bucket cache to the object storage. +func (b *bucketMetacache) save(ctx context.Context) error { + if b.transient { + return nil + } + objAPI := newObjectLayerFn() + if objAPI == nil { + return errServerNotInitialized + } + + // Keep lock while we marshal. + // We need a write lock since we update 'updated' + b.mu.Lock() + if !b.updated { + b.mu.Unlock() + return nil + } + // Save as s2 compressed msgpack + tmp := bytes.NewBuffer(make([]byte, 0, b.Msgsize())) + enc := s2.NewWriter(tmp) + err := msgp.Encode(enc, b) + if err != nil { + b.mu.Unlock() + return err + } + err = enc.Close() + if err != nil { + b.mu.Unlock() + return err + } + b.updated = false + b.mu.Unlock() + + hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len()), false) + if err != nil { + return err + } + _, err = objAPI.PutObject(ctx, minioMetaBucket, pathJoin("buckets", b.bucket, ".metacache", "index.s2"), NewPutObjReader(hr, nil, nil), ObjectOptions{}) + logger.LogIf(ctx, err) + return err +} + +// findCache will attempt to find a matching cache for the provided options. +// If a cache with the same ID exists already it will be returned. +// If none can be found a new is created with the provided ID. +func (b *bucketMetacache) findCache(o listPathOptions) metacache { + if b == nil { + logger.Info("bucketMetacache.findCache: nil cache for bucket %s", o.Bucket) + return metacache{} + } + + if o.Bucket != b.bucket && !b.transient { + logger.Info("bucketMetacache.findCache: bucket %s does not match this bucket %s", o.Bucket, b.bucket) + debug.PrintStack() + return metacache{} + } + + debugPrint := func(msg string, data ...interface{}) {} + if false { + debugPrint = logger.Info + } + + // Grab a write lock, since we create one if we cannot find one. + if o.Create { + b.mu.Lock() + defer b.mu.Unlock() + } else { + b.mu.RLock() + defer b.mu.RUnlock() + } + + // Check if exists already. + if c, ok := b.caches[o.ID]; ok { + return c + } + + var best metacache + for _, cached := range b.caches { + // Never return transient caches if there is no id. + if b.transient { + break + } + if cached.status == scanStateError || cached.dataVersion != metacacheStreamVersion { + debugPrint("cache %s state or stream version mismatch", cached.id) + continue + } + if cached.startedCycle < o.OldestCycle { + debugPrint("cache %s cycle too old", cached.id) + continue + } + // Root of what we are looking for must at least have + if !strings.HasPrefix(o.BaseDir, cached.root) { + debugPrint("cache %s prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir) + continue + } + // If the existing listing wasn't recursive root must match. + if !cached.recursive && o.BaseDir != cached.root { + debugPrint("cache %s non rec prefix mismatch, cached:%v, want:%v", cached.id, cached.root, o.BaseDir) + continue + } + if o.Recursive && !cached.recursive { + debugPrint("cache %s not recursive", cached.id) + // If this is recursive the cached listing must be as well. + continue + } + if o.Separator != slashSeparator && !cached.recursive { + debugPrint("cache %s not slashsep and not recursive", cached.id) + // Non slash separator requires recursive. + continue + } + if cached.ended.IsZero() && time.Since(cached.lastUpdate) > metacacheMaxRunningAge { + debugPrint("cache %s not running, time: %v", cached.id, time.Since(cached.lastUpdate)) + // Abandoned + continue + } + if !cached.ended.IsZero() && cached.endedCycle <= o.OldestCycle { + debugPrint("cache %s ended and cycle (%v) <= oldest allowed (%v)", cached.id, cached.endedCycle, o.OldestCycle) + // If scan has ended the oldest requested must be less. + continue + } + if cached.started.Before(best.started) { + debugPrint("cache %s disregarded - we have a better", cached.id) + // If we already have a newer, keep that. + continue + } + best = cached + } + if !best.started.IsZero() { + if o.Create { + best.lastHandout = UTCNow() + b.caches[best.id] = best + b.updated = true + } + debugPrint("returning cached") + return best + } + if !o.Create { + return metacache{ + id: o.ID, + bucket: o.Bucket, + status: scanStateNone, + } + } + + // Create new and add. + best = o.newMetacache() + b.caches[o.ID] = best + b.updated = true + return best +} + +// cleanup removes redundant and outdated entries. +func (b *bucketMetacache) cleanup() { + // Entries to remove. + remove := make(map[string]struct{}) + currentCycle := intDataUpdateTracker.current() + + debugPrint := func(msg string, data ...interface{}) {} + if false { + debugPrint = logger.Info + } + + b.mu.RLock() + for id, cache := range b.caches { + if b.transient && time.Since(cache.started) > time.Hour { + // Keep transient caches only for 1 hour. + remove[id] = struct{}{} + } + if !cache.worthKeeping(currentCycle) { + debugPrint("cache %s not worth keeping", id) + remove[id] = struct{}{} + } + if cache.id != id { + logger.Info("cache ID mismatch %s != %s", id, cache.id) + remove[id] = struct{}{} + } + if cache.bucket != b.bucket && !b.transient { + logger.Info("cache bucket mismatch %s != %s", b.bucket, cache.bucket) + remove[id] = struct{}{} + } + } + + // Check all non-deleted against eachother. + // O(n*n), but should still be rather quick. + for id, cache := range b.caches { + if _, ok := remove[id]; ok { + continue + } + for _, cache2 := range b.caches { + if cache.canBeReplacedBy(&cache2) { + debugPrint("cache %s can be replaced by %s", id, cache2.id) + remove[id] = struct{}{} + break + } else { + debugPrint("cache %s can be NOT replaced by %s", id, cache2.id) + } + } + } + + b.mu.RUnlock() + for id := range remove { + b.deleteCache(id) + } +} + +// updateCache will update a cache by id. +// If the cache cannot be found nil is returned. +// The bucket cache will be locked until the done . +func (b *bucketMetacache) updateCache(id string) (cache *metacache, done func()) { + b.mu.Lock() + c, ok := b.caches[id] + if !ok { + b.mu.Unlock() + return nil, func() {} + } + return &c, func() { + c.lastUpdate = UTCNow() + b.caches[id] = c + b.mu.Unlock() + } +} + +// updateCacheEntry will update a cache. +// Returns the updated status. +func (b *bucketMetacache) updateCacheEntry(update metacache) (metacache, error) { + b.mu.Lock() + defer b.mu.Unlock() + existing, ok := b.caches[update.id] + if !ok { + logger.Info("updateCacheEntry: bucket %s list id %v not found", b.bucket, update.id) + return update, errFileNotFound + } + + existing.lastUpdate = UTCNow() + if existing.status == scanStateStarted && update.status != scanStateStarted { + existing.status = update.status + } + if existing.status == scanStateSuccess && update.status == scanStateSuccess { + existing.ended = UTCNow() + existing.endedCycle = update.endedCycle + } + if existing.error == "" && update.error != "" { + existing.error = update.error + existing.status = scanStateError + } + existing.fileNotFound = existing.fileNotFound || update.fileNotFound + b.caches[update.id] = existing + b.updated = true + return existing, nil +} + +// getCache will return a clone of a specific metacache. +// Will return nil if the cache doesn't exist. +func (b *bucketMetacache) getCache(id string) *metacache { + b.mu.RLock() + c, ok := b.caches[id] + b.mu.RUnlock() + if !ok { + return nil + } + return &c +} + +// deleteAll will delete all on disk data for ALL caches. +// Deletes are performed concurrently. +func (b *bucketMetacache) deleteAll() { + b.mu.Lock() + defer b.mu.Unlock() + + ctx := context.Background() + ez, ok := newObjectLayerFn().(*erasureServerSets) + if !ok { + logger.LogIf(ctx, errors.New("bucketMetacache: expected objAPI to be *erasureZones")) + return + } + var wg sync.WaitGroup + for id := range b.caches { + wg.Add(1) + go func(cache metacache) { + defer wg.Done() + logger.LogIf(ctx, ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id))) + }(b.caches[id]) + delete(b.caches, id) + } + wg.Wait() +} + +// deleteCache will delete a specific cache and all files related to it across the cluster. +func (b *bucketMetacache) deleteCache(id string) { + b.mu.Lock() + c, ok := b.caches[id] + if ok { + delete(b.caches, id) + b.updated = true + } + b.mu.Unlock() + if ok { + ctx := context.Background() + objAPI := newObjectLayerFn() + if objAPI == nil { + logger.LogIf(ctx, errors.New("bucketMetacache: no object layer")) + return + } + ez, ok := objAPI.(*erasureServerSets) + if !ok { + logger.LogIf(ctx, errors.New("bucketMetacache: expected objAPI to be *erasureServerSets")) + return + } + logger.LogIf(ctx, ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(c.bucket, c.id))) + } +} diff --git a/cmd/metacache-bucket_gen.go b/cmd/metacache-bucket_gen.go new file mode 100644 index 000000000..580ce51ba --- /dev/null +++ b/cmd/metacache-bucket_gen.go @@ -0,0 +1,209 @@ +package cmd + +// Code generated by github.com/tinylib/msgp DO NOT EDIT. + +import ( + "github.com/tinylib/msgp/msgp" +) + +// DecodeMsg implements msgp.Decodable +func (z *bucketMetacache) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "bucket": + z.bucket, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "bucket") + return + } + case "caches": + var zb0002 uint32 + zb0002, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err, "caches") + return + } + if z.caches == nil { + z.caches = make(map[string]metacache, zb0002) + } else if len(z.caches) > 0 { + for key := range z.caches { + delete(z.caches, key) + } + } + for zb0002 > 0 { + zb0002-- + var za0001 string + var za0002 metacache + za0001, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "caches") + return + } + err = za0002.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "caches", za0001) + return + } + z.caches[za0001] = za0002 + } + default: + err = dc.Skip() + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *bucketMetacache) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "bucket" + err = en.Append(0x82, 0xa6, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74) + if err != nil { + return + } + err = en.WriteString(z.bucket) + if err != nil { + err = msgp.WrapError(err, "bucket") + return + } + // write "caches" + err = en.Append(0xa6, 0x63, 0x61, 0x63, 0x68, 0x65, 0x73) + if err != nil { + return + } + err = en.WriteMapHeader(uint32(len(z.caches))) + if err != nil { + err = msgp.WrapError(err, "caches") + return + } + for za0001, za0002 := range z.caches { + err = en.WriteString(za0001) + if err != nil { + err = msgp.WrapError(err, "caches") + return + } + err = za0002.EncodeMsg(en) + if err != nil { + err = msgp.WrapError(err, "caches", za0001) + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *bucketMetacache) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "bucket" + o = append(o, 0x82, 0xa6, 0x62, 0x75, 0x63, 0x6b, 0x65, 0x74) + o = msgp.AppendString(o, z.bucket) + // string "caches" + o = append(o, 0xa6, 0x63, 0x61, 0x63, 0x68, 0x65, 0x73) + o = msgp.AppendMapHeader(o, uint32(len(z.caches))) + for za0001, za0002 := range z.caches { + o = msgp.AppendString(o, za0001) + o, err = za0002.MarshalMsg(o) + if err != nil { + err = msgp.WrapError(err, "caches", za0001) + return + } + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *bucketMetacache) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "bucket": + z.bucket, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "bucket") + return + } + case "caches": + var zb0002 uint32 + zb0002, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err, "caches") + return + } + if z.caches == nil { + z.caches = make(map[string]metacache, zb0002) + } else if len(z.caches) > 0 { + for key := range z.caches { + delete(z.caches, key) + } + } + for zb0002 > 0 { + var za0001 string + var za0002 metacache + zb0002-- + za0001, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "caches") + return + } + bts, err = za0002.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "caches", za0001) + return + } + z.caches[za0001] = za0002 + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *bucketMetacache) Msgsize() (s int) { + s = 1 + 7 + msgp.StringPrefixSize + len(z.bucket) + 7 + msgp.MapHeaderSize + if z.caches != nil { + for za0001, za0002 := range z.caches { + _ = za0002 + s += msgp.StringPrefixSize + len(za0001) + za0002.Msgsize() + } + } + return +} diff --git a/cmd/metacache-bucket_gen_test.go b/cmd/metacache-bucket_gen_test.go new file mode 100644 index 000000000..38a5ca2d8 --- /dev/null +++ b/cmd/metacache-bucket_gen_test.go @@ -0,0 +1,123 @@ +package cmd + +// Code generated by github.com/tinylib/msgp DO NOT EDIT. + +import ( + "bytes" + "testing" + + "github.com/tinylib/msgp/msgp" +) + +func TestMarshalUnmarshalbucketMetacache(t *testing.T) { + v := bucketMetacache{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgbucketMetacache(b *testing.B) { + v := bucketMetacache{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgbucketMetacache(b *testing.B) { + v := bucketMetacache{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshalbucketMetacache(b *testing.B) { + v := bucketMetacache{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodebucketMetacache(t *testing.T) { + v := bucketMetacache{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodebucketMetacache Msgsize() is inaccurate") + } + + vn := bucketMetacache{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodebucketMetacache(b *testing.B) { + v := bucketMetacache{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodebucketMetacache(b *testing.B) { + v := bucketMetacache{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/cmd/metacache-entries.go b/cmd/metacache-entries.go new file mode 100644 index 000000000..ae686cbd7 --- /dev/null +++ b/cmd/metacache-entries.go @@ -0,0 +1,559 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "io" + "os" + "sort" + "strings" +) + +// metaCacheEntry is an object or a directory within an unknown bucket. +type metaCacheEntry struct { + // name is the full name of the object including prefixes + name string + // Metadata. If none is present it is not an object but only a prefix. + // Entries without metadata will only be present in non-recursive scans. + metadata []byte + + // cached contains the metadata if decoded. + cached *FileInfo +} + +// isDir returns if the entry is representing a prefix directory. +func (e metaCacheEntry) isDir() bool { + return len(e.metadata) == 0 +} + +// isObject returns if the entry is representing an object. +func (e metaCacheEntry) isObject() bool { + return len(e.metadata) > 0 +} + +// hasPrefix returns whether an entry has a specific prefix +func (e metaCacheEntry) hasPrefix(s string) bool { + return strings.HasPrefix(e.name, s) +} + +// likelyMatches returns if the entries match by comparing name and metadata length. +func (e *metaCacheEntry) likelyMatches(other *metaCacheEntry) bool { + // This should reject 99% + if len(e.metadata) != len(other.metadata) || e.name != other.name { + return false + } + return true +} + +// matches returns if the entries match by comparing their latest version fileinfo. +func (e *metaCacheEntry) matches(other *metaCacheEntry, bucket string) bool { + if e == nil && other == nil { + return true + } + if e == nil || other == nil { + return false + } + + // This should reject 99% + if len(e.metadata) != len(other.metadata) || e.name != other.name { + return false + } + eFi, eErr := e.fileInfo(bucket) + oFi, oErr := e.fileInfo(bucket) + if eErr != nil || oErr != nil { + return eErr == oErr + } + return eFi.ModTime.Equal(oFi.ModTime) && eFi.Size == oFi.Size && eFi.VersionID == oFi.VersionID +} + +// isInDir returns whether the entry is in the dir when considering the separator. +func (e metaCacheEntry) isInDir(dir, separator string) bool { + if len(dir) == 0 { + // Root + idx := strings.Index(e.name, separator) + return idx == -1 || idx == len(e.name)-len(separator) + } + ext := strings.TrimPrefix(e.name, dir) + if len(ext) != len(e.name) { + idx := strings.Index(ext, separator) + // If separator is not found or is last entry, ok. + return idx == -1 || idx == len(ext)-len(separator) + } + return false +} + +// isLatestDeletemarker returns whether the latest version is a delete marker. +// If metadata is NOT versioned false will always be returned. +// If v2 and UNABLE to load metadata true will be returned. +func (e *metaCacheEntry) isLatestDeletemarker() bool { + if e.cached != nil { + return e.cached.Deleted + } + if !isXL2V1Format(e.metadata) { + return false + } + var xlMeta xlMetaV2 + if err := xlMeta.Load(e.metadata); err != nil || len(xlMeta.Versions) == 0 { + return true + } + return xlMeta.Versions[len(xlMeta.Versions)-1].Type == DeleteType +} + +// fileInfo returns the decoded metadata. +// If entry is a directory it is returned as that. +// If versioned the latest version will be returned. +func (e *metaCacheEntry) fileInfo(bucket string) (*FileInfo, error) { + if e.isDir() { + return &FileInfo{ + Volume: bucket, + Name: e.name, + Mode: os.ModeDir, + }, nil + } + if e.cached == nil { + fi, err := getFileInfo(e.metadata, bucket, e.name, "") + if err != nil { + return nil, err + } + e.cached = &fi + } + return e.cached, nil +} + +// fileInfoVersions returns the metadata as FileInfoVersions. +// If entry is a directory it is returned as that. +func (e *metaCacheEntry) fileInfoVersions(bucket string) (FileInfoVersions, error) { + if e.isDir() { + return FileInfoVersions{ + Volume: bucket, + Name: e.name, + Versions: []FileInfo{ + { + Volume: bucket, + Name: e.name, + Mode: os.ModeDir, + }, + }, + }, nil + } + return getFileInfoVersions(e.metadata, bucket, e.name) +} + +// metaCacheEntries is a slice of metacache entries. +type metaCacheEntries []metaCacheEntry + +// less function for sorting. +func (m metaCacheEntries) less(i, j int) bool { + return m[i].name < m[j].name +} + +// sort entries by name. +// m is sorted and a sorted metadata object is returned. +// Changes to m will also be reflected in the returned object. +func (m metaCacheEntries) sort() metaCacheEntriesSorted { + if m.isSorted() { + return metaCacheEntriesSorted{o: m} + } + sort.Slice(m, m.less) + return metaCacheEntriesSorted{o: m} +} + +// isSorted returns whether the objects are sorted. +// This is usually orders of magnitude faster than actually sorting. +func (m metaCacheEntries) isSorted() bool { + return sort.SliceIsSorted(m, m.less) +} + +// shallowClone will create a shallow clone of the array objects, +// but object metadata will not be cloned. +func (m metaCacheEntries) shallowClone() metaCacheEntries { + dst := make(metaCacheEntries, len(m)) + copy(dst, m) + return dst +} + +type metadataResolutionParams struct { + dirQuorum int // Number if disks needed for a directory to 'exist'. + objQuorum int // Number of disks needed for an object to 'exist'. + bucket string // Name of the bucket. Used for generating cached fileinfo. +} + +func (m metaCacheEntries) resolve(r *metadataResolutionParams) (selected *metaCacheEntry, ok bool) { + if len(m) == 0 { + return nil, false + } + + dirExists := 0 + objExists := 0 + var selFIV *FileInfo + for i := range m { + entry := &m[i] + if entry.name == "" { + continue + } + if entry.isDir() { + dirExists++ + selected = entry + continue + } + + // Get new entry metadata + objExists++ + fiv, err := entry.fileInfo(r.bucket) + if err != nil { + continue + } + if selFIV == nil { + selected = entry + selFIV = fiv + continue + } + + if selected.matches(entry, r.bucket) { + continue + } + + // Select latest modtime. + if fiv.ModTime.After(selFIV.ModTime) { + selected = entry + selFIV = fiv + continue + } + } + // If directory, we need quorum. + if dirExists > 0 && dirExists < r.dirQuorum { + return nil, false + } + if objExists < r.objQuorum { + return nil, false + } + // Take the latest selected. + return selected, selected != nil +} + +// names will return all names in order. +// Since this allocates it should not be used in critical functions. +func (m metaCacheEntries) names() []string { + res := make([]string, 0, len(m)) + for _, obj := range m { + res = append(res, obj.name) + } + return res +} + +// metaCacheEntriesSorted contains metacache entries that are sorted. +type metaCacheEntriesSorted struct { + o metaCacheEntries + // list id is not serialized + listID string +} + +// writeTo will write all objects to the provided output. +func (m metaCacheEntriesSorted) writeTo(writer io.Writer) error { + w := newMetacacheWriter(writer, 1<<20) + if err := w.write(m.o...); err != nil { + w.Close() + return err + } + return w.Close() +} + +// shallowClone will create a shallow clone of the array objects, +// but object metadata will not be cloned. +func (m metaCacheEntriesSorted) shallowClone() metaCacheEntriesSorted { + // We have value receiver so we already have a copy. + m.o = m.o.shallowClone() + return m +} + +// iterate the entries in order. +// If the iterator function returns iterating stops. +func (m *metaCacheEntriesSorted) iterate(fn func(entry metaCacheEntry) (cont bool)) { + if m == nil { + return + } + for _, o := range m.o { + if !fn(o) { + return + } + } +} + +// fileInfoVersions converts the metadata to FileInfoVersions where possible. +// Metadata that cannot be decoded is skipped. +func (m *metaCacheEntriesSorted) fileInfoVersions(bucket, prefix, delimiter, afterV string) (versions []ObjectInfo, commonPrefixes []string) { + versions = make([]ObjectInfo, 0, m.len()) + prevPrefix := "" + for _, entry := range m.o { + if entry.isObject() { + fiv, err := entry.fileInfoVersions(bucket) + if afterV != "" { + // Forward first entry to specified version + fiv.forwardPastVersion(afterV) + afterV = "" + } + if err == nil { + for _, version := range fiv.Versions { + versions = append(versions, version.ToObjectInfo(bucket, entry.name)) + } + } + continue + } + if entry.isDir() { + if delimiter == "" { + continue + } + idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) + if idx < 0 { + continue + } + idx = len(prefix) + idx + len(delimiter) + currPrefix := entry.name[:idx] + if currPrefix == prevPrefix { + continue + } + prevPrefix = currPrefix + commonPrefixes = append(commonPrefixes, currPrefix) + continue + } + } + + return versions, commonPrefixes +} + +// fileInfoVersions converts the metadata to FileInfoVersions where possible. +// Metadata that cannot be decoded is skipped. +func (m *metaCacheEntriesSorted) fileInfos(bucket, prefix, delimiter string) (objects []ObjectInfo, commonPrefixes []string) { + objects = make([]ObjectInfo, 0, m.len()) + prevPrefix := "" + for _, entry := range m.o { + if entry.isObject() { + fi, err := entry.fileInfo(bucket) + if err == nil { + objects = append(objects, fi.ToObjectInfo(bucket, entry.name)) + } + continue + } + if entry.isDir() { + if delimiter == "" { + continue + } + idx := strings.Index(strings.TrimPrefix(entry.name, prefix), delimiter) + if idx < 0 { + continue + } + idx = len(prefix) + idx + len(delimiter) + currPrefix := entry.name[:idx] + if currPrefix == prevPrefix { + continue + } + prevPrefix = currPrefix + commonPrefixes = append(commonPrefixes, currPrefix) + continue + } + } + + return objects, commonPrefixes +} + +// forwardTo will truncate m so only entries that are s or after is in the list. +func (m *metaCacheEntriesSorted) forwardTo(s string) { + if s == "" { + return + } + idx := sort.Search(len(m.o), func(i int) bool { + return m.o[i].name >= s + }) + m.o = m.o[idx:] +} + +// merge will merge other into m. +// If the same entries exists in both and metadata matches only one is added, +// otherwise the entry from m will be placed first. +// Operation time is expected to be O(n+m). +func (m *metaCacheEntriesSorted) merge(other metaCacheEntriesSorted, limit int) { + merged := make(metaCacheEntries, 0, m.len()+other.len()) + a := m.entries() + b := other.entries() + for len(a) > 0 && len(b) > 0 { + if a[0].name == b[0].name && bytes.Equal(a[0].metadata, b[0].metadata) { + // Same, discard one. + merged = append(merged, a[0]) + a = a[1:] + b = b[1:] + } else if a[0].name < b[0].name { + merged = append(merged, a[0]) + a = a[1:] + } else { + merged = append(merged, b[0]) + b = b[1:] + } + if limit > 0 && len(merged) >= limit { + break + } + } + // Append anything left. + if limit < 0 || len(merged) < limit { + merged = append(merged, a...) + merged = append(merged, b...) + } + m.o = merged +} + +// filter allows selective filtering with the provided function. +func (m *metaCacheEntriesSorted) filter(fn func(entry *metaCacheEntry) bool) { + dst := m.o[:0] + for _, o := range m.o { + if fn(&o) { + dst = append(dst, o) + } + } + m.o = dst +} + +// filterPrefix will filter m to only contain entries with the specified prefix. +func (m *metaCacheEntriesSorted) filterPrefix(s string) { + if s == "" { + return + } + m.forwardTo(s) + for i, o := range m.o { + if !o.hasPrefix(s) { + m.o = m.o[:i] + break + } + } +} + +// filterObjectsOnly will remove prefix directories. +// Order is preserved, but the underlying slice is modified. +func (m *metaCacheEntriesSorted) filterObjectsOnly() { + dst := m.o[:0] + for _, o := range m.o { + if !o.isDir() { + dst = append(dst, o) + } + } + m.o = dst +} + +// filterPrefixesOnly will remove objects. +// Order is preserved, but the underlying slice is modified. +func (m *metaCacheEntriesSorted) filterPrefixesOnly() { + dst := m.o[:0] + for _, o := range m.o { + if o.isDir() { + dst = append(dst, o) + } + } + m.o = dst +} + +// filterRecursiveEntries will keep entries only with the prefix that doesn't contain separator. +// This can be used to remove recursive listings. +// To return root elements only set prefix to an empty string. +// Order is preserved, but the underlying slice is modified. +func (m *metaCacheEntriesSorted) filterRecursiveEntries(prefix, separator string) { + dst := m.o[:0] + if prefix != "" { + m.forwardTo(prefix) + for _, o := range m.o { + ext := strings.TrimPrefix(o.name, prefix) + if len(ext) != len(o.name) { + if !strings.Contains(ext, separator) { + dst = append(dst, o) + } + } + } + } else { + // No prefix, simpler + for _, o := range m.o { + if !strings.Contains(o.name, separator) { + dst = append(dst, o) + } + } + } + m.o = dst +} + +// truncate the number of entries to maximum n. +func (m *metaCacheEntriesSorted) truncate(n int) { + if m == nil { + return + } + if len(m.o) > n { + m.o = m.o[:n] + } +} + +// len returns the number of objects and prefix dirs in m. +func (m *metaCacheEntriesSorted) len() int { + if m == nil { + return 0 + } + return len(m.o) +} + +// entries returns the underlying objects as is currently represented. +func (m *metaCacheEntriesSorted) entries() metaCacheEntries { + if m == nil { + return nil + } + return m.o +} + +// deduplicate entries in the list. +// If compareMeta is set it will be used to resolve conflicts. +// The function should return whether the existing entry should be replaced with other. +// If no compareMeta is provided duplicates may be left. +// This is indicated by the returned boolean. +func (m *metaCacheEntriesSorted) deduplicate(compareMeta func(existing, other *metaCacheEntry) (replace bool)) (dupesLeft bool) { + dst := m.o[:0] + for j := range m.o { + found := false + obj := &m.o[j] + for i := len(dst) - 1; i >= 0; i++ { + existing := &dst[i] + if existing.name != obj.name { + break + } + + // Use given resolution function first if any. + if compareMeta != nil { + if compareMeta(existing, obj) { + dst[i] = *obj + } + found = true + break + } + if obj.likelyMatches(existing) { + found = true + break + } + + // Matches, move on. + dupesLeft = true + continue + } + if !found { + dst = append(dst, *obj) + } + } + m.o = dst + return dupesLeft +} diff --git a/cmd/metacache-entries_test.go b/cmd/metacache-entries_test.go new file mode 100644 index 000000000..75379ccba --- /dev/null +++ b/cmd/metacache-entries_test.go @@ -0,0 +1,265 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "reflect" + "sort" + "testing" +) + +func Test_metaCacheEntries_sort(t *testing.T) { + entries := loadMetacacheSampleEntries(t) + + o := entries.entries() + if !o.isSorted() { + t.Fatal("Expected sorted objects") + } + + // Swap first and last + o[0], o[len(o)-1] = o[len(o)-1], o[0] + if o.isSorted() { + t.Fatal("Expected unsorted objects") + } + + sorted := o.sort() + if !o.isSorted() { + t.Fatal("Expected sorted o objects") + } + if !sorted.entries().isSorted() { + t.Fatal("Expected sorted wrapped objects") + } + want := loadMetacacheSampleNames + for i, got := range o { + if got.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], got.name) + } + } +} + +func Test_metaCacheEntries_forwardTo(t *testing.T) { + org := loadMetacacheSampleEntries(t) + entries := org + want := []string{"src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"} + entries.forwardTo("src/compress/zlib/reader_test.go") + got := entries.entries().names() + if !reflect.DeepEqual(got, want) { + t.Errorf("got unexpected result: %#v", got) + } + + // Try with prefix + entries = org + entries.forwardTo("src/compress/zlib/reader_t") + got = entries.entries().names() + if !reflect.DeepEqual(got, want) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_merge(t *testing.T) { + org := loadMetacacheSampleEntries(t) + a, b := org.shallowClone(), org.shallowClone() + be := b.entries() + for i := range be { + // Modify b so it isn't deduplicated. + be[i].metadata = []byte("something-else") + } + // Merge b into a + a.merge(b, -1) + want := append(loadMetacacheSampleNames, loadMetacacheSampleNames...) + sort.Strings(want) + got := a.entries().names() + if len(got) != len(want) { + t.Errorf("unexpected count, want %v, got %v", len(want), len(got)) + } + + for i, name := range got { + if want[i] != name { + t.Errorf("unexpected name, want %q, got %q", want[i], name) + } + } +} + +func Test_metaCacheEntries_dedupe(t *testing.T) { + org := loadMetacacheSampleEntries(t) + a, b := org.shallowClone(), org.shallowClone() + + // Merge b into a + a.merge(b, -1) + if a.deduplicate(nil) { + t.Fatal("deduplicate returned duplicate entries left") + } + want := loadMetacacheSampleNames + got := a.entries().names() + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_dedupe2(t *testing.T) { + org := loadMetacacheSampleEntries(t) + a, b := org.shallowClone(), org.shallowClone() + + // Replace metadata in b + testMarker := []byte("sampleset") + for i := range b.o { + b.o[i].metadata = testMarker + } + + // Merge b into a + a.merge(b, -1) + if a.deduplicate(func(existing, other *metaCacheEntry) (replace bool) { + a := bytes.Equal(existing.metadata, testMarker) + b := bytes.Equal(other.metadata, testMarker) + if a == b { + t.Fatal("got same number of testmarkers, only one should be given", a, b) + } + return b + }) { + t.Fatal("deduplicate returned duplicate entries left, we should always resolve") + } + want := loadMetacacheSampleNames + got := a.entries().names() + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_filterObjects(t *testing.T) { + data := loadMetacacheSampleEntries(t) + data.filterObjectsOnly() + got := data.entries().names() + want := []string{"src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/Isaac.Newton-Opticks.txt.bz2", "src/compress/bzip2/testdata/e.txt.bz2", "src/compress/bzip2/testdata/fail-issue5747.bz2", "src/compress/bzip2/testdata/pass-random1.bin", "src/compress/bzip2/testdata/pass-random1.bz2", "src/compress/bzip2/testdata/pass-random2.bin", "src/compress/bzip2/testdata/pass-random2.bz2", "src/compress/bzip2/testdata/pass-sawtooth.bz2", "src/compress/bzip2/testdata/random.data.bz2", "src/compress/flate/deflate.go", "src/compress/flate/deflate_test.go", "src/compress/flate/deflatefast.go", "src/compress/flate/dict_decoder.go", "src/compress/flate/dict_decoder_test.go", "src/compress/flate/example_test.go", "src/compress/flate/flate_test.go", "src/compress/flate/huffman_bit_writer.go", "src/compress/flate/huffman_bit_writer_test.go", "src/compress/flate/huffman_code.go", "src/compress/flate/inflate.go", "src/compress/flate/inflate_test.go", "src/compress/flate/reader_test.go", "src/compress/flate/testdata/huffman-null-max.dyn.expect", "src/compress/flate/testdata/huffman-null-max.dyn.expect-noinput", "src/compress/flate/testdata/huffman-null-max.golden", "src/compress/flate/testdata/huffman-null-max.in", "src/compress/flate/testdata/huffman-null-max.wb.expect", "src/compress/flate/testdata/huffman-null-max.wb.expect-noinput", "src/compress/flate/testdata/huffman-pi.dyn.expect", "src/compress/flate/testdata/huffman-pi.dyn.expect-noinput", "src/compress/flate/testdata/huffman-pi.golden", "src/compress/flate/testdata/huffman-pi.in", "src/compress/flate/testdata/huffman-pi.wb.expect", "src/compress/flate/testdata/huffman-pi.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.golden", "src/compress/flate/testdata/huffman-rand-1k.in", "src/compress/flate/testdata/huffman-rand-1k.wb.expect", "src/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.golden", "src/compress/flate/testdata/huffman-rand-limit.in", "src/compress/flate/testdata/huffman-rand-limit.wb.expect", "src/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-max.golden", "src/compress/flate/testdata/huffman-rand-max.in", "src/compress/flate/testdata/huffman-shifts.dyn.expect", "src/compress/flate/testdata/huffman-shifts.dyn.expect-noinput", "src/compress/flate/testdata/huffman-shifts.golden", "src/compress/flate/testdata/huffman-shifts.in", "src/compress/flate/testdata/huffman-shifts.wb.expect", "src/compress/flate/testdata/huffman-shifts.wb.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.dyn.expect", "src/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.golden", "src/compress/flate/testdata/huffman-text-shift.in", "src/compress/flate/testdata/huffman-text-shift.wb.expect", "src/compress/flate/testdata/huffman-text-shift.wb.expect-noinput", "src/compress/flate/testdata/huffman-text.dyn.expect", "src/compress/flate/testdata/huffman-text.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text.golden", "src/compress/flate/testdata/huffman-text.in", "src/compress/flate/testdata/huffman-text.wb.expect", "src/compress/flate/testdata/huffman-text.wb.expect-noinput", "src/compress/flate/testdata/huffman-zero.dyn.expect", "src/compress/flate/testdata/huffman-zero.dyn.expect-noinput", "src/compress/flate/testdata/huffman-zero.golden", "src/compress/flate/testdata/huffman-zero.in", "src/compress/flate/testdata/huffman-zero.wb.expect", "src/compress/flate/testdata/huffman-zero.wb.expect-noinput", "src/compress/flate/testdata/null-long-match.dyn.expect-noinput", "src/compress/flate/testdata/null-long-match.wb.expect-noinput", "src/compress/flate/token.go", "src/compress/flate/writer_test.go", "src/compress/gzip/example_test.go", "src/compress/gzip/gunzip.go", "src/compress/gzip/gunzip_test.go", "src/compress/gzip/gzip.go", "src/compress/gzip/gzip_test.go", "src/compress/gzip/issue14937_test.go", "src/compress/gzip/testdata/issue6550.gz.base64", "src/compress/lzw/reader.go", "src/compress/lzw/reader_test.go", "src/compress/lzw/writer.go", "src/compress/lzw/writer_test.go", "src/compress/testdata/e.txt", "src/compress/testdata/gettysburg.txt", "src/compress/testdata/pi.txt", "src/compress/zlib/example_test.go", "src/compress/zlib/reader.go", "src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"} + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_filterPrefixes(t *testing.T) { + data := loadMetacacheSampleEntries(t) + data.filterPrefixesOnly() + got := data.entries().names() + want := []string{"src/compress/bzip2/", "src/compress/bzip2/testdata/", "src/compress/flate/", "src/compress/flate/testdata/", "src/compress/gzip/", "src/compress/gzip/testdata/", "src/compress/lzw/", "src/compress/testdata/", "src/compress/zlib/"} + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_filterRecursive(t *testing.T) { + data := loadMetacacheSampleEntries(t) + data.filterRecursiveEntries("src/compress/bzip2/", slashSeparator) + got := data.entries().names() + want := []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go"} + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_filterRecursiveRoot(t *testing.T) { + data := loadMetacacheSampleEntries(t) + data.filterRecursiveEntries("", slashSeparator) + got := data.entries().names() + want := []string{} + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_filterRecursiveRootSep(t *testing.T) { + data := loadMetacacheSampleEntries(t) + // This will remove anything with "bzip2/" in the path since it is separator + data.filterRecursiveEntries("", "bzip2/") + got := data.entries().names() + want := []string{"src/compress/flate/", "src/compress/flate/deflate.go", "src/compress/flate/deflate_test.go", "src/compress/flate/deflatefast.go", "src/compress/flate/dict_decoder.go", "src/compress/flate/dict_decoder_test.go", "src/compress/flate/example_test.go", "src/compress/flate/flate_test.go", "src/compress/flate/huffman_bit_writer.go", "src/compress/flate/huffman_bit_writer_test.go", "src/compress/flate/huffman_code.go", "src/compress/flate/inflate.go", "src/compress/flate/inflate_test.go", "src/compress/flate/reader_test.go", "src/compress/flate/testdata/", "src/compress/flate/testdata/huffman-null-max.dyn.expect", "src/compress/flate/testdata/huffman-null-max.dyn.expect-noinput", "src/compress/flate/testdata/huffman-null-max.golden", "src/compress/flate/testdata/huffman-null-max.in", "src/compress/flate/testdata/huffman-null-max.wb.expect", "src/compress/flate/testdata/huffman-null-max.wb.expect-noinput", "src/compress/flate/testdata/huffman-pi.dyn.expect", "src/compress/flate/testdata/huffman-pi.dyn.expect-noinput", "src/compress/flate/testdata/huffman-pi.golden", "src/compress/flate/testdata/huffman-pi.in", "src/compress/flate/testdata/huffman-pi.wb.expect", "src/compress/flate/testdata/huffman-pi.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.golden", "src/compress/flate/testdata/huffman-rand-1k.in", "src/compress/flate/testdata/huffman-rand-1k.wb.expect", "src/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.golden", "src/compress/flate/testdata/huffman-rand-limit.in", "src/compress/flate/testdata/huffman-rand-limit.wb.expect", "src/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-max.golden", "src/compress/flate/testdata/huffman-rand-max.in", "src/compress/flate/testdata/huffman-shifts.dyn.expect", "src/compress/flate/testdata/huffman-shifts.dyn.expect-noinput", "src/compress/flate/testdata/huffman-shifts.golden", "src/compress/flate/testdata/huffman-shifts.in", "src/compress/flate/testdata/huffman-shifts.wb.expect", "src/compress/flate/testdata/huffman-shifts.wb.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.dyn.expect", "src/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.golden", "src/compress/flate/testdata/huffman-text-shift.in", "src/compress/flate/testdata/huffman-text-shift.wb.expect", "src/compress/flate/testdata/huffman-text-shift.wb.expect-noinput", "src/compress/flate/testdata/huffman-text.dyn.expect", "src/compress/flate/testdata/huffman-text.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text.golden", "src/compress/flate/testdata/huffman-text.in", "src/compress/flate/testdata/huffman-text.wb.expect", "src/compress/flate/testdata/huffman-text.wb.expect-noinput", "src/compress/flate/testdata/huffman-zero.dyn.expect", "src/compress/flate/testdata/huffman-zero.dyn.expect-noinput", "src/compress/flate/testdata/huffman-zero.golden", "src/compress/flate/testdata/huffman-zero.in", "src/compress/flate/testdata/huffman-zero.wb.expect", "src/compress/flate/testdata/huffman-zero.wb.expect-noinput", "src/compress/flate/testdata/null-long-match.dyn.expect-noinput", "src/compress/flate/testdata/null-long-match.wb.expect-noinput", "src/compress/flate/token.go", "src/compress/flate/writer_test.go", "src/compress/gzip/", "src/compress/gzip/example_test.go", "src/compress/gzip/gunzip.go", "src/compress/gzip/gunzip_test.go", "src/compress/gzip/gzip.go", "src/compress/gzip/gzip_test.go", "src/compress/gzip/issue14937_test.go", "src/compress/gzip/testdata/", "src/compress/gzip/testdata/issue6550.gz.base64", "src/compress/lzw/", "src/compress/lzw/reader.go", "src/compress/lzw/reader_test.go", "src/compress/lzw/writer.go", "src/compress/lzw/writer_test.go", "src/compress/testdata/", "src/compress/testdata/e.txt", "src/compress/testdata/gettysburg.txt", "src/compress/testdata/pi.txt", "src/compress/zlib/", "src/compress/zlib/example_test.go", "src/compress/zlib/reader.go", "src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"} + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntries_filterPrefix(t *testing.T) { + data := loadMetacacheSampleEntries(t) + data.filterPrefix("src/compress/bzip2/") + got := data.entries().names() + want := []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/", "src/compress/bzip2/testdata/Isaac.Newton-Opticks.txt.bz2", "src/compress/bzip2/testdata/e.txt.bz2", "src/compress/bzip2/testdata/fail-issue5747.bz2", "src/compress/bzip2/testdata/pass-random1.bin", "src/compress/bzip2/testdata/pass-random1.bz2", "src/compress/bzip2/testdata/pass-random2.bin", "src/compress/bzip2/testdata/pass-random2.bz2", "src/compress/bzip2/testdata/pass-sawtooth.bz2", "src/compress/bzip2/testdata/random.data.bz2"} + if !reflect.DeepEqual(want, got) { + t.Errorf("got unexpected result: %#v", got) + } +} + +func Test_metaCacheEntry_isInDir(t *testing.T) { + tests := []struct { + testName string + entry string + dir string + sep string + want bool + }{ + { + testName: "basic-file", + entry: "src/file", + dir: "src/", + sep: slashSeparator, + want: true, + }, + { + testName: "basic-dir", + entry: "src/dir/", + dir: "src/", + sep: slashSeparator, + want: true, + }, + { + testName: "deeper-file", + entry: "src/dir/somewhere.ext", + dir: "src/", + sep: slashSeparator, + want: false, + }, + { + testName: "deeper-dir", + entry: "src/dir/somewhere/", + dir: "src/", + sep: slashSeparator, + want: false, + }, + { + testName: "root-dir", + entry: "doc/", + dir: "", + sep: slashSeparator, + want: true, + }, + { + testName: "root-file", + entry: "word.doc", + dir: "", + sep: slashSeparator, + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.testName, func(t *testing.T) { + e := metaCacheEntry{ + name: tt.entry, + } + if got := e.isInDir(tt.dir, tt.sep); got != tt.want { + t.Errorf("isInDir() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/cmd/metacache-manager.go b/cmd/metacache-manager.go new file mode 100644 index 000000000..3e06d64f3 --- /dev/null +++ b/cmd/metacache-manager.go @@ -0,0 +1,181 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "context" + "errors" + "fmt" + "runtime/debug" + "sync" + "time" + + "github.com/minio/minio/cmd/logger" +) + +// localMetacacheMgr is the *local* manager for this peer. +// It should never be used directly since buckets are +// distributed deterministically. +// Therefore no cluster locks are required. +var localMetacacheMgr = &metacacheManager{ + buckets: make(map[string]*bucketMetacache), +} + +type metacacheManager struct { + mu sync.RWMutex + init sync.Once + buckets map[string]*bucketMetacache +} + +const metacacheManagerTransientBucket = "**transient**" + +// initManager will start async saving the cache. +func (m *metacacheManager) initManager() { + // Add a transient bucket. + tb := newBucketMetacache(metacacheManagerTransientBucket) + tb.transient = true + m.buckets[metacacheManagerTransientBucket] = tb + + // Start saver when object layer is ready. + go func() { + objAPI := newObjectLayerFn() + for objAPI == nil { + time.Sleep(time.Second) + objAPI = newObjectLayerFn() + } + if !globalIsErasure { + logger.Info("metacacheManager was initialized in non-erasure mode, skipping save") + return + } + + t := time.NewTicker(time.Minute) + var exit bool + bg := context.Background() + for !exit { + select { + case <-t.C: + case <-GlobalContext.Done(): + exit = true + } + m.mu.RLock() + for _, v := range m.buckets { + if !exit { + v.cleanup() + } + logger.LogIf(bg, v.save(bg)) + } + m.mu.RUnlock() + } + m.getTransient().deleteAll() + }() +} + +// getBucket will get a bucket metacache or load it from disk if needed. +func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucketMetacache { + m.init.Do(m.initManager) + + // Return a transient bucket for invalid or system buckets. + if isReservedOrInvalidBucket(bucket, false) { + return m.getTransient() + } + m.mu.RLock() + b, ok := m.buckets[bucket] + m.mu.RUnlock() + if ok { + if b.bucket != bucket { + logger.Info("getBucket: cached bucket %s does not match this bucket %s", b.bucket, bucket) + debug.PrintStack() + } + return b + } + + m.mu.Lock() + // See if someone else fetched it while we waited for the lock. + b, ok = m.buckets[bucket] + if ok { + m.mu.Unlock() + if b.bucket != bucket { + logger.Info("getBucket: newly cached bucket %s does not match this bucket %s", b.bucket, bucket) + debug.PrintStack() + } + return b + } + + // Load bucket. If we fail return the transient bucket. + b, err := loadBucketMetaCache(ctx, bucket) + if err != nil { + m.mu.Unlock() + return m.getTransient() + } + if b.bucket != bucket { + logger.LogIf(ctx, fmt.Errorf("getBucket: loaded bucket %s does not match this bucket %s", b.bucket, bucket)) + } + m.buckets[bucket] = b + m.mu.Unlock() + return b +} + +// getTransient will return a transient bucket. +func (m *metacacheManager) getTransient() *bucketMetacache { + m.init.Do(m.initManager) + m.mu.RLock() + bmc := m.buckets[metacacheManagerTransientBucket] + m.mu.RUnlock() + return bmc +} + +// checkMetacacheState should be used if data is not updating. +// Should only be called if a failure occurred. +func (o listPathOptions) checkMetacacheState(ctx context.Context) error { + // We operate on a copy... + o.Create = false + var cache metacache + if !o.Transient { + rpc := globalNotificationSys.restClientFromHash(o.Bucket) + if rpc == nil { + // Local + cache = localMetacacheMgr.getBucket(ctx, o.Bucket).findCache(o) + } else { + c, err := rpc.GetMetacacheListing(ctx, o) + if err != nil { + return err + } + cache = *c + } + } else { + cache = localMetacacheMgr.getTransient().findCache(o) + } + + if cache.status == scanStateNone { + return errFileNotFound + } + if cache.status == scanStateSuccess { + if time.Since(cache.lastUpdate) > 10*time.Second { + return fmt.Errorf("timeout: Finished and data not available after 10 seconds") + } + return nil + } + if cache.error != "" { + return errors.New(cache.error) + } + if cache.status == scanStateStarted { + if time.Since(cache.lastUpdate) > metacacheMaxRunningAge { + return errors.New("cache listing not updating") + } + } + return nil +} diff --git a/cmd/metacache-marker.go b/cmd/metacache-marker.go new file mode 100644 index 000000000..b93c231ed --- /dev/null +++ b/cmd/metacache-marker.go @@ -0,0 +1,70 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "context" + "fmt" + "strings" + + "github.com/minio/minio/cmd/logger" +) + +// markerTagVersion is the marker version. +// Should not need to be updated unless a fundamental change is made to the marker format. +const markerTagVersion = "v1" + +// parseMarker will parse a marker possibly encoded with encodeMarker +func parseMarker(s string) (marker, uuid string) { + if !strings.Contains(s, "[minio_cache:"+markerTagVersion) { + return s, "" + } + start := strings.LastIndex(s, "[") + marker = s[:start] + end := strings.LastIndex(s, "]") + tag := strings.Trim(s[start:end], "[]") + tags := strings.Split(tag, ",") + for _, tag := range tags { + kv := strings.Split(tag, ":") + if len(kv) < 2 { + continue + } + switch kv[0] { + case "minio_cache": + if kv[1] != markerTagVersion { + break + } + case "id": + uuid = kv[1] + default: + // Ignore unknown + } + } + return +} + +// encodeMarker will encode a uuid and return it as a marker. +// uuid cannot contain '[', ':' or ','. +func encodeMarker(marker, uuid string) string { + if uuid == "" { + return marker + } + if strings.ContainsAny(uuid, "[:,") { + logger.LogIf(context.Background(), fmt.Errorf("encodeMarker: uuid %s contained invalid characters", uuid)) + } + return fmt.Sprintf("%s[minio_cache:%s,id:%s]", marker, markerTagVersion, uuid) +} diff --git a/cmd/metacache-server-sets.go b/cmd/metacache-server-sets.go new file mode 100644 index 000000000..4c03057e7 --- /dev/null +++ b/cmd/metacache-server-sets.go @@ -0,0 +1,213 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "context" + "io" + "path" + "sync" + + "github.com/minio/minio/cmd/config" + "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/env" +) + +// listPath will return the requested entries. +// If no more entries are in the listing io.EOF is returned, +// otherwise nil or an unexpected error is returned. +// The listPathOptions given will be checked and modified internally. +// Required important fields are Bucket, Prefix, Separator. +// Other important fields are Limit, Marker. +// List ID always derived from the Marker. +func (z *erasureServerSets) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { + if err := checkListObjsArgs(ctx, o.Bucket, o.Prefix, o.Marker, z); err != nil { + return entries, err + } + + // Marker is set validate pre-condition. + if o.Marker != "" && o.Prefix != "" { + // Marker not common with prefix is not implemented. Send an empty response + if !HasPrefix(o.Marker, o.Prefix) { + return entries, io.EOF + } + } + + // With max keys of zero we have reached eof, return right here. + if o.Limit == 0 { + return entries, io.EOF + } + + // For delimiter and prefix as '/' we do not list anything at all + // since according to s3 spec we stop at the 'delimiter' + // along // with the prefix. On a flat namespace with 'prefix' + // as '/' we don't have any entries, since all the keys are + // of form 'keyName/...' + if o.Separator == SlashSeparator && o.Prefix == SlashSeparator { + return entries, io.EOF + } + + // Over flowing count - reset to maxObjectList. + if o.Limit < 0 || o.Limit > maxObjectList { + o.Limit = maxObjectList + } + + // If delimiter is slashSeparator we must return directories of + // the non-recursive scan unless explicitly requested. + o.IncludeDirectories = o.Separator == slashSeparator + if (o.Separator == slashSeparator || o.Separator == "") && !o.Recursive { + o.Recursive = o.Separator != slashSeparator + o.Separator = slashSeparator + } else { + // Default is recursive, if delimiter is set then list non recursive. + o.Recursive = true + } + + // Decode and get the optional list id from the marker. + o.Marker, o.ID = parseMarker(o.Marker) + o.Create = o.ID == "" + if o.ID == "" { + o.ID = mustGetUUID() + } + o.BaseDir = baseDirFromPrefix(o.Prefix) + + var cache metacache + // If we don't have a list id we must ask the server if it has a cache or create a new. + if o.Create { + o.CurrentCycle = intDataUpdateTracker.current() + o.OldestCycle = globalNotificationSys.findEarliestCleanBloomFilter(ctx, path.Join(o.Bucket, o.BaseDir)) + var cache metacache + rpc := globalNotificationSys.restClientFromHash(o.Bucket) + if rpc == nil { + // Local + cache = localMetacacheMgr.getBucket(ctx, o.Bucket).findCache(o) + } else { + c, err := rpc.GetMetacacheListing(ctx, o) + if err != nil { + logger.LogIf(ctx, err) + cache = localMetacacheMgr.getTransient().findCache(o) + o.Transient = true + } else { + cache = *c + } + } + if cache.fileNotFound { + return entries, errFileNotFound + } + // Only create if we created a new. + o.Create = o.ID == cache.id + o.ID = cache.id + } + + if o.AskDisks == 0 { + switch env.Get("MINIO_API_LIST_STRICT_QUORUM", config.EnableOff) { + case config.EnableOn: + // If strict, ask at least 50%. + o.AskDisks = -1 + case "reduced": + // Reduced safety. + o.AskDisks = 2 + case "disk": + // Ask single disk. + o.AskDisks = 1 + } + } + + var mu sync.Mutex + var wg sync.WaitGroup + var errs []error + allAtEOF := true + asked := 0 + mu.Lock() + // Ask all sets and merge entries. + for _, zone := range z.serverSets { + for _, set := range zone.sets { + wg.Add(1) + asked++ + go func(i int, set *erasureObjects) { + defer wg.Done() + e, err := set.listPath(ctx, o) + mu.Lock() + defer mu.Unlock() + if err == nil { + allAtEOF = false + } + errs[i] = err + entries.merge(e, -1) + + // Resolve non-trivial conflicts + entries.deduplicate(func(existing, other *metaCacheEntry) (replace bool) { + if existing.isDir() { + return false + } + eFIV, err := existing.fileInfo(o.Bucket) + if err != nil { + return true + } + oFIV, err := existing.fileInfo(o.Bucket) + if err != nil { + return false + } + return oFIV.ModTime.After(eFIV.ModTime) + }) + if entries.len() > o.Limit { + allAtEOF = false + entries.truncate(o.Limit) + } + }(len(errs), set) + errs = append(errs, nil) + } + } + mu.Unlock() + wg.Wait() + + if isAllNotFound(errs) { + // All sets returned not found. + // Update master cache with that information. + cache.status = scanStateSuccess + cache.fileNotFound = true + client := globalNotificationSys.restClientFromHash(o.Bucket) + if o.Transient { + cache, err = localMetacacheMgr.getTransient().updateCacheEntry(cache) + } else if client == nil { + cache, err = localMetacacheMgr.getBucket(GlobalContext, o.Bucket).updateCacheEntry(cache) + } else { + cache, err = client.UpdateMetacacheListing(context.Background(), cache) + } + logger.LogIf(ctx, err) + return entries, errFileNotFound + } + + for _, err := range errs { + if err == nil { + allAtEOF = false + continue + } + if err == io.EOF { + continue + } + logger.LogIf(ctx, err) + return entries, err + } + truncated := entries.len() > o.Limit || !allAtEOF + entries.truncate(o.Limit) + entries.listID = o.ID + if !truncated { + return entries, io.EOF + } + return entries, nil +} diff --git a/cmd/metacache-set.go b/cmd/metacache-set.go new file mode 100644 index 000000000..21692c4a2 --- /dev/null +++ b/cmd/metacache-set.go @@ -0,0 +1,769 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "context" + "encoding/gob" + "encoding/json" + "errors" + "fmt" + "io" + "strconv" + "strings" + "sync" + "time" + + "github.com/minio/minio/cmd/config/storageclass" + xhttp "github.com/minio/minio/cmd/http" + "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/console" + "github.com/minio/minio/pkg/hash" +) + +type listPathOptions struct { + // ID of the listing. + // This will be used to persist the list. + ID string + + // Bucket of the listing. + Bucket string + + // Directory inside the bucket. + BaseDir string + + // Scan/return only content with prefix. + Prefix string + + // Marker to resume listing. + // The response will be the first entry AFTER this object name. + Marker string + + // Limit the number of results. + Limit int + + // The number of disks to ask. Special values: + // 0 uses default number of disks. + // -1 use at least 50% of disks or at least the default number. + AskDisks int + + // InclDeleted will keep all entries where latest version is a delete marker. + InclDeleted bool + + // Scan recursively. + // If false only main directory will be scanned. + // Should always be true if Separator is n SlashSeparator. + Recursive bool + + // Separator to use. + Separator string + + // Create indicates that the lister should not attempt to load an existing cache. + Create bool + + // CurrentCycle indicates the current bloom cycle. + // Will be used if a new scan is started. + CurrentCycle uint64 + + // OldestCycle indicates the oldest cycle acceptable. + OldestCycle uint64 + + // Include pure directories. + IncludeDirectories bool + + // Transient is set if the cache is transient due to an error or being a reserved bucket. + // This means the cache metadata will not be persisted on disk. + // A transient result will never be returned from the cache so knowing the list id is required. + Transient bool +} + +func init() { + gob.Register(listPathOptions{}) +} + +// gatherResults will collect all results on the input channel and filter results according to the options. +// Caller should close the channel when done. +// The returned function will return the results once there is enough or input is closed. +func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCacheEntriesSorted, error) { + const debugPrint = false + var resultsDone = make(chan metaCacheEntriesSorted) + // Copy so we can mutate + resCh := resultsDone + resErr := io.EOF + + go func() { + var results metaCacheEntriesSorted + for entry := range in { + if resCh == nil { + // past limit + continue + } + if !o.IncludeDirectories && entry.isDir() { + continue + } + if debugPrint { + console.Infoln("gather got:", entry.name) + } + if o.Marker != "" && entry.name <= o.Marker { + if debugPrint { + console.Infoln("pre marker") + } + continue + } + if !strings.HasPrefix(entry.name, o.Prefix) { + if debugPrint { + console.Infoln("not in prefix") + } + continue + } + if !o.Recursive && !entry.isInDir(o.Prefix, o.Separator) { + if debugPrint { + console.Infoln("not in dir", o.Prefix, o.Separator) + } + continue + } + if !o.InclDeleted && entry.isObject() { + if entry.isLatestDeletemarker() { + if debugPrint { + console.Infoln("latest delete") + } + continue + } + } + if o.Limit > 0 && results.len() >= o.Limit { + // We have enough and we have more. + // Do not return io.EOF + if resCh != nil { + resErr = nil + resCh <- results + resCh = nil + } + continue + } + if debugPrint { + console.Infoln("adding...") + } + results.o = append(results.o, entry) + } + if resCh != nil { + resErr = io.EOF + resCh <- results + } + }() + return func() (metaCacheEntriesSorted, error) { + return <-resultsDone, resErr + } +} + +// findFirstPart will find the part with 0 being the first that corresponds to the marker in the options. +// io.ErrUnexpectedEOF is returned if the place containing the marker hasn't been scanned yet. +// io.EOF indicates the marker is beyond the end of the stream and does not exist. +func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) { + search := o.Marker + if search == "" { + search = o.Prefix + } + if search == "" { + return 0, nil + } + const debugPrint = false + if debugPrint { + console.Infoln("searching for ", search) + } + var tmp metacacheBlock + i := 0 + for { + partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i) + v, ok := fi.Metadata[partKey] + if !ok { + if debugPrint { + console.Infoln("no match in metadata, waiting") + } + return -1, io.ErrUnexpectedEOF + } + err := json.Unmarshal([]byte(v), &tmp) + if !ok { + logger.LogIf(context.Background(), err) + return -1, err + } + if tmp.First == "" && tmp.Last == "" && tmp.EOS { + return 0, errFileNotFound + } + if tmp.First >= search { + if debugPrint { + console.Infoln("First >= search", v) + } + return i, nil + } + if tmp.Last >= search { + if debugPrint { + + console.Infoln("Last >= search", v) + } + return i, nil + } + if tmp.EOS { + if debugPrint { + console.Infoln("no match, at EOS", v) + } + return -3, io.EOF + } + if debugPrint { + console.Infoln("First ", tmp.First, "<", search, " search", i) + } + i++ + } +} + +// newMetacache constructs a new metacache from the options. +func (o listPathOptions) newMetacache() metacache { + return metacache{ + id: o.ID, + bucket: o.Bucket, + root: o.BaseDir, + recursive: o.Recursive, + status: scanStateStarted, + error: "", + started: UTCNow(), + lastHandout: UTCNow(), + lastUpdate: UTCNow(), + ended: time.Time{}, + startedCycle: o.CurrentCycle, + endedCycle: 0, + dataVersion: metacacheStreamVersion, + } +} + +func getMetacacheBlockInfo(fi FileInfo, block int) (*metacacheBlock, error) { + var tmp metacacheBlock + partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, block) + v, ok := fi.Metadata[partKey] + if !ok { + return nil, io.ErrUnexpectedEOF + } + return &tmp, json.Unmarshal([]byte(v), &tmp) +} + +func metacachePrefixForID(bucket, id string) string { + return pathJoin("buckets", bucket, ".metacache", id) +} + +// objectPath returns the object path of the cache. +func (o *listPathOptions) objectPath(block int) string { + return pathJoin(metacachePrefixForID(o.Bucket, o.ID), "block-"+strconv.Itoa(block)+".s2") +} + +// filter will apply the options and return the number of objects requested by the limit. +// Will return io.EOF if there are no more entries with the same filter. +// The last entry can be used as a marker to resume the listing. +func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSorted, err error) { + const debugPrint = false + // Forward to prefix, if any + err = r.forwardTo(o.Prefix) + if err != nil { + return entries, err + } + if o.Marker != "" { + err = r.forwardTo(o.Marker) + if err != nil { + return entries, err + } + next, err := r.peek() + if err != nil { + return entries, err + } + if next.name == o.Marker { + err := r.skip(1) + if err != nil { + return entries, err + } + } + } + if debugPrint { + console.Infoln("forwarded to ", o.Prefix, "marker:", o.Marker, "sep:", o.Separator) + } + // Filter + if !o.Recursive { + entries.o = make(metaCacheEntries, 0, o.Limit) + pastPrefix := false + err := r.readFn(func(entry metaCacheEntry) bool { + if o.Prefix != "" && !strings.HasPrefix(entry.name, o.Prefix) { + // We are past the prefix, don't continue. + pastPrefix = true + return false + } + if !o.IncludeDirectories && entry.isDir() { + return true + } + if !entry.isInDir(o.Prefix, o.Separator) { + return true + } + if !o.InclDeleted && entry.isObject() && entry.isLatestDeletemarker() { + return entries.len() < o.Limit + } + entries.o = append(entries.o, entry) + return entries.len() < o.Limit + }) + if err == io.EOF || pastPrefix || r.nextEOF() { + return entries, io.EOF + } + return entries, err + } + + // We should not need to filter more. + return r.readN(o.Limit, o.InclDeleted, o.IncludeDirectories, o.Prefix) +} + +func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { + retries := 0 + const debugPrint = false + for { + select { + case <-ctx.Done(): + return entries, ctx.Err() + default: + } + + // Load first part metadata... + // All operations are performed without locks, so we must be careful and allow for failures. + fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(0), ObjectOptions{}) + if err != nil { + if err == errFileNotFound || errors.Is(err, errErasureReadQuorum) || errors.Is(err, InsufficientReadQuorum{}) { + // Not ready yet... + if retries == 10 { + err := o.checkMetacacheState(ctx) + if debugPrint { + logger.Info("waiting for first part (%s), err: %v", o.objectPath(0), err) + } + if err != nil { + return entries, err + } + retries = 0 + continue + } + retries++ + time.Sleep(100 * time.Millisecond) + continue + } + if debugPrint { + console.Infoln("first getObjectFileInfo", o.objectPath(0), "returned err:", err) + console.Infof("err type: %T\n", err) + } + return entries, err + } + if fi.Deleted { + return entries, errFileNotFound + } + + partN, err := o.findFirstPart(fi) + switch err { + case nil: + case io.ErrUnexpectedEOF, errErasureReadQuorum, InsufficientReadQuorum{}: + if retries == 10 { + err := o.checkMetacacheState(ctx) + if debugPrint { + logger.Info("waiting for metadata, err: %v", err) + } + if err != nil { + return entries, err + } + retries = 0 + continue + } + retries++ + time.Sleep(100 * time.Millisecond) + continue + case io.EOF: + return entries, io.EOF + } + // We got a stream to start at. + loadedPart := 0 + var buf bytes.Buffer + for { + select { + case <-ctx.Done(): + return entries, ctx.Err() + default: + } + + if partN != loadedPart { + // Load first part metadata... + fi, metaArr, onlineDisks, err = er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(partN), ObjectOptions{}) + switch err { + case errFileNotFound, errErasureReadQuorum, InsufficientReadQuorum{}: + if retries >= 10 { + err := o.checkMetacacheState(ctx) + if debugPrint { + logger.Info("waiting for part data (%v), err: %v", o.objectPath(partN), err) + } + if err != nil { + return entries, err + } + retries = 0 + continue + } + time.Sleep(100 * time.Millisecond) + continue + default: + time.Sleep(100 * time.Millisecond) + if retries >= 20 { + // We had at least 10 retries without getting a result. + logger.LogIf(ctx, err) + return entries, err + } + retries++ + continue + case nil: + loadedPart = partN + bi, err := getMetacacheBlockInfo(fi, partN) + logger.LogIf(ctx, err) + if err == nil { + if bi.pastPrefix(o.Prefix) { + return entries, io.EOF + } + } + } + if fi.Deleted { + return entries, io.ErrUnexpectedEOF + } + } + buf.Reset() + err := er.getObjectWithFileInfo(ctx, minioMetaBucket, o.objectPath(partN), 0, fi.Size, &buf, fi, metaArr, onlineDisks) + switch err { + case errFileNotFound, errErasureReadQuorum, InsufficientReadQuorum{}: + if retries >= 20 { + // We had at least 10 retries without getting a result. + logger.LogIf(ctx, err) + return entries, err + } + retries++ + time.Sleep(100 * time.Millisecond) + continue + default: + logger.LogIf(ctx, err) + return entries, err + case nil: + } + tmp, err := newMetacacheReader(&buf) + if err != nil { + return entries, err + } + e, err := tmp.filter(o) + entries.o = append(entries.o, e.o...) + if o.Limit > 0 && entries.len() > o.Limit { + entries.truncate(o.Limit) + return entries, nil + } + switch err { + case io.EOF: + // We finished at the end of the block. + // And should not expect any more results. + bi, err := getMetacacheBlockInfo(fi, partN) + logger.LogIf(ctx, err) + if err != nil || bi.EOS { + // We are done and there are no more parts. + return entries, io.EOF + } + if bi.endedPrefix(o.Prefix) { + // Nothing more for prefix. + return entries, io.EOF + } + partN++ + case nil: + // We stopped within the listing, we are done for now... + return entries, nil + default: + return entries, err + } + } + } +} + +// Will return io.EOF if continuing would not yield more results. +func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) { + const debugPrint = false + if debugPrint { + console.Printf("listPath with options: %#v", o) + } + // See if we have the listing stored. + if !o.Create { + entries, err := er.streamMetadataParts(ctx, o) + switch err { + case nil, io.EOF, context.Canceled, context.DeadlineExceeded: + return entries, err + } + logger.LogIf(ctx, err) + return entries, err + } + + rpcClient := globalNotificationSys.restClientFromHash(o.Bucket) + meta := o.newMetacache() + var metaMu sync.Mutex + defer func() { + if debugPrint { + console.Println("listPath returning:", entries.len(), "err:", err) + } + if err != nil { + metaMu.Lock() + if meta.status != scanStateError { + meta.error = err.Error() + meta.status = scanStateError + } + lm := meta + metaMu.Unlock() + if rpcClient == nil { + localMetacacheMgr.getBucket(GlobalContext, o.Bucket).updateCacheEntry(lm) + } else { + rpcClient.UpdateMetacacheListing(context.Background(), lm) + } + } + }() + if debugPrint { + console.Println("listPath: scanning bucket:", o.Bucket, "basedir:", o.BaseDir, "prefix:", o.Prefix, "marker:", o.Marker) + } + + // Disconnect from call above, but cancel on exit. + ctx, cancel := context.WithCancel(GlobalContext) + // We need to ask disks. + disks := er.getOnlineDisks() + + var askDisks = o.AskDisks + switch askDisks { + // 50% or at least 3. + case -1: + o.AskDisks = getReadQuorum(len(er.getDisks())) + if o.AskDisks < 3 { + o.AskDisks = 3 + } + // Default is 3 disks. + case 0: + askDisks = 3 + } + + if len(disks) < askDisks { + err = InsufficientReadQuorum{} + if debugPrint { + console.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks) + } + logger.LogIf(ctx, fmt.Errorf("listPath: Insufficient disks, %d of %d needed are available", len(disks), askDisks)) + cancel() + return + } + + // Select askDisks random disks, 3 is ok. + if len(disks) > askDisks { + disks = disks[:askDisks] + } + var readers = make([]*metacacheReader, askDisks) + for i := range disks { + r, w := io.Pipe() + d := disks[i] + readers[i], err = newMetacacheReader(r) + if err != nil { + cancel() + return entries, err + } + // Send request. + go func() { + err := d.WalkDir(ctx, WalkDirOptions{Bucket: o.Bucket, BaseDir: o.BaseDir, Recursive: o.Recursive || o.Separator != SlashSeparator}, w) + w.CloseWithError(err) + if err != io.EOF { + logger.LogIf(ctx, err) + } + }() + } + + // Create output for our results. + cacheCh := make(chan metaCacheEntry, metacacheBlockSize) + + // Create filter for results. + filterCh := make(chan metaCacheEntry, 100) + filteredResults := o.gatherResults(filterCh) + closeChannels := func() { + close(cacheCh) + close(filterCh) + } + + go func() { + defer cancel() + // Save continuous updates + go func() { + ticker := time.NewTicker(10 * time.Second) + defer ticker.Stop() + var exit bool + for !exit { + select { + case <-ticker.C: + case <-ctx.Done(): + exit = true + } + metaMu.Lock() + meta.endedCycle = intDataUpdateTracker.current() + lm := meta + metaMu.Unlock() + var err error + if o.Transient { + lm, err = localMetacacheMgr.getTransient().updateCacheEntry(lm) + } else if rpcClient == nil { + lm, err = localMetacacheMgr.getBucket(GlobalContext, o.Bucket).updateCacheEntry(lm) + } else { + lm, err = rpcClient.UpdateMetacacheListing(context.Background(), lm) + } + logger.LogIf(ctx, err) + if lm.status == scanStateError { + cancel() + exit = true + } + } + }() + + // Write results to disk. + bw := newMetacacheBlockWriter(cacheCh, func(b *metacacheBlock) error { + if debugPrint { + console.Println("listPath: saving block", b.n, "to", o.objectPath(b.n)) + } + r, err := hash.NewReader(bytes.NewBuffer(b.data), int64(len(b.data)), "", "", int64(len(b.data)), false) + logger.LogIf(ctx, err) + custom := b.headerKV() + custom[xhttp.AmzStorageClass] = storageclass.RRS + _, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r, nil, nil), ObjectOptions{UserDefined: custom}) + if err != nil { + metaMu.Lock() + meta.status = scanStateError + meta.error = err.Error() + metaMu.Unlock() + cancel() + return err + } + if b.n == 0 { + return nil + } + // Update block 0 metadata. + for { + err := er.updateObjectMeta(ctx, minioMetaBucket, o.objectPath(0), b.headerKV(), ObjectOptions{}) + if err == nil { + break + } + logger.LogIf(ctx, err) + time.Sleep(100 * time.Millisecond) + } + return nil + }) + + // How to resolve results. + resolver := metadataResolutionParams{ + dirQuorum: askDisks - 1, + objQuorum: askDisks - 1, + bucket: o.Bucket, + } + + topEntries := make(metaCacheEntries, len(readers)) + for { + // Get the top entry from each + var current metaCacheEntry + var atEOF, agree int + for i, r := range readers { + topEntries[i].name = "" + entry, err := r.peek() + switch err { + case io.EOF: + atEOF++ + continue + case nil: + default: + closeChannels() + metaMu.Lock() + meta.status = scanStateError + meta.error = err.Error() + metaMu.Unlock() + return + } + // If no current, add it. + if current.name == "" { + topEntries[i] = entry + current = entry + agree++ + continue + } + // If exact match, we agree. + if current.matches(&entry, o.Bucket) { + topEntries[i] = entry + agree++ + continue + } + // If only the name matches we didn't agree, but add it for resolution. + if entry.name == current.name { + topEntries[i] = entry + continue + } + // We got different entries + if entry.name > current.name { + continue + } + // We got a new, better current. + // Clear existing entries. + for i := range topEntries[:i] { + topEntries[i] = metaCacheEntry{} + } + agree = 1 + current = entry + topEntries[i] = entry + } + // Break if all at EOF. + if atEOF == len(readers) { + break + } + if agree == len(readers) { + // Everybody agreed + for _, r := range readers { + r.skip(1) + } + cacheCh <- topEntries[0] + filterCh <- topEntries[0] + continue + } + + // Results Disagree :-( + entry, ok := topEntries.resolve(&resolver) + if ok { + cacheCh <- *entry + filterCh <- *entry + } + // Skip the inputs we used. + for i, r := range readers { + if topEntries[i].name != "" { + r.skip(1) + } + } + } + closeChannels() + metaMu.Lock() + if meta.error == "" { + if err := bw.Close(); err != nil { + meta.error = err.Error() + meta.status = scanStateError + } else { + meta.status = scanStateSuccess + meta.endedCycle = intDataUpdateTracker.current() + } + } + metaMu.Unlock() + }() + + return filteredResults() +} diff --git a/cmd/metacache-stream.go b/cmd/metacache-stream.go new file mode 100644 index 000000000..fb5d71271 --- /dev/null +++ b/cmd/metacache-stream.go @@ -0,0 +1,881 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "io" + "os" + "strings" + "sync" + + "github.com/klauspost/compress/s2" + "github.com/minio/minio/cmd/logger" + "github.com/tinylib/msgp/msgp" +) + +// metadata stream format: +// +// The stream is s2 compressed. +// https://github.com/klauspost/compress/tree/master/s2#s2-compression +// This ensures integrity and reduces the size typically by at least 50%. +// +// All stream elements are msgpack encoded. +// +// 1 Integer, metacacheStreamVersion of the writer. +// This can be used for managing breaking changes. +// +// For each element: +// 1. Bool. If false at end of stream. +// 2. String. Name of object. Directories contains a trailing slash. +// 3. Binary. Blob of metadata. Length 0 on directories. +// ... Next element. +// +// Streams can be assumed to be sorted in ascending order. +// If the stream ends before a false boolean it can be assumed it was truncated. + +const metacacheStreamVersion = 1 + +// metacacheWriter provides a serializer of metacache objects. +type metacacheWriter struct { + mw *msgp.Writer + creator func() error + closer func() error + blockSize int + + streamErr error + streamWg sync.WaitGroup +} + +// newMetacacheWriter will create a serializer that will write objects in given order to the output. +// Provide a block size that affects latency. If 0 a default of 128KiB will be used. +// Block size can be up to 4MiB. +func newMetacacheWriter(out io.Writer, blockSize int) *metacacheWriter { + if blockSize < 8<<10 { + blockSize = 128 << 10 + } + w := metacacheWriter{ + mw: nil, + blockSize: blockSize, + } + w.creator = func() error { + s2w := s2.NewWriter(out, s2.WriterBlockSize(blockSize)) + w.mw = msgp.NewWriter(s2w) + w.creator = nil + if err := w.mw.WriteByte(metacacheStreamVersion); err != nil { + return err + } + + w.closer = func() error { + if w.streamErr != nil { + return w.streamErr + } + if err := w.mw.WriteBool(false); err != nil { + return err + } + if err := w.mw.Flush(); err != nil { + return err + } + return s2w.Close() + } + return nil + } + return &w +} + +func newMetacacheFile(file string) *metacacheWriter { + w := metacacheWriter{ + mw: nil, + } + w.creator = func() error { + fw, err := os.Create(file) + if err != nil { + return err + } + s2w := s2.NewWriter(fw, s2.WriterBlockSize(1<<20)) + w.mw = msgp.NewWriter(s2w) + w.creator = nil + if err := w.mw.WriteByte(metacacheStreamVersion); err != nil { + return err + } + w.closer = func() error { + if w.streamErr != nil { + fw.Close() + return w.streamErr + } + // Indicate EOS + if err := w.mw.WriteBool(false); err != nil { + return err + } + if err := w.mw.Flush(); err != nil { + fw.Close() + return err + } + if err := s2w.Close(); err != nil { + fw.Close() + return err + } + return fw.Close() + } + return nil + } + return &w +} + +// write one or more objects to the stream in order. +// It is favorable to send as many objects as possible in a single write, +// but no more than math.MaxUint32 +func (w *metacacheWriter) write(objs ...metaCacheEntry) error { + if w == nil { + return errors.New("metacacheWriter: nil writer") + } + if len(objs) == 0 { + return nil + } + if w.creator != nil { + err := w.creator() + w.creator = nil + if err != nil { + return fmt.Errorf("metacacheWriter: unable to create writer: %w", err) + } + if w.mw == nil { + return errors.New("metacacheWriter: writer not initialized") + } + } + for _, o := range objs { + if len(o.name) == 0 { + return errors.New("metacacheWriter: no name provided") + } + // Indicate EOS + err := w.mw.WriteBool(true) + if err != nil { + return err + } + err = w.mw.WriteString(o.name) + if err != nil { + return err + } + err = w.mw.WriteBytes(o.metadata) + if err != nil { + return err + } + } + + return nil +} + +// stream entries to the output. +// The returned channel should be closed when done. +// Any error is reported when closing the metacacheWriter. +func (w *metacacheWriter) stream() (chan<- metaCacheEntry, error) { + if w.creator != nil { + err := w.creator() + w.creator = nil + if err != nil { + return nil, fmt.Errorf("metacacheWriter: unable to create writer: %w", err) + } + if w.mw == nil { + return nil, errors.New("metacacheWriter: writer not initialized") + } + } + var objs = make(chan metaCacheEntry, 100) + w.streamErr = nil + w.streamWg.Add(1) + go func() { + defer w.streamWg.Done() + for o := range objs { + if len(o.name) == 0 || w.streamErr != nil { + continue + } + // Indicate EOS + err := w.mw.WriteBool(true) + if err != nil { + w.streamErr = err + continue + } + err = w.mw.WriteString(o.name) + if err != nil { + w.streamErr = err + continue + } + err = w.mw.WriteBytes(o.metadata) + if err != nil { + w.streamErr = err + continue + } + } + }() + + return objs, nil +} + +// Close and release resources. +func (w *metacacheWriter) Close() error { + if w == nil || w.closer == nil { + return nil + } + w.streamWg.Wait() + err := w.closer() + w.closer = nil + return err +} + +// Reset and start writing to new writer. +// Close must have been called before this. +func (w *metacacheWriter) Reset(out io.Writer) { + w.streamErr = nil + w.creator = func() error { + s2w := s2.NewWriter(out, s2.WriterBlockSize(w.blockSize)) + w.mw = msgp.NewWriter(s2w) + w.creator = nil + if err := w.mw.WriteByte(metacacheStreamVersion); err != nil { + return err + } + + w.closer = func() error { + if w.streamErr != nil { + return w.streamErr + } + if err := w.mw.WriteBool(false); err != nil { + return err + } + if err := w.mw.Flush(); err != nil { + return err + } + return s2w.Close() + } + return nil + } +} + +var s2DecPool = sync.Pool{New: func() interface{} { + return s2.NewReader(nil) +}} + +// metacacheReader allows reading a cache stream. +type metacacheReader struct { + mr *msgp.Reader + current metaCacheEntry + err error // stateful error + closer func() + creator func() error +} + +// newMetacacheReader creates a new cache reader. +// Nothing will be read from the stream yet. +func newMetacacheReader(r io.Reader) (*metacacheReader, error) { + dec := s2DecPool.Get().(*s2.Reader) + dec.Reset(r) + mr := msgp.NewReader(dec) + m := metacacheReader{ + mr: mr, + closer: func() { + dec.Reset(nil) + s2DecPool.Put(dec) + }, + creator: func() error { + v, err := mr.ReadByte() + if err != nil { + return err + } + switch v { + case metacacheStreamVersion: + default: + return fmt.Errorf("metacacheReader: Unknown version: %d", v) + } + return nil + }, + } + return &m, nil +} + +func (r *metacacheReader) checkInit() { + if r.creator == nil || r.err != nil { + return + } + r.err = r.creator() + r.creator = nil +} + +// peek will return the name of the next object. +// Will return io.EOF if there are no more objects. +// Should be used sparingly. +func (r *metacacheReader) peek() (metaCacheEntry, error) { + r.checkInit() + if r.err != nil { + return metaCacheEntry{}, r.err + } + if r.current.name != "" { + return r.current, nil + } + if more, err := r.mr.ReadBool(); !more { + switch err { + case nil: + r.err = io.EOF + return metaCacheEntry{}, io.EOF + case io.EOF: + r.err = io.ErrUnexpectedEOF + return metaCacheEntry{}, io.ErrUnexpectedEOF + } + r.err = err + return metaCacheEntry{}, err + } + + var err error + if r.current.name, err = r.mr.ReadString(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return metaCacheEntry{}, err + } + r.current.metadata, err = r.mr.ReadBytes(r.current.metadata[:0]) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return r.current, err +} + +// next will read one entry from the stream. +// Generally not recommended for fast operation. +func (r *metacacheReader) next() (metaCacheEntry, error) { + r.checkInit() + if r.err != nil { + return metaCacheEntry{}, r.err + } + var m metaCacheEntry + var err error + if r.current.name != "" { + m.name = r.current.name + m.metadata = r.current.metadata + r.current.name = "" + r.current.metadata = nil + return m, nil + } + if more, err := r.mr.ReadBool(); !more { + switch err { + case nil: + r.err = io.EOF + return m, io.EOF + case io.EOF: + r.err = io.ErrUnexpectedEOF + return m, io.ErrUnexpectedEOF + } + r.err = err + return m, err + } + if m.name, err = r.mr.ReadString(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return m, err + } + m.metadata, err = r.mr.ReadBytes(nil) + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return m, err +} + +// next will read one entry from the stream. +// Generally not recommended for fast operation. +func (r *metacacheReader) nextEOF() bool { + r.checkInit() + if r.err != nil { + return r.err == io.EOF + } + if r.current.name != "" { + return false + } + _, err := r.peek() + if err != nil { + r.err = err + return r.err == io.EOF + } + return false +} + +// forwardTo will forward to the first entry that is >= s. +// Will return io.EOF if end of stream is reached without finding any. +func (r *metacacheReader) forwardTo(s string) error { + r.checkInit() + if r.err != nil { + return r.err + } + + if s == "" { + return nil + } + if r.current.name != "" { + if r.current.name >= s { + return nil + } + r.current.name = "" + r.current.metadata = nil + } + // temporary name buffer. + var tmp = make([]byte, 0, 256) + for { + if more, err := r.mr.ReadBool(); !more { + switch err { + case nil: + r.err = io.EOF + return io.EOF + case io.EOF: + r.err = io.ErrUnexpectedEOF + return io.ErrUnexpectedEOF + } + r.err = err + return err + } + // Read name without allocating more than 1 buffer. + sz, err := r.mr.ReadStringHeader() + if err != nil { + r.err = err + return err + } + if cap(tmp) < int(sz) { + tmp = make([]byte, 0, sz+256) + } + tmp = tmp[:sz] + _, err = r.mr.R.ReadFull(tmp) + if err != nil { + r.err = err + return err + } + if string(tmp) >= s { + r.current.name = string(tmp) + r.current.metadata, err = r.mr.ReadBytes(nil) + return err + } + // Skip metadata + err = r.mr.Skip() + if err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + } +} + +// readN will return all the requested number of entries in order +// or all if n < 0. +// Will return io.EOF if end of stream is reached. +// If requesting 0 objects nil error will always be returned regardless of at end of stream. +// Use peek to determine if at end of stream. +func (r *metacacheReader) readN(n int, inclDeleted, inclDirs bool, prefix string) (metaCacheEntriesSorted, error) { + r.checkInit() + if n == 0 { + return metaCacheEntriesSorted{}, nil + } + if r.err != nil { + return metaCacheEntriesSorted{}, r.err + } + + var res metaCacheEntries + if n > 0 { + res = make(metaCacheEntries, 0, n) + } + if prefix != "" { + if err := r.forwardTo(prefix); err != nil { + return metaCacheEntriesSorted{}, err + } + } + next, err := r.peek() + if err != nil { + return metaCacheEntriesSorted{}, err + } + if !next.hasPrefix(prefix) { + return metaCacheEntriesSorted{}, io.EOF + } + + if r.current.name != "" { + if (inclDeleted || !r.current.isLatestDeletemarker()) && r.current.hasPrefix(prefix) && (inclDirs || r.current.isObject()) { + res = append(res, r.current) + } + r.current.name = "" + r.current.metadata = nil + } + + for n < 0 || len(res) < n { + if more, err := r.mr.ReadBool(); !more { + switch err { + case nil: + r.err = io.EOF + return metaCacheEntriesSorted{o: res}, io.EOF + case io.EOF: + r.err = io.ErrUnexpectedEOF + return metaCacheEntriesSorted{o: res}, io.ErrUnexpectedEOF + } + r.err = err + return metaCacheEntriesSorted{o: res}, err + } + var err error + var meta metaCacheEntry + if meta.name, err = r.mr.ReadString(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return metaCacheEntriesSorted{o: res}, err + } + if !meta.hasPrefix(prefix) { + r.mr.R.Skip(1) + return metaCacheEntriesSorted{o: res}, io.EOF + } + if meta.metadata, err = r.mr.ReadBytes(nil); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return metaCacheEntriesSorted{o: res}, err + } + if !inclDirs && meta.isDir() { + continue + } + if meta.isDir() && !inclDeleted && meta.isLatestDeletemarker() { + continue + } + res = append(res, meta) + } + return metaCacheEntriesSorted{o: res}, nil +} + +// readAll will return all remaining objects on the dst channel and close it when done. +// The context allows the operation to be canceled. +func (r *metacacheReader) readAll(ctx context.Context, dst chan<- metaCacheEntry) error { + r.checkInit() + if r.err != nil { + return r.err + } + defer close(dst) + if r.current.name != "" { + select { + case <-ctx.Done(): + r.err = ctx.Err() + return ctx.Err() + case dst <- r.current: + } + r.current.name = "" + r.current.metadata = nil + } + for { + if more, err := r.mr.ReadBool(); !more { + switch err { + case io.EOF: + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + + var err error + var meta metaCacheEntry + if meta.name, err = r.mr.ReadString(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + if meta.metadata, err = r.mr.ReadBytes(nil); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + select { + case <-ctx.Done(): + r.err = ctx.Err() + return ctx.Err() + case dst <- meta: + } + } +} + +// readFn will return all remaining objects +// and provide a callback for each entry read in order +// as long as true is returned on the callback. +func (r *metacacheReader) readFn(fn func(entry metaCacheEntry) bool) error { + r.checkInit() + if r.err != nil { + return r.err + } + if r.current.name != "" { + fn(r.current) + r.current.name = "" + r.current.metadata = nil + } + for { + if more, err := r.mr.ReadBool(); !more { + switch err { + case io.EOF: + r.err = io.ErrUnexpectedEOF + return io.ErrUnexpectedEOF + case nil: + r.err = io.EOF + return io.EOF + } + return err + } + + var err error + var meta metaCacheEntry + if meta.name, err = r.mr.ReadString(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + if meta.metadata, err = r.mr.ReadBytes(nil); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + // Send it! + if !fn(meta) { + return nil + } + } +} + +// readNames will return all the requested number of names in order +// or all if n < 0. +// Will return io.EOF if end of stream is reached. +func (r *metacacheReader) readNames(n int) ([]string, error) { + r.checkInit() + if r.err != nil { + return nil, r.err + } + if n == 0 { + return nil, nil + } + var res []string + if n > 0 { + res = make([]string, 0, n) + } + if r.current.name != "" { + res = append(res, r.current.name) + r.current.name = "" + r.current.metadata = nil + } + for n < 0 || len(res) < n { + if more, err := r.mr.ReadBool(); !more { + switch err { + case nil: + r.err = io.EOF + return res, io.EOF + case io.EOF: + r.err = io.ErrUnexpectedEOF + return res, io.ErrUnexpectedEOF + } + return res, err + } + + var err error + var name string + if name, err = r.mr.ReadString(); err != nil { + r.err = err + return res, err + } + if err = r.mr.Skip(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return res, err + } + res = append(res, name) + } + return res, nil +} + +// skip n entries on the input stream. +// If there are less entries left io.EOF is returned. +func (r *metacacheReader) skip(n int) error { + r.checkInit() + if r.err != nil { + return r.err + } + if n <= 0 { + return nil + } + if r.current.name != "" { + n-- + r.current.name = "" + r.current.metadata = nil + } + for n > 0 { + if more, err := r.mr.ReadBool(); !more { + switch err { + case nil: + r.err = io.EOF + return io.EOF + case io.EOF: + r.err = io.ErrUnexpectedEOF + return io.ErrUnexpectedEOF + } + return err + } + + if err := r.mr.Skip(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + if err := r.mr.Skip(); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + r.err = err + return err + } + n-- + } + return nil +} + +// Close and release resources. +func (r *metacacheReader) Close() error { + if r == nil || r.closer == nil { + return nil + } + r.closer() + r.closer = nil + r.creator = nil + return nil +} + +// metacacheBlockWriter collects blocks and provides a callaback to store them. +type metacacheBlockWriter struct { + wg sync.WaitGroup + streamErr error + blockEntries int +} + +// newMetacacheBlockWriter provides a streaming block writer. +// Each block is the size of the capacity of the input channel. +// The caller should close to indicate the stream has ended. +func newMetacacheBlockWriter(in <-chan metaCacheEntry, nextBlock func(b *metacacheBlock) error) *metacacheBlockWriter { + w := metacacheBlockWriter{blockEntries: cap(in)} + w.wg.Add(1) + go func() { + defer w.wg.Done() + var current metacacheBlock + var n int + var buf bytes.Buffer + block := newMetacacheWriter(&buf, 1<<20) + finishBlock := func() { + err := block.Close() + if err != nil { + w.streamErr = err + return + } + current.data = buf.Bytes() + w.streamErr = nextBlock(¤t) + // Prepare for next + current.n++ + buf.Reset() + block.Reset(&buf) + current.First = "" + } + for o := range in { + if len(o.name) == 0 || w.streamErr != nil { + continue + } + if current.First == "" { + current.First = o.name + } + + if n >= w.blockEntries-1 { + finishBlock() + n = 0 + } + n++ + + w.streamErr = block.write(o) + if w.streamErr != nil { + continue + } + current.Last = o.name + } + if n > 0 || current.n == 0 { + current.EOS = true + finishBlock() + } + }() + return &w +} + +// Close the stream. +// The incoming channel must be closed before calling this. +// Returns the first error the occurred during the writing if any. +func (w *metacacheBlockWriter) Close() error { + w.wg.Wait() + return w.streamErr +} + +type metacacheBlock struct { + data []byte + n int + First string `json:"f"` + Last string `json:"l"` + EOS bool `json:"eos,omitempty"` +} + +func (b metacacheBlock) headerKV() map[string]string { + v, err := json.Marshal(b) + if err != nil { + logger.LogIf(context.Background(), err) // Unlikely + return nil + } + return map[string]string{fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, b.n): string(v)} +} + +// pastPrefix returns true if the given prefix is before start of the block. +func (b metacacheBlock) pastPrefix(prefix string) bool { + if prefix == "" || strings.HasPrefix(b.First, prefix) { + return false + } + // We have checked if prefix matches, so we can do direct compare. + return b.First > prefix +} + +// endedPrefix returns true if the given prefix ends within the block. +func (b metacacheBlock) endedPrefix(prefix string) bool { + if prefix == "" || strings.HasPrefix(b.Last, prefix) { + return false + } + + // We have checked if prefix matches, so we can do direct compare. + return b.Last > prefix +} diff --git a/cmd/metacache-stream_test.go b/cmd/metacache-stream_test.go new file mode 100644 index 000000000..58b907835 --- /dev/null +++ b/cmd/metacache-stream_test.go @@ -0,0 +1,437 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "context" + "io" + "io/ioutil" + "reflect" + "sync" + "testing" +) + +var loadMetacacheSampleNames = []string{"src/compress/bzip2/", "src/compress/bzip2/bit_reader.go", "src/compress/bzip2/bzip2.go", "src/compress/bzip2/bzip2_test.go", "src/compress/bzip2/huffman.go", "src/compress/bzip2/move_to_front.go", "src/compress/bzip2/testdata/", "src/compress/bzip2/testdata/Isaac.Newton-Opticks.txt.bz2", "src/compress/bzip2/testdata/e.txt.bz2", "src/compress/bzip2/testdata/fail-issue5747.bz2", "src/compress/bzip2/testdata/pass-random1.bin", "src/compress/bzip2/testdata/pass-random1.bz2", "src/compress/bzip2/testdata/pass-random2.bin", "src/compress/bzip2/testdata/pass-random2.bz2", "src/compress/bzip2/testdata/pass-sawtooth.bz2", "src/compress/bzip2/testdata/random.data.bz2", "src/compress/flate/", "src/compress/flate/deflate.go", "src/compress/flate/deflate_test.go", "src/compress/flate/deflatefast.go", "src/compress/flate/dict_decoder.go", "src/compress/flate/dict_decoder_test.go", "src/compress/flate/example_test.go", "src/compress/flate/flate_test.go", "src/compress/flate/huffman_bit_writer.go", "src/compress/flate/huffman_bit_writer_test.go", "src/compress/flate/huffman_code.go", "src/compress/flate/inflate.go", "src/compress/flate/inflate_test.go", "src/compress/flate/reader_test.go", "src/compress/flate/testdata/", "src/compress/flate/testdata/huffman-null-max.dyn.expect", "src/compress/flate/testdata/huffman-null-max.dyn.expect-noinput", "src/compress/flate/testdata/huffman-null-max.golden", "src/compress/flate/testdata/huffman-null-max.in", "src/compress/flate/testdata/huffman-null-max.wb.expect", "src/compress/flate/testdata/huffman-null-max.wb.expect-noinput", "src/compress/flate/testdata/huffman-pi.dyn.expect", "src/compress/flate/testdata/huffman-pi.dyn.expect-noinput", "src/compress/flate/testdata/huffman-pi.golden", "src/compress/flate/testdata/huffman-pi.in", "src/compress/flate/testdata/huffman-pi.wb.expect", "src/compress/flate/testdata/huffman-pi.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect", "src/compress/flate/testdata/huffman-rand-1k.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-1k.golden", "src/compress/flate/testdata/huffman-rand-1k.in", "src/compress/flate/testdata/huffman-rand-1k.wb.expect", "src/compress/flate/testdata/huffman-rand-1k.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect", "src/compress/flate/testdata/huffman-rand-limit.dyn.expect-noinput", "src/compress/flate/testdata/huffman-rand-limit.golden", "src/compress/flate/testdata/huffman-rand-limit.in", "src/compress/flate/testdata/huffman-rand-limit.wb.expect", "src/compress/flate/testdata/huffman-rand-limit.wb.expect-noinput", "src/compress/flate/testdata/huffman-rand-max.golden", "src/compress/flate/testdata/huffman-rand-max.in", "src/compress/flate/testdata/huffman-shifts.dyn.expect", "src/compress/flate/testdata/huffman-shifts.dyn.expect-noinput", "src/compress/flate/testdata/huffman-shifts.golden", "src/compress/flate/testdata/huffman-shifts.in", "src/compress/flate/testdata/huffman-shifts.wb.expect", "src/compress/flate/testdata/huffman-shifts.wb.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.dyn.expect", "src/compress/flate/testdata/huffman-text-shift.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text-shift.golden", "src/compress/flate/testdata/huffman-text-shift.in", "src/compress/flate/testdata/huffman-text-shift.wb.expect", "src/compress/flate/testdata/huffman-text-shift.wb.expect-noinput", "src/compress/flate/testdata/huffman-text.dyn.expect", "src/compress/flate/testdata/huffman-text.dyn.expect-noinput", "src/compress/flate/testdata/huffman-text.golden", "src/compress/flate/testdata/huffman-text.in", "src/compress/flate/testdata/huffman-text.wb.expect", "src/compress/flate/testdata/huffman-text.wb.expect-noinput", "src/compress/flate/testdata/huffman-zero.dyn.expect", "src/compress/flate/testdata/huffman-zero.dyn.expect-noinput", "src/compress/flate/testdata/huffman-zero.golden", "src/compress/flate/testdata/huffman-zero.in", "src/compress/flate/testdata/huffman-zero.wb.expect", "src/compress/flate/testdata/huffman-zero.wb.expect-noinput", "src/compress/flate/testdata/null-long-match.dyn.expect-noinput", "src/compress/flate/testdata/null-long-match.wb.expect-noinput", "src/compress/flate/token.go", "src/compress/flate/writer_test.go", "src/compress/gzip/", "src/compress/gzip/example_test.go", "src/compress/gzip/gunzip.go", "src/compress/gzip/gunzip_test.go", "src/compress/gzip/gzip.go", "src/compress/gzip/gzip_test.go", "src/compress/gzip/issue14937_test.go", "src/compress/gzip/testdata/", "src/compress/gzip/testdata/issue6550.gz.base64", "src/compress/lzw/", "src/compress/lzw/reader.go", "src/compress/lzw/reader_test.go", "src/compress/lzw/writer.go", "src/compress/lzw/writer_test.go", "src/compress/testdata/", "src/compress/testdata/e.txt", "src/compress/testdata/gettysburg.txt", "src/compress/testdata/pi.txt", "src/compress/zlib/", "src/compress/zlib/example_test.go", "src/compress/zlib/reader.go", "src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"} + +func loadMetacacheSample(t testing.TB) *metacacheReader { + b, err := ioutil.ReadFile("testdata/metacache.s2") + if err != nil { + t.Fatal(err) + } + r, err := newMetacacheReader(bytes.NewBuffer(b)) + if err != nil { + t.Fatal(err) + } + return r +} + +func loadMetacacheSampleEntries(t testing.TB) metaCacheEntriesSorted { + r := loadMetacacheSample(t) + defer r.Close() + entries, err := r.readN(-1, false, true, "") + if err != io.EOF { + t.Fatal(err) + } + if false { + w := newMetacacheFile("testdata/metacache-new.s2") + w.write(entries.entries()...) + w.Close() + } + + return entries +} + +func Test_metacacheReader_readNames(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + names, err := r.readNames(-1) + if err != io.EOF { + t.Fatal(err) + } + want := loadMetacacheSampleNames + if !reflect.DeepEqual(names, want) { + t.Errorf("got unexpected result: %#v", names) + } +} + +func Test_metacacheReader_readN(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + entries, err := r.readN(-1, false, true, "") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + want := loadMetacacheSampleNames + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + want = want[:0] + entries, err = r.readN(0, false, true, "") + if err != nil { + t.Fatal(err, entries.len()) + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + // Reload. + r = loadMetacacheSample(t) + defer r.Close() + entries, err = r.readN(0, false, true, "") + if err != nil { + t.Fatal(err, entries.len()) + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + entries, err = r.readN(5, false, true, "") + if err != nil { + t.Fatal(err, entries.len()) + } + want = loadMetacacheSampleNames[:5] + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } +} + +func Test_metacacheReader_readNDirs(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + entries, err := r.readN(-1, false, true, "") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + want := loadMetacacheSampleNames + var noDirs []string + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + if !entry.isDir() { + noDirs = append(noDirs, entry.name) + } + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + want = noDirs + r = loadMetacacheSample(t) + defer r.Close() + entries, err = r.readN(-1, false, false, "") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + want = want[:0] + entries, err = r.readN(0, false, false, "") + if err != nil { + t.Fatal(err, entries.len()) + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + // Reload. + r = loadMetacacheSample(t) + defer r.Close() + entries, err = r.readN(0, false, false, "") + if err != nil { + t.Fatal(err, entries.len()) + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + entries, err = r.readN(5, false, false, "") + if err != nil { + t.Fatal(err, entries.len()) + } + want = noDirs[:5] + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } +} + +func Test_metacacheReader_readNPrefix(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + entries, err := r.readN(-1, false, true, "src/compress/bzip2/") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + want := loadMetacacheSampleNames[:16] + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + + r = loadMetacacheSample(t) + defer r.Close() + entries, err = r.readN(-1, false, true, "src/nonexist") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + want = loadMetacacheSampleNames[:0] + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } + + r = loadMetacacheSample(t) + defer r.Close() + entries, err = r.readN(-1, false, true, "src/a") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + want = loadMetacacheSampleNames[:0] + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } + + r = loadMetacacheSample(t) + defer r.Close() + entries, err = r.readN(-1, false, true, "src/compress/zlib/e") + if err != io.EOF { + t.Fatal(err, entries.len()) + } + want = []string{"src/compress/zlib/example_test.go"} + if entries.len() != len(want) { + t.Fatal("unexpected length:", entries.len(), "want:", len(want)) + } + for i, entry := range entries.entries() { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + } +} + +func Test_metacacheReader_readFn(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + i := 0 + err := r.readFn(func(entry metaCacheEntry) bool { + want := loadMetacacheSampleNames[i] + if entry.name != want { + t.Errorf("entry %d, want %q, got %q", i, want, entry.name) + } + i++ + return true + }) + if err != io.EOF { + t.Fatal(err) + } +} + +func Test_metacacheReader_readAll(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + var readErr error + objs := make(chan metaCacheEntry, 1) + var wg sync.WaitGroup + wg.Add(1) + go func() { + readErr = r.readAll(context.Background(), objs) + wg.Done() + }() + want := loadMetacacheSampleNames + i := 0 + for entry := range objs { + if entry.name != want[i] { + t.Errorf("entry %d, want %q, got %q", i, want[i], entry.name) + } + i++ + } + wg.Wait() + if readErr != nil { + t.Fatal(readErr) + } +} + +func Test_metacacheReader_forwardTo(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + err := r.forwardTo("src/compress/zlib/reader_test.go") + if err != nil { + t.Fatal(err) + } + names, err := r.readNames(-1) + if err != io.EOF { + t.Fatal(err) + } + want := []string{"src/compress/zlib/reader_test.go", "src/compress/zlib/writer.go", "src/compress/zlib/writer_test.go"} + if !reflect.DeepEqual(names, want) { + t.Errorf("got unexpected result: %#v", names) + } + + // Try with prefix + r = loadMetacacheSample(t) + err = r.forwardTo("src/compress/zlib/reader_t") + if err != nil { + t.Fatal(err) + } + names, err = r.readNames(-1) + if err != io.EOF { + t.Fatal(err) + } + if !reflect.DeepEqual(names, want) { + t.Errorf("got unexpected result: %#v", names) + } +} + +func Test_metacacheReader_next(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + for i, want := range loadMetacacheSampleNames { + gotObj, err := r.next() + if err != nil { + t.Fatal(err) + } + if gotObj.name != want { + t.Errorf("entry %d, want %q, got %q", i, want, gotObj.name) + } + } +} +func Test_metacacheReader_peek(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + for i, want := range loadMetacacheSampleNames { + got, err := r.peek() + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + if got.name != want { + t.Errorf("entry %d, want %q, got %q", i, want, got.name) + } + gotObj, err := r.next() + if err != nil { + t.Fatal(err) + } + if gotObj.name != want { + t.Errorf("entry %d, want %q, got %q", i, want, gotObj.name) + } + } +} + +func Test_newMetacacheStream(t *testing.T) { + r := loadMetacacheSample(t) + var buf bytes.Buffer + w := newMetacacheWriter(&buf, 1<<20) + err := r.readFn(func(object metaCacheEntry) bool { + err := w.write(object) + if err != nil { + t.Fatal(err) + } + return true + }) + r.Close() + if err != io.EOF { + t.Fatal(err) + } + err = w.Close() + if err != nil { + t.Fatal(err) + } + + r, err = newMetacacheReader(&buf) + if err != nil { + t.Fatal(err) + } + defer r.Close() + names, err := r.readNames(-1) + if err != io.EOF { + t.Fatal(err) + } + want := loadMetacacheSampleNames + if !reflect.DeepEqual(names, want) { + t.Errorf("got unexpected result: %#v", names) + } +} + +func Test_metacacheReader_skip(t *testing.T) { + r := loadMetacacheSample(t) + defer r.Close() + names, err := r.readNames(5) + if err != nil { + t.Fatal(err) + } + want := loadMetacacheSampleNames[:5] + if !reflect.DeepEqual(names, want) { + t.Errorf("got unexpected result: %#v", names) + } + err = r.skip(5) + if err != nil { + t.Fatal(err) + } + names, err = r.readNames(5) + if err != nil { + t.Fatal(err) + } + want = loadMetacacheSampleNames[10:15] + if !reflect.DeepEqual(names, want) { + t.Errorf("got unexpected result: %#v", names) + } + + err = r.skip(len(loadMetacacheSampleNames)) + if err != io.EOF { + t.Fatal(err) + } +} diff --git a/cmd/metacache-walk.go b/cmd/metacache-walk.go new file mode 100644 index 000000000..f2d4cb779 --- /dev/null +++ b/cmd/metacache-walk.go @@ -0,0 +1,237 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "context" + "io" + "io/ioutil" + "net/http" + "net/url" + "os" + "sort" + "strconv" + "strings" + "sync/atomic" + + "github.com/gorilla/mux" + "github.com/minio/minio/cmd/logger" +) + +// WalkDirOptions provides options for WalkDir operations. +type WalkDirOptions struct { + // Bucket to crawl + Bucket string + + // Directory inside the bucket. + BaseDir string + + // Do a full recursive scan. + Recursive bool +} + +// WalkDir will traverse a directory and return all entries found. +// On success a sorted meta cache stream will be returned. +func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error { + atomic.AddInt32(&s.activeIOCount, 1) + defer func() { + atomic.AddInt32(&s.activeIOCount, -1) + }() + + // Verify if volume is valid and it exists. + volumeDir, err := s.getVolDir(opts.Bucket) + if err != nil { + return err + } + + // Stat a volume entry. + _, err = os.Stat(volumeDir) + if err != nil { + if os.IsNotExist(err) { + return errVolumeNotFound + } else if isSysErrIO(err) { + return errFaultyDisk + } + return err + } + + // Fast exit track to check if we are listing an object with + // a trailing slash, this will avoid to list the object content. + if HasSuffix(opts.BaseDir, SlashSeparator) { + if st, err := os.Stat(pathJoin(volumeDir, opts.BaseDir, xlStorageFormatFile)); err == nil && st.Mode().IsRegular() { + return errFileNotFound + } + } + // Use a small block size to start sending quickly + w := newMetacacheWriter(wr, 16<<10) + defer w.Close() + out, err := w.stream() + if err != nil { + return err + } + defer close(out) + + var scanDir func(path string) error + scanDir = func(current string) error { + entries, err := s.ListDir(ctx, opts.Bucket, current, -1) + if err != nil { + // Folder could have gone away in-between + if err != errVolumeNotFound && err != errFileNotFound { + logger.LogIf(ctx, err) + } + // Forward some errors? + return nil + } + + for i, entry := range entries { + if strings.HasSuffix(entry, slashSeparator) { + // Trim slash, maybe compiler is clever? + entries[i] = entries[i][:len(entry)-1] + continue + } + // Do do not retain the file. + entries[i] = "" + + // If root was an object return it as such. + if HasSuffix(entry, xlStorageFormatFile) { + var meta metaCacheEntry + meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFile)) + if err != nil { + logger.LogIf(ctx, err) + continue + } + meta.name = strings.TrimSuffix(meta.name, xlStorageFormatFile) + meta.name = strings.TrimSuffix(meta.name, SlashSeparator) + out <- meta + return nil + } + // Check legacy. + if HasSuffix(entry, xlStorageFormatFileV1) { + var meta metaCacheEntry + meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1)) + if err != nil { + logger.LogIf(ctx, err) + continue + } + meta.name = strings.TrimSuffix(meta.name, xlStorageFormatFileV1) + meta.name = strings.TrimSuffix(meta.name, SlashSeparator) + out <- meta + return nil + } + // Skip all other files. + } + + // Process in sort order. + sort.Strings(entries) + dirStack := make([]string, 0, 5) + for _, entry := range entries { + if entry == "" { + continue + } + meta := metaCacheEntry{name: PathJoin(current, entry)} + + // If directory entry on stack before this, pop it now. + for len(dirStack) > 0 && dirStack[len(dirStack)-1] < meta.name { + pop := dirStack[len(dirStack)-1] + out <- metaCacheEntry{name: pop} + if opts.Recursive { + // Scan folder we found. Should be in correct sort order where we are. + err := scanDir(pop) + logger.LogIf(ctx, err) + } + dirStack = dirStack[:len(dirStack)-1] + } + + // All objects will be returned as directories, there has been no object check yet. + // Check it by attempting to read metadata. + meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFile)) + switch { + case err == nil: + // It was an object + out <- meta + case os.IsNotExist(err): + meta.metadata, err = ioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1)) + if err == nil { + // Maybe rename? Would make it inconsistent across disks though. + // os.Rename(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1), pathJoin(volumeDir, meta.name, xlStorageFormatFile)) + // It was an object + out <- meta + continue + } + + // NOT an object, append to stack (with slash) + dirStack = append(dirStack, meta.name+slashSeparator) + default: + logger.LogIf(ctx, err) + } + } + // If directory entry left on stack, pop it now. + for len(dirStack) > 0 { + pop := dirStack[len(dirStack)-1] + out <- metaCacheEntry{name: pop} + if opts.Recursive { + // Scan folder we found. Should be in correct sort order where we are. + err := scanDir(pop) + logger.LogIf(ctx, err) + } + dirStack = dirStack[:len(dirStack)-1] + } + return nil + } + + // Stream output. + return scanDir(opts.BaseDir) +} + +func (p *xlStorageDiskIDCheck) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error { + if err := p.checkDiskStale(); err != nil { + return err + } + return p.storage.WalkDir(ctx, opts, wr) +} + +// WalkDir will traverse a directory and return all entries found. +// On success a meta cache stream will be returned, that should be closed when done. +func (client *storageRESTClient) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error { + values := make(url.Values) + values.Set(storageRESTVolume, opts.Bucket) + values.Set(storageRESTDirPath, opts.BaseDir) + values.Set(storageRESTRecursive, strconv.FormatBool(opts.Recursive)) + respBody, err := client.call(ctx, storageRESTMethodWalkDir, values, nil, -1) + if err != nil { + logger.LogIf(ctx, err) + return err + } + return waitForHTTPStream(respBody, wr) +} + +// WalkDirHandler - remote caller to list files and folders in a requested directory path. +func (s *storageRESTServer) WalkDirHandler(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + return + } + vars := mux.Vars(r) + volume := vars[storageRESTVolume] + dirPath := vars[storageRESTDirPath] + recursive, err := strconv.ParseBool(vars[storageRESTRecursive]) + if err != nil { + s.writeErrorResponse(w, err) + return + } + writer := streamHTTPResponse(w) + writer.CloseWithError(s.storage.WalkDir(r.Context(), WalkDirOptions{Bucket: volume, BaseDir: dirPath, Recursive: recursive}, writer)) +} diff --git a/cmd/metacache.go b/cmd/metacache.go new file mode 100644 index 000000000..2516cb6b8 --- /dev/null +++ b/cmd/metacache.go @@ -0,0 +1,132 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "path" + "strings" + "time" +) + +type scanStatus uint8 + +const ( + scanStateNone scanStatus = iota + scanStateStarted + scanStateSuccess + scanStateError + + // Time in which the initiator of a scan must have reported back. + metacacheMaxRunningAge = time.Minute + + // metacacheBlockSize is the number of file/directory entries to have in each block. + metacacheBlockSize = 5000 +) + +//go:generate msgp -file $GOFILE -unexported + +// metacache contains a tracked cache entry. +type metacache struct { + id string `msg:"id"` + bucket string `msg:"b"` + root string `msg:"root"` + recursive bool `msg:"rec"` + status scanStatus `msg:"stat"` + fileNotFound bool `msg:"fnf"` + error string `msg:"err"` + started time.Time `msg:"st"` + ended time.Time `msg:"end"` + lastUpdate time.Time `msg:"u"` + lastHandout time.Time `msg:"lh"` + startedCycle uint64 `msg:"stc"` + endedCycle uint64 `msg:"endc"` + dataVersion uint8 `msg:"v"` +} + +func (m *metacache) finished() bool { + return !m.ended.IsZero() +} + +// worthKeeping indicates if the cache by itself is worth keeping. +func (m *metacache) worthKeeping(currentCycle uint64) bool { + if m == nil { + return false + } + cache := m + switch { + case !cache.finished() && time.Since(cache.lastUpdate) > metacacheMaxRunningAge: + // Not finished and update for metacacheMaxRunningAge, discard it. + return false + case cache.finished() && cache.startedCycle > currentCycle: + // Cycle is somehow bigger. + return false + case cache.finished() && currentCycle >= dataUsageUpdateDirCycles && cache.startedCycle < currentCycle-dataUsageUpdateDirCycles: + // Cycle is too old to be valuable. + return false + case cache.status == scanStateError || cache.status == scanStateNone: + // Remove failed listings + return false + } + return true +} + +// canBeReplacedBy. +// Both must pass the worthKeeping check. +func (m *metacache) canBeReplacedBy(other *metacache) bool { + // If the other is older it can never replace. + if other.started.Before(m.started) || m.id == other.id { + return false + } + + // Keep it around a bit longer. + if time.Since(m.lastHandout) < time.Hour { + return false + } + + // Go through recursive combinations. + switch { + case !m.recursive && !other.recursive: + // If both not recursive root must match. + return m.root == other.root + case m.recursive && !other.recursive: + // A recursive can never be replaced by a non-recursive + return false + case !m.recursive && other.recursive: + // If other is recursive it must contain this root + return strings.HasPrefix(m.root, other.root) + case m.recursive && other.recursive: + // Similar if both are recursive + return strings.HasPrefix(m.root, other.root) + } + panic("should be unreachable") +} + +// baseDirFromPrefix will return the base directory given an object path. +// For example an object with name prefix/folder/object.ext will return `prefix/folder/`. +func baseDirFromPrefix(prefix string) string { + b := path.Dir(prefix) + if b == "." || b == "./" || b == "/" { + b = "" + } + if !strings.Contains(prefix, slashSeparator) { + b = "" + } + if len(b) > 0 && !strings.HasSuffix(b, slashSeparator) { + b += slashSeparator + } + return b +} diff --git a/cmd/metacache_gen.go b/cmd/metacache_gen.go new file mode 100644 index 000000000..c5d8afda7 --- /dev/null +++ b/cmd/metacache_gen.go @@ -0,0 +1,495 @@ +package cmd + +// Code generated by github.com/tinylib/msgp DO NOT EDIT. + +import ( + "github.com/tinylib/msgp/msgp" +) + +// DecodeMsg implements msgp.Decodable +func (z *metacache) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "id": + z.id, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "id") + return + } + case "b": + z.bucket, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "bucket") + return + } + case "root": + z.root, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "root") + return + } + case "rec": + z.recursive, err = dc.ReadBool() + if err != nil { + err = msgp.WrapError(err, "recursive") + return + } + case "stat": + { + var zb0002 uint8 + zb0002, err = dc.ReadUint8() + if err != nil { + err = msgp.WrapError(err, "status") + return + } + z.status = scanStatus(zb0002) + } + case "fnf": + z.fileNotFound, err = dc.ReadBool() + if err != nil { + err = msgp.WrapError(err, "fileNotFound") + return + } + case "err": + z.error, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "error") + return + } + case "st": + z.started, err = dc.ReadTime() + if err != nil { + err = msgp.WrapError(err, "started") + return + } + case "end": + z.ended, err = dc.ReadTime() + if err != nil { + err = msgp.WrapError(err, "ended") + return + } + case "u": + z.lastUpdate, err = dc.ReadTime() + if err != nil { + err = msgp.WrapError(err, "lastUpdate") + return + } + case "lh": + z.lastHandout, err = dc.ReadTime() + if err != nil { + err = msgp.WrapError(err, "lastHandout") + return + } + case "stc": + z.startedCycle, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "startedCycle") + return + } + case "endc": + z.endedCycle, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "endedCycle") + return + } + case "v": + z.dataVersion, err = dc.ReadUint8() + if err != nil { + err = msgp.WrapError(err, "dataVersion") + return + } + default: + err = dc.Skip() + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *metacache) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 14 + // write "id" + err = en.Append(0x8e, 0xa2, 0x69, 0x64) + if err != nil { + return + } + err = en.WriteString(z.id) + if err != nil { + err = msgp.WrapError(err, "id") + return + } + // write "b" + err = en.Append(0xa1, 0x62) + if err != nil { + return + } + err = en.WriteString(z.bucket) + if err != nil { + err = msgp.WrapError(err, "bucket") + return + } + // write "root" + err = en.Append(0xa4, 0x72, 0x6f, 0x6f, 0x74) + if err != nil { + return + } + err = en.WriteString(z.root) + if err != nil { + err = msgp.WrapError(err, "root") + return + } + // write "rec" + err = en.Append(0xa3, 0x72, 0x65, 0x63) + if err != nil { + return + } + err = en.WriteBool(z.recursive) + if err != nil { + err = msgp.WrapError(err, "recursive") + return + } + // write "stat" + err = en.Append(0xa4, 0x73, 0x74, 0x61, 0x74) + if err != nil { + return + } + err = en.WriteUint8(uint8(z.status)) + if err != nil { + err = msgp.WrapError(err, "status") + return + } + // write "fnf" + err = en.Append(0xa3, 0x66, 0x6e, 0x66) + if err != nil { + return + } + err = en.WriteBool(z.fileNotFound) + if err != nil { + err = msgp.WrapError(err, "fileNotFound") + return + } + // write "err" + err = en.Append(0xa3, 0x65, 0x72, 0x72) + if err != nil { + return + } + err = en.WriteString(z.error) + if err != nil { + err = msgp.WrapError(err, "error") + return + } + // write "st" + err = en.Append(0xa2, 0x73, 0x74) + if err != nil { + return + } + err = en.WriteTime(z.started) + if err != nil { + err = msgp.WrapError(err, "started") + return + } + // write "end" + err = en.Append(0xa3, 0x65, 0x6e, 0x64) + if err != nil { + return + } + err = en.WriteTime(z.ended) + if err != nil { + err = msgp.WrapError(err, "ended") + return + } + // write "u" + err = en.Append(0xa1, 0x75) + if err != nil { + return + } + err = en.WriteTime(z.lastUpdate) + if err != nil { + err = msgp.WrapError(err, "lastUpdate") + return + } + // write "lh" + err = en.Append(0xa2, 0x6c, 0x68) + if err != nil { + return + } + err = en.WriteTime(z.lastHandout) + if err != nil { + err = msgp.WrapError(err, "lastHandout") + return + } + // write "stc" + err = en.Append(0xa3, 0x73, 0x74, 0x63) + if err != nil { + return + } + err = en.WriteUint64(z.startedCycle) + if err != nil { + err = msgp.WrapError(err, "startedCycle") + return + } + // write "endc" + err = en.Append(0xa4, 0x65, 0x6e, 0x64, 0x63) + if err != nil { + return + } + err = en.WriteUint64(z.endedCycle) + if err != nil { + err = msgp.WrapError(err, "endedCycle") + return + } + // write "v" + err = en.Append(0xa1, 0x76) + if err != nil { + return + } + err = en.WriteUint8(z.dataVersion) + if err != nil { + err = msgp.WrapError(err, "dataVersion") + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *metacache) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 14 + // string "id" + o = append(o, 0x8e, 0xa2, 0x69, 0x64) + o = msgp.AppendString(o, z.id) + // string "b" + o = append(o, 0xa1, 0x62) + o = msgp.AppendString(o, z.bucket) + // string "root" + o = append(o, 0xa4, 0x72, 0x6f, 0x6f, 0x74) + o = msgp.AppendString(o, z.root) + // string "rec" + o = append(o, 0xa3, 0x72, 0x65, 0x63) + o = msgp.AppendBool(o, z.recursive) + // string "stat" + o = append(o, 0xa4, 0x73, 0x74, 0x61, 0x74) + o = msgp.AppendUint8(o, uint8(z.status)) + // string "fnf" + o = append(o, 0xa3, 0x66, 0x6e, 0x66) + o = msgp.AppendBool(o, z.fileNotFound) + // string "err" + o = append(o, 0xa3, 0x65, 0x72, 0x72) + o = msgp.AppendString(o, z.error) + // string "st" + o = append(o, 0xa2, 0x73, 0x74) + o = msgp.AppendTime(o, z.started) + // string "end" + o = append(o, 0xa3, 0x65, 0x6e, 0x64) + o = msgp.AppendTime(o, z.ended) + // string "u" + o = append(o, 0xa1, 0x75) + o = msgp.AppendTime(o, z.lastUpdate) + // string "lh" + o = append(o, 0xa2, 0x6c, 0x68) + o = msgp.AppendTime(o, z.lastHandout) + // string "stc" + o = append(o, 0xa3, 0x73, 0x74, 0x63) + o = msgp.AppendUint64(o, z.startedCycle) + // string "endc" + o = append(o, 0xa4, 0x65, 0x6e, 0x64, 0x63) + o = msgp.AppendUint64(o, z.endedCycle) + // string "v" + o = append(o, 0xa1, 0x76) + o = msgp.AppendUint8(o, z.dataVersion) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *metacache) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "id": + z.id, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "id") + return + } + case "b": + z.bucket, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "bucket") + return + } + case "root": + z.root, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "root") + return + } + case "rec": + z.recursive, bts, err = msgp.ReadBoolBytes(bts) + if err != nil { + err = msgp.WrapError(err, "recursive") + return + } + case "stat": + { + var zb0002 uint8 + zb0002, bts, err = msgp.ReadUint8Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "status") + return + } + z.status = scanStatus(zb0002) + } + case "fnf": + z.fileNotFound, bts, err = msgp.ReadBoolBytes(bts) + if err != nil { + err = msgp.WrapError(err, "fileNotFound") + return + } + case "err": + z.error, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "error") + return + } + case "st": + z.started, bts, err = msgp.ReadTimeBytes(bts) + if err != nil { + err = msgp.WrapError(err, "started") + return + } + case "end": + z.ended, bts, err = msgp.ReadTimeBytes(bts) + if err != nil { + err = msgp.WrapError(err, "ended") + return + } + case "u": + z.lastUpdate, bts, err = msgp.ReadTimeBytes(bts) + if err != nil { + err = msgp.WrapError(err, "lastUpdate") + return + } + case "lh": + z.lastHandout, bts, err = msgp.ReadTimeBytes(bts) + if err != nil { + err = msgp.WrapError(err, "lastHandout") + return + } + case "stc": + z.startedCycle, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "startedCycle") + return + } + case "endc": + z.endedCycle, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "endedCycle") + return + } + case "v": + z.dataVersion, bts, err = msgp.ReadUint8Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "dataVersion") + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *metacache) Msgsize() (s int) { + s = 1 + 3 + msgp.StringPrefixSize + len(z.id) + 2 + msgp.StringPrefixSize + len(z.bucket) + 5 + msgp.StringPrefixSize + len(z.root) + 4 + msgp.BoolSize + 5 + msgp.Uint8Size + 4 + msgp.BoolSize + 4 + msgp.StringPrefixSize + len(z.error) + 3 + msgp.TimeSize + 4 + msgp.TimeSize + 2 + msgp.TimeSize + 3 + msgp.TimeSize + 4 + msgp.Uint64Size + 5 + msgp.Uint64Size + 2 + msgp.Uint8Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *scanStatus) DecodeMsg(dc *msgp.Reader) (err error) { + { + var zb0001 uint8 + zb0001, err = dc.ReadUint8() + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = scanStatus(zb0001) + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z scanStatus) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteUint8(uint8(z)) + if err != nil { + err = msgp.WrapError(err) + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z scanStatus) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendUint8(o, uint8(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *scanStatus) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zb0001 uint8 + zb0001, bts, err = msgp.ReadUint8Bytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = scanStatus(zb0001) + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z scanStatus) Msgsize() (s int) { + s = msgp.Uint8Size + return +} diff --git a/cmd/metacache_gen_test.go b/cmd/metacache_gen_test.go new file mode 100644 index 000000000..1b61d9a1d --- /dev/null +++ b/cmd/metacache_gen_test.go @@ -0,0 +1,123 @@ +package cmd + +// Code generated by github.com/tinylib/msgp DO NOT EDIT. + +import ( + "bytes" + "testing" + + "github.com/tinylib/msgp/msgp" +) + +func TestMarshalUnmarshalmetacache(t *testing.T) { + v := metacache{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgmetacache(b *testing.B) { + v := metacache{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgmetacache(b *testing.B) { + v := metacache{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshalmetacache(b *testing.B) { + v := metacache{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodemetacache(t *testing.T) { + v := metacache{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodemetacache Msgsize() is inaccurate") + } + + vn := metacache{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodemetacache(b *testing.B) { + v := metacache{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodemetacache(b *testing.B) { + v := metacache{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/cmd/metacache_test.go b/cmd/metacache_test.go new file mode 100644 index 000000000..63cbd97b1 --- /dev/null +++ b/cmd/metacache_test.go @@ -0,0 +1,284 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "testing" + "time" +) + +var metaCacheTestsetTimestamp, _ = time.Parse(time.RFC822Z, time.RFC822Z) + +var metaCacheTestset = []metacache{ + 0: { + id: "case-1-normal", + bucket: "bucket", + root: "folder/prefix", + recursive: false, + status: scanStateSuccess, + fileNotFound: false, + error: "", + started: metaCacheTestsetTimestamp, + ended: metaCacheTestsetTimestamp.Add(time.Minute), + lastUpdate: metaCacheTestsetTimestamp.Add(time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + }, + 1: { + id: "case-2-recursive", + bucket: "bucket", + root: "folder/prefix", + recursive: true, + status: scanStateSuccess, + fileNotFound: false, + error: "", + started: metaCacheTestsetTimestamp, + ended: metaCacheTestsetTimestamp.Add(time.Minute), + lastUpdate: metaCacheTestsetTimestamp.Add(time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + }, + 2: { + id: "case-3-older", + bucket: "bucket", + root: "folder/prefix", + recursive: false, + status: scanStateSuccess, + fileNotFound: true, + error: "", + started: metaCacheTestsetTimestamp.Add(-time.Minute), + ended: metaCacheTestsetTimestamp, + lastUpdate: metaCacheTestsetTimestamp, + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + }, + 3: { + id: "case-4-error", + bucket: "bucket", + root: "folder/prefix", + recursive: false, + status: scanStateError, + fileNotFound: false, + error: "an error lol", + started: metaCacheTestsetTimestamp.Add(time.Minute), + ended: metaCacheTestsetTimestamp.Add(2 * time.Minute), + lastUpdate: metaCacheTestsetTimestamp.Add(2 * time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + }, + 4: { + id: "case-5-noupdate", + bucket: "bucket", + root: "folder/prefix", + recursive: false, + status: scanStateStarted, + fileNotFound: false, + error: "", + started: metaCacheTestsetTimestamp.Add(-time.Minute), + ended: time.Time{}, + lastUpdate: metaCacheTestsetTimestamp.Add(-time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + }, + 5: { + id: "case-6-404notfound", + bucket: "bucket", + root: "folder/notfound", + recursive: true, + status: scanStateSuccess, + fileNotFound: true, + error: "", + started: metaCacheTestsetTimestamp, + ended: metaCacheTestsetTimestamp.Add(time.Minute), + lastUpdate: metaCacheTestsetTimestamp.Add(time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + }, + 6: { + id: "case-7-oldcycle", + bucket: "bucket", + root: "folder/prefix", + recursive: true, + status: scanStateSuccess, + fileNotFound: false, + error: "", + started: metaCacheTestsetTimestamp.Add(-10 * time.Minute), + ended: metaCacheTestsetTimestamp.Add(-8 * time.Minute), + lastUpdate: metaCacheTestsetTimestamp.Add(-8 * time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 6, + endedCycle: 8, + dataVersion: metacacheStreamVersion, + }, + 7: { + id: "case-8-running", + bucket: "bucket", + root: "folder/running", + recursive: false, + status: scanStateStarted, + fileNotFound: false, + error: "", + started: metaCacheTestsetTimestamp.Add(-1 * time.Minute), + ended: time.Time{}, + lastUpdate: metaCacheTestsetTimestamp.Add(-1 * time.Minute), + lastHandout: metaCacheTestsetTimestamp, + startedCycle: 10, + endedCycle: 0, + dataVersion: metacacheStreamVersion, + }, +} + +func Test_baseDirFromPrefix(t *testing.T) { + tests := []struct { + name string + prefix string + want string + }{ + { + name: "root", + prefix: "object.ext", + want: "", + }, + { + name: "rootdotslash", + prefix: "./object.ext", + want: "", + }, + { + name: "rootslash", + prefix: "/", + want: "", + }, + { + name: "folder", + prefix: "prefix/", + want: "prefix/", + }, + { + name: "folderobj", + prefix: "prefix/obj.ext", + want: "prefix/", + }, + { + name: "folderfolderobj", + prefix: "prefix/prefix2/obj.ext", + want: "prefix/prefix2/", + }, + { + name: "folderfolder", + prefix: "prefix/prefix2/", + want: "prefix/prefix2/", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if got := baseDirFromPrefix(tt.prefix); got != tt.want { + t.Errorf("baseDirFromPrefix() = %v, want %v", got, tt.want) + } + }) + } +} + +func Test_metacache_canBeReplacedBy(t *testing.T) { + testAgainst := metacache{ + id: "case-1-modified", + bucket: "bucket", + root: "folder/prefix", + recursive: true, + status: scanStateSuccess, + fileNotFound: false, + error: "", + started: metaCacheTestsetTimestamp.Add(time.Minute), + ended: metaCacheTestsetTimestamp.Add(2 * time.Minute), + lastUpdate: metaCacheTestsetTimestamp.Add(2 * time.Minute), + lastHandout: metaCacheTestsetTimestamp.Add(time.Minute), + startedCycle: 10, + endedCycle: 10, + dataVersion: metacacheStreamVersion, + } + wantResults := []bool{0: true, 1: true, 2: true, 3: true, 4: true, 5: false, 6: true, 7: false} + + for i, tt := range metaCacheTestset { + t.Run(tt.id, func(t *testing.T) { + var want bool + if i >= len(wantResults) { + t.Logf("no expected result for test #%d", i) + } else { + want = wantResults[i] + } + // Add an hour, otherwise it will never be replaced. + // We operated on a copy. + tt.lastHandout.Add(-2 * time.Hour) + got := tt.canBeReplacedBy(&testAgainst) + if got != want { + t.Errorf("#%d: want %v, got %v", i, want, got) + } + }) + } +} + +func Test_metacache_finished(t *testing.T) { + wantResults := []bool{0: true, 1: true, 2: true, 3: true, 4: false, 5: true, 6: true, 7: false} + + for i, tt := range metaCacheTestset { + t.Run(tt.id, func(t *testing.T) { + var want bool + if i >= len(wantResults) { + t.Logf("no expected result for test #%d", i) + } else { + want = wantResults[i] + } + + got := tt.finished() + if got != want { + t.Errorf("#%d: want %v, got %v", i, want, got) + } + }) + } +} + +func Test_metacache_worthKeeping(t *testing.T) { + wantResults := []bool{0: true, 1: true, 2: true, 3: false, 4: false, 5: true, 6: false, 7: false} + + for i, tt := range metaCacheTestset { + t.Run(tt.id, func(t *testing.T) { + var want bool + if i >= len(wantResults) { + t.Logf("no expected result for test #%d", i) + } else { + want = wantResults[i] + } + + got := tt.worthKeeping(7 + dataUsageUpdateDirCycles) + if got != want { + t.Errorf("#%d: want %v, got %v", i, want, got) + } + }) + } +} diff --git a/cmd/naughty-disk_test.go b/cmd/naughty-disk_test.go index 911ccc094..2fd8a4783 100644 --- a/cmd/naughty-disk_test.go +++ b/cmd/naughty-disk_test.go @@ -143,6 +143,13 @@ func (d *naughtyDisk) DeleteVol(ctx context.Context, volume string, forceDelete return d.disk.DeleteVol(ctx, volume, forceDelete) } +func (d *naughtyDisk) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error { + if err := d.calcError(); err != nil { + return err + } + return d.disk.WalkDir(ctx, opts, wr) +} + func (d *naughtyDisk) WalkSplunk(ctx context.Context, volume, dirPath, marker string, endWalkCh <-chan struct{}) (chan FileInfo, error) { if err := d.calcError(); err != nil { return nil, err @@ -227,11 +234,11 @@ func (d *naughtyDisk) CheckFile(ctx context.Context, volume string, path string) return d.disk.CheckFile(ctx, volume, path) } -func (d *naughtyDisk) DeleteFile(ctx context.Context, volume string, path string) (err error) { +func (d *naughtyDisk) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) { if err := d.calcError(); err != nil { return err } - return d.disk.DeleteFile(ctx, volume, path) + return d.disk.Delete(ctx, volume, path, recursive) } func (d *naughtyDisk) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) []error { diff --git a/cmd/notification.go b/cmd/notification.go index bb31498ce..01ec27896 100644 --- a/cmd/notification.go +++ b/cmd/notification.go @@ -29,6 +29,7 @@ import ( "sync" "time" + "github.com/cespare/xxhash/v2" "github.com/klauspost/compress/zip" "github.com/minio/minio-go/v7/pkg/set" "github.com/minio/minio/cmd/crypto" @@ -51,6 +52,7 @@ type NotificationSys struct { bucketRulesMap map[string]event.RulesMap bucketRemoteTargetRulesMap map[string]map[event.TargetID]event.RulesMap peerClients []*peerRESTClient + allPeerClients []*peerRESTClient } // GetARNList - returns available ARNs. @@ -451,7 +453,7 @@ func (sys *NotificationSys) updateBloomFilter(ctx context.Context, current uint6 // Load initial state from local... var bf *bloomFilter - bfr, err := intDataUpdateTracker.cycleFilter(ctx, req.Oldest, req.Current) + bfr, err := intDataUpdateTracker.cycleFilter(ctx, req) logger.LogIf(ctx, err) if err == nil && bfr.Complete { nbf := intDataUpdateTracker.newBloomFilter() @@ -507,6 +509,124 @@ func (sys *NotificationSys) updateBloomFilter(ctx context.Context, current uint6 return bf, nil } +// collectBloomFilter will collect bloom filters from all servers from the specified cycle. +func (sys *NotificationSys) collectBloomFilter(ctx context.Context, from uint64) (*bloomFilter, error) { + var req = bloomFilterRequest{ + Current: 0, + Oldest: from, + } + + // Load initial state from local... + var bf *bloomFilter + bfr, err := intDataUpdateTracker.cycleFilter(ctx, req) + logger.LogIf(ctx, err) + if err == nil && bfr.Complete { + nbf := intDataUpdateTracker.newBloomFilter() + bf = &nbf + _, err = bf.ReadFrom(bytes.NewBuffer(bfr.Filter)) + logger.LogIf(ctx, err) + } + if !bfr.Complete { + // If local isn't complete just return early + return nil, nil + } + + var mu sync.Mutex + g := errgroup.WithNErrs(len(sys.peerClients)) + for idx, client := range sys.peerClients { + if client == nil { + continue + } + client := client + g.Go(func() error { + serverBF, err := client.cycleServerBloomFilter(ctx, req) + if false && intDataUpdateTracker.debug { + b, _ := json.MarshalIndent(serverBF, "", " ") + logger.Info("Disk %v, Bloom filter: %v", client.host.Name, string(b)) + } + // Keep lock while checking result. + mu.Lock() + defer mu.Unlock() + + if err != nil || !serverBF.Complete || bf == nil { + logger.LogIf(ctx, err) + bf = nil + return nil + } + + var tmp bloom.BloomFilter + _, err = tmp.ReadFrom(bytes.NewBuffer(serverBF.Filter)) + if err != nil { + logger.LogIf(ctx, err) + bf = nil + return nil + } + if bf.BloomFilter == nil { + bf.BloomFilter = &tmp + } else { + err = bf.Merge(&tmp) + if err != nil { + logger.LogIf(ctx, err) + bf = nil + return nil + } + } + return nil + }, idx) + } + g.Wait() + return bf, nil +} + +// findEarliestCleanBloomFilter will find the earliest bloom filter across the cluster +// where the directory is clean. +// Due to how objects are stored this can include object names. +func (sys *NotificationSys) findEarliestCleanBloomFilter(ctx context.Context, dir string) uint64 { + + // Load initial state from local... + current := intDataUpdateTracker.current() + best := intDataUpdateTracker.latestWithDir(dir) + if best == current { + // If the current is dirty no need to check others. + return current + } + + var req = bloomFilterRequest{ + Current: 0, + Oldest: best, + OldestClean: dir, + } + + var mu sync.Mutex + g := errgroup.WithNErrs(len(sys.peerClients)) + for idx, client := range sys.peerClients { + if client == nil { + continue + } + client := client + g.Go(func() error { + serverBF, err := client.cycleServerBloomFilter(ctx, req) + + // Keep lock while checking result. + mu.Lock() + defer mu.Unlock() + + if err != nil { + // Error, don't assume clean. + best = current + logger.LogIf(ctx, err) + return nil + } + if serverBF.OldestIdx > best { + best = serverBF.OldestIdx + } + return nil + }, idx) + } + g.Wait() + return best +} + // GetLocks - makes GetLocks RPC call on all peers. func (sys *NotificationSys) GetLocks(ctx context.Context, r *http.Request) []*PeerLocks { locksResp := make([]*PeerLocks, len(sys.peerClients)) @@ -1156,15 +1276,27 @@ func (sys *NotificationSys) GetLocalDiskIDs(ctx context.Context) (localDiskIDs [ return localDiskIDs } +// restClientFromHash will return a deterministic peerRESTClient based on s. +// Will return nil if client is local. +func (sys *NotificationSys) restClientFromHash(s string) (client *peerRESTClient) { + if len(sys.peerClients) == 0 { + return nil + } + idx := xxhash.Sum64String(s) % uint64(len(sys.allPeerClients)) + return sys.allPeerClients[idx] +} + // NewNotificationSys - creates new notification system object. func NewNotificationSys(endpoints EndpointServerSets) *NotificationSys { // targetList/bucketRulesMap/bucketRemoteTargetRulesMap are populated by NotificationSys.Init() + remote, all := newPeerRestClients(endpoints) return &NotificationSys{ targetList: event.NewTargetList(), targetResCh: make(chan event.TargetIDResult), bucketRulesMap: make(map[string]event.RulesMap), bucketRemoteTargetRulesMap: make(map[string]map[event.TargetID]event.RulesMap), - peerClients: newPeerRestClients(endpoints), + peerClients: remote, + allPeerClients: all, } } diff --git a/cmd/object-api-common.go b/cmd/object-api-common.go index 79aadaa60..a88b0f957 100644 --- a/cmd/object-api-common.go +++ b/cmd/object-api-common.go @@ -89,7 +89,7 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string) delFunc = func(entryPath string) error { if !HasSuffix(entryPath, SlashSeparator) { // Delete the file entry. - err := storage.DeleteFile(ctx, volume, entryPath) + err := storage.Delete(ctx, volume, entryPath, false) if !IsErrIgnored(err, []error{ errDiskNotFound, errUnformattedDisk, @@ -118,7 +118,7 @@ func cleanupDir(ctx context.Context, storage StorageAPI, volume, dirPath string) // Entry path is empty, just delete it. if len(entries) == 0 { - err = storage.DeleteFile(ctx, volume, entryPath) + err = storage.Delete(ctx, volume, entryPath, false) if !IsErrIgnored(err, []error{ errDiskNotFound, errUnformattedDisk, diff --git a/cmd/object-api-errors.go b/cmd/object-api-errors.go index 790daf26e..71dab8f7b 100644 --- a/cmd/object-api-errors.go +++ b/cmd/object-api-errors.go @@ -169,6 +169,11 @@ func (e InsufficientReadQuorum) Error() string { return "Storage resources are insufficient for the read operation." } +// Unwrap the error. +func (e InsufficientReadQuorum) Unwrap() error { + return errErasureReadQuorum +} + // InsufficientWriteQuorum storage cannot satisfy quorum for write operation. type InsufficientWriteQuorum struct{} @@ -176,6 +181,11 @@ func (e InsufficientWriteQuorum) Error() string { return "Storage resources are insufficient for the write operation." } +// Unwrap the error. +func (e InsufficientWriteQuorum) Unwrap() error { + return errErasureWriteQuorum +} + // GenericError - generic object layer error. type GenericError struct { Bucket string diff --git a/cmd/object-api-listobjects_test.go b/cmd/object-api-listobjects_test.go index 8e3610a56..3fdedfe9c 100644 --- a/cmd/object-api-listobjects_test.go +++ b/cmd/object-api-listobjects_test.go @@ -578,6 +578,7 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) { for i, testCase := range testCases { testCase := testCase t.Run(fmt.Sprintf("%s-Test%d", instanceType, i+1), func(t *testing.T) { + t.Log("ListObjects, bucket:", testCase.bucketName, "prefix:", testCase.prefix, "marker:", testCase.marker, "delimiter:", testCase.delimiter, "maxkeys:", testCase.maxKeys) result, err := obj.ListObjects(context.Background(), testCase.bucketName, testCase.prefix, testCase.marker, testCase.delimiter, int(testCase.maxKeys)) if err != nil && testCase.shouldPass { @@ -602,9 +603,15 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) { // otherwise it may lead to index out of range error in // assertion following this. if len(testCase.result.Objects) != len(result.Objects) { - t.Fatalf("Test %d: %s: Expected number of object in the result to be '%d', but found '%d' objects instead", i+1, instanceType, len(testCase.result.Objects), len(result.Objects)) + t.Logf("want: %v", objInfoNames(testCase.result.Objects)) + t.Logf("got: %v", objInfoNames(result.Objects)) + t.Errorf("Test %d: %s: Expected number of object in the result to be '%d', but found '%d' objects instead", i+1, instanceType, len(testCase.result.Objects), len(result.Objects)) } for j := 0; j < len(testCase.result.Objects); j++ { + if j >= len(result.Objects) { + t.Errorf("Test %d: %s: Expected object name to be \"%s\", but not nothing instead", i+1, instanceType, testCase.result.Objects[j].Name) + continue + } if testCase.result.Objects[j].Name != result.Objects[j].Name { t.Errorf("Test %d: %s: Expected object name to be \"%s\", but found \"%s\" instead", i+1, instanceType, testCase.result.Objects[j].Name, result.Objects[j].Name) } @@ -616,16 +623,25 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) { } if len(testCase.result.Prefixes) != len(result.Prefixes) { - t.Fatalf("Test %d: %s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", i+1, instanceType, len(testCase.result.Prefixes), len(result.Prefixes)) + t.Logf("want: %v", testCase.result.Prefixes) + t.Logf("got: %v", result.Prefixes) + t.Errorf("Test %d: %s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", i+1, instanceType, len(testCase.result.Prefixes), len(result.Prefixes)) } for j := 0; j < len(testCase.result.Prefixes); j++ { + if j >= len(result.Prefixes) { + t.Errorf("Test %d: %s: Expected prefix name to be \"%s\", but found no result", i+1, instanceType, testCase.result.Prefixes[j]) + continue + } if testCase.result.Prefixes[j] != result.Prefixes[j] { t.Errorf("Test %d: %s: Expected prefix name to be \"%s\", but found \"%s\" instead", i+1, instanceType, testCase.result.Prefixes[j], result.Prefixes[j]) } } if testCase.result.IsTruncated != result.IsTruncated { - t.Errorf("Test %d: %s: Expected IsTruncated flag to be %v, but instead found it to be %v", i+1, instanceType, testCase.result.IsTruncated, result.IsTruncated) + // Allow an extra continuation token. + if !result.IsTruncated || len(result.Objects) == 0 { + t.Errorf("Test %d: %s: Expected IsTruncated flag to be %v, but instead found it to be %v", i+1, instanceType, testCase.result.IsTruncated, result.IsTruncated) + } } if testCase.result.IsTruncated && result.NextMarker == "" { @@ -633,22 +649,35 @@ func testListObjects(obj ObjectLayer, instanceType string, t1 TestErrHandler) { } if !testCase.result.IsTruncated && result.NextMarker != "" { - t.Errorf("Test %d: %s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", i+1, instanceType, result.NextMarker) + if !result.IsTruncated || len(result.Objects) == 0 { + t.Errorf("Test %d: %s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", i+1, instanceType, result.NextMarker) + } } } // Take ListObject treeWalk go-routine to completion, if available in the treewalk pool. - if result.IsTruncated { - _, err = obj.ListObjects(context.Background(), testCase.bucketName, + for result.IsTruncated { + result, err = obj.ListObjects(context.Background(), testCase.bucketName, testCase.prefix, result.NextMarker, testCase.delimiter, 1000) if err != nil { t.Fatal(err) } + if !testCase.result.IsTruncated && len(result.Objects) > 0 { + t.Errorf("expected to get all objects in the previous call, but got %d more", len(result.Objects)) + } } }) } } +func objInfoNames(o []ObjectInfo) []string { + var res = make([]string, len(o)) + for i := range o { + res[i] = o[i].Name + } + return res +} + // Wrapper for calling ListObjectVersions tests for both Erasure multiple disks and single node setup. func TestListObjectVersions(t *testing.T) { ExecObjectLayerTest(t, testListObjectVersions) @@ -1240,7 +1269,7 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand } if len(testCase.result.Prefixes) != len(result.Prefixes) { - fmt.Println(testCase, testCase.result.Prefixes, result.Prefixes) + t.Log(testCase, testCase.result.Prefixes, result.Prefixes) t.Fatalf("%s: Expected number of prefixes in the result to be '%d', but found '%d' prefixes instead", instanceType, len(testCase.result.Prefixes), len(result.Prefixes)) } for j := 0; j < len(testCase.result.Prefixes); j++ { @@ -1250,7 +1279,10 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand } if testCase.result.IsTruncated != result.IsTruncated { - t.Errorf("%s: Expected IsTruncated flag to be %v, but instead found it to be %v", instanceType, testCase.result.IsTruncated, result.IsTruncated) + // Allow an extra continuation token. + if !result.IsTruncated || len(result.Objects) == 0 { + t.Errorf("%s: Expected IsTruncated flag to be %v, but instead found it to be %v", instanceType, testCase.result.IsTruncated, result.IsTruncated) + } } if testCase.result.IsTruncated && result.NextMarker == "" { @@ -1258,17 +1290,22 @@ func testListObjectVersions(obj ObjectLayer, instanceType string, t1 TestErrHand } if !testCase.result.IsTruncated && result.NextMarker != "" { - t.Errorf("%s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", instanceType, result.NextMarker) + if !result.IsTruncated || len(result.Objects) == 0 { + t.Errorf("%s: Expected NextContinuationToken to be empty since listing is not truncated, but instead found `%v`", instanceType, result.NextMarker) + } } } // Take ListObject treeWalk go-routine to completion, if available in the treewalk pool. - if result.IsTruncated { - _, err = obj.ListObjectVersions(context.Background(), testCase.bucketName, + for result.IsTruncated { + result, err = obj.ListObjectVersions(context.Background(), testCase.bucketName, testCase.prefix, result.NextMarker, "", testCase.delimiter, 1000) if err != nil { t.Fatal(err) } + if !testCase.result.IsTruncated && len(result.Objects) > 0 { + t.Errorf("expected to get all objects in the previous call, but got %d more", len(result.Objects)) + } } }) } diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index 11ce60ad7..0e925ff1f 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -22,6 +22,7 @@ import ( "crypto/tls" "encoding/gob" "errors" + "fmt" "io" "math" "net/url" @@ -40,7 +41,8 @@ import ( "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/madmin" xnet "github.com/minio/minio/pkg/net" - trace "github.com/minio/minio/pkg/trace" + "github.com/minio/minio/pkg/trace" + "github.com/tinylib/msgp/msgp" ) // client to talk to peer Nodes. @@ -657,6 +659,40 @@ func (client *peerRESTClient) GetLocalDiskIDs(ctx context.Context) (diskIDs []st return diskIDs } +// GetMetacacheListing - get a new or existing metacache. +func (client *peerRESTClient) GetMetacacheListing(ctx context.Context, o listPathOptions) (*metacache, error) { + var reader bytes.Buffer + err := gob.NewEncoder(&reader).Encode(o) + if err != nil { + return nil, err + } + respBody, err := client.callWithContext(ctx, peerRESTMethodGetMetacacheListing, nil, &reader, int64(reader.Len())) + if err != nil { + logger.LogIf(ctx, err) + return nil, err + } + var resp metacache + defer http.DrainBody(respBody) + return &resp, msgp.Decode(respBody, &resp) +} + +// UpdateMetacacheListing - update an existing metacache it will unconditionally be updated to the new state. +func (client *peerRESTClient) UpdateMetacacheListing(ctx context.Context, m metacache) (metacache, error) { + b, err := m.MarshalMsg(nil) + if err != nil { + return m, err + } + respBody, err := client.callWithContext(ctx, peerRESTMethodUpdateMetacacheListing, nil, bytes.NewBuffer(b), int64(len(b))) + if err != nil { + logger.LogIf(ctx, err) + return m, err + } + defer http.DrainBody(respBody) + var resp metacache + return resp, msgp.Decode(respBody, &resp) + +} + func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) { values := make(url.Values) values.Set(peerRESTTraceAll, strconv.FormatBool(trcAll)) @@ -811,30 +847,25 @@ func (client *peerRESTClient) ConsoleLog(logCh chan interface{}, doneCh <-chan s }() } -func getRemoteHosts(endpointServerSets EndpointServerSets) []*xnet.Host { - peers := GetRemotePeers(endpointServerSets) - remoteHosts := make([]*xnet.Host, 0, len(peers)) - for _, hostStr := range peers { - host, err := xnet.ParseHost(hostStr) - if err != nil { - logger.LogIf(GlobalContext, err) +// newPeerRestClients creates new peer clients. +// The two slices will point to the same clients, +// but 'all' will contain nil entry for local client. +// The 'all' slice will be in the same order across the cluster. +func newPeerRestClients(endpoints EndpointServerSets) (remote, all []*peerRESTClient) { + hosts := endpoints.hostsSorted() + remote = make([]*peerRESTClient, 0, len(hosts)) + all = make([]*peerRESTClient, len(hosts)) + for i, host := range hosts { + if host == nil { continue } - remoteHosts = append(remoteHosts, host) + all[i] = newPeerRESTClient(host) + remote = append(remote, all[i]) } - - return remoteHosts -} - -// newPeerRestClients creates new peer clients. -func newPeerRestClients(endpoints EndpointServerSets) []*peerRESTClient { - peerHosts := getRemoteHosts(endpoints) - restClients := make([]*peerRESTClient, len(peerHosts)) - for i, host := range peerHosts { - restClients[i] = newPeerRESTClient(host) + if len(all) != len(remote)+1 { + logger.LogIf(context.Background(), fmt.Errorf("WARNING: Expected number of all hosts (%v) to be remote +1 (%v)", len(all), len(remote))) } - - return restClients + return remote, all } // Returns a peer rest client. diff --git a/cmd/peer-rest-common.go b/cmd/peer-rest-common.go index 0210fe445..d62393889 100644 --- a/cmd/peer-rest-common.go +++ b/cmd/peer-rest-common.go @@ -24,39 +24,41 @@ const ( ) const ( - peerRESTMethodHealth = "/health" - peerRESTMethodServerInfo = "/serverinfo" - peerRESTMethodDriveOBDInfo = "/driveobdinfo" - peerRESTMethodNetOBDInfo = "/netobdinfo" - peerRESTMethodCPUOBDInfo = "/cpuobdinfo" - peerRESTMethodDiskHwOBDInfo = "/diskhwobdinfo" - peerRESTMethodOsInfoOBDInfo = "/osinfoobdinfo" - peerRESTMethodMemOBDInfo = "/memobdinfo" - peerRESTMethodProcOBDInfo = "/procobdinfo" - peerRESTMethodDispatchNetOBDInfo = "/dispatchnetobdinfo" - peerRESTMethodDeleteBucketMetadata = "/deletebucketmetadata" - peerRESTMethodLoadBucketMetadata = "/loadbucketmetadata" - peerRESTMethodServerUpdate = "/serverupdate" - peerRESTMethodSignalService = "/signalservice" - peerRESTMethodBackgroundHealStatus = "/backgroundhealstatus" - peerRESTMethodGetLocks = "/getlocks" - peerRESTMethodLoadUser = "/loaduser" - peerRESTMethodLoadServiceAccount = "/loadserviceaccount" - peerRESTMethodDeleteUser = "/deleteuser" - peerRESTMethodDeleteServiceAccount = "/deleteserviceaccount" - peerRESTMethodLoadPolicy = "/loadpolicy" - peerRESTMethodLoadPolicyMapping = "/loadpolicymapping" - peerRESTMethodDeletePolicy = "/deletepolicy" - peerRESTMethodLoadGroup = "/loadgroup" - peerRESTMethodStartProfiling = "/startprofiling" - peerRESTMethodDownloadProfilingData = "/downloadprofilingdata" - peerRESTMethodReloadFormat = "/reloadformat" - peerRESTMethodCycleBloom = "/cyclebloom" - peerRESTMethodTrace = "/trace" - peerRESTMethodListen = "/listen" - peerRESTMethodLog = "/log" - peerRESTMethodGetLocalDiskIDs = "/getlocaldiskids" - peerRESTMethodGetBandwidth = "/bandwidth" + peerRESTMethodHealth = "/health" + peerRESTMethodServerInfo = "/serverinfo" + peerRESTMethodDriveOBDInfo = "/driveobdinfo" + peerRESTMethodNetOBDInfo = "/netobdinfo" + peerRESTMethodCPUOBDInfo = "/cpuobdinfo" + peerRESTMethodDiskHwOBDInfo = "/diskhwobdinfo" + peerRESTMethodOsInfoOBDInfo = "/osinfoobdinfo" + peerRESTMethodMemOBDInfo = "/memobdinfo" + peerRESTMethodProcOBDInfo = "/procobdinfo" + peerRESTMethodDispatchNetOBDInfo = "/dispatchnetobdinfo" + peerRESTMethodDeleteBucketMetadata = "/deletebucketmetadata" + peerRESTMethodLoadBucketMetadata = "/loadbucketmetadata" + peerRESTMethodServerUpdate = "/serverupdate" + peerRESTMethodSignalService = "/signalservice" + peerRESTMethodBackgroundHealStatus = "/backgroundhealstatus" + peerRESTMethodGetLocks = "/getlocks" + peerRESTMethodLoadUser = "/loaduser" + peerRESTMethodLoadServiceAccount = "/loadserviceaccount" + peerRESTMethodDeleteUser = "/deleteuser" + peerRESTMethodDeleteServiceAccount = "/deleteserviceaccount" + peerRESTMethodLoadPolicy = "/loadpolicy" + peerRESTMethodLoadPolicyMapping = "/loadpolicymapping" + peerRESTMethodDeletePolicy = "/deletepolicy" + peerRESTMethodLoadGroup = "/loadgroup" + peerRESTMethodStartProfiling = "/startprofiling" + peerRESTMethodDownloadProfilingData = "/downloadprofilingdata" + peerRESTMethodReloadFormat = "/reloadformat" + peerRESTMethodCycleBloom = "/cyclebloom" + peerRESTMethodTrace = "/trace" + peerRESTMethodListen = "/listen" + peerRESTMethodLog = "/log" + peerRESTMethodGetLocalDiskIDs = "/getlocaldiskids" + peerRESTMethodGetBandwidth = "/bandwidth" + peerRESTMethodGetMetacacheListing = "/getmetacache" + peerRESTMethodUpdateMetacacheListing = "/updatemetacache" ) const ( diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index 2211885b0..16f4fd154 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -34,6 +34,7 @@ import ( "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/madmin" trace "github.com/minio/minio/pkg/trace" + "github.com/tinylib/msgp/msgp" ) // To abstract a node over network. @@ -616,7 +617,7 @@ func (s *peerRESTServer) ReloadFormatHandler(w http.ResponseWriter, r *http.Requ w.(http.Flusher).Flush() } -// CycleServerBloomFilterHandler cycles bllom filter on server. +// CycleServerBloomFilterHandler cycles bloom filter on server. func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r *http.Request) { if !s.IsValid(w, r) { s.writeErrorResponse(w, errors.New("Invalid request")) @@ -631,7 +632,7 @@ func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r s.writeErrorResponse(w, err) return } - bf, err := intDataUpdateTracker.cycleFilter(ctx, req.Oldest, req.Current) + bf, err := intDataUpdateTracker.cycleFilter(ctx, req) if err != nil { s.writeErrorResponse(w, err) return @@ -640,6 +641,51 @@ func (s *peerRESTServer) CycleServerBloomFilterHandler(w http.ResponseWriter, r logger.LogIf(ctx, gob.NewEncoder(w).Encode(bf)) } +func (s *peerRESTServer) GetMetacacheListingHandler(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + s.writeErrorResponse(w, errors.New("Invalid request")) + return + } + ctx := newContext(r, w, "GetMetacacheListing") + + var opts listPathOptions + err := gob.NewDecoder(r.Body).Decode(&opts) + if err != nil && err != io.EOF { + s.writeErrorResponse(w, err) + return + } + resp := localMetacacheMgr.getBucket(ctx, opts.Bucket).findCache(opts) + logger.LogIf(ctx, msgp.Encode(w, &resp)) +} + +func (s *peerRESTServer) UpdateMetacacheListingHandler(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + s.writeErrorResponse(w, errors.New("Invalid request")) + return + } + ctx := newContext(r, w, "UpdateMetacacheListing") + + var req metacache + err := msgp.Decode(r.Body, &req) + if err != nil { + s.writeErrorResponse(w, err) + return + } + b := localMetacacheMgr.getBucket(ctx, req.bucket) + if b == nil { + s.writeErrorResponse(w, errServerNotInitialized) + return + } + + cache, err := b.updateCacheEntry(req) + if err != nil { + s.writeErrorResponse(w, err) + return + } + // Return updated metadata. + logger.LogIf(ctx, msgp.Encode(w, &cache)) +} + // PutBucketNotificationHandler - Set bucket policy. func (s *peerRESTServer) PutBucketNotificationHandler(w http.ResponseWriter, r *http.Request) { if !s.IsValid(w, r) { @@ -1054,4 +1100,6 @@ func registerPeerRESTHandlers(router *mux.Router) { subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodLog).HandlerFunc(server.ConsoleLogHandler) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetLocalDiskIDs).HandlerFunc(httpTraceHdrs(server.GetLocalDiskIDs)) subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetBandwidth).HandlerFunc(httpTraceHdrs(server.GetBandwidth)) + subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodGetMetacacheListing).HandlerFunc(httpTraceHdrs(server.GetMetacacheListingHandler)) + subrouter.Methods(http.MethodPost).Path(peerRESTVersionPrefix + peerRESTMethodUpdateMetacacheListing).HandlerFunc(httpTraceHdrs(server.UpdateMetacacheListingHandler)) } diff --git a/cmd/rest/client.go b/cmd/rest/client.go index bb8e3f006..1fdb9960b 100644 --- a/cmd/rest/client.go +++ b/cmd/rest/client.go @@ -27,6 +27,8 @@ import ( "sync/atomic" "time" + "github.com/minio/minio/cmd/logger" + xhttp "github.com/minio/minio/cmd/http" xnet "github.com/minio/minio/pkg/net" ) @@ -113,6 +115,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod resp, err := c.httpClient.Do(req) if err != nil { if xnet.IsNetworkOrHostDown(err) { + logger.LogIf(ctx, err, "marking disk offline") c.MarkOffline() } return nil, &NetworkError{err} @@ -142,6 +145,7 @@ func (c *Client) Call(ctx context.Context, method string, values url.Values, bod b, err := ioutil.ReadAll(io.LimitReader(resp.Body, c.MaxErrResponseSize)) if err != nil { if xnet.IsNetworkOrHostDown(err) { + logger.LogIf(ctx, err, "marking disk offline") c.MarkOffline() } return nil, err diff --git a/cmd/storage-datatypes.go b/cmd/storage-datatypes.go index 7b316aa82..63d28da4f 100644 --- a/cmd/storage-datatypes.go +++ b/cmd/storage-datatypes.go @@ -59,6 +59,20 @@ type FileInfoVersions struct { Versions []FileInfo } +// forwardPastVersion will truncate the result to only contain versions after 'v'. +// If v is empty or the version isn't found no changes will be made. +func (f *FileInfoVersions) forwardPastVersion(v string) { + if v == "" { + return + } + for i, ver := range f.Versions { + if ver.VersionID == v { + f.Versions = f.Versions[i+1:] + return + } + } +} + // FileInfo - represents file stat information. type FileInfo struct { // Name of the volume. diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index da3bc1591..3688ed4cb 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -48,6 +48,9 @@ type StorageAPI interface { StatVol(ctx context.Context, volume string) (vol VolInfo, err error) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) + // WalkDir will walk a directory on disk and return a metacache stream on wr. + WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error + // WalkVersions in sorted order directly on disk. WalkVersions(ctx context.Context, volume, dirPath, marker string, recursive bool, endWalkCh <-chan struct{}) (chan FileInfoVersions, error) // Walk in sorted order directly on disk. @@ -71,7 +74,7 @@ type StorageAPI interface { RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error CheckParts(ctx context.Context, volume string, path string, fi FileInfo) error CheckFile(ctx context.Context, volume string, path string) (err error) - DeleteFile(ctx context.Context, volume string, path string) (err error) + Delete(ctx context.Context, volume string, path string, recursive bool) (err error) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error // Write all data, syncs the data to disk. diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 82afc0ad8..61307bdc9 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -555,10 +555,11 @@ func (client *storageRESTClient) ListDir(ctx context.Context, volume, dirPath st } // DeleteFile - deletes a file. -func (client *storageRESTClient) DeleteFile(ctx context.Context, volume string, path string) error { +func (client *storageRESTClient) Delete(ctx context.Context, volume string, path string, recursive bool) error { values := make(url.Values) values.Set(storageRESTVolume, volume) values.Set(storageRESTFilePath, path) + values.Set(storageRESTRecursive, strconv.FormatBool(recursive)) respBody, err := client.call(ctx, storageRESTMethodDeleteFile, values, nil, -1) defer http.DrainBody(respBody) return err diff --git a/cmd/storage-rest-common.go b/cmd/storage-rest-common.go index 6145eb2c3..0fba8f97d 100644 --- a/cmd/storage-rest-common.go +++ b/cmd/storage-rest-common.go @@ -17,7 +17,7 @@ package cmd const ( - storageRESTVersion = "v21" // Add checkDataDir in ReadVersion API + storageRESTVersion = "v22" // Add dir listing and recursive delete operation. storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTPrefix = minioReservedBucketPath + "/storage" ) @@ -52,6 +52,7 @@ const ( storageRESTMethodDeleteVersions = "/deleteverions" storageRESTMethodRenameFile = "/renamefile" storageRESTMethodVerifyFile = "/verifyfile" + storageRESTMethodWalkDir = "/walkdir" ) const ( diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index 20293e6b7..014e8fdac 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -18,6 +18,8 @@ package cmd import ( "bufio" + "bytes" + "encoding/binary" "encoding/gob" "encoding/hex" "errors" @@ -633,8 +635,13 @@ func (s *storageRESTServer) DeleteFileHandler(w http.ResponseWriter, r *http.Req vars := mux.Vars(r) volume := vars[storageRESTVolume] filePath := vars[storageRESTFilePath] + recursive, err := strconv.ParseBool(vars[storageRESTRecursive]) + if err != nil { + s.writeErrorResponse(w, err) + return + } - err := s.storage.DeleteFile(r.Context(), volume, filePath) + err = s.storage.Delete(r.Context(), volume, filePath, recursive) if err != nil { s.writeErrorResponse(w, err) } @@ -792,6 +799,161 @@ func waitForHTTPResponse(respBody io.Reader) (io.Reader, error) { } } +// drainCloser can be used for wrapping an http response. +// It will drain the body before closing. +type drainCloser struct { + rc io.ReadCloser +} + +// Read forwards the read operation. +func (f drainCloser) Read(p []byte) (n int, err error) { + return f.rc.Read(p) +} + +// Close drains the body and closes the upstream. +func (f drainCloser) Close() error { + xhttp.DrainBody(f.rc) + return nil +} + +// httpStreamResponse allows streaming a response, but still send an error. +type httpStreamResponse struct { + done chan error + block chan []byte + err error +} + +// Write part of the the streaming response. +// Note that upstream errors are currently not forwarded, but may be in the future. +func (h *httpStreamResponse) Write(b []byte) (int, error) { + tmp := make([]byte, len(b)) + copy(tmp, b) + h.block <- tmp + return len(b), h.err +} + +// CloseWithError will close the stream and return the specified error. +// This can be done several times, but only the first error will be sent. +// After calling this the stream should not be written to. +func (h *httpStreamResponse) CloseWithError(err error) { + if h.done == nil { + return + } + h.done <- err + h.err = err + // Indicates that the response is done. + <-h.done + h.done = nil +} + +// streamHTTPResponse can be used to avoid timeouts with long storage +// operations, such as bitrot verification or data usage crawling. +// Every 10 seconds a space character is sent. +// The returned function should always be called to release resources. +// An optional error can be sent which will be picked as text only error, +// without its original type by the receiver. +// waitForHTTPStream should be used to the receiving side. +func streamHTTPResponse(w http.ResponseWriter) *httpStreamResponse { + doneCh := make(chan error) + blockCh := make(chan []byte) + h := httpStreamResponse{done: doneCh, block: blockCh} + go func() { + ticker := time.NewTicker(time.Second * 10) + for { + select { + case <-ticker.C: + // Response not ready, write a filler byte. + w.Write([]byte{32}) + w.(http.Flusher).Flush() + case err := <-doneCh: + ticker.Stop() + defer close(doneCh) + if err != nil { + var buf bytes.Buffer + enc := gob.NewEncoder(&buf) + if ee := enc.Encode(err); ee == nil { + w.Write([]byte{3}) + w.Write(buf.Bytes()) + } else { + w.Write([]byte{1}) + w.Write([]byte(err.Error())) + } + } else { + w.Write([]byte{0}) + } + return + case block := <-blockCh: + var tmp [5]byte + tmp[0] = 2 + binary.LittleEndian.PutUint32(tmp[1:], uint32(len(block))) + w.Write(tmp[:]) + w.Write(block) + w.(http.Flusher).Flush() + } + } + }() + return &h +} + +// waitForHTTPStream will wait for responses where +// streamHTTPResponse has been used. +// The returned reader contains the payload and must be closed if no error is returned. +func waitForHTTPStream(respBody io.ReadCloser, w io.Writer) error { + var tmp [1]byte + for { + _, err := io.ReadFull(respBody, tmp[:]) + if err != nil { + return err + } + // Check if we have a response ready or a filler byte. + switch tmp[0] { + case 0: + // 0 is unbuffered, copy the rest. + _, err := io.Copy(w, respBody) + respBody.Close() + if err == io.EOF { + return nil + } + return err + case 1: + errorText, err := ioutil.ReadAll(respBody) + if err != nil { + return err + } + respBody.Close() + return errors.New(string(errorText)) + case 3: + // Typed error + defer respBody.Close() + dec := gob.NewDecoder(respBody) + var err error + if de := dec.Decode(&err); de == nil { + return err + } + return errors.New("rpc error") + case 2: + // Block of data + var tmp [4]byte + _, err := io.ReadFull(respBody, tmp[:]) + if err != nil { + return err + } + + length := binary.LittleEndian.Uint32(tmp[:]) + _, err = io.CopyN(w, respBody, int64(length)) + if err != nil { + return err + } + continue + case 32: + continue + default: + go xhttp.DrainBody(respBody) + return fmt.Errorf("unexpected filler byte: %d", tmp[0]) + } + } +} + // VerifyFileResp - VerifyFile()'s response. type VerifyFileResp struct { Err error @@ -960,12 +1122,14 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerSets Endpoint subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDeleteVersions).HandlerFunc(httpTraceHdrs(server.DeleteVersionsHandler)). Queries(restQueries(storageRESTVolume, storageRESTTotalVersions)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDeleteFile).HandlerFunc(httpTraceHdrs(server.DeleteFileHandler)). - Queries(restQueries(storageRESTVolume, storageRESTFilePath)...) + Queries(restQueries(storageRESTVolume, storageRESTFilePath, storageRESTRecursive)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodRenameFile).HandlerFunc(httpTraceHdrs(server.RenameFileHandler)). Queries(restQueries(storageRESTSrcVolume, storageRESTSrcPath, storageRESTDstVolume, storageRESTDstPath)...) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodVerifyFile).HandlerFunc(httpTraceHdrs(server.VerifyFileHandler)). Queries(restQueries(storageRESTVolume, storageRESTFilePath)...) + subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodWalkDir).HandlerFunc(httpTraceHdrs(server.WalkDirHandler)). + Queries(restQueries(storageRESTVolume, storageRESTDirPath, storageRESTRecursive)...) } } } diff --git a/cmd/storage-rest_test.go b/cmd/storage-rest_test.go index 361e03cf1..eaba4e68b 100644 --- a/cmd/storage-rest_test.go +++ b/cmd/storage-rest_test.go @@ -362,7 +362,7 @@ func testStorageAPIDeleteFile(t *testing.T, storage StorageAPI) { } for i, testCase := range testCases { - err := storage.DeleteFile(context.Background(), testCase.volumeName, testCase.objectName) + err := storage.Delete(context.Background(), testCase.volumeName, testCase.objectName, false) expectErr := (err != nil) if expectErr != testCase.expectErr { diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index d207c9674..c45ed0be8 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -34,6 +34,7 @@ import ( "encoding/pem" "encoding/xml" "errors" + "flag" "fmt" "io" "io/ioutil" @@ -65,8 +66,9 @@ import ( "github.com/minio/minio/pkg/hash" ) -// Tests should initNSLock only once. -func init() { +// TestMain to set up global env. +func TestMain(m *testing.M) { + flag.Parse() globalActiveCred = auth.Credentials{ AccessKey: auth.DefaultAccessKey, SecretKey: auth.DefaultSecretKey, @@ -89,8 +91,13 @@ func init() { // Set as non-distributed. globalIsDistErasure = false - // Disable printing console messages during tests. - color.Output = ioutil.Discard + if !testing.Verbose() { + // Disable printing console messages during tests. + color.Output = ioutil.Discard + logger.Disable = true + } + // Uncomment the following line to see trace logs during unit tests. + // logger.AddTarget(console.New()) // Set system resources to maximum. setMaxResources() @@ -98,18 +105,16 @@ func init() { // Initialize globalConsoleSys system globalConsoleSys = NewConsoleLogger(context.Background()) - logger.Disable = true - globalDNSCache = xhttp.NewDNSCache(3*time.Second, 10*time.Second) initHelp() resetTestGlobals() - // Uncomment the following line to see trace logs during unit tests. - // logger.AddTarget(console.New()) + + os.Exit(m.Run()) } -// concurreny level for certain parallel tests. +// concurrency level for certain parallel tests. const testConcurrencyLevel = 10 /// @@ -1874,10 +1879,13 @@ func ExecObjectLayerTest(t TestErrHandler, objTest objTestType) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + defer setObjectLayer(newObjectLayerFn()) + objLayer, fsDir, err := prepareFS() if err != nil { t.Fatalf("Initialization of object layer failed for single node setup: %s", err) } + setObjectLayer(objLayer) newAllSubsystems() @@ -1893,11 +1901,12 @@ func ExecObjectLayerTest(t TestErrHandler, objTest objTestType) { objTest(objLayer, FSTestStr, t) newAllSubsystems() - objLayer, fsDirs, err := prepareErasureSets32(ctx) if err != nil { t.Fatalf("Initialization of object layer failed for Erasure setup: %s", err) } + setObjectLayer(objLayer) + defer objLayer.Shutdown(context.Background()) initAllSubsystems(ctx, objLayer) diff --git a/cmd/testdata/metacache.s2 b/cmd/testdata/metacache.s2 new file mode 100644 index 000000000..d288db9ba Binary files /dev/null and b/cmd/testdata/metacache.s2 differ diff --git a/cmd/xl-storage-disk-id-check.go b/cmd/xl-storage-disk-id-check.go index 248905acd..07ff52001 100644 --- a/cmd/xl-storage-disk-id-check.go +++ b/cmd/xl-storage-disk-id-check.go @@ -237,12 +237,12 @@ func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, pat return p.storage.CheckFile(ctx, volume, path) } -func (p *xlStorageDiskIDCheck) DeleteFile(ctx context.Context, volume string, path string) (err error) { +func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) { if err = p.checkDiskStale(); err != nil { return err } - return p.storage.DeleteFile(ctx, volume, path) + return p.storage.Delete(ctx, volume, path, recursive) } func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) (errs []error) { diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 8df3865da..e72ed03a7 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -1146,7 +1146,7 @@ func (s *xlStorage) DeleteVersions(ctx context.Context, volume string, versions // DeleteVersion - deletes FileInfo metadata for path at `xl.meta` func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo) error { if HasSuffix(path, SlashSeparator) { - return s.DeleteFile(ctx, volume, path) + return s.Delete(ctx, volume, path, false) } buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile)) @@ -1980,7 +1980,7 @@ func deleteFile(basePath, deletePath string, recursive bool) error { } // DeleteFile - delete a file at path. -func (s *xlStorage) DeleteFile(ctx context.Context, volume string, path string) (err error) { +func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) { atomic.AddInt32(&s.activeIOCount, 1) defer func() { atomic.AddInt32(&s.activeIOCount, -1) @@ -2011,8 +2011,8 @@ func (s *xlStorage) DeleteFile(ctx context.Context, volume string, path string) return err } - // Delete file and delete parent directory as well if its empty. - return deleteFile(volumeDir, filePath, false) + // Delete file and delete parent directory as well if it's empty. + return deleteFile(volumeDir, filePath, recursive) } func (s *xlStorage) DeleteFileBulk(volume string, paths []string) (errs []error, err error) { diff --git a/cmd/xl-storage_test.go b/cmd/xl-storage_test.go index 9bb227aef..937921b66 100644 --- a/cmd/xl-storage_test.go +++ b/cmd/xl-storage_test.go @@ -817,14 +817,14 @@ func TestXLStorageXlStorageListDir(t *testing.T) { t.Fatalf("Unable to initialize xlStorage, %s", err) } - if err = xlStorageNew.DeleteFile(context.Background(), "mybucket", "myobject"); err != errFileAccessDenied { + if err = xlStorageNew.Delete(context.Background(), "mybucket", "myobject", false); err != errFileAccessDenied { t.Errorf("expected: %s, got: %s", errFileAccessDenied, err) } } // TestXLStorage for delete on an removed disk. // should fail with disk not found. - err = xlStorageDeletedStorage.DeleteFile(context.Background(), "del-vol", "my-file") + err = xlStorageDeletedStorage.Delete(context.Background(), "del-vol", "my-file", false) if err != errDiskNotFound { t.Errorf("Expected: \"Disk not found\", got \"%s\"", err) } @@ -878,7 +878,7 @@ func TestXLStorageDeleteFile(t *testing.T) { expectedErr: nil, }, // TestXLStorage case - 2. - // The file was deleted in the last case, so DeleteFile should fail. + // The file was deleted in the last case, so Delete should fail. { srcVol: "success-vol", srcPath: "success-file", @@ -916,7 +916,7 @@ func TestXLStorageDeleteFile(t *testing.T) { } for i, testCase := range testCases { - if err = xlStorage.DeleteFile(context.Background(), testCase.srcVol, testCase.srcPath); err != testCase.expectedErr { + if err = xlStorage.Delete(context.Background(), testCase.srcVol, testCase.srcPath, false); err != testCase.expectedErr { t.Errorf("TestXLStorage case %d: Expected: \"%s\", got: \"%s\"", i+1, testCase.expectedErr, err) } } @@ -941,14 +941,14 @@ func TestXLStorageDeleteFile(t *testing.T) { t.Fatalf("Unable to initialize xlStorage, %s", err) } - if err = xlStorageNew.DeleteFile(context.Background(), "mybucket", "myobject"); err != errFileAccessDenied { + if err = xlStorageNew.Delete(context.Background(), "mybucket", "myobject", false); err != errFileAccessDenied { t.Errorf("expected: %s, got: %s", errFileAccessDenied, err) } } // TestXLStorage for delete on an removed disk. // should fail with disk not found. - err = xlStorageDeletedStorage.DeleteFile(context.Background(), "del-vol", "my-file") + err = xlStorageDeletedStorage.Delete(context.Background(), "del-vol", "my-file", false) if err != errDiskNotFound { t.Errorf("Expected: \"Disk not found\", got \"%s\"", err) } @@ -1671,7 +1671,7 @@ func TestXLStorageVerifyFile(t *testing.T) { t.Fatal("expected to fail bitrot check") } - if err := xlStorage.DeleteFile(context.Background(), volName, fileName); err != nil { + if err := xlStorage.Delete(context.Background(), volName, fileName, false); err != nil { t.Fatal(err) } diff --git a/cmd/xl-storage_windows_test.go b/cmd/xl-storage_windows_test.go index 633ce32fd..cd49e0152 100644 --- a/cmd/xl-storage_windows_test.go +++ b/cmd/xl-storage_windows_test.go @@ -68,7 +68,7 @@ func TestUNCPaths(t *testing.T) { } else if err == nil && !test.pass { t.Error(err) } - fs.DeleteFile(context.Background(), "voldir", test.objName) + fs.Delete(context.Background(), "voldir", test.objName, false) }) } }