From 8d9866263334a644ed8a80a427ed7bad792c1dce Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 19 Mar 2020 00:19:29 +0100 Subject: [PATCH] re-implement data usage crawler to be more efficient (#9075) Implementation overview: https://gist.github.com/klauspost/1801c858d5e0df391114436fdad6987b --- Makefile | 2 +- browser/staticcheck.conf | 1 + cmd/admin-heal-ops.go | 15 +- cmd/config-current.go | 5 +- cmd/config/errors-utils.go | 2 +- cmd/config/etcd/dns/etcd_dns.go | 6 +- cmd/data-usage-cache.go | 521 +++++++++++++++++++ cmd/data-usage-cache_gen.go | 447 ++++++++++++++++ cmd/data-usage-cache_gen_test.go | 349 +++++++++++++ cmd/data-usage.go | 423 +++++++++++---- cmd/data-usage_test.go | 664 ++++++++++++++++++++++++ cmd/disk-cache.go | 2 +- cmd/fastwalk.go | 179 +------ cmd/format-xl.go | 2 +- cmd/fs-v1-multipart.go | 2 +- cmd/fs-v1-multipart_test.go | 32 +- cmd/fs-v1.go | 41 +- cmd/fs-v1_test.go | 2 - cmd/gateway-unsupported.go | 4 +- cmd/gateway/azure/gateway-azure.go | 3 +- cmd/gateway/s3/gateway-s3-sse.go | 2 +- cmd/generic-handlers.go | 2 +- cmd/global-heal.go | 2 +- cmd/http-stats.go | 14 +- cmd/iam-object-store.go | 5 +- cmd/jwt/parser_test.go | 23 +- cmd/logger/target/http/http.go | 9 +- cmd/naughty-disk_test.go | 5 +- cmd/object-api-datatypes.go | 19 +- cmd/object-api-errors.go | 12 +- cmd/object-api-interface.go | 2 +- cmd/object-api-utils.go | 6 +- cmd/object-handlers.go | 2 +- cmd/posix-diskid-check.go | 5 +- cmd/posix.go | 18 +- cmd/server-main.go | 4 + cmd/server-startup-msg.go | 4 +- cmd/service.go | 16 +- cmd/signals.go | 2 +- cmd/storage-interface.go | 3 +- cmd/storage-rest-client.go | 54 +- cmd/storage-rest-common.go | 2 +- cmd/storage-rest-server.go | 72 ++- cmd/streaming-signature-v4.go | 2 +- cmd/test-utils_test.go | 19 +- cmd/web-handlers_test.go | 2 +- cmd/xl-sets.go | 18 +- cmd/xl-v1-multipart.go | 2 +- cmd/xl-v1-multipart_test.go | 28 +- cmd/xl-v1.go | 190 ++++++- cmd/xl-zones.go | 129 +++-- go.mod | 3 + go.sum | 7 + mint/run/core/aws-sdk-go/quick-tests.go | 21 +- pkg/event/errors.go | 4 +- pkg/event/target/amqp.go | 2 + pkg/handlers/forwarder.go | 3 +- pkg/ioutil/ioutil.go | 2 +- pkg/lock/lock_windows.go | 1 + pkg/madmin/info-commands.go | 18 +- pkg/s3select/sql/utils.go | 2 +- 61 files changed, 2895 insertions(+), 543 deletions(-) create mode 100644 browser/staticcheck.conf create mode 100644 cmd/data-usage-cache.go create mode 100644 cmd/data-usage-cache_gen.go create mode 100644 cmd/data-usage-cache_gen_test.go create mode 100644 cmd/data-usage_test.go diff --git a/Makefile b/Makefile index 9694bbe5d..7f95d336a 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ getdeps: ifeq ($(GOARCH),s390x) @which staticcheck 1>/dev/null || (echo "Installing staticcheck" && GO111MODULE=off go get honnef.co/go/tools/cmd/staticcheck) else - @which staticcheck 1>/dev/null || (echo "Installing staticcheck" && wget --quiet https://github.com/dominikh/go-tools/releases/download/2019.2.3/staticcheck_${GOOS}_${GOARCH}.tar.gz && tar xf staticcheck_${GOOS}_${GOARCH}.tar.gz && mv staticcheck/staticcheck ${GOPATH}/bin/staticcheck && chmod +x ${GOPATH}/bin/staticcheck && rm -f staticcheck_${GOOS}_${GOARCH}.tar.gz && rm -rf staticcheck) + @which staticcheck 1>/dev/null || (echo "Installing staticcheck" && wget --quiet https://github.com/dominikh/go-tools/releases/download/2020.1.3/staticcheck_${GOOS}_${GOARCH}.tar.gz && tar xf staticcheck_${GOOS}_${GOARCH}.tar.gz && mv staticcheck/staticcheck ${GOPATH}/bin/staticcheck && chmod +x ${GOPATH}/bin/staticcheck && rm -f staticcheck_${GOOS}_${GOARCH}.tar.gz && rm -rf staticcheck) endif @which misspell 1>/dev/null || (echo "Installing misspell" && GO111MODULE=off go get -u github.com/client9/misspell/cmd/misspell) diff --git a/browser/staticcheck.conf b/browser/staticcheck.conf new file mode 100644 index 000000000..322047da4 --- /dev/null +++ b/browser/staticcheck.conf @@ -0,0 +1 @@ +checks = ["all", "-ST1005", "-ST1000", "-SA4000", "-SA9004", "-SA1019", "-SA1008", "-U1000", "-ST1003", "-ST1018"] diff --git a/cmd/admin-heal-ops.go b/cmd/admin-heal-ops.go index 97acbca27..c426e0a2a 100644 --- a/cmd/admin-heal-ops.go +++ b/cmd/admin-heal-ops.go @@ -106,12 +106,12 @@ func initHealState() *allHealState { healSeqMap: make(map[string]*healSequence), } - go healState.periodicHealSeqsClean() + go healState.periodicHealSeqsClean(GlobalContext) return healState } -func (ahs *allHealState) periodicHealSeqsClean() { +func (ahs *allHealState) periodicHealSeqsClean(ctx context.Context) { // Launch clean-up routine to remove this heal sequence (after // it ends) from the global state after timeout has elapsed. ticker := time.NewTicker(time.Minute * 5) @@ -127,7 +127,7 @@ func (ahs *allHealState) periodicHealSeqsClean() { } } ahs.Unlock() - case <-GlobalServiceDoneCh: + case <-ctx.Done(): // server could be restarting - need // to exit immediately return @@ -369,7 +369,7 @@ func newHealSequence(bucket, objPrefix, clientAddr string, reqInfo := &logger.ReqInfo{RemoteHost: clientAddr, API: "Heal", BucketName: bucket} reqInfo.AppendTags("prefix", objPrefix) - ctx := logger.SetReqInfo(context.Background(), reqInfo) + ctx := logger.SetReqInfo(GlobalContext, reqInfo) return &healSequence{ bucket: bucket, @@ -603,7 +603,7 @@ func (h *healSequence) healItemsFromSourceCh() error { h.lastHealActivity = UTCNow() case <-h.traverseAndHealDoneCh: return nil - case <-GlobalServiceDoneCh: + case <-h.ctx.Done(): return nil } } @@ -630,11 +630,6 @@ func (h *healSequence) healItems(bucketsOnly bool) error { return err } - // Start healing the background ops prefix. - if err := h.healMinioSysMeta(backgroundOpsMetaPrefix)(); err != nil { - logger.LogIf(h.ctx, err) - } - // Heal buckets and objects return h.healBuckets(bucketsOnly) } diff --git a/cmd/config-current.go b/cmd/config-current.go index d1229cbd8..1bd9246f9 100644 --- a/cmd/config-current.go +++ b/cmd/config-current.go @@ -26,7 +26,6 @@ import ( "github.com/minio/minio/cmd/config/cache" "github.com/minio/minio/cmd/config/compress" "github.com/minio/minio/cmd/config/etcd" - xetcd "github.com/minio/minio/cmd/config/etcd" "github.com/minio/minio/cmd/config/etcd/dns" xldap "github.com/minio/minio/cmd/config/identity/ldap" "github.com/minio/minio/cmd/config/identity/openid" @@ -304,13 +303,13 @@ func lookupConfigs(s config.Config) { } } - etcdCfg, err := xetcd.LookupConfig(s[config.EtcdSubSys][config.Default], globalRootCAs) + etcdCfg, err := etcd.LookupConfig(s[config.EtcdSubSys][config.Default], globalRootCAs) if err != nil { logger.LogIf(ctx, fmt.Errorf("Unable to initialize etcd config: %w", err)) } if etcdCfg.Enabled { - globalEtcdClient, err = xetcd.New(etcdCfg) + globalEtcdClient, err = etcd.New(etcdCfg) if err != nil { logger.LogIf(ctx, fmt.Errorf("Unable to initialize etcd config: %w", err)) } diff --git a/cmd/config/errors-utils.go b/cmd/config/errors-utils.go index 301619608..7fd52cb1c 100644 --- a/cmd/config/errors-utils.go +++ b/cmd/config/errors-utils.go @@ -46,7 +46,7 @@ func (u Err) Clone() Err { } } -// Return the error message +// Error returns the error message func (u Err) Error() string { if u.detail == "" { if u.msg != "" { diff --git a/cmd/config/etcd/dns/etcd_dns.go b/cmd/config/etcd/dns/etcd_dns.go index fc1195b18..094536008 100644 --- a/cmd/config/etcd/dns/etcd_dns.go +++ b/cmd/config/etcd/dns/etcd_dns.go @@ -60,11 +60,7 @@ func (c *CoreDNS) List() (map[string][]SrvRecord, error) { if record.Key == "" { continue } - if _, ok := srvRecords[record.Key]; ok { - srvRecords[record.Key] = append(srvRecords[record.Key], record) - } else { - srvRecords[record.Key] = []SrvRecord{record} - } + srvRecords[record.Key] = append(srvRecords[record.Key], record) } } return srvRecords, nil diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go new file mode 100644 index 000000000..129cabf3f --- /dev/null +++ b/cmd/data-usage-cache.go @@ -0,0 +1,521 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "bytes" + "context" + "encoding/binary" + "fmt" + "io" + "path" + "path/filepath" + "strings" + "time" + + "github.com/cespare/xxhash/v2" + "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/color" + "github.com/minio/minio/pkg/hash" + "github.com/tinylib/msgp/msgp" +) + +const dataUsageHashLen = 8 + +//go:generate msgp -file $GOFILE -unexported + +// dataUsageHash is the hash type used. +type dataUsageHash uint64 + +// sizeHistogram is a size histogram. +type sizeHistogram [dataUsageBucketLen]uint64 + +//msgp:tuple dataUsageEntry +type dataUsageEntry struct { + // These fields do no include any children. + Size int64 + Objects uint64 + ObjSizes sizeHistogram + + Children dataUsageHashMap +} + +//msgp:ignore dataUsageEntryInfo +type dataUsageEntryInfo struct { + Name string + Parent string + Entry dataUsageEntry +} + +type dataUsageCacheInfo struct { + // Name of the bucket. Also root element. + Name string + LastUpdate time.Time + NextCycle uint8 +} + +// merge other data usage entry into this, excluding children. +func (e *dataUsageEntry) merge(other dataUsageEntry) { + e.Objects += other.Objects + e.Size += other.Size + for i, v := range other.ObjSizes[:] { + e.ObjSizes[i] += v + } +} + +// mod returns true if the hash mod cycles == cycle. +func (h dataUsageHash) mod(cycle uint8, cycles uint8) bool { + return uint8(h)%cycles == cycle%cycles +} + +// addChildString will add a child based on its name. +// If it already exists it will not be added again. +func (e *dataUsageEntry) addChildString(name string) { + e.addChild(hashPath(name)) +} + +// addChild will add a child based on its hash. +// If it already exists it will not be added again. +func (e *dataUsageEntry) addChild(hash dataUsageHash) { + if _, ok := e.Children[hash]; ok { + return + } + if e.Children == nil { + e.Children = make(dataUsageHashMap, 1) + } + e.Children[hash] = struct{}{} +} + +// find a path in the cache. +// Returns nil if not found. +func (d *dataUsageCache) find(path string) *dataUsageEntry { + due, ok := d.Cache[hashPath(path)] + if !ok { + return nil + } + return &due +} + +// dui converts the flattened version of the path to DataUsageInfo. +func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo { + e := d.find(path) + if e == nil { + return DataUsageInfo{LastUpdate: time.Now()} + } + flat := d.flatten(*e) + return DataUsageInfo{ + LastUpdate: d.Info.LastUpdate, + ObjectsCount: flat.Objects, + ObjectsTotalSize: uint64(flat.Size), + ObjectsSizesHistogram: flat.ObjSizes.asMap(), + BucketsCount: uint64(len(e.Children)), + BucketsSizes: d.pathSizes(buckets), + } +} + +// replace will add or replace an entry in the cache. +// If a parent is specified it will be added to that if not already there. +// If the parent does not exist, it will be added. +func (d *dataUsageCache) replace(path, parent string, e dataUsageEntry) { + hash := hashPath(path) + if d.Cache == nil { + d.Cache = make(map[dataUsageHash]dataUsageEntry, 100) + } + d.Cache[hash] = e + if parent != "" { + phash := hashPath(parent) + p := d.Cache[phash] + p.addChild(hash) + d.Cache[phash] = p + } +} + +// replaceHashed add or replaces an entry to the cache based on its hash. +// If a parent is specified it will be added to that if not already there. +// If the parent does not exist, it will be added. +func (d *dataUsageCache) replaceHashed(hash dataUsageHash, parent *dataUsageHash, e dataUsageEntry) { + if d.Cache == nil { + d.Cache = make(map[dataUsageHash]dataUsageEntry, 100) + } + d.Cache[hash] = e + if parent != nil { + p := d.Cache[*parent] + p.addChild(hash) + d.Cache[*parent] = p + } +} + +// StringAll returns a detailed string representation of all entries in the cache. +func (d *dataUsageCache) StringAll() string { + s := fmt.Sprintf("info:%+v\n", d.Info) + for k, v := range d.Cache { + s += fmt.Sprintf("\t%v: %+v\n", k, v) + } + return strings.TrimSpace(s) +} + +// String returns a human readable representation of the string. +func (h dataUsageHash) String() string { + return fmt.Sprintf("%x", uint64(h)) +} + +// flatten all children of the root into the root element and return it. +func (d *dataUsageCache) flatten(root dataUsageEntry) dataUsageEntry { + for id := range root.Children { + e := d.Cache[id] + if len(e.Children) > 0 { + e = d.flatten(e) + } + root.merge(e) + } + root.Children = nil + return root +} + +// add a size to the histogram. +func (h *sizeHistogram) add(size int64) { + // Fetch the histogram interval corresponding + // to the passed object size. + for i, interval := range ObjectsHistogramIntervals { + if size >= interval.start && size <= interval.end { + h[i]++ + break + } + } +} + +// asMap returns the map as a map[string]uint64. +func (h *sizeHistogram) asMap() map[string]uint64 { + res := make(map[string]uint64, 7) + for i, count := range h { + res[ObjectsHistogramIntervals[i].name] = count + } + return res +} + +// pathSizes returns the path sizes as a map. +func (d *dataUsageCache) pathSizes(buckets []BucketInfo) map[string]uint64 { + var dst = make(map[string]uint64, len(buckets)) + for _, bucket := range buckets { + e := d.find(bucket.Name) + if e == nil { + if dataUsageDebug { + logger.Info(color.Green("data-usage:")+" Bucket not found in cache: %v", bucket.Name) + } + continue + } + flat := d.flatten(*e) + dst[bucket.Name] = uint64(flat.Size) + } + return dst +} + +// sizeRecursive returns the path as a flattened entry. +func (d *dataUsageCache) sizeRecursive(path string) *dataUsageEntry { + root := d.find(path) + if root == nil || len(root.Children) == 0 { + return root + } + flat := d.flatten(*root) + return &flat +} + +// dataUsageCache contains a cache of data usage entries. +//msgp:ignore dataUsageCache +type dataUsageCache struct { + Info dataUsageCacheInfo + Cache map[dataUsageHash]dataUsageEntry +} + +// root returns the root of the cache. +func (d *dataUsageCache) root() *dataUsageEntry { + return d.find(d.Info.Name) +} + +// rootHash returns the root of the cache. +func (d *dataUsageCache) rootHash() dataUsageHash { + return hashPath(d.Info.Name) +} + +// clone returns a copy of the cache with no references to the existing. +func (d *dataUsageCache) clone() dataUsageCache { + clone := dataUsageCache{ + Info: d.Info, + Cache: make(map[dataUsageHash]dataUsageEntry, len(d.Cache)), + } + for k, v := range d.Cache { + clone.Cache[k] = v + } + return clone +} + +// merge root of other into d. +// children of root will be flattened before being merged. +// Last update time will be set to the last updated. +func (d *dataUsageCache) merge(other dataUsageCache) { + existingRoot := d.root() + otherRoot := other.root() + if existingRoot == nil && otherRoot == nil { + return + } + if otherRoot == nil { + return + } + if existingRoot == nil { + *d = other.clone() + return + } + if other.Info.LastUpdate.After(d.Info.LastUpdate) { + d.Info.LastUpdate = other.Info.LastUpdate + } + existingRoot.merge(*otherRoot) + eHash := d.rootHash() + for key := range otherRoot.Children { + entry := other.Cache[key] + flat := other.flatten(entry) + existing := d.Cache[key] + // If not found, merging simply adds. + existing.merge(flat) + d.replaceHashed(key, &eHash, existing) + } +} + +// load the cache content with name from minioMetaBackgroundOpsBucket. +// Only backend errors are returned as errors. +// If the object is not found or unable to deserialize d is cleared and nil error is returned. +func (d *dataUsageCache) load(ctx context.Context, store ObjectLayer, name string) error { + var buf bytes.Buffer + err := store.GetObject(ctx, dataUsageBucket, name, 0, -1, &buf, "", ObjectOptions{}) + if err != nil { + if !isErrObjectNotFound(err) { + return toObjectErr(err, dataUsageBucket, name) + } + *d = dataUsageCache{} + return nil + } + err = d.deserialize(buf.Bytes()) + if err != nil { + *d = dataUsageCache{} + logger.LogIf(ctx, err) + } + return nil +} + +// save the content of the cache to minioMetaBackgroundOpsBucket with the provided name. +func (d *dataUsageCache) save(ctx context.Context, store ObjectLayer, name string) error { + b := d.serialize() + size := int64(len(b)) + r, err := hash.NewReader(bytes.NewReader(b), size, "", "", size, false) + if err != nil { + return err + } + + _, err = store.PutObject(ctx, + dataUsageBucket, + name, + NewPutObjReader(r, nil, nil), + ObjectOptions{}) + return err +} + +// dataUsageCacheVer indicates the cache version. +// Bumping the cache version will drop data from previous versions +// and write new data with the new version. +const dataUsageCacheVer = 1 + +// serialize the contents of the cache. +func (d *dataUsageCache) serialize() []byte { + // Alloc pessimistically + // dataUsageCacheVer + due := dataUsageEntry{} + msgLen := 1 + msgLen += d.Info.Msgsize() + // len(d.Cache) + msgLen += binary.MaxVarintLen64 + // Hashes (one for key, assume 1 child/node) + msgLen += len(d.Cache) * dataUsageHashLen * 2 + msgLen += len(d.Cache) * due.Msgsize() + + // Create destination buffer... + dst := make([]byte, 0, msgLen) + + var n int + tmp := make([]byte, 1024) + // byte: version. + dst = append(dst, dataUsageCacheVer) + // Info... + dst, err := d.Info.MarshalMsg(dst) + if err != nil { + panic(err) + } + n = binary.PutUvarint(tmp, uint64(len(d.Cache))) + dst = append(dst, tmp[:n]...) + + for k, v := range d.Cache { + // Put key + binary.LittleEndian.PutUint64(tmp[:dataUsageHashLen], uint64(k)) + dst = append(dst, tmp[:8]...) + tmp, err = v.MarshalMsg(tmp[:0]) + if err != nil { + panic(err) + } + // key, value pairs. + dst = append(dst, tmp...) + + } + return dst +} + +// deserialize the supplied byte slice into the cache. +func (d *dataUsageCache) deserialize(b []byte) error { + if len(b) < 1 { + return io.ErrUnexpectedEOF + } + switch b[0] { + case 1: + default: + return fmt.Errorf("dataUsageCache: unknown version: %d", int(b[0])) + } + b = b[1:] + + // Info... + b, err := d.Info.UnmarshalMsg(b) + if err != nil { + return err + } + cacheLen, n := binary.Uvarint(b) + if n <= 0 { + return fmt.Errorf("dataUsageCache: reading cachelen, n <= 0 ") + } + b = b[n:] + d.Cache = make(map[dataUsageHash]dataUsageEntry, cacheLen) + + for i := 0; i < int(cacheLen); i++ { + if len(b) <= dataUsageHashLen { + return io.ErrUnexpectedEOF + } + k := binary.LittleEndian.Uint64(b[:dataUsageHashLen]) + b = b[dataUsageHashLen:] + var v dataUsageEntry + b, err = v.UnmarshalMsg(b) + if err != nil { + return err + } + d.Cache[dataUsageHash(k)] = v + } + return nil +} + +// Trim this from start+end of hashes. +var hashPathCutSet = dataUsageRoot + +func init() { + if dataUsageRoot != string(filepath.Separator) { + hashPathCutSet = dataUsageRoot + string(filepath.Separator) + } +} + +// hashPath calculates a hash of the provided string. +func hashPath(data string) dataUsageHash { + if data != dataUsageRoot { + data = strings.Trim(data, hashPathCutSet) + } + data = path.Clean(data) + return dataUsageHash(xxhash.Sum64String(data)) +} + +//msgp:ignore dataUsageEntryInfo +type dataUsageHashMap map[dataUsageHash]struct{} + +// MarshalMsg implements msgp.Marshaler +func (d dataUsageHashMap) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, d.Msgsize()) + + // Write bin header manually + const mbin32 uint8 = 0xc6 + sz := uint32(len(d)) * dataUsageHashLen + o = append(o, mbin32, byte(sz>>24), byte(sz>>16), byte(sz>>8), byte(sz)) + + var tmp [dataUsageHashLen]byte + for k := range d { + binary.LittleEndian.PutUint64(tmp[:], uint64(k)) + o = append(o, tmp[:]...) + } + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (d dataUsageHashMap) Msgsize() (s int) { + s = 5 + len(d)*dataUsageHashLen + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (d *dataUsageHashMap) UnmarshalMsg(bts []byte) (o []byte, err error) { + var hashes []byte + hashes, bts, err = msgp.ReadBytesZC(bts) + if err != nil { + err = msgp.WrapError(err, "dataUsageHashMap") + return + } + + var dst = make(dataUsageHashMap, len(hashes)/dataUsageHashLen) + for len(hashes) >= dataUsageHashLen { + dst[dataUsageHash(binary.LittleEndian.Uint64(hashes[:dataUsageHashLen]))] = struct{}{} + hashes = hashes[dataUsageHashLen:] + } + *d = dst + o = bts + return +} + +func (d *dataUsageHashMap) DecodeMsg(dc *msgp.Reader) (err error) { + var zb0001 uint32 + zb0001, err = dc.ReadBytesHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + var dst = make(dataUsageHashMap, zb0001) + var tmp [8]byte + for i := uint32(0); i < zb0001; i++ { + _, err = io.ReadFull(dc, tmp[:]) + if err != nil { + err = msgp.WrapError(err, "dataUsageHashMap") + return + } + dst[dataUsageHash(binary.LittleEndian.Uint64(tmp[:]))] = struct{}{} + } + return nil +} +func (d dataUsageHashMap) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteBytesHeader(uint32(len(d)) * dataUsageHashLen) + if err != nil { + err = msgp.WrapError(err) + return + } + var tmp [dataUsageHashLen]byte + for k := range d { + binary.LittleEndian.PutUint64(tmp[:], uint64(k)) + _, err = en.Write(tmp[:]) + if err != nil { + err = msgp.WrapError(err) + return + } + } + return nil +} diff --git a/cmd/data-usage-cache_gen.go b/cmd/data-usage-cache_gen.go new file mode 100644 index 000000000..0434254a1 --- /dev/null +++ b/cmd/data-usage-cache_gen.go @@ -0,0 +1,447 @@ +package cmd + +// Code generated by github.com/tinylib/msgp DO NOT EDIT. + +import ( + "github.com/tinylib/msgp/msgp" +) + +// DecodeMsg implements msgp.Decodable +func (z *dataUsageCacheInfo) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "Name": + z.Name, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "Name") + return + } + case "LastUpdate": + z.LastUpdate, err = dc.ReadTime() + if err != nil { + err = msgp.WrapError(err, "LastUpdate") + return + } + case "NextCycle": + z.NextCycle, err = dc.ReadUint8() + if err != nil { + err = msgp.WrapError(err, "NextCycle") + return + } + default: + err = dc.Skip() + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z dataUsageCacheInfo) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 3 + // write "Name" + err = en.Append(0x83, 0xa4, 0x4e, 0x61, 0x6d, 0x65) + if err != nil { + return + } + err = en.WriteString(z.Name) + if err != nil { + err = msgp.WrapError(err, "Name") + return + } + // write "LastUpdate" + err = en.Append(0xaa, 0x4c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65) + if err != nil { + return + } + err = en.WriteTime(z.LastUpdate) + if err != nil { + err = msgp.WrapError(err, "LastUpdate") + return + } + // write "NextCycle" + err = en.Append(0xa9, 0x4e, 0x65, 0x78, 0x74, 0x43, 0x79, 0x63, 0x6c, 0x65) + if err != nil { + return + } + err = en.WriteUint8(z.NextCycle) + if err != nil { + err = msgp.WrapError(err, "NextCycle") + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z dataUsageCacheInfo) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 3 + // string "Name" + o = append(o, 0x83, 0xa4, 0x4e, 0x61, 0x6d, 0x65) + o = msgp.AppendString(o, z.Name) + // string "LastUpdate" + o = append(o, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65) + o = msgp.AppendTime(o, z.LastUpdate) + // string "NextCycle" + o = append(o, 0xa9, 0x4e, 0x65, 0x78, 0x74, 0x43, 0x79, 0x63, 0x6c, 0x65) + o = msgp.AppendUint8(o, z.NextCycle) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *dataUsageCacheInfo) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "Name": + z.Name, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Name") + return + } + case "LastUpdate": + z.LastUpdate, bts, err = msgp.ReadTimeBytes(bts) + if err != nil { + err = msgp.WrapError(err, "LastUpdate") + return + } + case "NextCycle": + z.NextCycle, bts, err = msgp.ReadUint8Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "NextCycle") + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z dataUsageCacheInfo) Msgsize() (s int) { + s = 1 + 5 + msgp.StringPrefixSize + len(z.Name) + 11 + msgp.TimeSize + 10 + msgp.Uint8Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *dataUsageEntry) DecodeMsg(dc *msgp.Reader) (err error) { + var zb0001 uint32 + zb0001, err = dc.ReadArrayHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + if zb0001 != 4 { + err = msgp.ArrayError{Wanted: 4, Got: zb0001} + return + } + z.Size, err = dc.ReadInt64() + if err != nil { + err = msgp.WrapError(err, "Size") + return + } + z.Objects, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "Objects") + return + } + var zb0002 uint32 + zb0002, err = dc.ReadArrayHeader() + if err != nil { + err = msgp.WrapError(err, "ObjSizes") + return + } + if zb0002 != uint32(dataUsageBucketLen) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLen), Got: zb0002} + return + } + for za0001 := range z.ObjSizes { + z.ObjSizes[za0001], err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "ObjSizes", za0001) + return + } + } + err = z.Children.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "Children") + return + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *dataUsageEntry) EncodeMsg(en *msgp.Writer) (err error) { + // array header, size 4 + err = en.Append(0x94) + if err != nil { + return + } + err = en.WriteInt64(z.Size) + if err != nil { + err = msgp.WrapError(err, "Size") + return + } + err = en.WriteUint64(z.Objects) + if err != nil { + err = msgp.WrapError(err, "Objects") + return + } + err = en.WriteArrayHeader(uint32(dataUsageBucketLen)) + if err != nil { + err = msgp.WrapError(err, "ObjSizes") + return + } + for za0001 := range z.ObjSizes { + err = en.WriteUint64(z.ObjSizes[za0001]) + if err != nil { + err = msgp.WrapError(err, "ObjSizes", za0001) + return + } + } + err = z.Children.EncodeMsg(en) + if err != nil { + err = msgp.WrapError(err, "Children") + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *dataUsageEntry) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // array header, size 4 + o = append(o, 0x94) + o = msgp.AppendInt64(o, z.Size) + o = msgp.AppendUint64(o, z.Objects) + o = msgp.AppendArrayHeader(o, uint32(dataUsageBucketLen)) + for za0001 := range z.ObjSizes { + o = msgp.AppendUint64(o, z.ObjSizes[za0001]) + } + o, err = z.Children.MarshalMsg(o) + if err != nil { + err = msgp.WrapError(err, "Children") + return + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *dataUsageEntry) UnmarshalMsg(bts []byte) (o []byte, err error) { + var zb0001 uint32 + zb0001, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + if zb0001 != 4 { + err = msgp.ArrayError{Wanted: 4, Got: zb0001} + return + } + z.Size, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "Size") + return + } + z.Objects, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "Objects") + return + } + var zb0002 uint32 + zb0002, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err, "ObjSizes") + return + } + if zb0002 != uint32(dataUsageBucketLen) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLen), Got: zb0002} + return + } + for za0001 := range z.ObjSizes { + z.ObjSizes[za0001], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "ObjSizes", za0001) + return + } + } + bts, err = z.Children.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "Children") + return + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *dataUsageEntry) Msgsize() (s int) { + s = 1 + msgp.Int64Size + msgp.Uint64Size + msgp.ArrayHeaderSize + (dataUsageBucketLen * (msgp.Uint64Size)) + z.Children.Msgsize() + return +} + +// DecodeMsg implements msgp.Decodable +func (z *dataUsageHash) DecodeMsg(dc *msgp.Reader) (err error) { + { + var zb0001 uint64 + zb0001, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = dataUsageHash(zb0001) + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z dataUsageHash) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteUint64(uint64(z)) + if err != nil { + err = msgp.WrapError(err) + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z dataUsageHash) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendUint64(o, uint64(z)) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *dataUsageHash) UnmarshalMsg(bts []byte) (o []byte, err error) { + { + var zb0001 uint64 + zb0001, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + (*z) = dataUsageHash(zb0001) + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z dataUsageHash) Msgsize() (s int) { + s = msgp.Uint64Size + return +} + +// DecodeMsg implements msgp.Decodable +func (z *sizeHistogram) DecodeMsg(dc *msgp.Reader) (err error) { + var zb0001 uint32 + zb0001, err = dc.ReadArrayHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + if zb0001 != uint32(dataUsageBucketLen) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLen), Got: zb0001} + return + } + for za0001 := range z { + z[za0001], err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, za0001) + return + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *sizeHistogram) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteArrayHeader(uint32(dataUsageBucketLen)) + if err != nil { + err = msgp.WrapError(err) + return + } + for za0001 := range z { + err = en.WriteUint64(z[za0001]) + if err != nil { + err = msgp.WrapError(err, za0001) + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *sizeHistogram) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendArrayHeader(o, uint32(dataUsageBucketLen)) + for za0001 := range z { + o = msgp.AppendUint64(o, z[za0001]) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *sizeHistogram) UnmarshalMsg(bts []byte) (o []byte, err error) { + var zb0001 uint32 + zb0001, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + if zb0001 != uint32(dataUsageBucketLen) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLen), Got: zb0001} + return + } + for za0001 := range z { + z[za0001], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, za0001) + return + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *sizeHistogram) Msgsize() (s int) { + s = msgp.ArrayHeaderSize + (dataUsageBucketLen * (msgp.Uint64Size)) + return +} diff --git a/cmd/data-usage-cache_gen_test.go b/cmd/data-usage-cache_gen_test.go new file mode 100644 index 000000000..9c3edc3af --- /dev/null +++ b/cmd/data-usage-cache_gen_test.go @@ -0,0 +1,349 @@ +package cmd + +// Code generated by github.com/tinylib/msgp DO NOT EDIT. + +import ( + "bytes" + "testing" + + "github.com/tinylib/msgp/msgp" +) + +func TestMarshalUnmarshaldataUsageCacheInfo(t *testing.T) { + v := dataUsageCacheInfo{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgdataUsageCacheInfo(b *testing.B) { + v := dataUsageCacheInfo{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgdataUsageCacheInfo(b *testing.B) { + v := dataUsageCacheInfo{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshaldataUsageCacheInfo(b *testing.B) { + v := dataUsageCacheInfo{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodedataUsageCacheInfo(t *testing.T) { + v := dataUsageCacheInfo{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodedataUsageCacheInfo Msgsize() is inaccurate") + } + + vn := dataUsageCacheInfo{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodedataUsageCacheInfo(b *testing.B) { + v := dataUsageCacheInfo{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodedataUsageCacheInfo(b *testing.B) { + v := dataUsageCacheInfo{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} + +func TestMarshalUnmarshaldataUsageEntry(t *testing.T) { + v := dataUsageEntry{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgdataUsageEntry(b *testing.B) { + v := dataUsageEntry{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgdataUsageEntry(b *testing.B) { + v := dataUsageEntry{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshaldataUsageEntry(b *testing.B) { + v := dataUsageEntry{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodedataUsageEntry(t *testing.T) { + v := dataUsageEntry{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodedataUsageEntry Msgsize() is inaccurate") + } + + vn := dataUsageEntry{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodedataUsageEntry(b *testing.B) { + v := dataUsageEntry{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodedataUsageEntry(b *testing.B) { + v := dataUsageEntry{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} + +func TestMarshalUnmarshalsizeHistogram(t *testing.T) { + v := sizeHistogram{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgsizeHistogram(b *testing.B) { + v := sizeHistogram{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgsizeHistogram(b *testing.B) { + v := sizeHistogram{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshalsizeHistogram(b *testing.B) { + v := sizeHistogram{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodesizeHistogram(t *testing.T) { + v := sizeHistogram{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodesizeHistogram Msgsize() is inaccurate") + } + + vn := sizeHistogram{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodesizeHistogram(b *testing.B) { + v := sizeHistogram{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodesizeHistogram(b *testing.B) { + v := sizeHistogram{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} diff --git a/cmd/data-usage.go b/cmd/data-usage.go index cea9ddeeb..54fe843d7 100644 --- a/cmd/data-usage.go +++ b/cmd/data-usage.go @@ -20,23 +20,36 @@ import ( "bytes" "context" "encoding/json" + "errors" "os" - "path/filepath" + "path" + "strconv" "time" jsoniter "github.com/json-iterator/go" "github.com/minio/minio/cmd/config" "github.com/minio/minio/cmd/logger" + "github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/env" "github.com/minio/minio/pkg/hash" ) const ( - dataUsageObjName = "data-usage" - dataUsageCrawlInterval = 12 * time.Hour - dataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL" + dataUsageObjName = "usage.json" + dataUsageCacheName = "usage-cache.bin" + dataUsageBucketCacheDir = "usage-caches" + dataUsageCrawlConf = "MINIO_DISK_USAGE_CRAWL" + dataUsageCrawlDelay = "MINIO_DISK_USAGE_CRAWL_DELAY" + dataUsageDebug = true + dataUsageSleepPerFolder = 1 * time.Millisecond + dataUsageSleepDefMult = 10.0 + dataUsageUpdateDirCycles = 16 + dataUsageRoot = SlashSeparator + dataUsageBucket = minioMetaBucket + SlashSeparator + bucketMetaPrefix + dataUsageStartDelay = 5 * time.Minute // Time to wait on startup and between cycles. ) +// initDataUsageStats will start the crawler unless disabled. func initDataUsageStats() { dataUsageEnabled, err := config.ParseBool(env.Get(dataUsageCrawlConf, config.EnableOn)) if err == nil && !dataUsageEnabled { @@ -45,6 +58,7 @@ func initDataUsageStats() { go runDataUsageInfoUpdateRoutine() } +// runDataUsageInfoUpdateRoutine will contain the main crawler. func runDataUsageInfoUpdateRoutine() { // Wait until the object layer is ready var objAPI ObjectLayer @@ -57,37 +71,13 @@ func runDataUsageInfoUpdateRoutine() { break } - runDataUsageInfo(context.Background(), objAPI, GlobalServiceDoneCh) -} - -// timeToNextCrawl returns the duration until next crawl should occur -// this is validated by verifying the LastUpdate time. -func timeToCrawl(ctx context.Context, objAPI ObjectLayer) time.Duration { - dataUsageInfo, err := loadDataUsageFromBackend(ctx, objAPI) - if err != nil { - // Upon an error wait for like 10 - // seconds to start the crawler. - return 10 * time.Second - } - // File indeed doesn't exist when LastUpdate is zero - // so we have never crawled, start crawl right away. - if dataUsageInfo.LastUpdate.IsZero() { - return 1 * time.Second - } - timeSinceLastUpdate := UTCNow().Sub(dataUsageInfo.LastUpdate) - if timeSinceLastUpdate > dataUsageCrawlInterval { - // Waited long enough start crawl in a 1 second - return 1 * time.Second - } - // No crawling needed, ask the routine to wait until - // the daily interval 12hrs - delta between last update - // with current time. - return dataUsageCrawlInterval - timeSinceLastUpdate + runDataUsageInfo(GlobalContext, objAPI) } var dataUsageLockTimeout = lifecycleLockTimeout -func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer, endCh <-chan struct{}) { +func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer) { + // Make sure only 1 crawler is running on the cluster. locker := objAPI.NewNSLock(ctx, minioMetaBucket, "leader-data-usage-info") for { err := locker.GetLock(dataUsageLockTimeout) @@ -99,47 +89,57 @@ func runDataUsageInfo(ctx context.Context, objAPI ObjectLayer, endCh <-chan stru // data usage calculator role for its lifetime. break } - + if dataUsageDebug { + logger.Info(color.Green("runDataUsageInfo:") + " Starting crawler master") + } for { - wait := timeToCrawl(ctx, objAPI) select { - case <-endCh: + case <-ctx.Done(): locker.Unlock() return - case <-time.NewTimer(wait).C: - // Crawl only when no previous crawl has occurred, - // or its been too long since last crawl. - err := storeDataUsageInBackend(ctx, objAPI, objAPI.CrawlAndGetDataUsage(ctx, endCh)) + // Wait before starting next cycle and wait on startup. + case <-time.NewTimer(dataUsageStartDelay).C: + results := make(chan DataUsageInfo, 1) + go storeDataUsageInBackend(ctx, objAPI, results) + err := objAPI.CrawlAndGetDataUsage(ctx, results) + close(results) logger.LogIf(ctx, err) } } } -func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, dataUsageInfo DataUsageInfo) error { - dataUsageJSON, err := json.Marshal(dataUsageInfo) - if err != nil { - return err - } +// storeDataUsageInBackend will store all objects sent on the gui channel until closed. +func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, gui <-chan DataUsageInfo) { + for dataUsageInfo := range gui { + dataUsageJSON, err := json.MarshalIndent(dataUsageInfo, "", " ") + if err != nil { + logger.LogIf(ctx, err) + continue + } + if dataUsageDebug { + logger.Info(color.Green("data-usage:")+" Received update: %s", string(dataUsageJSON)) + } + size := int64(len(dataUsageJSON)) + r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false) + if err != nil { + logger.LogIf(ctx, err) + continue + } - size := int64(len(dataUsageJSON)) - r, err := hash.NewReader(bytes.NewReader(dataUsageJSON), size, "", "", size, false) - if err != nil { - return err + _, err = objAPI.PutObject(ctx, dataUsageBucket, dataUsageObjName, NewPutObjReader(r, nil, nil), ObjectOptions{}) + logger.LogIf(ctx, err) } - - _, err = objAPI.PutObject(ctx, minioMetaBackgroundOpsBucket, dataUsageObjName, NewPutObjReader(r, nil, nil), ObjectOptions{}) - return err } func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsageInfo, error) { var dataUsageInfoJSON bytes.Buffer - err := objAPI.GetObject(ctx, minioMetaBackgroundOpsBucket, dataUsageObjName, 0, -1, &dataUsageInfoJSON, "", ObjectOptions{}) + err := objAPI.GetObject(ctx, dataUsageBucket, dataUsageObjName, 0, -1, &dataUsageInfoJSON, "", ObjectOptions{}) if err != nil { if isErrObjectNotFound(err) { return DataUsageInfo{}, nil } - return DataUsageInfo{}, toObjectErr(err, minioMetaBackgroundOpsBucket, dataUsageObjName) + return DataUsageInfo{}, toObjectErr(err, dataUsageBucket, dataUsageObjName) } var dataUsageInfo DataUsageInfo @@ -160,52 +160,295 @@ type Item struct { type getSizeFn func(item Item) (int64, error) -func updateUsage(basePath string, doneCh <-chan struct{}, waitForLowActiveIO func(), getSize getSizeFn) DataUsageInfo { - var dataUsageInfo = DataUsageInfo{ - BucketsSizes: make(map[string]uint64), - ObjectsSizesHistogram: make(map[string]uint64), +type cachedFolder struct { + name string + parent *dataUsageHash +} + +type folderScanner struct { + root string + getSize getSizeFn + oldCache dataUsageCache + newCache dataUsageCache + waitForLowActiveIO func() + + newFolders []cachedFolder + existingFolders []cachedFolder +} + +// sleepDuration multiplies the duration d by x and sleeps if is more than 100 micro seconds. +// sleep is limited to max 1 second. +func sleepDuration(d time.Duration, x float64) { + // Don't sleep for really small amount of time + if d := time.Duration(float64(d) * x); d > time.Microsecond*100 { + if d > time.Second { + d = time.Second + } + time.Sleep(d) + } +} + +// scanQueuedLevels will scan the provided folders. +// Files found in the folders will be added to f.newCache. +// If final is provided folders will be put into f.newFolders or f.existingFolders. +// If final is not provided the folders found are returned from the function. +func (f *folderScanner) scanQueuedLevels(ctx context.Context, folders []cachedFolder, final bool) ([]cachedFolder, error) { + var nextFolders []cachedFolder + delayMult := dataUsageSleepDefMult + if mult := os.Getenv(dataUsageCrawlDelay); mult != "" { + if d, err := strconv.ParseFloat(mult, 64); err == nil { + delayMult = d + } + } + done := ctx.Done() + for _, folder := range folders { + select { + case <-done: + return nil, ctx.Err() + default: + } + f.waitForLowActiveIO() + sleepDuration(dataUsageSleepPerFolder, delayMult) + + cache := dataUsageEntry{} + thisHash := hashPath(folder.name) + + err := readDirFn(path.Join(f.root, folder.name), func(entName string, typ os.FileMode) error { + // Parse + entName = path.Clean(path.Join(folder.name, entName)) + bucket, _ := path2BucketObjectWithBasePath(f.root, entName) + if bucket == "" { + if dataUsageDebug { + logger.Info(color.Green("data-usage:")+" no bucket (%s,%s)", f.root, entName) + } + return nil + } + + if isReservedOrInvalidBucket(bucket, false) { + if dataUsageDebug { + logger.Info(color.Green("data-usage:")+" invalid bucket: %v, entry: %v", bucket, entName) + } + return nil + } + + select { + case <-done: + return ctx.Err() + default: + } + + if typ&os.ModeDir != 0 { + h := hashPath(entName) + _, exists := f.oldCache.Cache[h] + cache.addChildString(entName) + + this := cachedFolder{name: entName, parent: &thisHash} + cache.addChild(h) + if final { + if exists { + f.existingFolders = append(f.existingFolders, this) + } else { + f.newFolders = append(f.newFolders, this) + } + } else { + nextFolders = append(nextFolders, this) + } + return nil + } + f.waitForLowActiveIO() + // Dynamic time delay. + t := time.Now() + + // Get file size, ignore errors. + size, err := f.getSize(Item{Path: path.Join(f.root, entName), Typ: typ}) + + sleepDuration(time.Since(t), delayMult) + if err == errSkipFile { + return nil + } + logger.LogIf(ctx, err) + cache.Size += size + cache.Objects++ + cache.ObjSizes.add(size) + + return nil + }) + if err != nil { + return nil, err + } + f.newCache.replaceHashed(thisHash, folder.parent, cache) + } + return nextFolders, nil +} + +// deepScanFolder will deep scan a folder and return the size if no error occurs. +func (f *folderScanner) deepScanFolder(ctx context.Context, folder string) (*dataUsageEntry, error) { + var cache dataUsageEntry + delayMult := dataUsageSleepDefMult + if mult := os.Getenv(dataUsageCrawlDelay); mult != "" { + if d, err := strconv.ParseFloat(mult, 64); err == nil { + delayMult = d + } + } + done := ctx.Done() + + var addDir func(entName string, typ os.FileMode) error + var dirStack = []string{f.root, folder} + + addDir = func(entName string, typ os.FileMode) error { + select { + case <-done: + return ctx.Err() + default: + } + + f.waitForLowActiveIO() + if typ&os.ModeDir != 0 { + dirStack = append(dirStack, entName) + err := readDirFn(path.Join(dirStack...), addDir) + dirStack = dirStack[:len(dirStack)-1] + sleepDuration(dataUsageSleepPerFolder, delayMult) + return err + } + // Dynamic time delay. + t := time.Now() + + // Get file size, ignore errors. + dirStack = append(dirStack, entName) + fileName := path.Join(dirStack...) + dirStack = dirStack[:len(dirStack)-1] + + size, err := f.getSize(Item{Path: fileName, Typ: typ}) + + // Don't sleep for really small amount of time + sleepDuration(time.Since(t), delayMult) + + if err == errSkipFile { + return nil + } + logger.LogIf(ctx, err) + cache.Size += size + cache.Objects++ + cache.ObjSizes.add(size) + return nil + } + err := readDirFn(path.Join(dirStack...), addDir) + if err != nil { + return nil, err + } + return &cache, nil +} + +// updateUsage will crawl the basepath+cache.Info.Name and return an updated cache. +// The returned cache will always be valid, but may not be updated from the existing. +// Before each operation waitForLowActiveIO is called which can be used to temporarily halt the crawler. +// If the supplied context is canceled the function will return at the first chance. +func updateUsage(ctx context.Context, basePath string, cache dataUsageCache, waitForLowActiveIO func(), getSize getSizeFn) (dataUsageCache, error) { + if cache.Info.Name == "" { + cache.Info.Name = dataUsageRoot + } + var logPrefix, logSuffix string + if dataUsageDebug { + logPrefix = color.Green("data-usage: ") + logSuffix = color.Blue(" - %v + %v", basePath, cache.Info.Name) + } + s := folderScanner{ + root: basePath, + getSize: getSize, + oldCache: cache, + newCache: dataUsageCache{Info: cache.Info}, + waitForLowActiveIO: waitForLowActiveIO, + newFolders: nil, + existingFolders: nil, } - fastWalk(basePath, 1, doneCh, func(path string, typ os.FileMode) error { - // Wait for I/O to go down. - waitForLowActiveIO() + done := ctx.Done() + var flattenLevels = 3 - bucket, entry := path2BucketObjectWithBasePath(basePath, path) - if bucket == "" { - return nil - } + // If we are scanning inside a bucket reduce depth by 1. + if cache.Info.Name != dataUsageRoot { + flattenLevels-- + } + if dataUsageDebug { + logger.Info(logPrefix+"Cycle: %v"+logSuffix, cache.Info.NextCycle) + } - if isReservedOrInvalidBucket(bucket, false) { - return filepath.SkipDir + // Always scan flattenLevels deep. Cache root is level 0. + todo := []cachedFolder{{name: cache.Info.Name}} + for i := 0; i < flattenLevels; i++ { + if dataUsageDebug { + logger.Info(logPrefix+"Level %v, scanning %v directories."+logSuffix, i, len(todo)) } - - if entry == "" && typ&os.ModeDir != 0 { - dataUsageInfo.BucketsCount++ - dataUsageInfo.BucketsSizes[bucket] = 0 - return nil - } - - if typ&os.ModeDir != 0 { - return nil - } - - t := time.Now() - size, err := getSize(Item{path, typ}) - // Use the response time of the getSize call to guess system load. - // Sleep equivalent time. - if d := time.Since(t); d > 100*time.Microsecond { - time.Sleep(d) + select { + case <-done: + return cache, ctx.Err() + default: } + var err error + todo, err = s.scanQueuedLevels(ctx, todo, i == flattenLevels-1) if err != nil { - return errSkipFile + // No useful information... + return cache, err + } + } + + if dataUsageDebug { + logger.Info(logPrefix+"New folders: %v"+logSuffix, s.newFolders) + } + // Add new folders first + for _, folder := range s.newFolders { + select { + case <-done: + return s.newCache, ctx.Err() + default: + } + du, err := s.deepScanFolder(ctx, folder.name) + if err != nil { + logger.LogIf(ctx, err) + continue + } + if du == nil { + logger.LogIf(ctx, errors.New("data-usage: no disk usage provided")) + continue + } + s.newCache.replace(folder.name, "", *du) + // Add to parent manually + if folder.parent != nil { + parent := s.newCache.Cache[*folder.parent] + parent.addChildString(folder.name) + } + } + + if dataUsageDebug { + logger.Info(logPrefix+"Existing folders: %v"+logSuffix, len(s.existingFolders)) + } + // Do selective scanning of existing folders. + for _, folder := range s.existingFolders { + select { + case <-done: + return s.newCache, ctx.Err() + default: + } + h := hashPath(folder.name) + if !h.mod(s.oldCache.Info.NextCycle, dataUsageUpdateDirCycles) { + s.newCache.replaceHashed(h, folder.parent, s.oldCache.Cache[h]) + continue } - dataUsageInfo.ObjectsCount++ - dataUsageInfo.ObjectsTotalSize += uint64(size) - dataUsageInfo.BucketsSizes[bucket] += uint64(size) - dataUsageInfo.ObjectsSizesHistogram[objSizeToHistoInterval(uint64(size))]++ - return nil - }) + // Update on this cycle... + du, err := s.deepScanFolder(ctx, folder.name) + if err != nil { + logger.LogIf(ctx, err) + continue + } + if du == nil { + logger.LogIf(ctx, errors.New("data-usage: no disk usage provided")) + continue + } + s.newCache.replaceHashed(h, folder.parent, *du) + } - return dataUsageInfo + s.newCache.Info.LastUpdate = time.Now() + s.newCache.Info.NextCycle++ + return s.newCache, nil } diff --git a/cmd/data-usage_test.go b/cmd/data-usage_test.go new file mode 100644 index 000000000..53305e7c4 --- /dev/null +++ b/cmd/data-usage_test.go @@ -0,0 +1,664 @@ +/* + * MinIO Cloud Storage, (C) 2020 MinIO, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "context" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "testing" +) + +type usageTestFile struct { + name string + size int +} + +func Test_updateUsage(t *testing.T) { + base, err := ioutil.TempDir("", "Test_updateUsage") + if err != nil { + t.Skip(err) + } + defer os.RemoveAll(base) + var files = []usageTestFile{ + {name: "rootfile", size: 10000}, + {name: "rootfile2", size: 10000}, + {name: "dir1/d1file", size: 2000}, + {name: "dir2/d2file", size: 300}, + {name: "dir1/dira/dafile", size: 100000}, + {name: "dir1/dira/dbfile", size: 200000}, + {name: "dir1/dira/dirasub/dcfile", size: 1000000}, + {name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, + } + createUsageTestFiles(t, base, files) + + getSize := func(item Item) (i int64, err error) { + if item.Typ&os.ModeDir == 0 { + s, err := os.Stat(item.Path) + if err != nil { + return 0, err + } + return s.Size(), nil + } + return 0, nil + } + got, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + + // Test dirs + var want = []struct { + path string + isNil bool + size, objs int + flatten bool + oSizes sizeHistogram + }{ + { + path: "/", + size: 1322310, + flatten: true, + objs: 8, + oSizes: sizeHistogram{0: 2, 1: 6}, + }, + { + path: "/", + size: 20000, + objs: 2, + oSizes: sizeHistogram{1: 2}, + }, + { + path: "/dir1", + size: 2000, + objs: 1, + oSizes: sizeHistogram{1: 1}, + }, + { + path: "/dir1/dira", + flatten: true, + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "/dir1/dira/", + flatten: true, + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "/dir1/dira", + size: 300000, + objs: 2, + oSizes: sizeHistogram{0: 0, 1: 2}, + }, + { + path: "/dir1/dira/", + size: 300000, + objs: 2, + oSizes: sizeHistogram{0: 0, 1: 2}, + }, + { + path: "/nonexistying", + isNil: true, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if w.flatten { + *e = got.flatten(*e) + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + files = []usageTestFile{ + { + name: "newfolder/afile", + size: 4, + }, + { + name: "newfolder/anotherone", + size: 1, + }, + { + name: "newfolder/anemptyone", + size: 0, + }, + { + name: "dir1/fileindir1", + size: 20000, + }, + { + name: "dir1/dirc/fileindirc", + size: 20000, + }, + { + name: "rootfile3", + size: 1000, + }, + } + createUsageTestFiles(t, base, files) + got, err = updateUsage(context.Background(), base, got, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + + want = []struct { + path string + isNil bool + size, objs int + flatten bool + oSizes sizeHistogram + }{ + { + path: "/", + size: 1363315, + flatten: true, + objs: 14, + oSizes: sizeHistogram{0: 6, 1: 8}, + }, + { + path: "/", + size: 21000, + objs: 3, + oSizes: sizeHistogram{0: 1, 1: 2}, + }, + { + path: "/newfolder", + size: 5, + objs: 3, + oSizes: sizeHistogram{0: 3}, + }, + { + path: "/dir1/dira", + size: 1300010, + flatten: true, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "/nonexistying", + isNil: true, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if w.flatten { + *e = got.flatten(*e) + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + files = []usageTestFile{ + { + name: "dir1/dira/dirasub/fileindira2", + size: 200, + }, + } + + createUsageTestFiles(t, base, files) + err = os.RemoveAll(filepath.Join(base, "dir1/dira/dirasub/dcfile")) + if err != nil { + t.Fatal(err) + } + // Changed dir must be picked up in this many cycles. + for i := 0; i < dataUsageUpdateDirCycles; i++ { + got, err = updateUsage(context.Background(), base, got, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + } + + want = []struct { + path string + isNil bool + size, objs int + flatten bool + oSizes sizeHistogram + }{ + { + path: "/", + size: 363515, + flatten: true, + objs: 14, + oSizes: sizeHistogram{0: 7, 1: 7}, + }, + { + path: "/dir1/dira", + size: 300210, + objs: 4, + flatten: true, + oSizes: sizeHistogram{0: 2, 1: 2}, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if w.flatten { + *e = got.flatten(*e) + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + t.Log(got.StringAll()) + + t.Logf("Root, flat: %+v", got.flatten(*got.root())) + t.Logf("Root: %+v", *got.root()) + t.Logf("/dir1/dira: %+v", *got.find("/dir1/dira")) + +} + +func Test_updateUsagePrefix(t *testing.T) { + base, err := ioutil.TempDir("", "Test_updateUsagePrefix") + if err != nil { + t.Skip(err) + } + base = filepath.Join(base, "bucket") + defer os.RemoveAll(base) + var files = []usageTestFile{ + {name: "bucket/rootfile", size: 10000}, + {name: "bucket/rootfile2", size: 10000}, + {name: "bucket/dir1/d1file", size: 2000}, + {name: "bucket/dir2/d2file", size: 300}, + {name: "bucket/dir1/dira/dafile", size: 100000}, + {name: "bucket/dir1/dira/dbfile", size: 200000}, + {name: "bucket/dir1/dira/dirasub/dcfile", size: 1000000}, + {name: "bucket/dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, + } + createUsageTestFiles(t, base, files) + + getSize := func(item Item) (i int64, err error) { + if item.Typ&os.ModeDir == 0 { + s, err := os.Stat(item.Path) + if err != nil { + return 0, err + } + return s.Size(), nil + } + return 0, nil + } + got, err := updateUsage(context.Background(), base, dataUsageCache{Info: dataUsageCacheInfo{Name: "bucket"}}, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + + // Test dirs + var want = []struct { + path string + isNil bool + size, objs int + oSizes sizeHistogram + }{ + { + path: "flat", + size: 1322310, + objs: 8, + oSizes: sizeHistogram{0: 2, 1: 6}, + }, + { + path: "bucket/", + size: 20000, + objs: 2, + oSizes: sizeHistogram{1: 2}, + }, + { + path: "bucket/dir1", + size: 2000, + objs: 1, + oSizes: sizeHistogram{1: 1}, + }, + { + path: "bucket/dir1/dira", + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "bucket/dir1/dira/", + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "bucket/nonexistying", + isNil: true, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.path == "flat" { + f := got.flatten(*got.root()) + e = &f + } + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + files = []usageTestFile{ + { + name: "bucket/newfolder/afile", + size: 4, + }, + { + name: "bucket/newfolder/anotherone", + size: 1, + }, + { + name: "bucket/newfolder/anemptyone", + size: 0, + }, + { + name: "bucket/dir1/fileindir1", + size: 20000, + }, + { + name: "bucket/dir1/dirc/fileindirc", + size: 20000, + }, + { + name: "bucket/rootfile3", + size: 1000, + }, + } + createUsageTestFiles(t, base, files) + got, err = updateUsage(context.Background(), base, got, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + + want = []struct { + path string + isNil bool + size, objs int + oSizes sizeHistogram + }{ + { + path: "flat", + size: 1363315, + objs: 14, + oSizes: sizeHistogram{0: 6, 1: 8}, + }, + { + path: "bucket/", + size: 21000, + objs: 3, + oSizes: sizeHistogram{0: 1, 1: 2}, + }, + { + path: "bucket/newfolder", + size: 5, + objs: 3, + oSizes: sizeHistogram{0: 3}, + }, + { + path: "bucket/dir1/dira", + size: 1300010, + objs: 4, + oSizes: sizeHistogram{0: 1, 1: 3}, + }, + { + path: "bucket/nonexistying", + isNil: true, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.path == "flat" { + f := got.flatten(*got.root()) + e = &f + } + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + files = []usageTestFile{ + { + name: "bucket/dir1/dira/dirasub/fileindira2", + size: 200, + }, + } + + createUsageTestFiles(t, base, files) + err = os.RemoveAll(filepath.Join(base, "bucket/dir1/dira/dirasub/dcfile")) + if err != nil { + t.Fatal(err) + } + // Changed dir must be picked up in this many cycles. + for i := 0; i < dataUsageUpdateDirCycles; i++ { + got, err = updateUsage(context.Background(), base, got, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + } + + want = []struct { + path string + isNil bool + size, objs int + oSizes sizeHistogram + }{ + { + path: "flat", + size: 363515, + objs: 14, + oSizes: sizeHistogram{0: 7, 1: 7}, + }, + { + path: "bucket/dir1/dira", + size: 300210, + objs: 4, + oSizes: sizeHistogram{0: 2, 1: 2}, + }, + } + + for _, w := range want { + t.Run(w.path, func(t *testing.T) { + e := got.find(w.path) + if w.path == "flat" { + f := got.flatten(*got.root()) + e = &f + } + if w.isNil { + if e != nil { + t.Error("want nil, got", e) + } + return + } + if e == nil { + t.Fatal("got nil result") + } + if e.Size != int64(w.size) { + t.Error("got size", e.Size, "want", w.size) + } + if e.Objects != uint64(w.objs) { + t.Error("got objects", e.Objects, "want", w.objs) + } + if e.ObjSizes != w.oSizes { + t.Error("got histogram", e.ObjSizes, "want", w.oSizes) + } + }) + } + + t.Log(got.StringAll()) + + t.Logf("Root, flat: %+v", got.flatten(*got.root())) + t.Logf("Root: %+v", *got.root()) + t.Logf("bucket/dir1/dira: %+v", *got.find("bucket/dir1/dira")) +} + +func createUsageTestFiles(t *testing.T, base string, files []usageTestFile) { + for _, f := range files { + err := os.MkdirAll(filepath.Dir(filepath.Join(base, f.name)), os.ModePerm) + if err != nil { + t.Fatal(err) + } + err = ioutil.WriteFile(filepath.Join(base, f.name), make([]byte, f.size), os.ModePerm) + if err != nil { + t.Fatal(err) + } + } +} + +func Test_dataUsageCacheSerialize(t *testing.T) { + base, err := ioutil.TempDir("", "Test_dataUsageCacheSerialize") + if err != nil { + t.Skip(err) + } + defer os.RemoveAll(base) + var files = []usageTestFile{ + {name: "rootfile", size: 10000}, + {name: "rootfile2", size: 10000}, + {name: "dir1/d1file", size: 2000}, + {name: "dir2/d2file", size: 300}, + {name: "dir1/dira/dafile", size: 100000}, + {name: "dir1/dira/dbfile", size: 200000}, + {name: "dir1/dira/dirasub/dcfile", size: 1000000}, + {name: "dir1/dira/dirasub/sublevel3/dccccfile", size: 10}, + } + createUsageTestFiles(t, base, files) + + getSize := func(item Item) (i int64, err error) { + if item.Typ&os.ModeDir == 0 { + s, err := os.Stat(item.Path) + if err != nil { + return 0, err + } + return s.Size(), nil + } + return 0, nil + } + want, err := updateUsage(context.Background(), base, dataUsageCache{}, func() {}, getSize) + if err != nil { + t.Fatal(err) + } + b := want.serialize() + t.Log("serialize -> ", len(b), "bytes") + + var got dataUsageCache + err = got.deserialize(b) + if err != nil { + t.Fatal(err) + } + if got.Info.LastUpdate.IsZero() { + t.Error("lastupdate not set") + } + + if fmt.Sprint(want) == fmt.Sprint(got) { + t.Fatalf("deserialize mismatch\nwant: %+v\ngot: %+v", want, got) + } +} diff --git a/cmd/disk-cache.go b/cmd/disk-cache.go index e6633f12e..a4084f13c 100644 --- a/cmd/disk-cache.go +++ b/cmd/disk-cache.go @@ -693,7 +693,7 @@ func newServerCacheObjects(ctx context.Context, config cache.Config) (CacheObjec return c, nil } -func (c *cacheObjects) gc(ctx context.Context, doneCh chan struct{}) { +func (c *cacheObjects) gc(ctx context.Context, doneCh <-chan struct{}) { ticker := time.NewTicker(cacheGCInterval) defer ticker.Stop() diff --git a/cmd/fastwalk.go b/cmd/fastwalk.go index cb911f01a..560c5c382 100644 --- a/cmd/fastwalk.go +++ b/cmd/fastwalk.go @@ -11,173 +11,12 @@ package cmd import ( "errors" "os" - "path/filepath" "strings" - "sync" ) var errSkipFile = errors.New("fastwalk: skip this file") -// Walk is a faster implementation of filepath.Walk. -// -// filepath.Walk's design necessarily calls os.Lstat on each file, -// even if the caller needs less info. -// Many tools need only the type of each file. -// On some platforms, this information is provided directly by the readdir -// system call, avoiding the need to stat each file individually. -// fastwalk_unix.go contains a fork of the syscall routines. -// -// See golang.org/issue/16399 -// -// Walk walks the file tree rooted at root, calling walkFn for -// each file or directory in the tree, including root. -// -// If fastWalk returns filepath.SkipDir, the directory is skipped. -// -// Unlike filepath.Walk: -// * file stat calls must be done by the user. -// The only provided metadata is the file type, which does not include -// any permission bits. -// * multiple goroutines stat the filesystem concurrently. The provided -// walkFn must be safe for concurrent use. -// * fastWalk can follow symlinks if walkFn returns the TraverseLink -// sentinel error. It is the walkFn's responsibility to prevent -// fastWalk from going into symlink cycles. -func fastWalk(root string, nworkers int, doneCh <-chan struct{}, walkFn func(path string, typ os.FileMode) error) error { - - // Make sure to wait for all workers to finish, otherwise - // walkFn could still be called after returning. This Wait call - // runs after close(e.donec) below. - var wg sync.WaitGroup - defer wg.Wait() - - w := &walker{ - fn: walkFn, - enqueuec: make(chan walkItem, nworkers), // buffered for performance - workc: make(chan walkItem, nworkers), // buffered for performance - donec: make(chan struct{}), - - // buffered for correctness & not leaking goroutines: - resc: make(chan error, nworkers), - } - defer close(w.donec) - - for i := 0; i < nworkers; i++ { - wg.Add(1) - go w.doWork(&wg) - } - - todo := []walkItem{{dir: root}} - out := 0 - for { - workc := w.workc - var workItem walkItem - if len(todo) == 0 { - workc = nil - } else { - workItem = todo[len(todo)-1] - } - select { - case <-doneCh: - return nil - case workc <- workItem: - todo = todo[:len(todo)-1] - out++ - case it := <-w.enqueuec: - todo = append(todo, it) - case err := <-w.resc: - out-- - if err != nil { - return err - } - if out == 0 && len(todo) == 0 { - // It's safe to quit here, as long as the buffered - // enqueue channel isn't also readable, which might - // happen if the worker sends both another unit of - // work and its result before the other select was - // scheduled and both w.resc and w.enqueuec were - // readable. - select { - case it := <-w.enqueuec: - todo = append(todo, it) - default: - return nil - } - } - } - } -} - -// doWork reads directories as instructed (via workc) and runs the -// user's callback function. -func (w *walker) doWork(wg *sync.WaitGroup) { - defer wg.Done() - for { - select { - case <-w.donec: - return - case it := <-w.workc: - select { - case <-w.donec: - return - case w.resc <- w.walk(it.dir, !it.callbackDone): - } - } - } -} - -type walker struct { - fn func(path string, typ os.FileMode) error - - donec chan struct{} // closed on fastWalk's return - workc chan walkItem // to workers - enqueuec chan walkItem // from workers - resc chan error // from workers -} - -type walkItem struct { - dir string - callbackDone bool // callback already called; don't do it again -} - -func (w *walker) enqueue(it walkItem) { - select { - case w.enqueuec <- it: - case <-w.donec: - } -} - -var stringsBuilderPool = sync.Pool{ - New: func() interface{} { - return &strings.Builder{} - }, -} - -func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { - builder := stringsBuilderPool.Get().(*strings.Builder) - defer func() { - builder.Reset() - stringsBuilderPool.Put(builder) - }() - - builder.WriteString(dirName) - if !strings.HasSuffix(dirName, SlashSeparator) { - builder.WriteString(SlashSeparator) - } - builder.WriteString(baseName) - if typ == os.ModeDir { - w.enqueue(walkItem{dir: builder.String()}) - return nil - } - - err := w.fn(builder.String(), typ) - if err == filepath.SkipDir || err == errSkipFile { - return nil - } - return err -} - -func readDirFn(dirName string, fn func(dirName, entName string, typ os.FileMode) error) error { +func readDirFn(dirName string, fn func(entName string, typ os.FileMode) error) error { fis, err := readDir(dirName) if err != nil { return err @@ -188,23 +27,9 @@ func readDirFn(dirName string, fn func(dirName, entName string, typ os.FileMode) mode |= os.ModeDir } - if err = fn(dirName, fi, mode); err != nil { + if err = fn(fi, mode); err != nil { return err } } return nil } - -func (w *walker) walk(root string, runUserCallback bool) error { - if runUserCallback { - err := w.fn(root, os.ModeDir) - if err == filepath.SkipDir || err == errSkipFile { - return nil - } - if err != nil { - return err - } - } - - return readDirFn(root, w.onDirEnt) -} diff --git a/cmd/format-xl.go b/cmd/format-xl.go index 6adbf1213..6ddd1b27c 100644 --- a/cmd/format-xl.go +++ b/cmd/format-xl.go @@ -828,7 +828,7 @@ func ecDrivesNoConfig(drivesPerSet int) int { // Make XL backend meta volumes. func makeFormatXLMetaVolumes(disk StorageAPI) error { // Attempt to create MinIO internal buckets. - return disk.MakeVolBulk(minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, minioMetaBackgroundOpsBucket) + return disk.MakeVolBulk(minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, dataUsageBucket) } var initMetaVolIgnoredErrs = append(baseIgnoredErrs, errVolumeExists) diff --git a/cmd/fs-v1-multipart.go b/cmd/fs-v1-multipart.go index 6b47971de..0e96fa422 100644 --- a/cmd/fs-v1-multipart.go +++ b/cmd/fs-v1-multipart.go @@ -757,7 +757,7 @@ func (fs *FSObjects) AbortMultipartUpload(ctx context.Context, bucket, object, u // Removes multipart uploads if any older than `expiry` duration // on all buckets for every `cleanupInterval`, this function is // blocking and should be run in a go-routine. -func (fs *FSObjects) cleanupStaleMultipartUploads(ctx context.Context, cleanupInterval, expiry time.Duration, doneCh chan struct{}) { +func (fs *FSObjects) cleanupStaleMultipartUploads(ctx context.Context, cleanupInterval, expiry time.Duration, doneCh <-chan struct{}) { ticker := time.NewTicker(cleanupInterval) defer ticker.Stop() diff --git a/cmd/fs-v1-multipart_test.go b/cmd/fs-v1-multipart_test.go index 826b8a53b..741c33670 100644 --- a/cmd/fs-v1-multipart_test.go +++ b/cmd/fs-v1-multipart_test.go @@ -21,6 +21,7 @@ import ( "context" "os" "path/filepath" + "sync" "testing" "time" ) @@ -34,33 +35,38 @@ func TestFSCleanupMultipartUploadsInRoutine(t *testing.T) { obj := initFSObjects(disk, t) fs := obj.(*FSObjects) - // Close the go-routine, we are going to - // manually start it and test in this test case. - GlobalServiceDoneCh <- struct{}{} - bucketName := "bucket" objectName := "object" - obj.MakeBucketWithLocation(context.Background(), bucketName, "") - uploadID, err := obj.NewMultipartUpload(context.Background(), bucketName, objectName, ObjectOptions{}) + // Create a context we can cancel. + ctx, cancel := context.WithCancel(context.Background()) + obj.MakeBucketWithLocation(ctx, bucketName, "") + + uploadID, err := obj.NewMultipartUpload(ctx, bucketName, objectName, ObjectOptions{}) if err != nil { t.Fatal("Unexpected err: ", err) } - go fs.cleanupStaleMultipartUploads(context.Background(), 20*time.Millisecond, 0, GlobalServiceDoneCh) + var cleanupWg sync.WaitGroup + cleanupWg.Add(1) + go func() { + defer cleanupWg.Done() + fs.cleanupStaleMultipartUploads(context.Background(), time.Millisecond, 0, ctx.Done()) + }() - // Wait for 40ms such that - we have given enough time for - // cleanup routine to kick in. - time.Sleep(40 * time.Millisecond) - - // Close the routine we do not need it anymore. - GlobalServiceDoneCh <- struct{}{} + // Wait for 100ms such that - we have given enough time for + // cleanup routine to kick in. Flaky on slow systems... + time.Sleep(100 * time.Millisecond) + cancel() + cleanupWg.Wait() // Check if upload id was already purged. if err = obj.AbortMultipartUpload(context.Background(), bucketName, objectName, uploadID); err != nil { if _, ok := err.(InvalidUploadID); !ok { t.Fatal("Unexpected err: ", err) } + } else { + t.Error("Item was not cleaned up.") } } diff --git a/cmd/fs-v1.go b/cmd/fs-v1.go index d24c3e8d1..b7b42b82a 100644 --- a/cmd/fs-v1.go +++ b/cmd/fs-v1.go @@ -37,12 +37,11 @@ import ( "github.com/minio/minio/cmd/config" xhttp "github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/logger" - bucketsse "github.com/minio/minio/pkg/bucket/encryption" "github.com/minio/minio/pkg/bucket/lifecycle" "github.com/minio/minio/pkg/bucket/object/tagging" "github.com/minio/minio/pkg/bucket/policy" - + "github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/lock" "github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/mimedb" @@ -112,7 +111,7 @@ func initMetaVolumeFS(fsPath, fsUUID string) error { return err } - if err := os.MkdirAll(pathJoin(fsPath, minioMetaBackgroundOpsBucket), 0777); err != nil { + if err := os.MkdirAll(pathJoin(fsPath, dataUsageBucket), 0777); err != nil { return err } @@ -235,9 +234,26 @@ func (fs *FSObjects) waitForLowActiveIO() { } // CrawlAndGetDataUsage returns data usage stats of the current FS deployment -func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { - dataUsageInfo := updateUsage(fs.fsPath, endCh, fs.waitForLowActiveIO, func(item Item) (int64, error) { - // Get file size, symlinks which cannot bex +func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error { + // Load bucket totals + var oldCache dataUsageCache + err := oldCache.load(ctx, fs, dataUsageCacheName) + if err != nil { + return err + } + if oldCache.Info.Name == "" { + oldCache.Info.Name = dataUsageRoot + } + if dataUsageDebug { + logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:") + " Start crawl cycle") + } + buckets, err := fs.ListBuckets(ctx) + if err != nil { + return err + } + t := time.Now() + cache, err := updateUsage(ctx, fs.fsPath, oldCache, fs.waitForLowActiveIO, func(item Item) (int64, error) { + // Get file size, symlinks which cannot be // followed are automatically filtered by fastwalk. fi, err := os.Stat(item.Path) if err != nil { @@ -245,11 +261,16 @@ func (fs *FSObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan stru } return fi.Size(), nil }) + if dataUsageDebug { + logger.Info(color.Green("FSObjects.CrawlAndGetDataUsage:")+" Crawl time: %v", time.Since(t)) + } + // Even if there was an error, the new cache may have better info. + if cache.Info.LastUpdate.After(oldCache.Info.LastUpdate) { + logger.LogIf(ctx, cache.save(ctx, fs, dataUsageCacheName)) + updates <- cache.dui(dataUsageRoot, buckets) + } - dataUsageInfo.LastUpdate = UTCNow() - atomic.StoreUint64(&fs.totalUsed, dataUsageInfo.ObjectsTotalSize) - - return dataUsageInfo + return err } /// Bucket operations diff --git a/cmd/fs-v1_test.go b/cmd/fs-v1_test.go index 18a764239..ddc272cea 100644 --- a/cmd/fs-v1_test.go +++ b/cmd/fs-v1_test.go @@ -356,8 +356,6 @@ func TestFSListBuckets(t *testing.T) { t.Fatal("Unexpected error: ", err) } - GlobalServiceDoneCh <- struct{}{} - // Create a bucket with invalid name if err := os.MkdirAll(pathJoin(fs.fsPath, "vo^"), 0777); err != nil { t.Fatal("Unexpected error: ", err) diff --git a/cmd/gateway-unsupported.go b/cmd/gateway-unsupported.go index 7b52a2d24..dae566bc7 100644 --- a/cmd/gateway-unsupported.go +++ b/cmd/gateway-unsupported.go @@ -50,9 +50,9 @@ func NewGatewayLayerWithLocker(gwLayer ObjectLayer) ObjectLayer { type GatewayUnsupported struct{} // CrawlAndGetDataUsage - crawl is not implemented for gateway -func (a GatewayUnsupported) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { +func (a GatewayUnsupported) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error { logger.CriticalIf(ctx, errors.New("not implemented")) - return DataUsageInfo{} + return NotImplemented{} } // NewNSLock is a dummy stub for gateway. diff --git a/cmd/gateway/azure/gateway-azure.go b/cmd/gateway/azure/gateway-azure.go index 87015be66..578535971 100644 --- a/cmd/gateway/azure/gateway-azure.go +++ b/cmd/gateway/azure/gateway-azure.go @@ -39,7 +39,6 @@ import ( humanize "github.com/dustin/go-humanize" "github.com/minio/cli" miniogopolicy "github.com/minio/minio-go/v6/pkg/policy" - "github.com/minio/minio/cmd" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/auth" "github.com/minio/minio/pkg/bucket/policy" @@ -1196,7 +1195,7 @@ func (a *azureObjects) CompleteMultipartUpload(ctx context.Context, bucket, obje if err != nil { return objInfo, azureToObjectError(err, bucket, object) } - objMetadata["md5sum"] = cmd.ComputeCompleteMultipartMD5(uploadedParts) + objMetadata["md5sum"] = minio.ComputeCompleteMultipartMD5(uploadedParts) _, err = objBlob.CommitBlockList(ctx, allBlocks, objProperties, objMetadata, azblob.BlobAccessConditions{}) if err != nil { diff --git a/cmd/gateway/s3/gateway-s3-sse.go b/cmd/gateway/s3/gateway-s3-sse.go index a2e44f2c0..ee170dd3c 100644 --- a/cmd/gateway/s3/gateway-s3-sse.go +++ b/cmd/gateway/s3/gateway-s3-sse.go @@ -679,7 +679,7 @@ func getGWContentPath(object string) string { } // Clean-up the stale incomplete encrypted multipart uploads. Should be run in a Go routine. -func (l *s3EncObjects) cleanupStaleEncMultipartUploads(ctx context.Context, cleanupInterval, expiry time.Duration, doneCh chan struct{}) { +func (l *s3EncObjects) cleanupStaleEncMultipartUploads(ctx context.Context, cleanupInterval, expiry time.Duration, doneCh <-chan struct{}) { ticker := time.NewTicker(cleanupInterval) defer ticker.Stop() diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index b468dcd20..44f38e608 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -353,7 +353,7 @@ func parseAmzDate(amzDateStr string) (amzDate time.Time, apiErr APIErrorCode) { // supported amz date formats. func parseAmzDateHeader(req *http.Request) (time.Time, APIErrorCode) { for _, amzDateHeader := range amzDateHeaders { - amzDateStr := req.Header.Get(http.CanonicalHeaderKey(amzDateHeader)) + amzDateStr := req.Header.Get(amzDateHeader) if amzDateStr != "" { return parseAmzDate(amzDateStr) } diff --git a/cmd/global-heal.go b/cmd/global-heal.go index 7af88d897..34e55b068 100644 --- a/cmd/global-heal.go +++ b/cmd/global-heal.go @@ -39,7 +39,7 @@ var leaderLockTimeout = newDynamicTimeout(time.Minute, time.Minute) func newBgHealSequence(numDisks int) *healSequence { reqInfo := &logger.ReqInfo{API: "BackgroundHeal"} - ctx := logger.SetReqInfo(context.Background(), reqInfo) + ctx := logger.SetReqInfo(GlobalContext, reqInfo) hs := madmin.HealOpts{ // Remove objects that do not have read-quorum diff --git a/cmd/http-stats.go b/cmd/http-stats.go index c431a9e22..de6dcf80b 100644 --- a/cmd/http-stats.go +++ b/cmd/http-stats.go @@ -101,28 +101,24 @@ type HTTPAPIStats struct { // Inc increments the api stats counter. func (stats *HTTPAPIStats) Inc(api string) { - stats.Lock() - defer stats.Unlock() if stats == nil { return } + stats.Lock() + defer stats.Unlock() if stats.apiStats == nil { stats.apiStats = make(map[string]int) } - if _, ok := stats.apiStats[api]; ok { - stats.apiStats[api]++ - return - } - stats.apiStats[api] = 1 + stats.apiStats[api]++ } // Dec increments the api stats counter. func (stats *HTTPAPIStats) Dec(api string) { - stats.Lock() - defer stats.Unlock() if stats == nil { return } + stats.Lock() + defer stats.Unlock() if val, ok := stats.apiStats[api]; ok && val > 0 { stats.apiStats[api]-- } diff --git a/cmd/iam-object-store.go b/cmd/iam-object-store.go index 3411589c9..da9677e5b 100644 --- a/cmd/iam-object-store.go +++ b/cmd/iam-object-store.go @@ -664,14 +664,15 @@ func listIAMConfigItems(objectAPI ObjectLayer, pathPrefix string, dirs bool, } func (iamOS *IAMObjectStore) watch(sys *IAMSys) { + ctx := GlobalContext watchDisk := func() { for { select { - case <-GlobalServiceDoneCh: + case <-ctx.Done(): return case <-time.NewTimer(globalRefreshIAMInterval).C: err := iamOS.loadAll(sys, nil) - logger.LogIf(context.Background(), err) + logger.LogIf(ctx, err) } } } diff --git a/cmd/jwt/parser_test.go b/cmd/jwt/parser_test.go index 8b689635c..d12bdf40c 100644 --- a/cmd/jwt/parser_test.go +++ b/cmd/jwt/parser_test.go @@ -27,7 +27,6 @@ import ( "time" "github.com/dgrijalva/jwt-go" - jwtgo "github.com/dgrijalva/jwt-go" ) var ( @@ -49,7 +48,7 @@ var jwtTestData = []struct { "", defaultKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", }, }, @@ -61,7 +60,7 @@ var jwtTestData = []struct { "", // autogen defaultKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", "exp": float64(time.Now().Unix() - 100), }, @@ -74,7 +73,7 @@ var jwtTestData = []struct { "", // autogen defaultKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", "nbf": float64(time.Now().Unix() + 100), }, @@ -87,7 +86,7 @@ var jwtTestData = []struct { "", // autogen defaultKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", "nbf": float64(time.Now().Unix() + 100), "exp": float64(time.Now().Unix() - 100), @@ -101,7 +100,7 @@ var jwtTestData = []struct { "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJmb28iOiJiYXIifQ.EhkiHkoESI_cG3NPigFrxEk9Z60_oXrOT2vGm9Pn6RDgYNovYORQmmA0zs1AoAOf09ly2Nx2YAg6ABqAYga1AcMFkJljwxTT5fYphTuqpWdy4BELeSYJx5Ty2gmr8e7RonuUztrdD5WfPqLKMm1Ozp_T6zALpRmwTIW0QPnaBXaQD90FplAg46Iy1UlDKr-Eupy0i5SLch5Q-p2ZpaL_5fnTIUDlxC3pWhJTyx_71qDI-mAA_5lE_VdroOeflG56sSmDxopPEG3bFlSu1eowyBfxtu0_CuVd-M42RU75Zc4Gsj6uV77MBtbMrf4_7M_NUTSgoIF3fRqxrj0NzihIBg", defaultKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", }, }, @@ -113,7 +112,7 @@ var jwtTestData = []struct { "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJmb28iOiJiYXIifQ.FhkiHkoESI_cG3NPigFrxEk9Z60_oXrOT2vGm9Pn6RDgYNovYORQmmA0zs1AoAOf09ly2Nx2YAg6ABqAYga1AcMFkJljwxTT5fYphTuqpWdy4BELeSYJx5Ty2gmr8e7RonuUztrdD5WfPqLKMm1Ozp_T6zALpRmwTIW0QPnaBXaQD90FplAg46Iy1UlDKr-Eupy0i5SLch5Q-p2ZpaL_5fnTIUDlxC3pWhJTyx_71qDI-mAA_5lE_VdroOeflG56sSmDxopPEG3bFlSu1eowyBfxtu0_CuVd-M42RU75Zc4Gsj6uV77MBtbMrf4_7M_NUTSgoIF3fRqxrj0NzihIBg", nil, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", }, }, @@ -125,7 +124,7 @@ var jwtTestData = []struct { "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJmb28iOiJiYXIifQ.FhkiHkoESI_cG3NPigFrxEk9Z60_oXrOT2vGm9Pn6RDgYNovYORQmmA0zs1AoAOf09ly2Nx2YAg6ABqAYga1AcMFkJljwxTT5fYphTuqpWdy4BELeSYJx5Ty2gmr8e7RonuUztrdD5WfPqLKMm1Ozp_T6zALpRmwTIW0QPnaBXaQD90FplAg46Iy1UlDKr-Eupy0i5SLch5Q-p2ZpaL_5fnTIUDlxC3pWhJTyx_71qDI-mAA_5lE_VdroOeflG56sSmDxopPEG3bFlSu1eowyBfxtu0_CuVd-M42RU75Zc4Gsj6uV77MBtbMrf4_7M_NUTSgoIF3fRqxrj0NzihIBg", emptyKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", }, }, @@ -137,7 +136,7 @@ var jwtTestData = []struct { "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJmb28iOiJiYXIifQ.FhkiHkoESI_cG3NPigFrxEk9Z60_oXrOT2vGm9Pn6RDgYNovYORQmmA0zs1AoAOf09ly2Nx2YAg6ABqAYga1AcMFkJljwxTT5fYphTuqpWdy4BELeSYJx5Ty2gmr8e7RonuUztrdD5WfPqLKMm1Ozp_T6zALpRmwTIW0QPnaBXaQD90FplAg46Iy1UlDKr-Eupy0i5SLch5Q-p2ZpaL_5fnTIUDlxC3pWhJTyx_71qDI-mAA_5lE_VdroOeflG56sSmDxopPEG3bFlSu1eowyBfxtu0_CuVd-M42RU75Zc4Gsj6uV77MBtbMrf4_7M_NUTSgoIF3fRqxrj0NzihIBg", errorKeyFunc, &MapClaims{ - MapClaims: jwtgo.MapClaims{ + MapClaims: jwt.MapClaims{ "foo": "bar", }, }, @@ -149,7 +148,7 @@ var jwtTestData = []struct { "", defaultKeyFunc, &StandardClaims{ - StandardClaims: jwtgo.StandardClaims{ + StandardClaims: jwt.StandardClaims{ ExpiresAt: time.Now().Add(time.Second * 10).Unix(), }, }, @@ -160,7 +159,7 @@ var jwtTestData = []struct { func mapClaimsToken(claims *MapClaims) string { claims.SetAccessKey("test") - j := jwtgo.NewWithClaims(jwtgo.SigningMethodHS512, claims) + j := jwt.NewWithClaims(jwt.SigningMethodHS512, claims) tk, _ := j.SignedString([]byte("HelloSecret")) return tk } @@ -168,7 +167,7 @@ func mapClaimsToken(claims *MapClaims) string { func standardClaimsToken(claims *StandardClaims) string { claims.AccessKey = "test" claims.Subject = "test" - j := jwtgo.NewWithClaims(jwtgo.SigningMethodHS512, claims) + j := jwt.NewWithClaims(jwt.SigningMethodHS512, claims) tk, _ := j.SignedString([]byte("HelloSecret")) return tk } diff --git a/cmd/logger/target/http/http.go b/cmd/logger/target/http/http.go index 787461a42..7ba8b52eb 100644 --- a/cmd/logger/target/http/http.go +++ b/cmd/logger/target/http/http.go @@ -21,7 +21,6 @@ import ( "encoding/json" "errors" "net/http" - gohttp "net/http" "strings" xhttp "github.com/minio/minio/cmd/http" @@ -41,7 +40,7 @@ type Target struct { // User-Agent to be set on each log request sent to the `endpoint` userAgent string logKind string - client gohttp.Client + client http.Client } func (h *Target) startHTTPLogger() { @@ -54,7 +53,7 @@ func (h *Target) startHTTPLogger() { continue } - req, err := gohttp.NewRequest(http.MethodPost, h.endpoint, bytes.NewBuffer(logJSON)) + req, err := http.NewRequest(http.MethodPost, h.endpoint, bytes.NewBuffer(logJSON)) if err != nil { continue } @@ -78,12 +77,12 @@ func (h *Target) startHTTPLogger() { // New initializes a new logger target which // sends log over http to the specified endpoint -func New(endpoint, userAgent, logKind string, transport *gohttp.Transport) *Target { +func New(endpoint, userAgent, logKind string, transport *http.Transport) *Target { h := Target{ endpoint: endpoint, userAgent: userAgent, logKind: strings.ToUpper(logKind), - client: gohttp.Client{ + client: http.Client{ Transport: transport, }, logCh: make(chan interface{}, 10000), diff --git a/cmd/naughty-disk_test.go b/cmd/naughty-disk_test.go index 0c89b8d59..df2724602 100644 --- a/cmd/naughty-disk_test.go +++ b/cmd/naughty-disk_test.go @@ -17,6 +17,7 @@ package cmd import ( + "context" "io" "sync" ) @@ -80,8 +81,8 @@ func (d *naughtyDisk) calcError() (err error) { func (d *naughtyDisk) SetDiskID(id string) { } -func (d *naughtyDisk) CrawlAndGetDataUsage(endCh <-chan struct{}) (info DataUsageInfo, err error) { - return d.disk.CrawlAndGetDataUsage(endCh) +func (d *naughtyDisk) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (info dataUsageCache, err error) { + return d.disk.CrawlAndGetDataUsage(ctx, cache) } func (d *naughtyDisk) DiskInfo() (info DiskInfo, err error) { diff --git a/cmd/object-api-datatypes.go b/cmd/object-api-datatypes.go index a06f3e8ca..14f24ba6d 100644 --- a/cmd/object-api-datatypes.go +++ b/cmd/object-api-datatypes.go @@ -18,6 +18,7 @@ package cmd import ( "io" + "math" "time" "github.com/minio/minio/pkg/hash" @@ -77,6 +78,11 @@ type objectHistogramInterval struct { start, end int64 } +const ( + // dataUsageBucketLen must be length of ObjectsHistogramIntervals + dataUsageBucketLen = 7 +) + // ObjectsHistogramIntervals is the list of all intervals // of object sizes to be included in objects histogram. var ObjectsHistogramIntervals = []objectHistogramInterval{ @@ -86,20 +92,25 @@ var ObjectsHistogramIntervals = []objectHistogramInterval{ {"BETWEEN_10_MB_AND_64_MB", 1024 * 1024 * 10, 1024*1024*64 - 1}, {"BETWEEN_64_MB_AND_128_MB", 1024 * 1024 * 64, 1024*1024*128 - 1}, {"BETWEEN_128_MB_AND_512_MB", 1024 * 1024 * 128, 1024*1024*512 - 1}, - {"GREATER_THAN_512_MB", 1024 * 1024 * 512, -1}, + {"GREATER_THAN_512_MB", 1024 * 1024 * 512, math.MaxInt64}, } // DataUsageInfo represents data usage stats of the underlying Object API type DataUsageInfo struct { - // The timestamp of when the data usage info is generated + // LastUpdate is the timestamp of when the data usage info was last updated. + // This does not indicate a full scan. LastUpdate time.Time `json:"lastUpdate"` ObjectsCount uint64 `json:"objectsCount"` // Objects total size - ObjectsTotalSize uint64 `json:"objectsTotalSize"` + ObjectsTotalSize uint64 `json:"objectsTotalSize"` + + // ObjectsSizesHistogram contains information on objects across all buckets. + // See ObjectsHistogramIntervals. ObjectsSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"` - BucketsCount uint64 `json:"bucketsCount"` + BucketsCount uint64 `json:"bucketsCount"` + // BucketsSizes is "bucket name" -> size. BucketsSizes map[string]uint64 `json:"bucketsSizes"` } diff --git a/cmd/object-api-errors.go b/cmd/object-api-errors.go index 6c92c4190..915483a90 100644 --- a/cmd/object-api-errors.go +++ b/cmd/object-api-errors.go @@ -274,7 +274,7 @@ func (e BucketSSEConfigNotFound) Error() string { // BucketNameInvalid - bucketname provided is invalid. type BucketNameInvalid GenericError -// Return string an error formatted as the given text. +// Error returns string an error formatted as the given text. func (e BucketNameInvalid) Error() string { return "Bucket name invalid: " + e.Bucket } @@ -290,17 +290,17 @@ type ObjectNameTooLong GenericError // ObjectNamePrefixAsSlash - object name has a slash as prefix. type ObjectNamePrefixAsSlash GenericError -// Return string an error formatted as the given text. +// Error returns string an error formatted as the given text. func (e ObjectNameInvalid) Error() string { return "Object name invalid: " + e.Bucket + "#" + e.Object } -// Return string an error formatted as the given text. +// Error returns string an error formatted as the given text. func (e ObjectNameTooLong) Error() string { return "Object name too long: " + e.Bucket + "#" + e.Object } -// Return string an error formatted as the given text. +// Error returns string an error formatted as the given text. func (e ObjectNamePrefixAsSlash) Error() string { return "Object name contains forward slash as pefix: " + e.Bucket + "#" + e.Object } @@ -308,7 +308,7 @@ func (e ObjectNamePrefixAsSlash) Error() string { // AllAccessDisabled All access to this object has been disabled type AllAccessDisabled GenericError -// Return string an error formatted as the given text. +// Error returns string an error formatted as the given text. func (e AllAccessDisabled) Error() string { return "All access to this object has been disabled" } @@ -316,7 +316,7 @@ func (e AllAccessDisabled) Error() string { // IncompleteBody You did not provide the number of bytes specified by the Content-Length HTTP header. type IncompleteBody GenericError -// Return string an error formatted as the given text. +// Error returns string an error formatted as the given text. func (e IncompleteBody) Error() string { return e.Bucket + "#" + e.Object + "has incomplete body" } diff --git a/cmd/object-api-interface.go b/cmd/object-api-interface.go index 9c8501dcd..25312a3f4 100644 --- a/cmd/object-api-interface.go +++ b/cmd/object-api-interface.go @@ -59,7 +59,7 @@ type ObjectLayer interface { // Storage operations. Shutdown(context.Context) error - CrawlAndGetDataUsage(context.Context, <-chan struct{}) DataUsageInfo + CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error StorageInfo(ctx context.Context, local bool) StorageInfo // local queries only local disks // Bucket operations. diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index 95264a85c..3d360a702 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -51,10 +51,6 @@ import ( const ( // MinIO meta bucket. minioMetaBucket = ".minio.sys" - // Background ops meta prefix - backgroundOpsMetaPrefix = "background-ops" - // MinIO Stats meta prefix. - minioMetaBackgroundOpsBucket = minioMetaBucket + SlashSeparator + backgroundOpsMetaPrefix // Multipart meta prefix. mpartMetaPrefix = "multipart" // MinIO Multipart meta prefix. @@ -77,7 +73,7 @@ func isMinioMetaBucketName(bucket string) bool { return bucket == minioMetaBucket || bucket == minioMetaMultipartBucket || bucket == minioMetaTmpBucket || - bucket == minioMetaBackgroundOpsBucket + bucket == dataUsageBucket } // IsValidBucketName verifies that a bucket name is in accordance with diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go index 8219def7e..12fe4d2d7 100644 --- a/cmd/object-handlers.go +++ b/cmd/object-handlers.go @@ -1214,7 +1214,7 @@ func (api objectAPIHandlers) PutObjectHandler(w http.ResponseWriter, r *http.Req return } - if tags := r.Header.Get(http.CanonicalHeaderKey(xhttp.AmzObjectTagging)); tags != "" { + if tags := r.Header.Get(xhttp.AmzObjectTagging); tags != "" { metadata[xhttp.AmzObjectTagging], err = extractTags(ctx, tags) if err != nil { writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL, guessIsBrowserReq(r)) diff --git a/cmd/posix-diskid-check.go b/cmd/posix-diskid-check.go index 2b265f220..fb99c1b87 100644 --- a/cmd/posix-diskid-check.go +++ b/cmd/posix-diskid-check.go @@ -17,6 +17,7 @@ package cmd import ( + "context" "io" ) @@ -38,8 +39,8 @@ func (p *posixDiskIDCheck) IsOnline() bool { return storedDiskID == p.diskID } -func (p *posixDiskIDCheck) CrawlAndGetDataUsage(endCh <-chan struct{}) (DataUsageInfo, error) { - return p.storage.CrawlAndGetDataUsage(endCh) +func (p *posixDiskIDCheck) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) { + return p.storage.CrawlAndGetDataUsage(ctx, cache) } func (p *posixDiskIDCheck) Hostname() string { diff --git a/cmd/posix.go b/cmd/posix.go index 91cafe102..457d0ac80 100644 --- a/cmd/posix.go +++ b/cmd/posix.go @@ -338,8 +338,8 @@ func (s *posix) waitForLowActiveIO() { } } -func (s *posix) CrawlAndGetDataUsage(endCh <-chan struct{}) (DataUsageInfo, error) { - dataUsageInfo := updateUsage(s.diskPath, endCh, s.waitForLowActiveIO, func(item Item) (int64, error) { +func (s *posix) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) { + dataUsageInfo, err := updateUsage(ctx, s.diskPath, cache, s.waitForLowActiveIO, func(item Item) (int64, error) { // Look for `xl.json' at the leaf. if !strings.HasSuffix(item.Path, SlashSeparator+xlMetaJSONFile) { // if no xl.json found, skip the file. @@ -353,14 +353,20 @@ func (s *posix) CrawlAndGetDataUsage(endCh <-chan struct{}) (DataUsageInfo, erro meta, err := xlMetaV1UnmarshalJSON(context.Background(), xlMetaBuf) if err != nil { - return 0, errSkipFile + return 0, nil } return meta.Stat.Size, nil }) - - dataUsageInfo.LastUpdate = UTCNow() - atomic.StoreUint64(&s.totalUsed, dataUsageInfo.ObjectsTotalSize) + if err != nil { + return dataUsageInfo, err + } + dataUsageInfo.Info.LastUpdate = time.Now() + total := dataUsageInfo.sizeRecursive(dataUsageInfo.Info.Name) + if total == nil { + total = &dataUsageEntry{} + } + atomic.StoreUint64(&s.totalUsed, uint64(total.Size)) return dataUsageInfo, nil } diff --git a/cmd/server-main.go b/cmd/server-main.go index e3deeb9d9..a5b908103 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "net" "net/http" "os" "os/signal" @@ -361,6 +362,9 @@ func serverMain(ctx *cli.Context) { } httpServer := xhttp.NewServer([]string{globalMinioAddr}, criticalErrorHandler{handler}, getCert) + httpServer.BaseContext = func(listener net.Listener) context.Context { + return GlobalContext + } go func() { globalHTTPServerErrorCh <- httpServer.Start() }() diff --git a/cmd/server-startup-msg.go b/cmd/server-startup-msg.go index 3c9dd138b..5ca738cf4 100644 --- a/cmd/server-startup-msg.go +++ b/cmd/server-startup-msg.go @@ -92,13 +92,13 @@ func printStartupSafeModeMessage(apiEndpoints []string, err error) { mcMessage := fmt.Sprintf("> mc.exe config host add %s %s %s %s --api s3v4", alias, endPoint, cred.AccessKey, cred.SecretKey) logStartupMessage(fmt.Sprintf(getFormatStr(len(mcMessage), 3), mcMessage)) - mcMessage = fmt.Sprintf("> mc.exe admin config --help") + mcMessage = "> mc.exe admin config --help" logStartupMessage(fmt.Sprintf(getFormatStr(len(mcMessage), 3), mcMessage)) } else { mcMessage := fmt.Sprintf("$ mc config host add %s %s %s %s --api s3v4", alias, endPoint, cred.AccessKey, cred.SecretKey) logStartupMessage(fmt.Sprintf(getFormatStr(len(mcMessage), 3), mcMessage)) - mcMessage = fmt.Sprintf("$ mc admin config --help") + mcMessage = "$ mc admin config --help" logStartupMessage(fmt.Sprintf(getFormatStr(len(mcMessage), 3), mcMessage)) } } diff --git a/cmd/service.go b/cmd/service.go index 85cd716e7..8f10eae16 100644 --- a/cmd/service.go +++ b/cmd/service.go @@ -17,6 +17,7 @@ package cmd import ( + "context" "os" "os/exec" "syscall" @@ -35,11 +36,22 @@ const ( var globalServiceSignalCh chan serviceSignal // GlobalServiceDoneCh - Global service done channel. -var GlobalServiceDoneCh chan struct{} +var GlobalServiceDoneCh <-chan struct{} + +// GlobalContext context that is canceled when server is requested to shut down. +var GlobalContext context.Context + +// cancelGlobalContext can be used to indicate server shutdown. +var cancelGlobalContext context.CancelFunc // Initialize service mutex once. func init() { - GlobalServiceDoneCh = make(chan struct{}) + initGlobalContext() +} + +func initGlobalContext() { + GlobalContext, cancelGlobalContext = context.WithCancel(context.Background()) + GlobalServiceDoneCh = GlobalContext.Done() globalServiceSignalCh = make(chan serviceSignal) } diff --git a/cmd/signals.go b/cmd/signals.go index 8a679094e..965a85cc6 100644 --- a/cmd/signals.go +++ b/cmd/signals.go @@ -57,7 +57,7 @@ func handleSignals() { } // send signal to various go-routines that they need to quit. - close(GlobalServiceDoneCh) + cancelGlobalContext() if objAPI := newObjectLayerWithoutSafeModeFn(); objAPI != nil { oerr = objAPI.Shutdown(context.Background()) diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index bcd04e539..824c6d1f6 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -17,6 +17,7 @@ package cmd import ( + "context" "io" ) @@ -32,7 +33,7 @@ type StorageAPI interface { SetDiskID(id string) DiskInfo() (info DiskInfo, err error) - CrawlAndGetDataUsage(endCh <-chan struct{}) (DataUsageInfo, error) + CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) // Volume operations. MakeVol(volume string) (err error) diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 9f5fd138b..492c3d317 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -17,7 +17,6 @@ package cmd import ( - "bufio" "bytes" "context" "crypto/tls" @@ -152,26 +151,25 @@ func (client *storageRESTClient) Hostname() string { return client.endpoint.Host } -func (client *storageRESTClient) CrawlAndGetDataUsage(endCh <-chan struct{}) (DataUsageInfo, error) { - respBody, err := client.call(storageRESTMethodCrawlAndGetDataUsage, nil, nil, -1) +func (client *storageRESTClient) CrawlAndGetDataUsage(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) { + b := cache.serialize() + respBody, err := client.call(storageRESTMethodCrawlAndGetDataUsage, + url.Values{}, + bytes.NewBuffer(b), int64(len(b))) defer http.DrainBody(respBody) if err != nil { - return DataUsageInfo{}, err + return cache, err } - reader := bufio.NewReader(respBody) - for { - b, err := reader.ReadByte() - if err != nil { - return DataUsageInfo{}, err - } - if b != ' ' { - reader.UnreadByte() - break - } + reader, err := waitForHTTPResponse(respBody) + if err != nil { + return cache, err } - var usageInfo DataUsageInfo - err = gob.NewDecoder(reader).Decode(&usageInfo) - return usageInfo, err + b, err = ioutil.ReadAll(reader) + if err != nil { + return cache, err + } + var newCache dataUsageCache + return newCache, newCache.deserialize(b) } func (client *storageRESTClient) SetDiskID(id string) { @@ -418,7 +416,7 @@ func (client *storageRESTClient) DeleteFileBulk(volume string, paths []string) ( return nil, err } - reader, err := clearLeadingSpaces(respBody) + reader, err := waitForHTTPResponse(respBody) if err != nil { return nil, err } @@ -455,7 +453,7 @@ func (client *storageRESTClient) DeletePrefixes(volume string, paths []string) ( return nil, err } - reader, err := clearLeadingSpaces(respBody) + reader, err := waitForHTTPResponse(respBody) if err != nil { return nil, err } @@ -484,22 +482,6 @@ func (client *storageRESTClient) RenameFile(srcVolume, srcPath, dstVolume, dstPa return err } -// clearLeadingSpaces removes all the first spaces returned from a reader. -func clearLeadingSpaces(r io.Reader) (io.Reader, error) { - reader := bufio.NewReader(r) - for { - b, err := reader.ReadByte() - if err != nil { - return nil, err - } - if b != ' ' { - reader.UnreadByte() - break - } - } - return reader, nil -} - func (client *storageRESTClient) VerifyFile(volume, path string, size int64, algo BitrotAlgorithm, sum []byte, shardSize int64) error { values := make(url.Values) values.Set(storageRESTVolume, volume) @@ -514,7 +496,7 @@ func (client *storageRESTClient) VerifyFile(volume, path string, size int64, alg if err != nil { return err } - reader, err := clearLeadingSpaces(respBody) + reader, err := waitForHTTPResponse(respBody) if err != nil { return err } diff --git a/cmd/storage-rest-common.go b/cmd/storage-rest-common.go index ace9dc456..f8da63c88 100644 --- a/cmd/storage-rest-common.go +++ b/cmd/storage-rest-common.go @@ -17,7 +17,7 @@ package cmd const ( - storageRESTVersion = "v15" // Adding DeletePrefixes API + storageRESTVersion = "v16" // CrawlAndGetDataUsageHandler API change storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTPrefix = minioReservedBucketPath + "/storage" ) diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index 790acf1cd..c0370e32f 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -24,6 +24,7 @@ import ( "errors" "fmt" "io" + "io/ioutil" "net/http" "os/user" "path" @@ -138,16 +139,28 @@ func (s *storageRESTServer) CrawlAndGetDataUsageHandler(w http.ResponseWriter, r } w.Header().Set(xhttp.ContentType, "text/event-stream") - doneCh := sendWhiteSpaceToHTTPResponse(w) - usageInfo, err := s.storage.CrawlAndGetDataUsage(GlobalServiceDoneCh) - <-doneCh + b, err := ioutil.ReadAll(r.Body) + if err != nil { + s.writeErrorResponse(w, err) + return + } + var cache dataUsageCache + err = cache.deserialize(b) + if err != nil { + logger.LogIf(r.Context(), err) + s.writeErrorResponse(w, err) + return + } + + done := keepHTTPResponseAlive(w) + usageInfo, err := s.storage.CrawlAndGetDataUsage(r.Context(), cache) + done() if err != nil { s.writeErrorResponse(w, err) return } - - gob.NewEncoder(w).Encode(usageInfo) + w.Write(usageInfo.serialize()) w.(http.Flusher).Flush() } @@ -510,9 +523,9 @@ func (s *storageRESTServer) DeleteFileBulkHandler(w http.ResponseWriter, r *http w.Header().Set(xhttp.ContentType, "text/event-stream") encoder := gob.NewEncoder(w) - doneCh := sendWhiteSpaceToHTTPResponse(w) + done := keepHTTPResponseAlive(w) errs, err := s.storage.DeleteFileBulk(volume, filePaths) - <-doneCh + done() if err != nil { s.writeErrorResponse(w, err) return @@ -556,9 +569,9 @@ func (s *storageRESTServer) DeletePrefixesHandler(w http.ResponseWriter, r *http w.Header().Set(xhttp.ContentType, "text/event-stream") encoder := gob.NewEncoder(w) - doneCh := sendWhiteSpaceToHTTPResponse(w) + done := keepHTTPResponseAlive(w) errs, err := s.storage.DeletePrefixes(volume, prefixes) - <-doneCh + done() if err != nil { s.writeErrorResponse(w, err) return @@ -590,11 +603,15 @@ func (s *storageRESTServer) RenameFileHandler(w http.ResponseWriter, r *http.Req } } -// Send whitespace to the client to avoid timeouts with long storage +// keepHTTPResponseAlive can be used to avoid timeouts with long storage // operations, such as bitrot verification or data usage crawling. -func sendWhiteSpaceToHTTPResponse(w http.ResponseWriter) <-chan struct{} { +// Every 10 seconds a space character is sent. +// The returned function should always be called to release resources. +// waitForHTTPResponse should be used to the receiving side. +func keepHTTPResponseAlive(w http.ResponseWriter) func() { doneCh := make(chan struct{}) go func() { + defer close(doneCh) ticker := time.NewTicker(time.Second * 10) for { select { @@ -602,13 +619,38 @@ func sendWhiteSpaceToHTTPResponse(w http.ResponseWriter) <-chan struct{} { w.Write([]byte(" ")) w.(http.Flusher).Flush() case doneCh <- struct{}{}: + w.Write([]byte{0}) ticker.Stop() return } } - }() - return doneCh + return func() { + // Indicate we are ready to write. + <-doneCh + // Wait for channel to be closed so we don't race on writes. + <-doneCh + } +} + +// waitForHTTPResponse will wait for responses where keepHTTPResponseAlive +// has been used. +// The returned reader contains the payload. +func waitForHTTPResponse(respBody io.Reader) (io.Reader, error) { + reader := bufio.NewReader(respBody) + for { + b, err := reader.ReadByte() + if err != nil { + return nil, err + } + if b != ' ' { + if b != 0 { + reader.UnreadByte() + } + break + } + } + return reader, nil } // VerifyFileResp - VerifyFile()'s response. @@ -650,9 +692,9 @@ func (s *storageRESTServer) VerifyFile(w http.ResponseWriter, r *http.Request) { } w.Header().Set(xhttp.ContentType, "text/event-stream") encoder := gob.NewEncoder(w) - doneCh := sendWhiteSpaceToHTTPResponse(w) + done := keepHTTPResponseAlive(w) err = s.storage.VerifyFile(volume, filePath, size, BitrotAlgorithmFromString(algoStr), hash, int64(shardSize)) - <-doneCh + done() vresp := &VerifyFileResp{} if err != nil { vresp.Err = StorageErr(err.Error()) diff --git a/cmd/streaming-signature-v4.go b/cmd/streaming-signature-v4.go index 65af7f795..0de211593 100644 --- a/cmd/streaming-signature-v4.go +++ b/cmd/streaming-signature-v4.go @@ -102,7 +102,7 @@ func calculateSeedSignature(r *http.Request) (cred auth.Credentials, signature s // Extract date, if not present throw error. var dateStr string - if dateStr = req.Header.Get(http.CanonicalHeaderKey("x-amz-date")); dateStr == "" { + if dateStr = req.Header.Get("x-amz-date"); dateStr == "" { if dateStr = r.Header.Get("Date"); dateStr == "" { return cred, "", "", time.Time{}, ErrMissingDateHeader } diff --git a/cmd/test-utils_test.go b/cmd/test-utils_test.go index c8461e07a..8ddea40f3 100644 --- a/cmd/test-utils_test.go +++ b/cmd/test-utils_test.go @@ -419,22 +419,6 @@ func resetGlobalConfigPath() { globalConfigDir = &ConfigDir{path: ""} } -func resetGlobalServiceDoneCh() { - // Repeatedly send on the service done channel, so that - // listening go-routines will quit. This works better than - // closing the channel - closing introduces a new race, as the - // current thread writes to the variable, and other threads - // listening on it, read from it. -loop: - for { - select { - case GlobalServiceDoneCh <- struct{}{}: - default: - break loop - } - } -} - // sets globalObjectAPI to `nil`. func resetGlobalObjectAPI() { globalObjLayerMutex.Lock() @@ -499,7 +483,8 @@ func resetGlobalIAMSys() { func resetTestGlobals() { // close any indefinitely running go-routines from previous // tests. - resetGlobalServiceDoneCh() + cancelGlobalContext() + initGlobalContext() // set globalObjectAPI to `nil`. resetGlobalObjectAPI() // Reset config path set. diff --git a/cmd/web-handlers_test.go b/cmd/web-handlers_test.go index 00cbdc2ae..813ee2ce8 100644 --- a/cmd/web-handlers_test.go +++ b/cmd/web-handlers_test.go @@ -449,7 +449,7 @@ func testListBucketsWebHandler(obj ObjectLayer, instanceType string, t TestErrHa t.Fatalf("Cannot find the bucket already created by MakeBucket") } if listBucketsReply.Buckets[0].Name != bucketName { - t.Fatalf("Found another bucket other than already created by MakeBucket") + t.Fatalf("Found another bucket %q other than already created by MakeBucket", listBucketsReply.Buckets[0].Name) } } diff --git a/cmd/xl-sets.go b/cmd/xl-sets.go index 3f7ef52d1..d1f123f70 100644 --- a/cmd/xl-sets.go +++ b/cmd/xl-sets.go @@ -235,7 +235,7 @@ func (s *xlSets) connectDisks() { // monitorAndConnectEndpoints this is a monitoring loop to keep track of disconnected // endpoints by reconnecting them and making sure to place them into right position in // the set topology, this monitoring happens at a given monitoring interval. -func (s *xlSets) monitorAndConnectEndpoints(monitorInterval time.Duration) { +func (s *xlSets) monitorAndConnectEndpoints(ctx context.Context, monitorInterval time.Duration) { ticker := time.NewTicker(monitorInterval) // Stop the timer. @@ -243,7 +243,7 @@ func (s *xlSets) monitorAndConnectEndpoints(monitorInterval time.Duration) { for { select { - case <-GlobalServiceDoneCh: + case <-ctx.Done(): return case <-s.disksConnectDoneCh: return @@ -332,7 +332,7 @@ func newXLSets(endpoints Endpoints, format *formatXLV3, setCount int, drivesPerS s.connectDisksWithQuorum() // Start the disk monitoring and connect routine. - go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval) + go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval) go s.maintainMRFList() go s.healMRFRoutine() @@ -445,8 +445,8 @@ func (s *xlSets) StorageInfo(ctx context.Context, local bool) StorageInfo { return storageInfo } -func (s *xlSets) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { - return DataUsageInfo{} +func (s *xlSets) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error { + return NotImplemented{} } // Shutdown shutsdown all erasure coded sets in parallel @@ -1327,7 +1327,7 @@ func (s *xlSets) ReloadFormat(ctx context.Context, dryRun bool) (err error) { s.connectDisks() // Restart monitoring loop to monitor reformatted disks again. - go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval) + go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval) return nil } @@ -1516,7 +1516,7 @@ func (s *xlSets) HealFormat(ctx context.Context, dryRun bool) (res madmin.HealRe s.connectDisks() // Restart our monitoring loop to start monitoring newly formatted disks. - go s.monitorAndConnectEndpoints(defaultMonitorConnectEndpointInterval) + go s.monitorAndConnectEndpoints(GlobalContext, defaultMonitorConnectEndpointInterval) } return res, nil @@ -1764,6 +1764,10 @@ func (s *xlSets) healMRFRoutine() { // Wait until background heal state is initialized var bgSeq *healSequence for { + if globalBackgroundHealState == nil { + time.Sleep(time.Second) + continue + } var ok bool bgSeq, ok = globalBackgroundHealState.getHealSequenceByToken(bgHealingUUID) if ok { diff --git a/cmd/xl-v1-multipart.go b/cmd/xl-v1-multipart.go index 0d4dcd0ce..c4d661c1d 100644 --- a/cmd/xl-v1-multipart.go +++ b/cmd/xl-v1-multipart.go @@ -765,7 +765,7 @@ func (xl xlObjects) AbortMultipartUpload(ctx context.Context, bucket, object, up } // Clean-up the old multipart uploads. Should be run in a Go routine. -func (xl xlObjects) cleanupStaleMultipartUploads(ctx context.Context, cleanupInterval, expiry time.Duration, doneCh chan struct{}) { +func (xl xlObjects) cleanupStaleMultipartUploads(ctx context.Context, cleanupInterval, expiry time.Duration, doneCh <-chan struct{}) { ticker := time.NewTicker(cleanupInterval) defer ticker.Stop() diff --git a/cmd/xl-v1-multipart_test.go b/cmd/xl-v1-multipart_test.go index cef56b1e1..402d626a3 100644 --- a/cmd/xl-v1-multipart_test.go +++ b/cmd/xl-v1-multipart_test.go @@ -18,6 +18,7 @@ package cmd import ( "context" + "sync" "testing" "time" ) @@ -35,10 +36,6 @@ func TestXLCleanupStaleMultipartUploads(t *testing.T) { z := obj.(*xlZones) xl := z.zones[0].sets[0] - // Close the go-routine, we are going to - // manually start it and test in this test case. - GlobalServiceDoneCh <- struct{}{} - bucketName := "bucket" objectName := "object" var opts ObjectOptions @@ -49,19 +46,30 @@ func TestXLCleanupStaleMultipartUploads(t *testing.T) { t.Fatal("Unexpected err: ", err) } - go xl.cleanupStaleMultipartUploads(context.Background(), 20*time.Millisecond, 0, GlobalServiceDoneCh) + // Create a context we can cancel. + ctx, cancel := context.WithCancel(context.Background()) - // Wait for 40ms such that - we have given enough time for - // cleanup routine to kick in. - time.Sleep(40 * time.Millisecond) + var cleanupWg sync.WaitGroup + cleanupWg.Add(1) + go func() { + defer cleanupWg.Done() + xl.cleanupStaleMultipartUploads(context.Background(), time.Millisecond, 0, ctx.Done()) + }() - // Close the routine we do not need it anymore. - GlobalServiceDoneCh <- struct{}{} + // Wait for 100ms such that - we have given enough time for cleanup routine to kick in. + // Flaky on slow systems :/ + time.Sleep(100 * time.Millisecond) + + // Exit cleanup.. + cancel() + cleanupWg.Wait() // Check if upload id was already purged. if err = obj.AbortMultipartUpload(context.Background(), bucketName, objectName, uploadID); err != nil { if _, ok := err.(InvalidUploadID); !ok { t.Fatal("Unexpected err: ", err) } + } else { + t.Error("Item was not cleaned up.") } } diff --git a/cmd/xl-v1.go b/cmd/xl-v1.go index cb924d06e..d23c04e2b 100644 --- a/cmd/xl-v1.go +++ b/cmd/xl-v1.go @@ -18,11 +18,15 @@ package cmd import ( "context" + "fmt" + "path" "sort" "sync" + "time" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/bpool" + "github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/dsync" "github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/sync/errgroup" @@ -195,45 +199,177 @@ func (xl xlObjects) GetMetrics(ctx context.Context) (*Metrics, error) { return &Metrics{}, NotImplemented{} } -// CrawlAndGetDataUsage picks three random disks to crawl and get data usage -func (xl xlObjects) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { - var randomDisks []StorageAPI +// CrawlAndGetDataUsage will start crawling buckets and send updated totals as they are traversed. +// Updates are sent on a regular basis and the caller *must* consume them. +func (xl xlObjects) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error { + cache := make(chan dataUsageCache, 1) + defer close(cache) + buckets, err := xl.ListBuckets(ctx) + if err != nil { + return err + } + go func() { + for update := range cache { + updates <- update.dui(update.Info.Name, buckets) + } + }() + return xl.crawlAndGetDataUsage(ctx, buckets, cache) +} + +// CrawlAndGetDataUsage will start crawling buckets and send updated totals as they are traversed. +// Updates are sent on a regular basis and the caller *must* consume them. +func (xl xlObjects) crawlAndGetDataUsage(ctx context.Context, buckets []BucketInfo, updates chan<- dataUsageCache) error { + var disks []StorageAPI + for _, d := range xl.getLoadBalancedDisks() { if d == nil || !d.IsOnline() { continue } - randomDisks = append(randomDisks, d) - if len(randomDisks) >= 3 { - break + disks = append(disks, d) + } + if len(disks) == 0 || len(buckets) == 0 { + return nil + } + + // Load bucket totals + oldCache := dataUsageCache{} + err := oldCache.load(ctx, xl, dataUsageCacheName) + if err != nil { + return err + } + + // New cache.. + cache := dataUsageCache{ + Info: dataUsageCacheInfo{ + Name: dataUsageRoot, + NextCycle: oldCache.Info.NextCycle, + }, + Cache: make(map[dataUsageHash]dataUsageEntry, len(oldCache.Cache)), + } + + // Put all buckets into channel. + bucketCh := make(chan BucketInfo, len(buckets)) + // Add new buckets first + for _, b := range buckets { + if oldCache.find(b.Name) == nil { + bucketCh <- b + } + } + // Add existing buckets. + for _, b := range buckets { + e := oldCache.find(b.Name) + if e != nil { + bucketCh <- b + cache.replace(b.Name, dataUsageRoot, *e) } } - var dataUsageResults = make([]DataUsageInfo, len(randomDisks)) + close(bucketCh) + buckets = nil + bucketResults := make(chan dataUsageEntryInfo, len(disks)) - var wg sync.WaitGroup - for i := 0; i < len(randomDisks); i++ { - wg.Add(1) - go func(index int, disk StorageAPI) { - defer wg.Done() - var err error - dataUsageResults[index], err = disk.CrawlAndGetDataUsage(endCh) - if err != nil { - logger.LogIf(ctx, err) + // Start async collector/saver. + // This goroutine owns the cache. + var saverWg sync.WaitGroup + saverWg.Add(1) + go func() { + const updateTime = 30 * time.Second + t := time.NewTicker(updateTime) + defer t.Stop() + defer saverWg.Done() + var lastSave time.Time + + saveLoop: + for { + select { + case <-ctx.Done(): + // Return without saving. + return + case <-t.C: + if cache.Info.LastUpdate.Equal(lastSave) { + continue + } + logger.LogIf(ctx, cache.save(ctx, xl, dataUsageCacheName)) + updates <- cache.clone() + lastSave = cache.Info.LastUpdate + case v, ok := <-bucketResults: + if !ok { + break saveLoop + } + cache.replace(v.Name, v.Parent, v.Entry) + cache.Info.LastUpdate = time.Now() } - }(i, randomDisks[i]) + } + // Save final state... + cache.Info.NextCycle++ + cache.Info.LastUpdate = time.Now() + logger.LogIf(ctx, cache.save(ctx, xl, dataUsageCacheName)) + updates <- cache + }() + + // Start one crawler per disk + var wg sync.WaitGroup + wg.Add(len(disks)) + for i := range disks { + go func(i int) { + defer wg.Done() + disk := disks[i] + + for bucket := range bucketCh { + select { + case <-ctx.Done(): + return + default: + } + if dataUsageDebug { + logger.Info(color.Green("crawlAndGetDataUsage:")+" Scanning bucket %v.", bucket.Name) + } + // Load cache for bucket + cacheName := path.Join(dataUsageBucketCacheDir, bucket.Name+".bin") + cache := dataUsageCache{} + logger.LogIf(ctx, cache.load(ctx, xl, cacheName)) + if cache.Info.Name == "" { + cache.Info.Name = bucket.Name + } + if cache.Info.Name != bucket.Name { + logger.LogIf(ctx, fmt.Errorf("cache name mismatch: %s != %s", cache.Info.Name, bucket.Name)) + cache.Info = dataUsageCacheInfo{ + Name: bucket.Name, + LastUpdate: time.Time{}, + NextCycle: 0, + } + } + + // Calc usage + before := cache.Info.LastUpdate + cache, err = disk.CrawlAndGetDataUsage(ctx, cache) + if err != nil { + logger.LogIf(ctx, err) + if cache.Info.LastUpdate.After(before) { + logger.LogIf(ctx, cache.save(ctx, xl, cacheName)) + } + continue + } + + var root dataUsageEntry + if r := cache.root(); r != nil { + root = cache.flatten(*r) + } + bucketResults <- dataUsageEntryInfo{ + Name: cache.Info.Name, + Parent: dataUsageRoot, + Entry: root, + } + // Save cache + logger.LogIf(ctx, cache.save(ctx, xl, cacheName)) + } + }(i) } wg.Wait() + close(bucketResults) + saverWg.Wait() - var dataUsageInfo = dataUsageResults[0] - // Pick the crawling result of the disk which has the most - // number of objects in it. - for i := 1; i < len(dataUsageResults); i++ { - if dataUsageResults[i].ObjectsCount > dataUsageInfo.ObjectsCount { - dataUsageInfo = dataUsageResults[i] - } - } - - return dataUsageInfo + return nil } // IsReady - No Op. diff --git a/cmd/xl-zones.go b/cmd/xl-zones.go index 6ef02688a..7c16c067c 100644 --- a/cmd/xl-zones.go +++ b/cmd/xl-zones.go @@ -24,6 +24,7 @@ import ( "net/http" "strings" "sync" + "time" xhttp "github.com/minio/minio/cmd/http" "github.com/minio/minio/cmd/logger" @@ -31,6 +32,7 @@ import ( "github.com/minio/minio/pkg/bucket/lifecycle" "github.com/minio/minio/pkg/bucket/object/tagging" "github.com/minio/minio/pkg/bucket/policy" + "github.com/minio/minio/pkg/color" "github.com/minio/minio/pkg/madmin" "github.com/minio/minio/pkg/sync/errgroup" ) @@ -215,44 +217,111 @@ func (z *xlZones) StorageInfo(ctx context.Context, local bool) StorageInfo { return storageInfo } -func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, endCh <-chan struct{}) DataUsageInfo { - var aggDataUsageInfo = struct { - sync.Mutex - DataUsageInfo - }{} - - aggDataUsageInfo.ObjectsSizesHistogram = make(map[string]uint64) - aggDataUsageInfo.BucketsSizes = make(map[string]uint64) - +func (z *xlZones) CrawlAndGetDataUsage(ctx context.Context, updates chan<- DataUsageInfo) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() var wg sync.WaitGroup + var mu sync.Mutex + var results []dataUsageCache + var firstErr error + var knownBuckets = make(map[string]struct{}) // used to deduplicate buckets. + var allBuckets []BucketInfo + + t := time.Now() + if dataUsageDebug { + logger.Info(color.Green("xlZones.CrawlAndGetDataUsage:") + " Start crawl cycle") + } + // Collect for each set in zones. for _, z := range z.zones { for _, xlObj := range z.sets { + // Add new buckets. + buckets, err := xlObj.ListBuckets(ctx) + if err != nil { + return err + } + for _, b := range buckets { + if _, ok := knownBuckets[b.Name]; ok { + continue + } + allBuckets = append(allBuckets, b) + knownBuckets[b.Name] = struct{}{} + } wg.Add(1) - go func(xl *xlObjects) { - defer wg.Done() - info := xl.CrawlAndGetDataUsage(ctx, endCh) - - aggDataUsageInfo.Lock() - aggDataUsageInfo.ObjectsCount += info.ObjectsCount - aggDataUsageInfo.ObjectsTotalSize += info.ObjectsTotalSize - if aggDataUsageInfo.BucketsCount < info.BucketsCount { - aggDataUsageInfo.BucketsCount = info.BucketsCount + results = append(results, dataUsageCache{}) + go func(i int, xl *xlObjects) { + updates := make(chan dataUsageCache, 1) + defer close(updates) + // Start update collector. + go func() { + defer wg.Done() + for info := range updates { + mu.Lock() + results[i] = info + mu.Unlock() + } + }() + // Start crawler. Blocks until done. + err := xl.crawlAndGetDataUsage(ctx, buckets, updates) + if err != nil { + mu.Lock() + if firstErr == nil { + firstErr = err + } + // Cancel remaining... + cancel() + mu.Unlock() + return } - for k, v := range info.ObjectsSizesHistogram { - aggDataUsageInfo.ObjectsSizesHistogram[k] += v - } - for k, v := range info.BucketsSizes { - aggDataUsageInfo.BucketsSizes[k] += v - } - aggDataUsageInfo.Unlock() - - }(xlObj) + }(len(results)-1, xlObj) } } - wg.Wait() + updateCloser := make(chan chan struct{}) + go func() { + updateTicker := time.NewTicker(30 * time.Second) + defer updateTicker.Stop() + var lastUpdate time.Time + update := func() { + mu.Lock() + defer mu.Unlock() - aggDataUsageInfo.LastUpdate = UTCNow() - return aggDataUsageInfo.DataUsageInfo + // We need to merge since we will get the same buckets from each zone. + // Therefore to get the exact bucket sizes we must merge before we can convert. + allMerged := dataUsageCache{Info: dataUsageCacheInfo{Name: dataUsageRoot}} + for _, info := range results { + if info.Info.LastUpdate.IsZero() { + // Not filled yet. + return + } + allMerged.merge(info) + } + if allMerged.root() != nil && allMerged.Info.LastUpdate.After(lastUpdate) { + updates <- allMerged.dui(allMerged.Info.Name, allBuckets) + lastUpdate = allMerged.Info.LastUpdate + } + } + for { + select { + case <-ctx.Done(): + return + case v := <-updateCloser: + update() + close(v) + return + case <-updateTicker.C: + update() + } + } + }() + + wg.Wait() + if dataUsageDebug { + logger.Info(color.Green("xlZones.CrawlAndGetDataUsage:")+" Cycle scan time: %v", time.Since(t)) + } + ch := make(chan struct{}) + updateCloser <- ch + <-ch + + return firstErr } // This function is used to undo a successful MakeBucket operation. diff --git a/go.mod b/go.mod index e330d0816..b92b6bc50 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/baiyubin/aliyun-sts-go-sdk v0.0.0-20180326062324-cfa1a18b161f // indirect github.com/bcicen/jstream v0.0.0-20190220045926-16c1f8af81c2 github.com/beevik/ntp v0.2.0 + github.com/cespare/xxhash/v2 v2.1.1 github.com/cheggaaa/pb v1.0.28 github.com/coredns/coredns v1.4.0 github.com/coreos/bbolt v1.3.3 // indirect @@ -86,6 +87,7 @@ require ( github.com/nats-io/stan.go v0.4.5 github.com/ncw/directio v1.0.5 github.com/nsqio/go-nsq v1.0.7 + github.com/philhofer/fwd v1.0.0 // indirect github.com/pkg/errors v0.8.1 github.com/prometheus/client_golang v0.9.3-0.20190127221311-3c4408c8b829 github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90 // indirect @@ -100,6 +102,7 @@ require ( github.com/smartystreets/assertions v0.0.0-20190401211740-f487f9de1cd3 // indirect github.com/soheilhy/cmux v0.1.4 // indirect github.com/streadway/amqp v0.0.0-20190404075320-75d898a42a94 + github.com/tinylib/msgp v1.1.1 github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 // indirect github.com/ugorji/go v1.1.5-pre // indirect github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a diff --git a/go.sum b/go.sum index 5c53f301b..1b6407e8f 100644 --- a/go.sum +++ b/go.sum @@ -45,6 +45,9 @@ github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kB github.com/boltdb/bolt v1.3.1/go.mod h1:clJnj/oiGkjum5o1McbSZDSLxVThjynRyGBgiAx27Ps= github.com/census-instrumentation/opencensus-proto v0.2.0 h1:LzQXZOgg4CQfE6bFvXGM30YZL1WW/M337pXml+GrcZ4= github.com/census-instrumentation/opencensus-proto v0.2.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= +github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= +github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= +github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cheggaaa/pb v1.0.28 h1:kWGpdAcSp3MxMU9CCHOwz/8V0kCHN4+9yQm2MzWuI98= github.com/cheggaaa/pb v1.0.28/go.mod h1:pQciLPpbU0oxA0h+VJYYLxO+XeDQb5pZijXscXHm81s= github.com/circonus-labs/circonus-gometrics v2.3.1+incompatible/go.mod h1:nmEj6Dob7S7YxXgwXpfOuvO54S+tGdZdw9fuRZt25Ag= @@ -341,6 +344,8 @@ github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQ github.com/pascaldekloe/goe v0.1.0 h1:cBOtyMzM9HTpWjXfbbunk26uA6nG3a8n06Wieeh0MwY= github.com/pascaldekloe/goe v0.1.0/go.mod h1:lzWF7FIEvWOWxwDKqyGYQf6ZUaNfKdP144TG7ZOy1lc= github.com/pborman/getopt v0.0.0-20180729010549-6fdd0a2c7117/go.mod h1:85jBQOZwpVEaDAr341tbn15RS4fCAsIst0qp7i8ex1o= +github.com/philhofer/fwd v1.0.0 h1:UbZqGr5Y38ApvM/V/jEljVxwocdweyH+vmYvRPBnbqQ= +github.com/philhofer/fwd v1.0.0/go.mod h1:gk3iGcWd9+svBvR0sR+KPcfE+RNWozjowpeBVG3ZVNU= github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.2.6+incompatible h1:6aCX4/YZ9v8q69hTyiR7dNLnTA3fgtKHVVW5BCd5Znw= @@ -413,6 +418,8 @@ github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= github.com/tidwall/sjson v1.0.4 h1:UcdIRXff12Lpnu3OLtZvnc03g4vH2suXDXhBwBqmzYg= github.com/tidwall/sjson v1.0.4/go.mod h1:bURseu1nuBkFpIES5cz6zBtjmYeOQmEESshn7VpF15Y= +github.com/tinylib/msgp v1.1.1 h1:TnCZ3FIuKeaIy+F45+Cnp+caqdXGy4z74HvwXN+570Y= +github.com/tinylib/msgp v1.1.1/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5 h1:LnC5Kc/wtumK+WB441p7ynQJzVuNRJiqddSIE3IlSEQ= github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= diff --git a/mint/run/core/aws-sdk-go/quick-tests.go b/mint/run/core/aws-sdk-go/quick-tests.go index 8c4499ff9..7657b048a 100644 --- a/mint/run/core/aws-sdk-go/quick-tests.go +++ b/mint/run/core/aws-sdk-go/quick-tests.go @@ -234,6 +234,11 @@ func testPresignedPutInvalidHash(s3Client *s3.S3) { } rreq, err := http.NewRequest("PUT", url, bytes.NewReader([]byte(""))) + if err != nil { + failureLog(function, args, startTime, "", "AWS SDK Go presigned PUT request failed", err).Fatal() + return + } + rreq.Header.Add("X-Amz-Content-Sha256", "invalid-sha256") rreq.Header.Add("Content-Type", "application/octet-stream") @@ -390,6 +395,10 @@ func testSelectObject(s3Client *s3.S3) { Key: aws.String(object1), } _, err = s3Client.PutObject(putInput1) + if err != nil { + failureLog(function, args, startTime, "", fmt.Sprintf("AWS SDK Go Select object failed %v", err), err).Fatal() + return + } defer cleanup(s3Client, bucket, object1, function, args, startTime, true) @@ -470,6 +479,10 @@ func testSelectObject(s3Client *s3.S3) { Key: aws.String(object2), } _, err = s3Client.PutObject(putInput2) + if err != nil { + failureLog(function, args, startTime, "", fmt.Sprintf("AWS SDK Go Select object upload failed: %v", err), err).Fatal() + return + } defer cleanup(s3Client, bucket, object2, function, args, startTime, false) @@ -696,7 +709,7 @@ func testObjectTaggingErrors(s3Client *s3.S3) { _, err = s3Client.PutObjectTagging(input) if err == nil { - failureLog(function, args, startTime, "", fmt.Sprintf("AWS SDK Go PUT expected to fail but succeeded"), err).Fatal() + failureLog(function, args, startTime, "", "AWS SDK Go PUT expected to fail but succeeded", err).Fatal() return } @@ -727,7 +740,7 @@ func testObjectTaggingErrors(s3Client *s3.S3) { _, err = s3Client.PutObjectTagging(input) if err == nil { - failureLog(function, args, startTime, "", fmt.Sprintf("AWS SDK Go PUT expected to fail but succeeded"), err).Fatal() + failureLog(function, args, startTime, "", "AWS SDK Go PUT expected to fail but succeeded", err).Fatal() return } @@ -758,7 +771,7 @@ func testObjectTaggingErrors(s3Client *s3.S3) { _, err = s3Client.PutObjectTagging(input) if err == nil { - failureLog(function, args, startTime, "", fmt.Sprintf("AWS SDK Go PUT expected to fail but succeeded"), err).Fatal() + failureLog(function, args, startTime, "", "AWS SDK Go PUT expected to fail but succeeded", err).Fatal() return } @@ -789,7 +802,7 @@ func testObjectTaggingErrors(s3Client *s3.S3) { _, err = s3Client.PutObjectTagging(input) if err == nil { - failureLog(function, args, startTime, "", fmt.Sprintf("AWS SDK Go PUT expected to fail but succeeded"), err).Fatal() + failureLog(function, args, startTime, "", "AWS SDK Go PUT expected to fail but succeeded", err).Fatal() return } diff --git a/pkg/event/errors.go b/pkg/event/errors.go index b9024d081..d06acd48c 100644 --- a/pkg/event/errors.go +++ b/pkg/event/errors.go @@ -64,14 +64,14 @@ func (err ErrInvalidFilterName) Error() string { type ErrFilterNamePrefix struct{} func (err ErrFilterNamePrefix) Error() string { - return fmt.Sprintf("more than one prefix in filter rule") + return "more than one prefix in filter rule" } // ErrFilterNameSuffix - more than one suffix usage error. type ErrFilterNameSuffix struct{} func (err ErrFilterNameSuffix) Error() string { - return fmt.Sprintf("more than one suffix in filter rule") + return "more than one suffix in filter rule" } // ErrInvalidFilterValue - invalid filter value error. diff --git a/pkg/event/target/amqp.go b/pkg/event/target/amqp.go index a75dac2b6..ab0259a95 100644 --- a/pkg/event/target/amqp.go +++ b/pkg/event/target/amqp.go @@ -49,6 +49,8 @@ type AMQPArgs struct { QueueLimit uint64 `json:"queueLimit"` } +//lint:file-ignore ST1003 We cannot change these exported names. + // AMQP input constants. const ( AmqpQueueDir = "queue_dir" diff --git a/pkg/handlers/forwarder.go b/pkg/handlers/forwarder.go index 9c0eb2ad5..5c25f4652 100644 --- a/pkg/handlers/forwarder.go +++ b/pkg/handlers/forwarder.go @@ -91,7 +91,8 @@ func (f *Forwarder) getURLFromRequest(req *http.Request) *url.URL { func copyURL(i *url.URL) *url.URL { out := *i if i.User != nil { - out.User = &(*i.User) + u := *i.User + out.User = &u } return &out } diff --git a/pkg/ioutil/ioutil.go b/pkg/ioutil/ioutil.go index 9aa37cb26..ac77e489d 100644 --- a/pkg/ioutil/ioutil.go +++ b/pkg/ioutil/ioutil.go @@ -78,7 +78,7 @@ type LimitWriter struct { wLimit int64 } -// Implements the io.Writer interface limiting upto +// Write implements the io.Writer interface limiting upto // configured length, also skips the first N bytes. func (w *LimitWriter) Write(p []byte) (n int, err error) { n = len(p) diff --git a/pkg/lock/lock_windows.go b/pkg/lock/lock_windows.go index 89d79e727..bd7a6c3c0 100644 --- a/pkg/lock/lock_windows.go +++ b/pkg/lock/lock_windows.go @@ -80,6 +80,7 @@ func TryLockedOpenFile(path string, flag int, perm os.FileMode) (*LockedFile, er switch flag { case syscall.O_RDONLY: // https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-lockfileex + //lint:ignore SA4016 Reasons lockType = lockFileFailImmediately | 0 // Set this to enable shared lock and fail immediately. } return lockedOpenFile(path, flag, perm, lockType) diff --git a/pkg/madmin/info-commands.go b/pkg/madmin/info-commands.go index 1019d0dd7..4c85a4e85 100644 --- a/pkg/madmin/info-commands.go +++ b/pkg/madmin/info-commands.go @@ -21,6 +21,7 @@ import ( "encoding/json" "errors" "io/ioutil" + "math" "net/http" "net/url" "strconv" @@ -155,17 +156,24 @@ var ObjectsHistogramIntervals = []objectHistogramInterval{ {"BETWEEN_10_MB_AND_64_MB", 1024 * 1024 * 10, 1024*1024*64 - 1}, {"BETWEEN_64_MB_AND_128_MB", 1024 * 1024 * 64, 1024*1024*128 - 1}, {"BETWEEN_128_MB_AND_512_MB", 1024 * 1024 * 128, 1024*1024*512 - 1}, - {"GREATER_THAN_512_MB", 1024 * 1024 * 512, -1}, + {"GREATER_THAN_512_MB", 1024 * 1024 * 512, math.MaxInt64}, } // DataUsageInfo represents data usage of an Object API type DataUsageInfo struct { - LastUpdate time.Time `json:"lastUpdate"` - ObjectsCount uint64 `json:"objectsCount"` - ObjectsTotalSize uint64 `json:"objectsTotalSize"` + // LastUpdate is the timestamp of when the data usage info was last updated. + // This does not indicate a full scan. + LastUpdate time.Time `json:"lastUpdate"` + ObjectsCount uint64 `json:"objectsCount"` + ObjectsTotalSize uint64 `json:"objectsTotalSize"` + + // ObjectsSizesHistogram contains information on objects across all buckets. + // See ObjectsHistogramIntervals. ObjectsSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"` - BucketsCount uint64 `json:"bucketsCount"` + BucketsCount uint64 `json:"bucketsCount"` + + // BucketsSizes is "bucket name" -> size. BucketsSizes map[string]uint64 `json:"bucketsSizes"` } diff --git a/pkg/s3select/sql/utils.go b/pkg/s3select/sql/utils.go index 7d7564dbe..64ab96aa9 100644 --- a/pkg/s3select/sql/utils.go +++ b/pkg/s3select/sql/utils.go @@ -50,7 +50,7 @@ func (e *JSONPathElement) String() string { return "" } -// Removes double quotes in quoted identifiers +// String removes double quotes in quoted identifiers func (i *Identifier) String() string { if i.Unquoted != nil { return *i.Unquoted