/* * MinIO Cloud Storage, (C) 2020 MinIO, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package cmd import ( "bytes" "context" "encoding/binary" "errors" "fmt" "io" "path" "path/filepath" "strings" "time" "github.com/cespare/xxhash/v2" "github.com/minio/minio/cmd/logger" "github.com/minio/minio/pkg/hash" "github.com/tinylib/msgp/msgp" ) const dataUsageHashLen = 8 //go:generate msgp -file $GOFILE -unexported // dataUsageHash is the hash type used. type dataUsageHash uint64 // sizeHistogram is a size histogram. type sizeHistogram [dataUsageBucketLen]uint64 //msgp:tuple dataUsageEntry type dataUsageEntry struct { // These fields do no include any children. Size int64 Objects uint64 ObjSizes sizeHistogram Children dataUsageHashMap } //msgp:ignore dataUsageEntryInfo type dataUsageEntryInfo struct { Name string Parent string Entry dataUsageEntry } type dataUsageCacheInfo struct { // Name of the bucket. Also root element. Name string LastUpdate time.Time NextCycle uint32 BloomFilter []byte `msg:"BloomFilter,omitempty"` } // merge other data usage entry into this, excluding children. func (e *dataUsageEntry) merge(other dataUsageEntry) { e.Objects += other.Objects e.Size += other.Size for i, v := range other.ObjSizes[:] { e.ObjSizes[i] += v } } // mod returns true if the hash mod cycles == cycle. func (h dataUsageHash) mod(cycle uint32, cycles uint32) bool { return uint32(h)%cycles == cycle%cycles } // addChildString will add a child based on its name. // If it already exists it will not be added again. func (e *dataUsageEntry) addChildString(name string) { e.addChild(hashPath(name)) } // addChild will add a child based on its hash. // If it already exists it will not be added again. func (e *dataUsageEntry) addChild(hash dataUsageHash) { if _, ok := e.Children[hash]; ok { return } if e.Children == nil { e.Children = make(dataUsageHashMap, 1) } e.Children[hash] = struct{}{} } // find a path in the cache. // Returns nil if not found. func (d *dataUsageCache) find(path string) *dataUsageEntry { due, ok := d.Cache[hashPath(path)] if !ok { return nil } return &due } // dui converts the flattened version of the path to DataUsageInfo. // As a side effect d will be flattened, use a clone if this is not ok. func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo { e := d.find(path) if e == nil { return DataUsageInfo{LastUpdate: UTCNow()} } flat := d.flatten(*e) return DataUsageInfo{ LastUpdate: d.Info.LastUpdate, ObjectsCount: flat.Objects, ObjectsTotalSize: uint64(flat.Size), ObjectsSizesHistogram: flat.ObjSizes.asMap(), BucketsCount: uint64(len(e.Children)), BucketsSizes: d.pathSizes(buckets), } } // replace will add or replace an entry in the cache. // If a parent is specified it will be added to that if not already there. // If the parent does not exist, it will be added. func (d *dataUsageCache) replace(path, parent string, e dataUsageEntry) { hash := hashPath(path) if d.Cache == nil { d.Cache = make(map[dataUsageHash]dataUsageEntry, 100) } d.Cache[hash] = e if parent != "" { phash := hashPath(parent) p := d.Cache[phash] p.addChild(hash) d.Cache[phash] = p } } // replaceHashed add or replaces an entry to the cache based on its hash. // If a parent is specified it will be added to that if not already there. // If the parent does not exist, it will be added. func (d *dataUsageCache) replaceHashed(hash dataUsageHash, parent *dataUsageHash, e dataUsageEntry) { if d.Cache == nil { d.Cache = make(map[dataUsageHash]dataUsageEntry, 100) } d.Cache[hash] = e if parent != nil { p := d.Cache[*parent] p.addChild(hash) d.Cache[*parent] = p } } // copyWithChildren will copy entry with hash from src if it exists along with any children. // If a parent is specified it will be added to that if not already there. // If the parent does not exist, it will be added. func (d *dataUsageCache) copyWithChildren(src *dataUsageCache, hash dataUsageHash, parent *dataUsageHash) { if d.Cache == nil { d.Cache = make(map[dataUsageHash]dataUsageEntry, 100) } e, ok := src.Cache[hash] if !ok { return } d.Cache[hash] = e for ch := range e.Children { if ch == hash { logger.LogIf(GlobalContext, errors.New("dataUsageCache.copyWithChildren: Circular reference")) return } d.copyWithChildren(src, ch, &hash) } if parent != nil { p := d.Cache[*parent] p.addChild(hash) d.Cache[*parent] = p } } // StringAll returns a detailed string representation of all entries in the cache. func (d *dataUsageCache) StringAll() string { s := fmt.Sprintf("info:%+v\n", d.Info) for k, v := range d.Cache { s += fmt.Sprintf("\t%v: %+v\n", k, v) } return strings.TrimSpace(s) } // insert the hash into dst. // dst must be at least dataUsageHashLen bytes long. func (h dataUsageHash) bytes(dst []byte) { binary.LittleEndian.PutUint64(dst, uint64(h)) } // String returns a human readable representation of the string. func (h dataUsageHash) String() string { return fmt.Sprintf("%x", uint64(h)) } // flatten all children of the root into the root element and return it. func (d *dataUsageCache) flatten(root dataUsageEntry) dataUsageEntry { for id := range root.Children { e := d.Cache[id] if len(e.Children) > 0 { e = d.flatten(e) } root.merge(e) } root.Children = nil return root } // add a size to the histogram. func (h *sizeHistogram) add(size int64) { // Fetch the histogram interval corresponding // to the passed object size. for i, interval := range ObjectsHistogramIntervals { if size >= interval.start && size <= interval.end { h[i]++ break } } } // asMap returns the map as a map[string]uint64. func (h *sizeHistogram) asMap() map[string]uint64 { res := make(map[string]uint64, 7) for i, count := range h { res[ObjectsHistogramIntervals[i].name] = count } return res } // pathSizes returns the path sizes as a map. func (d *dataUsageCache) pathSizes(buckets []BucketInfo) map[string]uint64 { var dst = make(map[string]uint64, len(buckets)) for _, bucket := range buckets { e := d.find(bucket.Name) if e == nil { continue } flat := d.flatten(*e) dst[bucket.Name] = uint64(flat.Size) } return dst } // sizeRecursive returns the path as a flattened entry. func (d *dataUsageCache) sizeRecursive(path string) *dataUsageEntry { root := d.find(path) if root == nil || len(root.Children) == 0 { return root } flat := d.flatten(*root) return &flat } // dataUsageCache contains a cache of data usage entries. //msgp:ignore dataUsageCache type dataUsageCache struct { Info dataUsageCacheInfo Cache map[dataUsageHash]dataUsageEntry } // root returns the root of the cache. func (d *dataUsageCache) root() *dataUsageEntry { return d.find(d.Info.Name) } // rootHash returns the root of the cache. func (d *dataUsageCache) rootHash() dataUsageHash { return hashPath(d.Info.Name) } // clone returns a copy of the cache with no references to the existing. func (d *dataUsageCache) clone() dataUsageCache { clone := dataUsageCache{ Info: d.Info, Cache: make(map[dataUsageHash]dataUsageEntry, len(d.Cache)), } for k, v := range d.Cache { clone.Cache[k] = v } return clone } // merge root of other into d. // children of root will be flattened before being merged. // Last update time will be set to the last updated. func (d *dataUsageCache) merge(other dataUsageCache) { existingRoot := d.root() otherRoot := other.root() if existingRoot == nil && otherRoot == nil { return } if otherRoot == nil { return } if existingRoot == nil { *d = other.clone() return } if other.Info.LastUpdate.After(d.Info.LastUpdate) { d.Info.LastUpdate = other.Info.LastUpdate } existingRoot.merge(*otherRoot) eHash := d.rootHash() for key := range otherRoot.Children { entry := other.Cache[key] flat := other.flatten(entry) existing := d.Cache[key] // If not found, merging simply adds. existing.merge(flat) d.replaceHashed(key, &eHash, existing) } } // load the cache content with name from minioMetaBackgroundOpsBucket. // Only backend errors are returned as errors. // If the object is not found or unable to deserialize d is cleared and nil error is returned. func (d *dataUsageCache) load(ctx context.Context, store ObjectLayer, name string) error { var buf bytes.Buffer err := store.GetObject(ctx, dataUsageBucket, name, 0, -1, &buf, "", ObjectOptions{}) if err != nil { if !isErrObjectNotFound(err) && !isErrBucketNotFound(err) { return toObjectErr(err, dataUsageBucket, name) } *d = dataUsageCache{} return nil } err = d.deserialize(buf.Bytes()) if err != nil { *d = dataUsageCache{} logger.LogIf(ctx, err) } return nil } // save the content of the cache to minioMetaBackgroundOpsBucket with the provided name. func (d *dataUsageCache) save(ctx context.Context, store ObjectLayer, name string) error { b := d.serialize() size := int64(len(b)) r, err := hash.NewReader(bytes.NewReader(b), size, "", "", size, false) if err != nil { return err } _, err = store.PutObject(ctx, dataUsageBucket, name, NewPutObjReader(r, nil, nil), ObjectOptions{}) return err } // dataUsageCacheVer indicates the cache version. // Bumping the cache version will drop data from previous versions // and write new data with the new version. const dataUsageCacheVer = 1 // serialize the contents of the cache. func (d *dataUsageCache) serialize() []byte { // Alloc pessimistically // dataUsageCacheVer due := dataUsageEntry{} msgLen := 1 msgLen += d.Info.Msgsize() // len(d.Cache) msgLen += binary.MaxVarintLen64 // Hashes (one for key, assume 1 child/node) msgLen += len(d.Cache) * dataUsageHashLen * 2 msgLen += len(d.Cache) * due.Msgsize() // Create destination buffer... dst := make([]byte, 0, msgLen) var n int tmp := make([]byte, 1024) // byte: version. dst = append(dst, dataUsageCacheVer) // Info... dst, err := d.Info.MarshalMsg(dst) if err != nil { panic(err) } n = binary.PutUvarint(tmp, uint64(len(d.Cache))) dst = append(dst, tmp[:n]...) for k, v := range d.Cache { // Put key binary.LittleEndian.PutUint64(tmp[:dataUsageHashLen], uint64(k)) dst = append(dst, tmp[:8]...) tmp, err = v.MarshalMsg(tmp[:0]) if err != nil { panic(err) } // key, value pairs. dst = append(dst, tmp...) } return dst } // deserialize the supplied byte slice into the cache. func (d *dataUsageCache) deserialize(b []byte) error { if len(b) < 1 { return io.ErrUnexpectedEOF } switch b[0] { case 1: default: return fmt.Errorf("dataUsageCache: unknown version: %d", int(b[0])) } b = b[1:] // Info... b, err := d.Info.UnmarshalMsg(b) if err != nil { return err } cacheLen, n := binary.Uvarint(b) if n <= 0 { return fmt.Errorf("dataUsageCache: reading cachelen, n <= 0 ") } b = b[n:] d.Cache = make(map[dataUsageHash]dataUsageEntry, cacheLen) for i := 0; i < int(cacheLen); i++ { if len(b) <= dataUsageHashLen { return io.ErrUnexpectedEOF } k := binary.LittleEndian.Uint64(b[:dataUsageHashLen]) b = b[dataUsageHashLen:] var v dataUsageEntry b, err = v.UnmarshalMsg(b) if err != nil { return err } d.Cache[dataUsageHash(k)] = v } return nil } // Trim this from start+end of hashes. var hashPathCutSet = dataUsageRoot func init() { if dataUsageRoot != string(filepath.Separator) { hashPathCutSet = dataUsageRoot + string(filepath.Separator) } } // hashPath calculates a hash of the provided string. func hashPath(data string) dataUsageHash { if data != dataUsageRoot { data = strings.Trim(data, hashPathCutSet) } data = path.Clean(data) return dataUsageHash(xxhash.Sum64String(data)) } //msgp:ignore dataUsageEntryInfo type dataUsageHashMap map[dataUsageHash]struct{} // MarshalMsg implements msgp.Marshaler func (d dataUsageHashMap) MarshalMsg(b []byte) (o []byte, err error) { o = msgp.Require(b, d.Msgsize()) // Write bin header manually const mbin32 uint8 = 0xc6 sz := uint32(len(d)) * dataUsageHashLen o = append(o, mbin32, byte(sz>>24), byte(sz>>16), byte(sz>>8), byte(sz)) var tmp [dataUsageHashLen]byte for k := range d { binary.LittleEndian.PutUint64(tmp[:], uint64(k)) o = append(o, tmp[:]...) } return } // Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message func (d dataUsageHashMap) Msgsize() (s int) { s = 5 + len(d)*dataUsageHashLen return } // UnmarshalMsg implements msgp.Unmarshaler func (d *dataUsageHashMap) UnmarshalMsg(bts []byte) (o []byte, err error) { var hashes []byte hashes, bts, err = msgp.ReadBytesZC(bts) if err != nil { err = msgp.WrapError(err, "dataUsageHashMap") return } var dst = make(dataUsageHashMap, len(hashes)/dataUsageHashLen) for len(hashes) >= dataUsageHashLen { dst[dataUsageHash(binary.LittleEndian.Uint64(hashes[:dataUsageHashLen]))] = struct{}{} hashes = hashes[dataUsageHashLen:] } *d = dst o = bts return } func (d *dataUsageHashMap) DecodeMsg(dc *msgp.Reader) (err error) { var zb0001 uint32 zb0001, err = dc.ReadBytesHeader() if err != nil { err = msgp.WrapError(err) return } var dst = make(dataUsageHashMap, zb0001) var tmp [8]byte for i := uint32(0); i < zb0001; i++ { _, err = io.ReadFull(dc, tmp[:]) if err != nil { err = msgp.WrapError(err, "dataUsageHashMap") return } dst[dataUsageHash(binary.LittleEndian.Uint64(tmp[:]))] = struct{}{} } return nil } func (d dataUsageHashMap) EncodeMsg(en *msgp.Writer) (err error) { err = en.WriteBytesHeader(uint32(len(d)) * dataUsageHashLen) if err != nil { err = msgp.WrapError(err) return } var tmp [dataUsageHashLen]byte for k := range d { binary.LittleEndian.PutUint64(tmp[:], uint64(k)) _, err = en.Write(tmp[:]) if err != nil { err = msgp.WrapError(err) return } } return nil }