2020-03-18 19:19:29 -04:00
|
|
|
/*
|
|
|
|
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
|
|
|
"context"
|
2020-04-27 13:06:21 -04:00
|
|
|
"errors"
|
2020-03-18 19:19:29 -04:00
|
|
|
"fmt"
|
|
|
|
"io"
|
|
|
|
"path"
|
|
|
|
"path/filepath"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/cespare/xxhash/v2"
|
2020-06-12 13:28:21 -04:00
|
|
|
"github.com/klauspost/compress/zstd"
|
2020-03-18 19:19:29 -04:00
|
|
|
"github.com/minio/minio/cmd/logger"
|
2020-06-12 13:28:21 -04:00
|
|
|
"github.com/minio/minio/pkg/bucket/lifecycle"
|
2020-03-18 19:19:29 -04:00
|
|
|
"github.com/minio/minio/pkg/hash"
|
|
|
|
"github.com/tinylib/msgp/msgp"
|
|
|
|
)
|
|
|
|
|
|
|
|
//go:generate msgp -file $GOFILE -unexported
|
|
|
|
|
|
|
|
// dataUsageHash is the hash type used.
|
2020-06-12 13:28:21 -04:00
|
|
|
type dataUsageHash string
|
2020-03-18 19:19:29 -04:00
|
|
|
|
|
|
|
// sizeHistogram is a size histogram.
|
|
|
|
type sizeHistogram [dataUsageBucketLen]uint64
|
|
|
|
|
|
|
|
//msgp:tuple dataUsageEntry
|
|
|
|
type dataUsageEntry struct {
|
|
|
|
// These fields do no include any children.
|
2020-12-07 16:47:48 -05:00
|
|
|
Size int64
|
|
|
|
ReplicatedSize uint64
|
|
|
|
ReplicationPendingSize uint64
|
|
|
|
ReplicationFailedSize uint64
|
|
|
|
ReplicaSize uint64
|
|
|
|
Objects uint64
|
|
|
|
ObjSizes sizeHistogram
|
|
|
|
Children dataUsageHashMap
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// dataUsageCache contains a cache of data usage entries.
|
|
|
|
type dataUsageCache struct {
|
|
|
|
Info dataUsageCacheInfo
|
2020-12-01 15:07:39 -05:00
|
|
|
Disks []string
|
2020-06-12 13:28:21 -04:00
|
|
|
Cache map[string]dataUsageEntry
|
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
//msgp:ignore dataUsageEntryInfo
|
|
|
|
type dataUsageEntryInfo struct {
|
|
|
|
Name string
|
|
|
|
Parent string
|
|
|
|
Entry dataUsageEntry
|
|
|
|
}
|
|
|
|
|
|
|
|
type dataUsageCacheInfo struct {
|
|
|
|
// Name of the bucket. Also root element.
|
2020-04-27 13:06:21 -04:00
|
|
|
Name string
|
|
|
|
LastUpdate time.Time
|
|
|
|
NextCycle uint32
|
2020-06-12 13:28:21 -04:00
|
|
|
BloomFilter []byte `msg:"BloomFilter,omitempty"`
|
|
|
|
lifeCycle *lifecycle.Lifecycle `msg:"-"`
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
2020-12-07 16:47:48 -05:00
|
|
|
func (e *dataUsageEntry) addSizes(summary sizeSummary) {
|
|
|
|
e.Size += summary.totalSize
|
|
|
|
e.ReplicatedSize += uint64(summary.replicatedSize)
|
|
|
|
e.ReplicationFailedSize += uint64(summary.failedSize)
|
|
|
|
e.ReplicationPendingSize += uint64(summary.pendingSize)
|
|
|
|
e.ReplicaSize += uint64(summary.replicaSize)
|
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
// merge other data usage entry into this, excluding children.
|
|
|
|
func (e *dataUsageEntry) merge(other dataUsageEntry) {
|
|
|
|
e.Objects += other.Objects
|
|
|
|
e.Size += other.Size
|
2020-12-07 16:47:48 -05:00
|
|
|
e.ReplicationPendingSize += other.ReplicationPendingSize
|
|
|
|
e.ReplicationFailedSize += other.ReplicationFailedSize
|
|
|
|
e.ReplicatedSize += other.ReplicatedSize
|
|
|
|
e.ReplicaSize += other.ReplicaSize
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
for i, v := range other.ObjSizes[:] {
|
|
|
|
e.ObjSizes[i] += v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// mod returns true if the hash mod cycles == cycle.
|
2020-08-24 16:47:01 -04:00
|
|
|
// If cycles is 0 false is always returned.
|
|
|
|
// If cycles is 1 true is always returned (as expected).
|
2020-04-27 13:06:21 -04:00
|
|
|
func (h dataUsageHash) mod(cycle uint32, cycles uint32) bool {
|
2020-08-24 16:47:01 -04:00
|
|
|
if cycles <= 1 {
|
|
|
|
return cycles == 1
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
return uint32(xxhash.Sum64String(string(h)))%cycles == cycle%cycles
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// addChildString will add a child based on its name.
|
|
|
|
// If it already exists it will not be added again.
|
|
|
|
func (e *dataUsageEntry) addChildString(name string) {
|
|
|
|
e.addChild(hashPath(name))
|
|
|
|
}
|
|
|
|
|
|
|
|
// addChild will add a child based on its hash.
|
|
|
|
// If it already exists it will not be added again.
|
|
|
|
func (e *dataUsageEntry) addChild(hash dataUsageHash) {
|
2020-06-12 13:28:21 -04:00
|
|
|
if _, ok := e.Children[hash.Key()]; ok {
|
2020-03-18 19:19:29 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
if e.Children == nil {
|
|
|
|
e.Children = make(dataUsageHashMap, 1)
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
e.Children[hash.Key()] = struct{}{}
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// find a path in the cache.
|
|
|
|
// Returns nil if not found.
|
|
|
|
func (d *dataUsageCache) find(path string) *dataUsageEntry {
|
2020-06-12 13:28:21 -04:00
|
|
|
due, ok := d.Cache[hashPath(path).Key()]
|
2020-03-18 19:19:29 -04:00
|
|
|
if !ok {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return &due
|
|
|
|
}
|
|
|
|
|
2020-08-24 16:47:01 -04:00
|
|
|
// findChildrenCopy returns a copy of the children of the supplied hash.
|
|
|
|
func (d *dataUsageCache) findChildrenCopy(h dataUsageHash) dataUsageHashMap {
|
|
|
|
ch := d.Cache[h.String()].Children
|
|
|
|
res := make(dataUsageHashMap, len(ch))
|
|
|
|
for k := range ch {
|
|
|
|
res[k] = struct{}{}
|
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// Returns nil if not found.
|
|
|
|
func (d *dataUsageCache) subCache(path string) dataUsageCache {
|
|
|
|
dst := dataUsageCache{Info: dataUsageCacheInfo{
|
|
|
|
Name: path,
|
|
|
|
LastUpdate: d.Info.LastUpdate,
|
|
|
|
BloomFilter: d.Info.BloomFilter,
|
|
|
|
}}
|
|
|
|
dst.copyWithChildren(d, dataUsageHash(hashPath(path).Key()), nil)
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *dataUsageCache) deleteRecursive(h dataUsageHash) {
|
|
|
|
if existing, ok := d.Cache[h.String()]; ok {
|
|
|
|
// Delete first if there should be a loop.
|
|
|
|
delete(d.Cache, h.Key())
|
|
|
|
for child := range existing.Children {
|
|
|
|
d.deleteRecursive(dataUsageHash(child))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// replaceRootChild will replace the child of root in d with the root of 'other'.
|
|
|
|
func (d *dataUsageCache) replaceRootChild(other dataUsageCache) {
|
|
|
|
otherRoot := other.root()
|
|
|
|
if otherRoot == nil {
|
|
|
|
logger.LogIf(GlobalContext, errors.New("replaceRootChild: Source has no root"))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
thisRoot := d.root()
|
|
|
|
if thisRoot == nil {
|
|
|
|
logger.LogIf(GlobalContext, errors.New("replaceRootChild: Root of current not found"))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
thisRootHash := d.rootHash()
|
|
|
|
otherRootHash := other.rootHash()
|
|
|
|
if thisRootHash == otherRootHash {
|
|
|
|
logger.LogIf(GlobalContext, errors.New("replaceRootChild: Root of child matches root of destination"))
|
|
|
|
return
|
|
|
|
}
|
|
|
|
d.deleteRecursive(other.rootHash())
|
|
|
|
d.copyWithChildren(&other, other.rootHash(), &thisRootHash)
|
|
|
|
}
|
|
|
|
|
|
|
|
// keepBuckets will keep only the buckets specified specified by delete all others.
|
|
|
|
func (d *dataUsageCache) keepBuckets(b []BucketInfo) {
|
|
|
|
lu := make(map[dataUsageHash]struct{})
|
|
|
|
for _, v := range b {
|
|
|
|
lu[hashPath(v.Name)] = struct{}{}
|
|
|
|
}
|
|
|
|
d.keepRootChildren(lu)
|
|
|
|
}
|
|
|
|
|
|
|
|
// keepRootChildren will keep the root children specified by delete all others.
|
|
|
|
func (d *dataUsageCache) keepRootChildren(list map[dataUsageHash]struct{}) {
|
|
|
|
if d.root() == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
rh := d.rootHash()
|
|
|
|
for k := range d.Cache {
|
|
|
|
h := dataUsageHash(k)
|
|
|
|
if h == rh {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if _, ok := list[h]; !ok {
|
|
|
|
delete(d.Cache, k)
|
|
|
|
d.deleteRecursive(h)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
// dui converts the flattened version of the path to DataUsageInfo.
|
2020-04-27 13:06:21 -04:00
|
|
|
// As a side effect d will be flattened, use a clone if this is not ok.
|
2020-03-18 19:19:29 -04:00
|
|
|
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
|
|
|
|
e := d.find(path)
|
|
|
|
if e == nil {
|
2020-05-27 09:45:43 -04:00
|
|
|
// No entry found, return empty.
|
|
|
|
return DataUsageInfo{}
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
flat := d.flatten(*e)
|
|
|
|
return DataUsageInfo{
|
2020-12-07 16:47:48 -05:00
|
|
|
LastUpdate: d.Info.LastUpdate,
|
|
|
|
ObjectsTotalCount: flat.Objects,
|
|
|
|
ObjectsTotalSize: uint64(flat.Size),
|
|
|
|
ReplicatedSize: flat.ReplicatedSize,
|
|
|
|
ReplicationFailedSize: flat.ReplicationFailedSize,
|
|
|
|
ReplicationPendingSize: flat.ReplicationPendingSize,
|
|
|
|
ReplicaSize: flat.ReplicaSize,
|
|
|
|
BucketsCount: uint64(len(e.Children)),
|
|
|
|
BucketsUsage: d.bucketsUsageInfo(buckets),
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// replace will add or replace an entry in the cache.
|
|
|
|
// If a parent is specified it will be added to that if not already there.
|
|
|
|
// If the parent does not exist, it will be added.
|
|
|
|
func (d *dataUsageCache) replace(path, parent string, e dataUsageEntry) {
|
|
|
|
hash := hashPath(path)
|
|
|
|
if d.Cache == nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache = make(map[string]dataUsageEntry, 100)
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache[hash.Key()] = e
|
2020-03-18 19:19:29 -04:00
|
|
|
if parent != "" {
|
|
|
|
phash := hashPath(parent)
|
2020-06-12 13:28:21 -04:00
|
|
|
p := d.Cache[phash.Key()]
|
2020-03-18 19:19:29 -04:00
|
|
|
p.addChild(hash)
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache[phash.Key()] = p
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// replaceHashed add or replaces an entry to the cache based on its hash.
|
|
|
|
// If a parent is specified it will be added to that if not already there.
|
|
|
|
// If the parent does not exist, it will be added.
|
|
|
|
func (d *dataUsageCache) replaceHashed(hash dataUsageHash, parent *dataUsageHash, e dataUsageEntry) {
|
|
|
|
if d.Cache == nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache = make(map[string]dataUsageEntry, 100)
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache[hash.Key()] = e
|
2020-03-18 19:19:29 -04:00
|
|
|
if parent != nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
p := d.Cache[parent.Key()]
|
2020-03-18 19:19:29 -04:00
|
|
|
p.addChild(hash)
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache[parent.Key()] = p
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-27 13:06:21 -04:00
|
|
|
// copyWithChildren will copy entry with hash from src if it exists along with any children.
|
|
|
|
// If a parent is specified it will be added to that if not already there.
|
|
|
|
// If the parent does not exist, it will be added.
|
|
|
|
func (d *dataUsageCache) copyWithChildren(src *dataUsageCache, hash dataUsageHash, parent *dataUsageHash) {
|
|
|
|
if d.Cache == nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache = make(map[string]dataUsageEntry, 100)
|
2020-04-27 13:06:21 -04:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
e, ok := src.Cache[hash.String()]
|
2020-04-27 13:06:21 -04:00
|
|
|
if !ok {
|
|
|
|
return
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache[hash.Key()] = e
|
2020-04-27 13:06:21 -04:00
|
|
|
for ch := range e.Children {
|
2020-06-12 13:28:21 -04:00
|
|
|
if ch == hash.Key() {
|
2020-04-27 13:06:21 -04:00
|
|
|
logger.LogIf(GlobalContext, errors.New("dataUsageCache.copyWithChildren: Circular reference"))
|
|
|
|
return
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
d.copyWithChildren(src, dataUsageHash(ch), &hash)
|
2020-04-27 13:06:21 -04:00
|
|
|
}
|
|
|
|
if parent != nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
p := d.Cache[parent.Key()]
|
2020-04-27 13:06:21 -04:00
|
|
|
p.addChild(hash)
|
2020-06-12 13:28:21 -04:00
|
|
|
d.Cache[parent.Key()] = p
|
2020-04-27 13:06:21 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
// StringAll returns a detailed string representation of all entries in the cache.
|
|
|
|
func (d *dataUsageCache) StringAll() string {
|
|
|
|
s := fmt.Sprintf("info:%+v\n", d.Info)
|
|
|
|
for k, v := range d.Cache {
|
|
|
|
s += fmt.Sprintf("\t%v: %+v\n", k, v)
|
|
|
|
}
|
|
|
|
return strings.TrimSpace(s)
|
|
|
|
}
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// String returns a human readable representation of the string.
|
|
|
|
func (h dataUsageHash) String() string {
|
|
|
|
return string(h)
|
2020-04-27 13:06:21 -04:00
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
// String returns a human readable representation of the string.
|
2020-06-12 13:28:21 -04:00
|
|
|
func (h dataUsageHash) Key() string {
|
|
|
|
return string(h)
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// flatten all children of the root into the root element and return it.
|
|
|
|
func (d *dataUsageCache) flatten(root dataUsageEntry) dataUsageEntry {
|
|
|
|
for id := range root.Children {
|
|
|
|
e := d.Cache[id]
|
|
|
|
if len(e.Children) > 0 {
|
|
|
|
e = d.flatten(e)
|
|
|
|
}
|
|
|
|
root.merge(e)
|
|
|
|
}
|
|
|
|
root.Children = nil
|
|
|
|
return root
|
|
|
|
}
|
|
|
|
|
|
|
|
// add a size to the histogram.
|
|
|
|
func (h *sizeHistogram) add(size int64) {
|
|
|
|
// Fetch the histogram interval corresponding
|
|
|
|
// to the passed object size.
|
|
|
|
for i, interval := range ObjectsHistogramIntervals {
|
|
|
|
if size >= interval.start && size <= interval.end {
|
|
|
|
h[i]++
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-05-27 09:45:43 -04:00
|
|
|
// toMap returns the map to a map[string]uint64.
|
|
|
|
func (h *sizeHistogram) toMap() map[string]uint64 {
|
|
|
|
res := make(map[string]uint64, dataUsageBucketLen)
|
2020-03-18 19:19:29 -04:00
|
|
|
for i, count := range h {
|
|
|
|
res[ObjectsHistogramIntervals[i].name] = count
|
|
|
|
}
|
|
|
|
return res
|
|
|
|
}
|
|
|
|
|
2020-05-27 09:45:43 -04:00
|
|
|
// bucketsUsageInfo returns the buckets usage info as a map, with
|
|
|
|
// key as bucket name
|
|
|
|
func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]BucketUsageInfo {
|
|
|
|
var dst = make(map[string]BucketUsageInfo, len(buckets))
|
2020-03-18 19:19:29 -04:00
|
|
|
for _, bucket := range buckets {
|
|
|
|
e := d.find(bucket.Name)
|
|
|
|
if e == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
flat := d.flatten(*e)
|
2020-05-27 09:45:43 -04:00
|
|
|
dst[bucket.Name] = BucketUsageInfo{
|
2020-12-07 16:47:48 -05:00
|
|
|
Size: uint64(flat.Size),
|
|
|
|
ObjectsCount: flat.Objects,
|
|
|
|
ReplicationPendingSize: flat.ReplicationPendingSize,
|
|
|
|
ReplicatedSize: flat.ReplicatedSize,
|
|
|
|
ReplicationFailedSize: flat.ReplicationFailedSize,
|
|
|
|
ReplicaSize: flat.ReplicaSize,
|
|
|
|
ObjectSizesHistogram: flat.ObjSizes.toMap(),
|
2020-05-27 09:45:43 -04:00
|
|
|
}
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
return dst
|
|
|
|
}
|
|
|
|
|
2020-07-14 21:59:05 -04:00
|
|
|
// bucketUsageInfo returns the buckets usage info.
|
|
|
|
// If not found all values returned are zero values.
|
|
|
|
func (d *dataUsageCache) bucketUsageInfo(bucket string) BucketUsageInfo {
|
|
|
|
e := d.find(bucket)
|
|
|
|
if e == nil {
|
|
|
|
return BucketUsageInfo{}
|
|
|
|
}
|
|
|
|
flat := d.flatten(*e)
|
|
|
|
return BucketUsageInfo{
|
2020-12-07 16:47:48 -05:00
|
|
|
Size: uint64(flat.Size),
|
|
|
|
ObjectsCount: flat.Objects,
|
|
|
|
ReplicationPendingSize: flat.ReplicationPendingSize,
|
|
|
|
ReplicatedSize: flat.ReplicatedSize,
|
|
|
|
ReplicationFailedSize: flat.ReplicationFailedSize,
|
|
|
|
ReplicaSize: flat.ReplicaSize,
|
|
|
|
ObjectSizesHistogram: flat.ObjSizes.toMap(),
|
2020-07-14 21:59:05 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
// sizeRecursive returns the path as a flattened entry.
|
|
|
|
func (d *dataUsageCache) sizeRecursive(path string) *dataUsageEntry {
|
|
|
|
root := d.find(path)
|
|
|
|
if root == nil || len(root.Children) == 0 {
|
|
|
|
return root
|
|
|
|
}
|
|
|
|
flat := d.flatten(*root)
|
|
|
|
return &flat
|
|
|
|
}
|
|
|
|
|
|
|
|
// root returns the root of the cache.
|
|
|
|
func (d *dataUsageCache) root() *dataUsageEntry {
|
|
|
|
return d.find(d.Info.Name)
|
|
|
|
}
|
|
|
|
|
|
|
|
// rootHash returns the root of the cache.
|
|
|
|
func (d *dataUsageCache) rootHash() dataUsageHash {
|
|
|
|
return hashPath(d.Info.Name)
|
|
|
|
}
|
|
|
|
|
|
|
|
// clone returns a copy of the cache with no references to the existing.
|
|
|
|
func (d *dataUsageCache) clone() dataUsageCache {
|
|
|
|
clone := dataUsageCache{
|
|
|
|
Info: d.Info,
|
2020-06-12 13:28:21 -04:00
|
|
|
Cache: make(map[string]dataUsageEntry, len(d.Cache)),
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
for k, v := range d.Cache {
|
|
|
|
clone.Cache[k] = v
|
|
|
|
}
|
|
|
|
return clone
|
|
|
|
}
|
|
|
|
|
|
|
|
// merge root of other into d.
|
|
|
|
// children of root will be flattened before being merged.
|
|
|
|
// Last update time will be set to the last updated.
|
|
|
|
func (d *dataUsageCache) merge(other dataUsageCache) {
|
|
|
|
existingRoot := d.root()
|
|
|
|
otherRoot := other.root()
|
|
|
|
if existingRoot == nil && otherRoot == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if otherRoot == nil {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if existingRoot == nil {
|
|
|
|
*d = other.clone()
|
|
|
|
return
|
|
|
|
}
|
|
|
|
if other.Info.LastUpdate.After(d.Info.LastUpdate) {
|
|
|
|
d.Info.LastUpdate = other.Info.LastUpdate
|
|
|
|
}
|
|
|
|
existingRoot.merge(*otherRoot)
|
|
|
|
eHash := d.rootHash()
|
|
|
|
for key := range otherRoot.Children {
|
|
|
|
entry := other.Cache[key]
|
|
|
|
flat := other.flatten(entry)
|
|
|
|
existing := d.Cache[key]
|
|
|
|
// If not found, merging simply adds.
|
|
|
|
existing.merge(flat)
|
2020-06-12 13:28:21 -04:00
|
|
|
d.replaceHashed(dataUsageHash(key), &eHash, existing)
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-10 12:18:19 -04:00
|
|
|
type objectIO interface {
|
|
|
|
GetObject(ctx context.Context, bucket, object string, startOffset int64, length int64, writer io.Writer, etag string, opts ObjectOptions) (err error)
|
|
|
|
PutObject(ctx context.Context, bucket, object string, data *PutObjReader, opts ObjectOptions) (objInfo ObjectInfo, err error)
|
|
|
|
}
|
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
// load the cache content with name from minioMetaBackgroundOpsBucket.
|
|
|
|
// Only backend errors are returned as errors.
|
|
|
|
// If the object is not found or unable to deserialize d is cleared and nil error is returned.
|
2020-09-10 12:18:19 -04:00
|
|
|
func (d *dataUsageCache) load(ctx context.Context, store objectIO, name string) error {
|
2020-03-18 19:19:29 -04:00
|
|
|
var buf bytes.Buffer
|
|
|
|
err := store.GetObject(ctx, dataUsageBucket, name, 0, -1, &buf, "", ObjectOptions{})
|
|
|
|
if err != nil {
|
2020-11-12 12:28:56 -05:00
|
|
|
switch err.(type) {
|
|
|
|
case ObjectNotFound:
|
|
|
|
case BucketNotFound:
|
|
|
|
case InsufficientReadQuorum:
|
|
|
|
default:
|
2020-03-18 19:19:29 -04:00
|
|
|
return toObjectErr(err, dataUsageBucket, name)
|
|
|
|
}
|
|
|
|
*d = dataUsageCache{}
|
|
|
|
return nil
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
err = d.deserialize(&buf)
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
|
|
|
*d = dataUsageCache{}
|
|
|
|
logger.LogIf(ctx, err)
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// save the content of the cache to minioMetaBackgroundOpsBucket with the provided name.
|
2020-09-10 12:18:19 -04:00
|
|
|
func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string) error {
|
2020-12-10 16:03:22 -05:00
|
|
|
pr, pw := io.Pipe()
|
|
|
|
go func() {
|
|
|
|
pw.CloseWithError(d.serializeTo(pw))
|
|
|
|
}()
|
|
|
|
defer pr.Close()
|
|
|
|
r, err := hash.NewReader(pr, -1, "", "", -1, false)
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
_, err = store.PutObject(ctx,
|
|
|
|
dataUsageBucket,
|
|
|
|
name,
|
|
|
|
NewPutObjReader(r, nil, nil),
|
|
|
|
ObjectOptions{})
|
2020-04-28 04:16:57 -04:00
|
|
|
if isErrBucketNotFound(err) {
|
|
|
|
return nil
|
|
|
|
}
|
2020-03-18 19:19:29 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// dataUsageCacheVer indicates the cache version.
|
|
|
|
// Bumping the cache version will drop data from previous versions
|
|
|
|
// and write new data with the new version.
|
2020-12-07 16:47:48 -05:00
|
|
|
const dataUsageCacheVer = 3
|
2020-03-18 19:19:29 -04:00
|
|
|
|
|
|
|
// serialize the contents of the cache.
|
2020-12-10 16:03:22 -05:00
|
|
|
func (d *dataUsageCache) serializeTo(dst io.Writer) error {
|
|
|
|
// Add version and compress.
|
|
|
|
_, err := dst.Write([]byte{dataUsageCacheVer})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
enc, err := zstd.NewWriter(dst,
|
2020-06-12 13:28:21 -04:00
|
|
|
zstd.WithEncoderLevel(zstd.SpeedFastest),
|
|
|
|
zstd.WithWindowSize(1<<20),
|
|
|
|
zstd.WithEncoderConcurrency(2))
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
2020-12-10 16:03:22 -05:00
|
|
|
return err
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
mEnc := msgp.NewWriter(enc)
|
|
|
|
err = d.EncodeMsg(mEnc)
|
|
|
|
if err != nil {
|
2020-12-10 16:03:22 -05:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
err = mEnc.Flush()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
err = enc.Close()
|
|
|
|
if err != nil {
|
2020-12-10 16:03:22 -05:00
|
|
|
return err
|
2020-06-12 13:28:21 -04:00
|
|
|
}
|
2020-12-10 16:03:22 -05:00
|
|
|
return nil
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// deserialize the supplied byte slice into the cache.
|
2020-06-12 13:28:21 -04:00
|
|
|
func (d *dataUsageCache) deserialize(r io.Reader) error {
|
|
|
|
var b [1]byte
|
|
|
|
n, _ := r.Read(b[:])
|
|
|
|
if n != 1 {
|
2020-03-18 19:19:29 -04:00
|
|
|
return io.ErrUnexpectedEOF
|
|
|
|
}
|
|
|
|
switch b[0] {
|
2020-12-08 13:40:25 -05:00
|
|
|
case 1, 2:
|
2020-06-12 13:28:21 -04:00
|
|
|
return errors.New("cache version deprecated (will autoupdate)")
|
|
|
|
case dataUsageCacheVer:
|
2020-03-18 19:19:29 -04:00
|
|
|
default:
|
|
|
|
return fmt.Errorf("dataUsageCache: unknown version: %d", int(b[0]))
|
|
|
|
}
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// Zstd compressed.
|
|
|
|
dec, err := zstd.NewReader(r, zstd.WithDecoderConcurrency(2))
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
defer dec.Close()
|
2020-03-18 19:19:29 -04:00
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
return d.DecodeMsg(msgp.NewReader(dec))
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Trim this from start+end of hashes.
|
|
|
|
var hashPathCutSet = dataUsageRoot
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
if dataUsageRoot != string(filepath.Separator) {
|
|
|
|
hashPathCutSet = dataUsageRoot + string(filepath.Separator)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// hashPath calculates a hash of the provided string.
|
|
|
|
func hashPath(data string) dataUsageHash {
|
|
|
|
if data != dataUsageRoot {
|
|
|
|
data = strings.Trim(data, hashPathCutSet)
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
return dataUsageHash(path.Clean(data))
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
//msgp:ignore dataUsageHashMap
|
|
|
|
type dataUsageHashMap map[string]struct{}
|
2020-03-18 19:19:29 -04:00
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// DecodeMsg implements msgp.Decodable
|
|
|
|
func (z *dataUsageHashMap) DecodeMsg(dc *msgp.Reader) (err error) {
|
|
|
|
var zb0002 uint32
|
|
|
|
zb0002, err = dc.ReadArrayHeader()
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
err = msgp.WrapError(err)
|
2020-03-18 19:19:29 -04:00
|
|
|
return
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
*z = make(dataUsageHashMap, zb0002)
|
|
|
|
for i := uint32(0); i < zb0002; i++ {
|
|
|
|
{
|
|
|
|
var zb0003 string
|
|
|
|
zb0003, err = dc.ReadString()
|
|
|
|
if err != nil {
|
|
|
|
err = msgp.WrapError(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
(*z)[zb0003] = struct{}{}
|
|
|
|
}
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// EncodeMsg implements msgp.Encodable
|
|
|
|
func (z dataUsageHashMap) EncodeMsg(en *msgp.Writer) (err error) {
|
|
|
|
err = en.WriteArrayHeader(uint32(len(z)))
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
|
|
|
err = msgp.WrapError(err)
|
|
|
|
return
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
for zb0004 := range z {
|
|
|
|
err = en.WriteString(zb0004)
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
2020-06-12 13:28:21 -04:00
|
|
|
err = msgp.WrapError(err, zb0004)
|
2020-03-18 19:19:29 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// MarshalMsg implements msgp.Marshaler
|
|
|
|
func (z dataUsageHashMap) MarshalMsg(b []byte) (o []byte, err error) {
|
|
|
|
o = msgp.Require(b, z.Msgsize())
|
|
|
|
o = msgp.AppendArrayHeader(o, uint32(len(z)))
|
|
|
|
for zb0004 := range z {
|
|
|
|
o = msgp.AppendString(o, zb0004)
|
|
|
|
}
|
|
|
|
return
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
|
|
|
|
// UnmarshalMsg implements msgp.Unmarshaler
|
|
|
|
func (z *dataUsageHashMap) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
|
|
|
var zb0002 uint32
|
|
|
|
zb0002, bts, err = msgp.ReadArrayHeaderBytes(bts)
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
|
|
|
err = msgp.WrapError(err)
|
|
|
|
return
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
*z = make(dataUsageHashMap, zb0002)
|
|
|
|
for i := uint32(0); i < zb0002; i++ {
|
|
|
|
{
|
|
|
|
var zb0003 string
|
|
|
|
zb0003, bts, err = msgp.ReadStringBytes(bts)
|
|
|
|
if err != nil {
|
|
|
|
err = msgp.WrapError(err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
(*z)[zb0003] = struct{}{}
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
o = bts
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
|
|
|
func (z dataUsageHashMap) Msgsize() (s int) {
|
|
|
|
s = msgp.ArrayHeaderSize
|
|
|
|
for zb0004 := range z {
|
|
|
|
s += msgp.StringPrefixSize + len(zb0004)
|
|
|
|
}
|
|
|
|
return
|
2020-03-18 19:19:29 -04:00
|
|
|
}
|