From cba3dd276bef9544532d584474a3834e47bf6e5b Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Fri, 12 Jan 2024 23:51:08 -0800 Subject: [PATCH] Add more size intervals to obj size histogram (#18772) New intervals: [1024B, 64KiB) [64KiB, 256KiB) [256KiB, 512KiB) [512KiB, 1MiB) The new intervals helps us see object size distribution with higher resolution for the interval [1024B, 1MiB). --- cmd/data-usage-cache.go | 99 +++++- cmd/data-usage-cache_gen.go | 512 +++++++++++++++++++++++++++++++ cmd/data-usage-cache_gen_test.go | 113 +++++++ cmd/data-usage-cache_test.go | 98 ++++++ cmd/data-usage_test.go | 17 +- cmd/object-api-datatypes.go | 23 +- 6 files changed, 844 insertions(+), 18 deletions(-) create mode 100644 cmd/data-usage-cache_test.go diff --git a/cmd/data-usage-cache.go b/cmd/data-usage-cache.go index 65fa6eb78..8a40b1a13 100644 --- a/cmd/data-usage-cache.go +++ b/cmd/data-usage-cache.go @@ -1,4 +1,4 @@ -// Copyright (c) 2015-2021 MinIO, Inc. +// Copyright (c) 2015-2023 MinIO, Inc. // // This file is part of MinIO Object Storage stack // @@ -32,6 +32,7 @@ import ( "time" "github.com/cespare/xxhash/v2" + "github.com/dustin/go-humanize" "github.com/klauspost/compress/zstd" "github.com/minio/madmin-go/v3" "github.com/minio/minio/internal/bucket/lifecycle" @@ -46,6 +47,10 @@ import ( // dataUsageHash is the hash type used. type dataUsageHash string +// sizeHistogramV1 is size histogram V1, which has fewer intervals esp. between +// 1024B and 1MiB. +type sizeHistogramV1 [dataUsageBucketLenV1]uint64 + // sizeHistogram is a size histogram. type sizeHistogram [dataUsageBucketLen]uint64 @@ -204,8 +209,8 @@ func (r *replicationAllStats) clone() *replicationAllStats { return &dst } -//msgp:encode ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 -//msgp:marshal ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 +//msgp:encode ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 dataUsageEntryV7 +//msgp:marshal ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 dataUsageEntryV7 //msgp:tuple dataUsageEntryV2 type dataUsageEntryV2 struct { @@ -263,14 +268,28 @@ type dataUsageEntryV6 struct { Compacted bool } +type dataUsageEntryV7 struct { + Children dataUsageHashMap `msg:"ch"` + // These fields do no include any children. + Size int64 `msg:"sz"` + Objects uint64 `msg:"os"` + Versions uint64 `msg:"vs"` // Versions that are not delete markers. + DeleteMarkers uint64 `msg:"dms"` + ObjSizes sizeHistogramV1 `msg:"szs"` + ObjVersions versionsHistogram `msg:"vh"` + ReplicationStats *replicationAllStats `msg:"rs,omitempty"` + AllTierStats *allTierStats `msg:"ats,omitempty"` + Compacted bool `msg:"c"` +} + // dataUsageCache contains a cache of data usage entries latest version. type dataUsageCache struct { Info dataUsageCacheInfo Cache map[string]dataUsageEntry } -//msgp:encode ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 -//msgp:marshal ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 +//msgp:encode ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 dataUsageCacheV7 +//msgp:marshal ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 dataUsageCacheV7 // dataUsageCacheV2 contains a cache of data usage entries version 2. type dataUsageCacheV2 struct { @@ -302,6 +321,12 @@ type dataUsageCacheV6 struct { Cache map[string]dataUsageEntryV6 } +// dataUsageCacheV7 contains a cache of data usage entries version 7. +type dataUsageCacheV7 struct { + Info dataUsageCacheInfo + Cache map[string]dataUsageEntryV7 +} + //msgp:ignore dataUsageEntryInfo type dataUsageEntryInfo struct { Name string @@ -750,11 +775,42 @@ func (h *sizeHistogram) add(size int64) { } } +// mergeV1 is used to migrate data usage cache from sizeHistogramV1 to +// sizeHistogram +func (h *sizeHistogram) mergeV1(v sizeHistogramV1) { + var oidx, nidx int + for oidx < len(v) { + intOld, intNew := ObjectsHistogramIntervalsV1[oidx], ObjectsHistogramIntervals[nidx] + // skip intervals that aren't common to both histograms + if intOld.start != intNew.start || intOld.end != intNew.end { + nidx++ + continue + } + h[nidx] += v[oidx] + oidx++ + nidx++ + } +} + // toMap returns the map to a map[string]uint64. func (h *sizeHistogram) toMap() map[string]uint64 { res := make(map[string]uint64, dataUsageBucketLen) + var splCount uint64 for i, count := range h { - res[ObjectsHistogramIntervals[i].name] = count + szInt := ObjectsHistogramIntervals[i] + switch { + case humanize.KiByte == szInt.start && szInt.end == humanize.MiByte-1: + // spl interval: [1024B, 1MiB) + res[szInt.name] = splCount + case humanize.KiByte <= szInt.start && szInt.end <= humanize.MiByte-1: + // intervals that fall within the spl interval above; they + // appear earlier in this array of intervals, see + // ObjectsHistogramIntervals + splCount += count + fallthrough + default: + res[szInt.name] = count + } } return res } @@ -1027,7 +1083,8 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string) // Bumping the cache version will drop data from previous versions // and write new data with the new version. const ( - dataUsageCacheVerCurrent = 7 + dataUsageCacheVerCurrent = 8 + dataUsageCacheVerV7 = 7 dataUsageCacheVerV6 = 6 dataUsageCacheVerV5 = 5 dataUsageCacheVerV4 = 4 @@ -1275,6 +1332,34 @@ func (d *dataUsageCache) deserialize(r io.Reader) error { } d.Cache[k] = due } + return nil + case dataUsageCacheVerV7: + // Zstd compressed. + dec, err := zstd.NewReader(r, zstd.WithDecoderConcurrency(2)) + if err != nil { + return err + } + defer dec.Close() + dold := &dataUsageCacheV7{} + if err = dold.DecodeMsg(msgp.NewReader(dec)); err != nil { + return err + } + d.Info = dold.Info + d.Cache = make(map[string]dataUsageEntry, len(dold.Cache)) + for k, v := range dold.Cache { + var szHist sizeHistogram + szHist.mergeV1(v.ObjSizes) + d.Cache[k] = dataUsageEntry{ + Children: v.Children, + Size: v.Size, + Objects: v.Objects, + Versions: v.Versions, + ObjSizes: szHist, + ReplicationStats: v.ReplicationStats, + Compacted: v.Compacted, + } + } + return nil case dataUsageCacheVerCurrent: // Zstd compressed. diff --git a/cmd/data-usage-cache_gen.go b/cmd/data-usage-cache_gen.go index e66e1edcc..0be4fd0fa 100644 --- a/cmd/data-usage-cache_gen.go +++ b/cmd/data-usage-cache_gen.go @@ -1480,6 +1480,149 @@ func (z *dataUsageCacheV6) Msgsize() (s int) { return } +// DecodeMsg implements msgp.Decodable +func (z *dataUsageCacheV7) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "Info": + err = z.Info.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "Info") + return + } + case "Cache": + var zb0002 uint32 + zb0002, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err, "Cache") + return + } + if z.Cache == nil { + z.Cache = make(map[string]dataUsageEntryV7, zb0002) + } else if len(z.Cache) > 0 { + for key := range z.Cache { + delete(z.Cache, key) + } + } + for zb0002 > 0 { + zb0002-- + var za0001 string + var za0002 dataUsageEntryV7 + za0001, err = dc.ReadString() + if err != nil { + err = msgp.WrapError(err, "Cache") + return + } + err = za0002.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "Cache", za0001) + return + } + z.Cache[za0001] = za0002 + } + default: + err = dc.Skip() + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *dataUsageCacheV7) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "Info": + bts, err = z.Info.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "Info") + return + } + case "Cache": + var zb0002 uint32 + zb0002, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Cache") + return + } + if z.Cache == nil { + z.Cache = make(map[string]dataUsageEntryV7, zb0002) + } else if len(z.Cache) > 0 { + for key := range z.Cache { + delete(z.Cache, key) + } + } + for zb0002 > 0 { + var za0001 string + var za0002 dataUsageEntryV7 + zb0002-- + za0001, bts, err = msgp.ReadStringBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Cache") + return + } + bts, err = za0002.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "Cache", za0001) + return + } + z.Cache[za0001] = za0002 + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *dataUsageCacheV7) Msgsize() (s int) { + s = 1 + 5 + z.Info.Msgsize() + 6 + msgp.MapHeaderSize + if z.Cache != nil { + for za0001, za0002 := range z.Cache { + _ = za0002 + s += msgp.StringPrefixSize + len(za0001) + za0002.Msgsize() + } + } + return +} + // DecodeMsg implements msgp.Decodable func (z *dataUsageEntry) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte @@ -2684,6 +2827,297 @@ func (z *dataUsageEntryV6) Msgsize() (s int) { return } +// DecodeMsg implements msgp.Decodable +func (z *dataUsageEntryV7) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "ch": + err = z.Children.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "Children") + return + } + case "sz": + z.Size, err = dc.ReadInt64() + if err != nil { + err = msgp.WrapError(err, "Size") + return + } + case "os": + z.Objects, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "Objects") + return + } + case "vs": + z.Versions, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "Versions") + return + } + case "dms": + z.DeleteMarkers, err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "DeleteMarkers") + return + } + case "szs": + var zb0002 uint32 + zb0002, err = dc.ReadArrayHeader() + if err != nil { + err = msgp.WrapError(err, "ObjSizes") + return + } + if zb0002 != uint32(dataUsageBucketLenV1) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0002} + return + } + for za0001 := range z.ObjSizes { + z.ObjSizes[za0001], err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "ObjSizes", za0001) + return + } + } + case "vh": + var zb0003 uint32 + zb0003, err = dc.ReadArrayHeader() + if err != nil { + err = msgp.WrapError(err, "ObjVersions") + return + } + if zb0003 != uint32(dataUsageVersionLen) { + err = msgp.ArrayError{Wanted: uint32(dataUsageVersionLen), Got: zb0003} + return + } + for za0002 := range z.ObjVersions { + z.ObjVersions[za0002], err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, "ObjVersions", za0002) + return + } + } + case "rs": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + err = msgp.WrapError(err, "ReplicationStats") + return + } + z.ReplicationStats = nil + } else { + if z.ReplicationStats == nil { + z.ReplicationStats = new(replicationAllStats) + } + err = z.ReplicationStats.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "ReplicationStats") + return + } + } + case "ats": + if dc.IsNil() { + err = dc.ReadNil() + if err != nil { + err = msgp.WrapError(err, "AllTierStats") + return + } + z.AllTierStats = nil + } else { + if z.AllTierStats == nil { + z.AllTierStats = new(allTierStats) + } + err = z.AllTierStats.DecodeMsg(dc) + if err != nil { + err = msgp.WrapError(err, "AllTierStats") + return + } + } + case "c": + z.Compacted, err = dc.ReadBool() + if err != nil { + err = msgp.WrapError(err, "Compacted") + return + } + default: + err = dc.Skip() + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *dataUsageEntryV7) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "ch": + bts, err = z.Children.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "Children") + return + } + case "sz": + z.Size, bts, err = msgp.ReadInt64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "Size") + return + } + case "os": + z.Objects, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "Objects") + return + } + case "vs": + z.Versions, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "Versions") + return + } + case "dms": + z.DeleteMarkers, bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "DeleteMarkers") + return + } + case "szs": + var zb0002 uint32 + zb0002, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err, "ObjSizes") + return + } + if zb0002 != uint32(dataUsageBucketLenV1) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0002} + return + } + for za0001 := range z.ObjSizes { + z.ObjSizes[za0001], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "ObjSizes", za0001) + return + } + } + case "vh": + var zb0003 uint32 + zb0003, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err, "ObjVersions") + return + } + if zb0003 != uint32(dataUsageVersionLen) { + err = msgp.ArrayError{Wanted: uint32(dataUsageVersionLen), Got: zb0003} + return + } + for za0002 := range z.ObjVersions { + z.ObjVersions[za0002], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, "ObjVersions", za0002) + return + } + } + case "rs": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.ReplicationStats = nil + } else { + if z.ReplicationStats == nil { + z.ReplicationStats = new(replicationAllStats) + } + bts, err = z.ReplicationStats.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "ReplicationStats") + return + } + } + case "ats": + if msgp.IsNil(bts) { + bts, err = msgp.ReadNilBytes(bts) + if err != nil { + return + } + z.AllTierStats = nil + } else { + if z.AllTierStats == nil { + z.AllTierStats = new(allTierStats) + } + bts, err = z.AllTierStats.UnmarshalMsg(bts) + if err != nil { + err = msgp.WrapError(err, "AllTierStats") + return + } + } + case "c": + z.Compacted, bts, err = msgp.ReadBoolBytes(bts) + if err != nil { + err = msgp.WrapError(err, "Compacted") + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *dataUsageEntryV7) Msgsize() (s int) { + s = 1 + 3 + z.Children.Msgsize() + 3 + msgp.Int64Size + 3 + msgp.Uint64Size + 3 + msgp.Uint64Size + 4 + msgp.Uint64Size + 4 + msgp.ArrayHeaderSize + (dataUsageBucketLenV1 * (msgp.Uint64Size)) + 3 + msgp.ArrayHeaderSize + (dataUsageVersionLen * (msgp.Uint64Size)) + 3 + if z.ReplicationStats == nil { + s += msgp.NilSize + } else { + s += z.ReplicationStats.Msgsize() + } + s += 4 + if z.AllTierStats == nil { + s += msgp.NilSize + } else { + s += z.AllTierStats.Msgsize() + } + s += 2 + msgp.BoolSize + return +} + // DecodeMsg implements msgp.Decodable func (z *dataUsageHash) DecodeMsg(dc *msgp.Reader) (err error) { { @@ -3705,6 +4139,84 @@ func (z *sizeHistogram) Msgsize() (s int) { return } +// DecodeMsg implements msgp.Decodable +func (z *sizeHistogramV1) DecodeMsg(dc *msgp.Reader) (err error) { + var zb0001 uint32 + zb0001, err = dc.ReadArrayHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + if zb0001 != uint32(dataUsageBucketLenV1) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0001} + return + } + for za0001 := range z { + z[za0001], err = dc.ReadUint64() + if err != nil { + err = msgp.WrapError(err, za0001) + return + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *sizeHistogramV1) EncodeMsg(en *msgp.Writer) (err error) { + err = en.WriteArrayHeader(uint32(dataUsageBucketLenV1)) + if err != nil { + err = msgp.WrapError(err) + return + } + for za0001 := range z { + err = en.WriteUint64(z[za0001]) + if err != nil { + err = msgp.WrapError(err, za0001) + return + } + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *sizeHistogramV1) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + o = msgp.AppendArrayHeader(o, uint32(dataUsageBucketLenV1)) + for za0001 := range z { + o = msgp.AppendUint64(o, z[za0001]) + } + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *sizeHistogramV1) UnmarshalMsg(bts []byte) (o []byte, err error) { + var zb0001 uint32 + zb0001, bts, err = msgp.ReadArrayHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + if zb0001 != uint32(dataUsageBucketLenV1) { + err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0001} + return + } + for za0001 := range z { + z[za0001], bts, err = msgp.ReadUint64Bytes(bts) + if err != nil { + err = msgp.WrapError(err, za0001) + return + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *sizeHistogramV1) Msgsize() (s int) { + s = msgp.ArrayHeaderSize + (dataUsageBucketLenV1 * (msgp.Uint64Size)) + return +} + // DecodeMsg implements msgp.Decodable func (z *tierStats) DecodeMsg(dc *msgp.Reader) (err error) { var field []byte diff --git a/cmd/data-usage-cache_gen_test.go b/cmd/data-usage-cache_gen_test.go index a8bca8b68..9b726e077 100644 --- a/cmd/data-usage-cache_gen_test.go +++ b/cmd/data-usage-cache_gen_test.go @@ -1084,6 +1084,119 @@ func BenchmarkDecodesizeHistogram(b *testing.B) { } } +func TestMarshalUnmarshalsizeHistogramV1(t *testing.T) { + v := sizeHistogramV1{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgsizeHistogramV1(b *testing.B) { + v := sizeHistogramV1{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgsizeHistogramV1(b *testing.B) { + v := sizeHistogramV1{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshalsizeHistogramV1(b *testing.B) { + v := sizeHistogramV1{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodesizeHistogramV1(t *testing.T) { + v := sizeHistogramV1{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodesizeHistogramV1 Msgsize() is inaccurate") + } + + vn := sizeHistogramV1{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodesizeHistogramV1(b *testing.B) { + v := sizeHistogramV1{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodesizeHistogramV1(b *testing.B) { + v := sizeHistogramV1{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} + func TestMarshalUnmarshaltierStats(t *testing.T) { v := tierStats{} bts, err := v.MarshalMsg(nil) diff --git a/cmd/data-usage-cache_test.go b/cmd/data-usage-cache_test.go new file mode 100644 index 000000000..69fbd9fe9 --- /dev/null +++ b/cmd/data-usage-cache_test.go @@ -0,0 +1,98 @@ +// Copyright (c) 2015-2023 MinIO, Inc. +// +// This file is part of MinIO Object Storage stack +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +package cmd + +import ( + "fmt" + "testing" + + "github.com/dustin/go-humanize" +) + +func TestSizeHistogramToMap(t *testing.T) { + tests := []struct { + sizes []int64 + want map[string]uint64 + }{ + { + sizes: []int64{100, 1000, 72_000, 100_000}, + want: map[string]uint64{ + "LESS_THAN_1024_B": 2, + "BETWEEN_64_KB_AND_256_KB": 2, + "BETWEEN_1024B_AND_1_MB": 2, + }, + }, + { + sizes: []int64{100, 1000, 2000, 100_000, 13 * humanize.MiByte}, + want: map[string]uint64{ + "LESS_THAN_1024_B": 2, + "BETWEEN_1024_B_AND_64_KB": 1, + "BETWEEN_64_KB_AND_256_KB": 1, + "BETWEEN_1024B_AND_1_MB": 2, + "BETWEEN_10_MB_AND_64_MB": 1, + }, + }, + } + for i, test := range tests { + t.Run(fmt.Sprintf("Test-%d", i), func(t *testing.T) { + var h sizeHistogram + for _, sz := range test.sizes { + h.add(sz) + } + got := h.toMap() + exp := test.want + // what is in exp is in got + for k := range exp { + if exp[k] != got[k] { + t.Fatalf("interval %s: Expected %d values but got %d values\n", k, exp[k], got[k]) + } + } + // what is absent in exp is absent in got too + for k := range got { + if _, ok := exp[k]; !ok && got[k] > 0 { + t.Fatalf("Unexpected interval: %s has value %d\n", k, got[k]) + } + } + }) + } +} + +func TestMigrateSizeHistogramFromV1(t *testing.T) { + tests := []struct { + v sizeHistogramV1 + want sizeHistogram + }{ + { + v: sizeHistogramV1{0: 10, 1: 20, 2: 3}, + want: sizeHistogram{0: 10, 5: 20, 6: 3}, + }, + { + v: sizeHistogramV1{0: 10, 1: 20, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7}, + want: sizeHistogram{0: 10, 5: 20, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7}, + }, + } + for i, test := range tests { + t.Run(fmt.Sprintf("test-%d", i), func(t *testing.T) { + var got sizeHistogram + got.mergeV1(test.v) + if got != test.want { + t.Fatalf("Expected %v but got %v", test.want, got) + } + }) + } +} diff --git a/cmd/data-usage_test.go b/cmd/data-usage_test.go index ae6cbd700..b8b5a9a7c 100644 --- a/cmd/data-usage_test.go +++ b/cmd/data-usage_test.go @@ -82,7 +82,7 @@ func TestDataUsageUpdate(t *testing.T) { size: 1322310, flatten: true, objs: 8, - oSizes: sizeHistogram{0: 2, 1: 6}, + oSizes: sizeHistogram{0: 2, 1: 3, 2: 2, 4: 1}, }, { path: "/", @@ -94,7 +94,7 @@ func TestDataUsageUpdate(t *testing.T) { path: "/dir1", size: 1302010, objs: 5, - oSizes: sizeHistogram{0: 1, 1: 4}, + oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1}, }, { path: "/dir1/dira", @@ -167,7 +167,6 @@ func TestDataUsageUpdate(t *testing.T) { size: 200, }, } - createUsageTestFiles(t, base, bucket, files) err = os.RemoveAll(filepath.Join(base, bucket, "dir1/dira/dirasub/dcfile")) if err != nil { @@ -194,14 +193,14 @@ func TestDataUsageUpdate(t *testing.T) { size: 363515, flatten: true, objs: 14, - oSizes: sizeHistogram{0: 7, 1: 7}, + oSizes: sizeHistogram{0: 7, 1: 5, 2: 2}, }, { path: "/dir1", size: 342210, objs: 7, flatten: false, - oSizes: sizeHistogram{0: 2, 1: 5}, + oSizes: sizeHistogram{0: 2, 1: 3, 2: 2}, }, { path: "/newfolder", @@ -308,7 +307,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) { path: "flat", size: 1322310 + expectSize, objs: 8 + expectSize, - oSizes: sizeHistogram{0: 2 + expectSize, 1: 6}, + oSizes: sizeHistogram{0: 2 + expectSize, 1: 3, 2: 2, 4: 1}, }, { path: "bucket/", @@ -321,7 +320,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) { path: "bucket/dir1", size: 1302010, objs: 5, - oSizes: sizeHistogram{0: 1, 1: 4}, + oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1}, }, { // Gets compacted at this level... @@ -436,13 +435,13 @@ func TestDataUsageUpdatePrefix(t *testing.T) { path: "flat", size: 363515 + expectSize, objs: 14 + expectSize, - oSizes: sizeHistogram{0: 7 + expectSize, 1: 7}, + oSizes: sizeHistogram{0: 7 + expectSize, 1: 5, 2: 2}, }, { path: "bucket/dir1", size: 342210, objs: 7, - oSizes: sizeHistogram{0: 2, 1: 5}, + oSizes: sizeHistogram{0: 2, 1: 3, 2: 2}, }, { path: "bucket/", diff --git a/cmd/object-api-datatypes.go b/cmd/object-api-datatypes.go index ec2d8e8c4..53d5f97f9 100644 --- a/cmd/object-api-datatypes.go +++ b/cmd/object-api-datatypes.go @@ -54,16 +54,35 @@ type objectHistogramInterval struct { } const ( + // dataUsageBucketLenV1 must be length of ObjectsHistogramIntervalsV1 + dataUsageBucketLenV1 = 7 // dataUsageBucketLen must be length of ObjectsHistogramIntervals - dataUsageBucketLen = 7 + dataUsageBucketLen = 11 dataUsageVersionLen = 7 ) +// ObjectsHistogramIntervalsV1 is the list of all intervals +// of object sizes to be included in objects histogram(V1). +var ObjectsHistogramIntervalsV1 = [dataUsageBucketLenV1]objectHistogramInterval{ + {"LESS_THAN_1024_B", 0, humanize.KiByte - 1}, + {"BETWEEN_1024B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1}, + {"BETWEEN_1_MB_AND_10_MB", humanize.MiByte, humanize.MiByte*10 - 1}, + {"BETWEEN_10_MB_AND_64_MB", humanize.MiByte * 10, humanize.MiByte*64 - 1}, + {"BETWEEN_64_MB_AND_128_MB", humanize.MiByte * 64, humanize.MiByte*128 - 1}, + {"BETWEEN_128_MB_AND_512_MB", humanize.MiByte * 128, humanize.MiByte*512 - 1}, + {"GREATER_THAN_512_MB", humanize.MiByte * 512, math.MaxInt64}, +} + // ObjectsHistogramIntervals is the list of all intervals // of object sizes to be included in objects histogram. +// Note: this histogram expands 1024B-1MB to incl. 1024B-64KB, 64KB-256KB, 256KB-512KB and 512KB-1MiB var ObjectsHistogramIntervals = [dataUsageBucketLen]objectHistogramInterval{ {"LESS_THAN_1024_B", 0, humanize.KiByte - 1}, - {"BETWEEN_1024_B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1}, + {"BETWEEN_1024_B_AND_64_KB", humanize.KiByte, 64*humanize.KiByte - 1}, // not exported, for support use only + {"BETWEEN_64_KB_AND_256_KB", 64 * humanize.KiByte, 256*humanize.KiByte - 1}, // not exported, for support use only + {"BETWEEN_256_KB_AND_512_KB", 256 * humanize.KiByte, 512*humanize.KiByte - 1}, // not exported, for support use only + {"BETWEEN_512_KB_AND_1_MB", 512 * humanize.KiByte, humanize.MiByte - 1}, // not exported, for support use only + {"BETWEEN_1024B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1}, {"BETWEEN_1_MB_AND_10_MB", humanize.MiByte, humanize.MiByte*10 - 1}, {"BETWEEN_10_MB_AND_64_MB", humanize.MiByte * 10, humanize.MiByte*64 - 1}, {"BETWEEN_64_MB_AND_128_MB", humanize.MiByte * 64, humanize.MiByte*128 - 1},