Add more size intervals to obj size histogram (#18772)

New intervals:
[1024B, 64KiB)
[64KiB, 256KiB)
[256KiB, 512KiB)
[512KiB, 1MiB)

The new intervals helps us see object size distribution with higher
resolution for the interval [1024B, 1MiB).
This commit is contained in:
Krishnan Parthasarathi 2024-01-12 23:51:08 -08:00 committed by GitHub
parent a47fc75c26
commit cba3dd276b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 844 additions and 18 deletions

View File

@ -1,4 +1,4 @@
// Copyright (c) 2015-2021 MinIO, Inc.
// Copyright (c) 2015-2023 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
@ -32,6 +32,7 @@ import (
"time"
"github.com/cespare/xxhash/v2"
"github.com/dustin/go-humanize"
"github.com/klauspost/compress/zstd"
"github.com/minio/madmin-go/v3"
"github.com/minio/minio/internal/bucket/lifecycle"
@ -46,6 +47,10 @@ import (
// dataUsageHash is the hash type used.
type dataUsageHash string
// sizeHistogramV1 is size histogram V1, which has fewer intervals esp. between
// 1024B and 1MiB.
type sizeHistogramV1 [dataUsageBucketLenV1]uint64
// sizeHistogram is a size histogram.
type sizeHistogram [dataUsageBucketLen]uint64
@ -204,8 +209,8 @@ func (r *replicationAllStats) clone() *replicationAllStats {
return &dst
}
//msgp:encode ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6
//msgp:marshal ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6
//msgp:encode ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 dataUsageEntryV7
//msgp:marshal ignore dataUsageEntryV2 dataUsageEntryV3 dataUsageEntryV4 dataUsageEntryV5 dataUsageEntryV6 dataUsageEntryV7
//msgp:tuple dataUsageEntryV2
type dataUsageEntryV2 struct {
@ -263,14 +268,28 @@ type dataUsageEntryV6 struct {
Compacted bool
}
type dataUsageEntryV7 struct {
Children dataUsageHashMap `msg:"ch"`
// These fields do no include any children.
Size int64 `msg:"sz"`
Objects uint64 `msg:"os"`
Versions uint64 `msg:"vs"` // Versions that are not delete markers.
DeleteMarkers uint64 `msg:"dms"`
ObjSizes sizeHistogramV1 `msg:"szs"`
ObjVersions versionsHistogram `msg:"vh"`
ReplicationStats *replicationAllStats `msg:"rs,omitempty"`
AllTierStats *allTierStats `msg:"ats,omitempty"`
Compacted bool `msg:"c"`
}
// dataUsageCache contains a cache of data usage entries latest version.
type dataUsageCache struct {
Info dataUsageCacheInfo
Cache map[string]dataUsageEntry
}
//msgp:encode ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6
//msgp:marshal ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6
//msgp:encode ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 dataUsageCacheV7
//msgp:marshal ignore dataUsageCacheV2 dataUsageCacheV3 dataUsageCacheV4 dataUsageCacheV5 dataUsageCacheV6 dataUsageCacheV7
// dataUsageCacheV2 contains a cache of data usage entries version 2.
type dataUsageCacheV2 struct {
@ -302,6 +321,12 @@ type dataUsageCacheV6 struct {
Cache map[string]dataUsageEntryV6
}
// dataUsageCacheV7 contains a cache of data usage entries version 7.
type dataUsageCacheV7 struct {
Info dataUsageCacheInfo
Cache map[string]dataUsageEntryV7
}
//msgp:ignore dataUsageEntryInfo
type dataUsageEntryInfo struct {
Name string
@ -750,11 +775,42 @@ func (h *sizeHistogram) add(size int64) {
}
}
// mergeV1 is used to migrate data usage cache from sizeHistogramV1 to
// sizeHistogram
func (h *sizeHistogram) mergeV1(v sizeHistogramV1) {
var oidx, nidx int
for oidx < len(v) {
intOld, intNew := ObjectsHistogramIntervalsV1[oidx], ObjectsHistogramIntervals[nidx]
// skip intervals that aren't common to both histograms
if intOld.start != intNew.start || intOld.end != intNew.end {
nidx++
continue
}
h[nidx] += v[oidx]
oidx++
nidx++
}
}
// toMap returns the map to a map[string]uint64.
func (h *sizeHistogram) toMap() map[string]uint64 {
res := make(map[string]uint64, dataUsageBucketLen)
var splCount uint64
for i, count := range h {
res[ObjectsHistogramIntervals[i].name] = count
szInt := ObjectsHistogramIntervals[i]
switch {
case humanize.KiByte == szInt.start && szInt.end == humanize.MiByte-1:
// spl interval: [1024B, 1MiB)
res[szInt.name] = splCount
case humanize.KiByte <= szInt.start && szInt.end <= humanize.MiByte-1:
// intervals that fall within the spl interval above; they
// appear earlier in this array of intervals, see
// ObjectsHistogramIntervals
splCount += count
fallthrough
default:
res[szInt.name] = count
}
}
return res
}
@ -1027,7 +1083,8 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string)
// Bumping the cache version will drop data from previous versions
// and write new data with the new version.
const (
dataUsageCacheVerCurrent = 7
dataUsageCacheVerCurrent = 8
dataUsageCacheVerV7 = 7
dataUsageCacheVerV6 = 6
dataUsageCacheVerV5 = 5
dataUsageCacheVerV4 = 4
@ -1275,6 +1332,34 @@ func (d *dataUsageCache) deserialize(r io.Reader) error {
}
d.Cache[k] = due
}
return nil
case dataUsageCacheVerV7:
// Zstd compressed.
dec, err := zstd.NewReader(r, zstd.WithDecoderConcurrency(2))
if err != nil {
return err
}
defer dec.Close()
dold := &dataUsageCacheV7{}
if err = dold.DecodeMsg(msgp.NewReader(dec)); err != nil {
return err
}
d.Info = dold.Info
d.Cache = make(map[string]dataUsageEntry, len(dold.Cache))
for k, v := range dold.Cache {
var szHist sizeHistogram
szHist.mergeV1(v.ObjSizes)
d.Cache[k] = dataUsageEntry{
Children: v.Children,
Size: v.Size,
Objects: v.Objects,
Versions: v.Versions,
ObjSizes: szHist,
ReplicationStats: v.ReplicationStats,
Compacted: v.Compacted,
}
}
return nil
case dataUsageCacheVerCurrent:
// Zstd compressed.

View File

@ -1480,6 +1480,149 @@ func (z *dataUsageCacheV6) Msgsize() (s int) {
return
}
// DecodeMsg implements msgp.Decodable
func (z *dataUsageCacheV7) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, err = dc.ReadMapKeyPtr()
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "Info":
err = z.Info.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "Info")
return
}
case "Cache":
var zb0002 uint32
zb0002, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err, "Cache")
return
}
if z.Cache == nil {
z.Cache = make(map[string]dataUsageEntryV7, zb0002)
} else if len(z.Cache) > 0 {
for key := range z.Cache {
delete(z.Cache, key)
}
}
for zb0002 > 0 {
zb0002--
var za0001 string
var za0002 dataUsageEntryV7
za0001, err = dc.ReadString()
if err != nil {
err = msgp.WrapError(err, "Cache")
return
}
err = za0002.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "Cache", za0001)
return
}
z.Cache[za0001] = za0002
}
default:
err = dc.Skip()
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *dataUsageCacheV7) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, bts, err = msgp.ReadMapKeyZC(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "Info":
bts, err = z.Info.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "Info")
return
}
case "Cache":
var zb0002 uint32
zb0002, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Cache")
return
}
if z.Cache == nil {
z.Cache = make(map[string]dataUsageEntryV7, zb0002)
} else if len(z.Cache) > 0 {
for key := range z.Cache {
delete(z.Cache, key)
}
}
for zb0002 > 0 {
var za0001 string
var za0002 dataUsageEntryV7
zb0002--
za0001, bts, err = msgp.ReadStringBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Cache")
return
}
bts, err = za0002.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "Cache", za0001)
return
}
z.Cache[za0001] = za0002
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *dataUsageCacheV7) Msgsize() (s int) {
s = 1 + 5 + z.Info.Msgsize() + 6 + msgp.MapHeaderSize
if z.Cache != nil {
for za0001, za0002 := range z.Cache {
_ = za0002
s += msgp.StringPrefixSize + len(za0001) + za0002.Msgsize()
}
}
return
}
// DecodeMsg implements msgp.Decodable
func (z *dataUsageEntry) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
@ -2684,6 +2827,297 @@ func (z *dataUsageEntryV6) Msgsize() (s int) {
return
}
// DecodeMsg implements msgp.Decodable
func (z *dataUsageEntryV7) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, err = dc.ReadMapHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, err = dc.ReadMapKeyPtr()
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "ch":
err = z.Children.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "Children")
return
}
case "sz":
z.Size, err = dc.ReadInt64()
if err != nil {
err = msgp.WrapError(err, "Size")
return
}
case "os":
z.Objects, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "Objects")
return
}
case "vs":
z.Versions, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "Versions")
return
}
case "dms":
z.DeleteMarkers, err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "DeleteMarkers")
return
}
case "szs":
var zb0002 uint32
zb0002, err = dc.ReadArrayHeader()
if err != nil {
err = msgp.WrapError(err, "ObjSizes")
return
}
if zb0002 != uint32(dataUsageBucketLenV1) {
err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0002}
return
}
for za0001 := range z.ObjSizes {
z.ObjSizes[za0001], err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "ObjSizes", za0001)
return
}
}
case "vh":
var zb0003 uint32
zb0003, err = dc.ReadArrayHeader()
if err != nil {
err = msgp.WrapError(err, "ObjVersions")
return
}
if zb0003 != uint32(dataUsageVersionLen) {
err = msgp.ArrayError{Wanted: uint32(dataUsageVersionLen), Got: zb0003}
return
}
for za0002 := range z.ObjVersions {
z.ObjVersions[za0002], err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, "ObjVersions", za0002)
return
}
}
case "rs":
if dc.IsNil() {
err = dc.ReadNil()
if err != nil {
err = msgp.WrapError(err, "ReplicationStats")
return
}
z.ReplicationStats = nil
} else {
if z.ReplicationStats == nil {
z.ReplicationStats = new(replicationAllStats)
}
err = z.ReplicationStats.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "ReplicationStats")
return
}
}
case "ats":
if dc.IsNil() {
err = dc.ReadNil()
if err != nil {
err = msgp.WrapError(err, "AllTierStats")
return
}
z.AllTierStats = nil
} else {
if z.AllTierStats == nil {
z.AllTierStats = new(allTierStats)
}
err = z.AllTierStats.DecodeMsg(dc)
if err != nil {
err = msgp.WrapError(err, "AllTierStats")
return
}
}
case "c":
z.Compacted, err = dc.ReadBool()
if err != nil {
err = msgp.WrapError(err, "Compacted")
return
}
default:
err = dc.Skip()
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *dataUsageEntryV7) UnmarshalMsg(bts []byte) (o []byte, err error) {
var field []byte
_ = field
var zb0001 uint32
zb0001, bts, err = msgp.ReadMapHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
for zb0001 > 0 {
zb0001--
field, bts, err = msgp.ReadMapKeyZC(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
switch msgp.UnsafeString(field) {
case "ch":
bts, err = z.Children.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "Children")
return
}
case "sz":
z.Size, bts, err = msgp.ReadInt64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "Size")
return
}
case "os":
z.Objects, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "Objects")
return
}
case "vs":
z.Versions, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "Versions")
return
}
case "dms":
z.DeleteMarkers, bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "DeleteMarkers")
return
}
case "szs":
var zb0002 uint32
zb0002, bts, err = msgp.ReadArrayHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err, "ObjSizes")
return
}
if zb0002 != uint32(dataUsageBucketLenV1) {
err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0002}
return
}
for za0001 := range z.ObjSizes {
z.ObjSizes[za0001], bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "ObjSizes", za0001)
return
}
}
case "vh":
var zb0003 uint32
zb0003, bts, err = msgp.ReadArrayHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err, "ObjVersions")
return
}
if zb0003 != uint32(dataUsageVersionLen) {
err = msgp.ArrayError{Wanted: uint32(dataUsageVersionLen), Got: zb0003}
return
}
for za0002 := range z.ObjVersions {
z.ObjVersions[za0002], bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, "ObjVersions", za0002)
return
}
}
case "rs":
if msgp.IsNil(bts) {
bts, err = msgp.ReadNilBytes(bts)
if err != nil {
return
}
z.ReplicationStats = nil
} else {
if z.ReplicationStats == nil {
z.ReplicationStats = new(replicationAllStats)
}
bts, err = z.ReplicationStats.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "ReplicationStats")
return
}
}
case "ats":
if msgp.IsNil(bts) {
bts, err = msgp.ReadNilBytes(bts)
if err != nil {
return
}
z.AllTierStats = nil
} else {
if z.AllTierStats == nil {
z.AllTierStats = new(allTierStats)
}
bts, err = z.AllTierStats.UnmarshalMsg(bts)
if err != nil {
err = msgp.WrapError(err, "AllTierStats")
return
}
}
case "c":
z.Compacted, bts, err = msgp.ReadBoolBytes(bts)
if err != nil {
err = msgp.WrapError(err, "Compacted")
return
}
default:
bts, err = msgp.Skip(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *dataUsageEntryV7) Msgsize() (s int) {
s = 1 + 3 + z.Children.Msgsize() + 3 + msgp.Int64Size + 3 + msgp.Uint64Size + 3 + msgp.Uint64Size + 4 + msgp.Uint64Size + 4 + msgp.ArrayHeaderSize + (dataUsageBucketLenV1 * (msgp.Uint64Size)) + 3 + msgp.ArrayHeaderSize + (dataUsageVersionLen * (msgp.Uint64Size)) + 3
if z.ReplicationStats == nil {
s += msgp.NilSize
} else {
s += z.ReplicationStats.Msgsize()
}
s += 4
if z.AllTierStats == nil {
s += msgp.NilSize
} else {
s += z.AllTierStats.Msgsize()
}
s += 2 + msgp.BoolSize
return
}
// DecodeMsg implements msgp.Decodable
func (z *dataUsageHash) DecodeMsg(dc *msgp.Reader) (err error) {
{
@ -3705,6 +4139,84 @@ func (z *sizeHistogram) Msgsize() (s int) {
return
}
// DecodeMsg implements msgp.Decodable
func (z *sizeHistogramV1) DecodeMsg(dc *msgp.Reader) (err error) {
var zb0001 uint32
zb0001, err = dc.ReadArrayHeader()
if err != nil {
err = msgp.WrapError(err)
return
}
if zb0001 != uint32(dataUsageBucketLenV1) {
err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0001}
return
}
for za0001 := range z {
z[za0001], err = dc.ReadUint64()
if err != nil {
err = msgp.WrapError(err, za0001)
return
}
}
return
}
// EncodeMsg implements msgp.Encodable
func (z *sizeHistogramV1) EncodeMsg(en *msgp.Writer) (err error) {
err = en.WriteArrayHeader(uint32(dataUsageBucketLenV1))
if err != nil {
err = msgp.WrapError(err)
return
}
for za0001 := range z {
err = en.WriteUint64(z[za0001])
if err != nil {
err = msgp.WrapError(err, za0001)
return
}
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *sizeHistogramV1) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
o = msgp.AppendArrayHeader(o, uint32(dataUsageBucketLenV1))
for za0001 := range z {
o = msgp.AppendUint64(o, z[za0001])
}
return
}
// UnmarshalMsg implements msgp.Unmarshaler
func (z *sizeHistogramV1) UnmarshalMsg(bts []byte) (o []byte, err error) {
var zb0001 uint32
zb0001, bts, err = msgp.ReadArrayHeaderBytes(bts)
if err != nil {
err = msgp.WrapError(err)
return
}
if zb0001 != uint32(dataUsageBucketLenV1) {
err = msgp.ArrayError{Wanted: uint32(dataUsageBucketLenV1), Got: zb0001}
return
}
for za0001 := range z {
z[za0001], bts, err = msgp.ReadUint64Bytes(bts)
if err != nil {
err = msgp.WrapError(err, za0001)
return
}
}
o = bts
return
}
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
func (z *sizeHistogramV1) Msgsize() (s int) {
s = msgp.ArrayHeaderSize + (dataUsageBucketLenV1 * (msgp.Uint64Size))
return
}
// DecodeMsg implements msgp.Decodable
func (z *tierStats) DecodeMsg(dc *msgp.Reader) (err error) {
var field []byte

View File

@ -1084,6 +1084,119 @@ func BenchmarkDecodesizeHistogram(b *testing.B) {
}
}
func TestMarshalUnmarshalsizeHistogramV1(t *testing.T) {
v := sizeHistogramV1{}
bts, err := v.MarshalMsg(nil)
if err != nil {
t.Fatal(err)
}
left, err := v.UnmarshalMsg(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left)
}
left, err = msgp.Skip(bts)
if err != nil {
t.Fatal(err)
}
if len(left) > 0 {
t.Errorf("%d bytes left over after Skip(): %q", len(left), left)
}
}
func BenchmarkMarshalMsgsizeHistogramV1(b *testing.B) {
v := sizeHistogramV1{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.MarshalMsg(nil)
}
}
func BenchmarkAppendMsgsizeHistogramV1(b *testing.B) {
v := sizeHistogramV1{}
bts := make([]byte, 0, v.Msgsize())
bts, _ = v.MarshalMsg(bts[0:0])
b.SetBytes(int64(len(bts)))
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
bts, _ = v.MarshalMsg(bts[0:0])
}
}
func BenchmarkUnmarshalsizeHistogramV1(b *testing.B) {
v := sizeHistogramV1{}
bts, _ := v.MarshalMsg(nil)
b.ReportAllocs()
b.SetBytes(int64(len(bts)))
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := v.UnmarshalMsg(bts)
if err != nil {
b.Fatal(err)
}
}
}
func TestEncodeDecodesizeHistogramV1(t *testing.T) {
v := sizeHistogramV1{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
m := v.Msgsize()
if buf.Len() > m {
t.Log("WARNING: TestEncodeDecodesizeHistogramV1 Msgsize() is inaccurate")
}
vn := sizeHistogramV1{}
err := msgp.Decode(&buf, &vn)
if err != nil {
t.Error(err)
}
buf.Reset()
msgp.Encode(&buf, &v)
err = msgp.NewReader(&buf).Skip()
if err != nil {
t.Error(err)
}
}
func BenchmarkEncodesizeHistogramV1(b *testing.B) {
v := sizeHistogramV1{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
en := msgp.NewWriter(msgp.Nowhere)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
v.EncodeMsg(en)
}
en.Flush()
}
func BenchmarkDecodesizeHistogramV1(b *testing.B) {
v := sizeHistogramV1{}
var buf bytes.Buffer
msgp.Encode(&buf, &v)
b.SetBytes(int64(buf.Len()))
rd := msgp.NewEndlessReader(buf.Bytes(), b)
dc := msgp.NewReader(rd)
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := v.DecodeMsg(dc)
if err != nil {
b.Fatal(err)
}
}
}
func TestMarshalUnmarshaltierStats(t *testing.T) {
v := tierStats{}
bts, err := v.MarshalMsg(nil)

View File

@ -0,0 +1,98 @@
// Copyright (c) 2015-2023 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"fmt"
"testing"
"github.com/dustin/go-humanize"
)
func TestSizeHistogramToMap(t *testing.T) {
tests := []struct {
sizes []int64
want map[string]uint64
}{
{
sizes: []int64{100, 1000, 72_000, 100_000},
want: map[string]uint64{
"LESS_THAN_1024_B": 2,
"BETWEEN_64_KB_AND_256_KB": 2,
"BETWEEN_1024B_AND_1_MB": 2,
},
},
{
sizes: []int64{100, 1000, 2000, 100_000, 13 * humanize.MiByte},
want: map[string]uint64{
"LESS_THAN_1024_B": 2,
"BETWEEN_1024_B_AND_64_KB": 1,
"BETWEEN_64_KB_AND_256_KB": 1,
"BETWEEN_1024B_AND_1_MB": 2,
"BETWEEN_10_MB_AND_64_MB": 1,
},
},
}
for i, test := range tests {
t.Run(fmt.Sprintf("Test-%d", i), func(t *testing.T) {
var h sizeHistogram
for _, sz := range test.sizes {
h.add(sz)
}
got := h.toMap()
exp := test.want
// what is in exp is in got
for k := range exp {
if exp[k] != got[k] {
t.Fatalf("interval %s: Expected %d values but got %d values\n", k, exp[k], got[k])
}
}
// what is absent in exp is absent in got too
for k := range got {
if _, ok := exp[k]; !ok && got[k] > 0 {
t.Fatalf("Unexpected interval: %s has value %d\n", k, got[k])
}
}
})
}
}
func TestMigrateSizeHistogramFromV1(t *testing.T) {
tests := []struct {
v sizeHistogramV1
want sizeHistogram
}{
{
v: sizeHistogramV1{0: 10, 1: 20, 2: 3},
want: sizeHistogram{0: 10, 5: 20, 6: 3},
},
{
v: sizeHistogramV1{0: 10, 1: 20, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7},
want: sizeHistogram{0: 10, 5: 20, 6: 3, 7: 4, 8: 5, 9: 6, 10: 7},
},
}
for i, test := range tests {
t.Run(fmt.Sprintf("test-%d", i), func(t *testing.T) {
var got sizeHistogram
got.mergeV1(test.v)
if got != test.want {
t.Fatalf("Expected %v but got %v", test.want, got)
}
})
}
}

View File

@ -82,7 +82,7 @@ func TestDataUsageUpdate(t *testing.T) {
size: 1322310,
flatten: true,
objs: 8,
oSizes: sizeHistogram{0: 2, 1: 6},
oSizes: sizeHistogram{0: 2, 1: 3, 2: 2, 4: 1},
},
{
path: "/",
@ -94,7 +94,7 @@ func TestDataUsageUpdate(t *testing.T) {
path: "/dir1",
size: 1302010,
objs: 5,
oSizes: sizeHistogram{0: 1, 1: 4},
oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1},
},
{
path: "/dir1/dira",
@ -167,7 +167,6 @@ func TestDataUsageUpdate(t *testing.T) {
size: 200,
},
}
createUsageTestFiles(t, base, bucket, files)
err = os.RemoveAll(filepath.Join(base, bucket, "dir1/dira/dirasub/dcfile"))
if err != nil {
@ -194,14 +193,14 @@ func TestDataUsageUpdate(t *testing.T) {
size: 363515,
flatten: true,
objs: 14,
oSizes: sizeHistogram{0: 7, 1: 7},
oSizes: sizeHistogram{0: 7, 1: 5, 2: 2},
},
{
path: "/dir1",
size: 342210,
objs: 7,
flatten: false,
oSizes: sizeHistogram{0: 2, 1: 5},
oSizes: sizeHistogram{0: 2, 1: 3, 2: 2},
},
{
path: "/newfolder",
@ -308,7 +307,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
path: "flat",
size: 1322310 + expectSize,
objs: 8 + expectSize,
oSizes: sizeHistogram{0: 2 + expectSize, 1: 6},
oSizes: sizeHistogram{0: 2 + expectSize, 1: 3, 2: 2, 4: 1},
},
{
path: "bucket/",
@ -321,7 +320,7 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
path: "bucket/dir1",
size: 1302010,
objs: 5,
oSizes: sizeHistogram{0: 1, 1: 4},
oSizes: sizeHistogram{0: 1, 1: 1, 2: 2, 4: 1},
},
{
// Gets compacted at this level...
@ -436,13 +435,13 @@ func TestDataUsageUpdatePrefix(t *testing.T) {
path: "flat",
size: 363515 + expectSize,
objs: 14 + expectSize,
oSizes: sizeHistogram{0: 7 + expectSize, 1: 7},
oSizes: sizeHistogram{0: 7 + expectSize, 1: 5, 2: 2},
},
{
path: "bucket/dir1",
size: 342210,
objs: 7,
oSizes: sizeHistogram{0: 2, 1: 5},
oSizes: sizeHistogram{0: 2, 1: 3, 2: 2},
},
{
path: "bucket/",

View File

@ -54,16 +54,35 @@ type objectHistogramInterval struct {
}
const (
// dataUsageBucketLenV1 must be length of ObjectsHistogramIntervalsV1
dataUsageBucketLenV1 = 7
// dataUsageBucketLen must be length of ObjectsHistogramIntervals
dataUsageBucketLen = 7
dataUsageBucketLen = 11
dataUsageVersionLen = 7
)
// ObjectsHistogramIntervalsV1 is the list of all intervals
// of object sizes to be included in objects histogram(V1).
var ObjectsHistogramIntervalsV1 = [dataUsageBucketLenV1]objectHistogramInterval{
{"LESS_THAN_1024_B", 0, humanize.KiByte - 1},
{"BETWEEN_1024B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1},
{"BETWEEN_1_MB_AND_10_MB", humanize.MiByte, humanize.MiByte*10 - 1},
{"BETWEEN_10_MB_AND_64_MB", humanize.MiByte * 10, humanize.MiByte*64 - 1},
{"BETWEEN_64_MB_AND_128_MB", humanize.MiByte * 64, humanize.MiByte*128 - 1},
{"BETWEEN_128_MB_AND_512_MB", humanize.MiByte * 128, humanize.MiByte*512 - 1},
{"GREATER_THAN_512_MB", humanize.MiByte * 512, math.MaxInt64},
}
// ObjectsHistogramIntervals is the list of all intervals
// of object sizes to be included in objects histogram.
// Note: this histogram expands 1024B-1MB to incl. 1024B-64KB, 64KB-256KB, 256KB-512KB and 512KB-1MiB
var ObjectsHistogramIntervals = [dataUsageBucketLen]objectHistogramInterval{
{"LESS_THAN_1024_B", 0, humanize.KiByte - 1},
{"BETWEEN_1024_B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1},
{"BETWEEN_1024_B_AND_64_KB", humanize.KiByte, 64*humanize.KiByte - 1}, // not exported, for support use only
{"BETWEEN_64_KB_AND_256_KB", 64 * humanize.KiByte, 256*humanize.KiByte - 1}, // not exported, for support use only
{"BETWEEN_256_KB_AND_512_KB", 256 * humanize.KiByte, 512*humanize.KiByte - 1}, // not exported, for support use only
{"BETWEEN_512_KB_AND_1_MB", 512 * humanize.KiByte, humanize.MiByte - 1}, // not exported, for support use only
{"BETWEEN_1024B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1},
{"BETWEEN_1_MB_AND_10_MB", humanize.MiByte, humanize.MiByte*10 - 1},
{"BETWEEN_10_MB_AND_64_MB", humanize.MiByte * 10, humanize.MiByte*64 - 1},
{"BETWEEN_64_MB_AND_128_MB", humanize.MiByte * 64, humanize.MiByte*128 - 1},