mirror of
https://github.com/minio/minio.git
synced 2025-11-09 05:34:56 -05:00
add data update tracking using bloom filter (#9208)
By monitoring PUT/DELETE and heal operations it is possible to track changed paths and keep a bloom filter for this data. This can help prioritize paths to scan. The bloom filter can identify paths that have not changed, and the few collisions will only result in a marginal extra workload. This can be implemented on either a bucket+(1 prefix level) with reasonable performance. The bloom filter is set to have a false positive rate at 1% at 1M entries. A bloom table of this size is about ~2500 bytes when serialized. To not force a full scan of all paths that have changed cycle bloom filters would need to be kept, so we guarantee that dirty paths have been scanned within cycle runs. Until cycle bloom filters have been collected all paths are considered dirty.
This commit is contained in:
@@ -37,11 +37,17 @@ func (z *dataUsageCacheInfo) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
return
|
||||
}
|
||||
case "NextCycle":
|
||||
z.NextCycle, err = dc.ReadUint8()
|
||||
z.NextCycle, err = dc.ReadUint32()
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "NextCycle")
|
||||
return
|
||||
}
|
||||
case "BloomFilter":
|
||||
z.BloomFilter, err = dc.ReadBytes(z.BloomFilter)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "BloomFilter")
|
||||
return
|
||||
}
|
||||
default:
|
||||
err = dc.Skip()
|
||||
if err != nil {
|
||||
@@ -54,10 +60,24 @@ func (z *dataUsageCacheInfo) DecodeMsg(dc *msgp.Reader) (err error) {
|
||||
}
|
||||
|
||||
// EncodeMsg implements msgp.Encodable
|
||||
func (z dataUsageCacheInfo) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// map header, size 3
|
||||
func (z *dataUsageCacheInfo) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
// omitempty: check for empty values
|
||||
zb0001Len := uint32(4)
|
||||
var zb0001Mask uint8 /* 4 bits */
|
||||
if z.BloomFilter == nil {
|
||||
zb0001Len--
|
||||
zb0001Mask |= 0x8
|
||||
}
|
||||
// variable map header, size zb0001Len
|
||||
err = en.Append(0x80 | uint8(zb0001Len))
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if zb0001Len == 0 {
|
||||
return
|
||||
}
|
||||
// write "Name"
|
||||
err = en.Append(0x83, 0xa4, 0x4e, 0x61, 0x6d, 0x65)
|
||||
err = en.Append(0xa4, 0x4e, 0x61, 0x6d, 0x65)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
@@ -81,27 +101,55 @@ func (z dataUsageCacheInfo) EncodeMsg(en *msgp.Writer) (err error) {
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = en.WriteUint8(z.NextCycle)
|
||||
err = en.WriteUint32(z.NextCycle)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "NextCycle")
|
||||
return
|
||||
}
|
||||
if (zb0001Mask & 0x8) == 0 { // if not empty
|
||||
// write "BloomFilter"
|
||||
err = en.Append(0xab, 0x42, 0x6c, 0x6f, 0x6f, 0x6d, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
err = en.WriteBytes(z.BloomFilter)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "BloomFilter")
|
||||
return
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// MarshalMsg implements msgp.Marshaler
|
||||
func (z dataUsageCacheInfo) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
func (z *dataUsageCacheInfo) MarshalMsg(b []byte) (o []byte, err error) {
|
||||
o = msgp.Require(b, z.Msgsize())
|
||||
// map header, size 3
|
||||
// omitempty: check for empty values
|
||||
zb0001Len := uint32(4)
|
||||
var zb0001Mask uint8 /* 4 bits */
|
||||
if z.BloomFilter == nil {
|
||||
zb0001Len--
|
||||
zb0001Mask |= 0x8
|
||||
}
|
||||
// variable map header, size zb0001Len
|
||||
o = append(o, 0x80|uint8(zb0001Len))
|
||||
if zb0001Len == 0 {
|
||||
return
|
||||
}
|
||||
// string "Name"
|
||||
o = append(o, 0x83, 0xa4, 0x4e, 0x61, 0x6d, 0x65)
|
||||
o = append(o, 0xa4, 0x4e, 0x61, 0x6d, 0x65)
|
||||
o = msgp.AppendString(o, z.Name)
|
||||
// string "LastUpdate"
|
||||
o = append(o, 0xaa, 0x4c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65)
|
||||
o = msgp.AppendTime(o, z.LastUpdate)
|
||||
// string "NextCycle"
|
||||
o = append(o, 0xa9, 0x4e, 0x65, 0x78, 0x74, 0x43, 0x79, 0x63, 0x6c, 0x65)
|
||||
o = msgp.AppendUint8(o, z.NextCycle)
|
||||
o = msgp.AppendUint32(o, z.NextCycle)
|
||||
if (zb0001Mask & 0x8) == 0 { // if not empty
|
||||
// string "BloomFilter"
|
||||
o = append(o, 0xab, 0x42, 0x6c, 0x6f, 0x6f, 0x6d, 0x46, 0x69, 0x6c, 0x74, 0x65, 0x72)
|
||||
o = msgp.AppendBytes(o, z.BloomFilter)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -136,11 +184,17 @@ func (z *dataUsageCacheInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
return
|
||||
}
|
||||
case "NextCycle":
|
||||
z.NextCycle, bts, err = msgp.ReadUint8Bytes(bts)
|
||||
z.NextCycle, bts, err = msgp.ReadUint32Bytes(bts)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "NextCycle")
|
||||
return
|
||||
}
|
||||
case "BloomFilter":
|
||||
z.BloomFilter, bts, err = msgp.ReadBytesBytes(bts, z.BloomFilter)
|
||||
if err != nil {
|
||||
err = msgp.WrapError(err, "BloomFilter")
|
||||
return
|
||||
}
|
||||
default:
|
||||
bts, err = msgp.Skip(bts)
|
||||
if err != nil {
|
||||
@@ -154,8 +208,8 @@ func (z *dataUsageCacheInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
|
||||
}
|
||||
|
||||
// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message
|
||||
func (z dataUsageCacheInfo) Msgsize() (s int) {
|
||||
s = 1 + 5 + msgp.StringPrefixSize + len(z.Name) + 11 + msgp.TimeSize + 10 + msgp.Uint8Size
|
||||
func (z *dataUsageCacheInfo) Msgsize() (s int) {
|
||||
s = 1 + 5 + msgp.StringPrefixSize + len(z.Name) + 11 + msgp.TimeSize + 10 + msgp.Uint32Size + 12 + msgp.BytesPrefixSize + len(z.BloomFilter)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user