Update probabilities for bloom filter. (#12305)

See https://github.com/minio/minio/discussions/12285

Results in M=958506 K=7 and 119840 bytes per filter when serialized compared to 26176 bytes before.
This commit is contained in:
Klaus Post 2021-05-17 17:31:04 +02:00 committed by GitHub
parent f1e479d274
commit 55375fa7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 10 deletions

View File

@ -40,24 +40,20 @@ import (
const (
// Estimate bloom filter size. With this many items
dataUpdateTrackerEstItems = 10000000
dataUpdateTrackerEstItems = 200000
// ... we want this false positive rate:
dataUpdateTrackerFP = 0.99
dataUpdateTrackerFP = 0.1
dataUpdateTrackerQueueSize = 0
dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin"
dataUpdateTrackerVersion = 4
dataUpdateTrackerVersion = 5
dataUpdateTrackerSaveInterval = 5 * time.Minute
)
var (
objectUpdatedCh chan<- string
intDataUpdateTracker *dataUpdateTracker
)
var intDataUpdateTracker *dataUpdateTracker
func init() {
intDataUpdateTracker = newDataUpdateTracker()
objectUpdatedCh = intDataUpdateTracker.input
}
type dataUpdateTracker struct {
@ -404,8 +400,10 @@ func (d *dataUpdateTracker) deserialize(src io.Reader, newerThan time.Time) erro
return err
}
switch tmp[0] {
case 1, 2, 3:
console.Println(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.")
case 1, 2, 3, 4:
if intDataUpdateTracker.debug {
console.Debugln(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.")
}
return nil
case dataUpdateTrackerVersion:
default:

View File

@ -224,6 +224,7 @@ func TestDataUpdateTracker(t *testing.T) {
t.Fatal("wanted oldest index 3, got", bfr2.OldestIdx)
}
t.Logf("Size of filter %d bytes, M: %d, K:%d", len(bfr2.Filter), dut.Current.bf.Cap(), dut.Current.bf.K())
// Rerun test with returned bfr2
bf := dut.newBloomFilter()
_, err = bf.ReadFrom(bytes.NewReader(bfr2.Filter))