Update probabilities for bloom filter. (#12305)

See https://github.com/minio/minio/discussions/12285

Results in M=958506 K=7 and 119840 bytes per filter when serialized compared to 26176 bytes before.
This commit is contained in:
Klaus Post 2021-05-17 17:31:04 +02:00 committed by GitHub
parent f1e479d274
commit 55375fa7f6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 10 deletions

View File

@ -40,24 +40,20 @@ import (
const ( const (
// Estimate bloom filter size. With this many items // Estimate bloom filter size. With this many items
dataUpdateTrackerEstItems = 10000000 dataUpdateTrackerEstItems = 200000
// ... we want this false positive rate: // ... we want this false positive rate:
dataUpdateTrackerFP = 0.99 dataUpdateTrackerFP = 0.1
dataUpdateTrackerQueueSize = 0 dataUpdateTrackerQueueSize = 0
dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin" dataUpdateTrackerFilename = dataUsageBucket + SlashSeparator + ".tracker.bin"
dataUpdateTrackerVersion = 4 dataUpdateTrackerVersion = 5
dataUpdateTrackerSaveInterval = 5 * time.Minute dataUpdateTrackerSaveInterval = 5 * time.Minute
) )
var ( var intDataUpdateTracker *dataUpdateTracker
objectUpdatedCh chan<- string
intDataUpdateTracker *dataUpdateTracker
)
func init() { func init() {
intDataUpdateTracker = newDataUpdateTracker() intDataUpdateTracker = newDataUpdateTracker()
objectUpdatedCh = intDataUpdateTracker.input
} }
type dataUpdateTracker struct { type dataUpdateTracker struct {
@ -404,8 +400,10 @@ func (d *dataUpdateTracker) deserialize(src io.Reader, newerThan time.Time) erro
return err return err
} }
switch tmp[0] { switch tmp[0] {
case 1, 2, 3: case 1, 2, 3, 4:
console.Println(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.") if intDataUpdateTracker.debug {
console.Debugln(color.Green("dataUpdateTracker: ") + "deprecated data version, updating.")
}
return nil return nil
case dataUpdateTrackerVersion: case dataUpdateTrackerVersion:
default: default:

View File

@ -224,6 +224,7 @@ func TestDataUpdateTracker(t *testing.T) {
t.Fatal("wanted oldest index 3, got", bfr2.OldestIdx) t.Fatal("wanted oldest index 3, got", bfr2.OldestIdx)
} }
t.Logf("Size of filter %d bytes, M: %d, K:%d", len(bfr2.Filter), dut.Current.bf.Cap(), dut.Current.bf.K())
// Rerun test with returned bfr2 // Rerun test with returned bfr2
bf := dut.newBloomFilter() bf := dut.newBloomFilter()
_, err = bf.ReadFrom(bytes.NewReader(bfr2.Filter)) _, err = bf.ReadFrom(bytes.NewReader(bfr2.Filter))