Optimize read locker cleanup (#14200)

When objects hold a lot of read locks cleanup time grows exponentially.

```
BEFORE:

Unable to complete tests.

AFTER:

=== RUN   Test_localLocker_expireOldLocksExpire/100-locks/1-read
    local-locker_test.go:298: Scan Took: 0s. Left: 100/100
    local-locker_test.go:317: Expire 50% took: 0s. Left: 44/44
    local-locker_test.go:331: Expire rest took: 0s. Left: 0/0
=== RUN   Test_localLocker_expireOldLocksExpire/100-locks/100-read
    local-locker_test.go:298: Scan Took: 0s. Left: 10000/100
    local-locker_test.go:317: Expire 50% took: 1ms. Left: 5000/100
    local-locker_test.go:331: Expire rest took: 1ms. Left: 0/0
=== RUN   Test_localLocker_expireOldLocksExpire/100-locks/1000-read
    local-locker_test.go:298: Scan Took: 2ms. Left: 100000/100
    local-locker_test.go:317: Expire 50% took: 55ms. Left: 50038/100
    local-locker_test.go:331: Expire rest took: 29ms. Left: 0/0
=== RUN   Test_localLocker_expireOldLocksExpire/10000-locks/1-read
    local-locker_test.go:298: Scan Took: 1ms. Left: 10000/10000
    local-locker_test.go:317: Expire 50% took: 2ms. Left: 5019/5019
    local-locker_test.go:331: Expire rest took: 2ms. Left: 0/0
=== RUN   Test_localLocker_expireOldLocksExpire/10000-locks/100-read
    local-locker_test.go:298: Scan Took: 23ms. Left: 1000000/10000
    local-locker_test.go:317: Expire 50% took: 160ms. Left: 499798/10000
    local-locker_test.go:331: Expire rest took: 138ms. Left: 0/0
=== RUN   Test_localLocker_expireOldLocksExpire/10000-locks/1000-read
    local-locker_test.go:298: Scan Took: 200ms. Left: 10000000/10000
    local-locker_test.go:317: Expire 50% took: 5.888s. Left: 5000196/10000
    local-locker_test.go:331: Expire rest took: 3.417s. Left: 0/0
=== RUN   Test_localLocker_expireOldLocksExpire/1000000-locks/1-read
    local-locker_test.go:298: Scan Took: 133ms. Left: 1000000/1000000
    local-locker_test.go:317: Expire 50% took: 348ms. Left: 500255/500255
    local-locker_test.go:331: Expire rest took: 307ms. Left: 0/0
```
This commit is contained in:
Klaus Post 2022-01-27 14:10:57 -08:00 committed by GitHub
parent cf407f7176
commit a2a48cc065
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 110 additions and 16 deletions

View File

@ -349,25 +349,29 @@ func (l *localLocker) expireOldLocks(interval time.Duration) {
l.mutex.Lock()
defer l.mutex.Unlock()
for k := range l.lockMap {
found := false
// Since we mutate the value, remove one per loop.
for {
lris, ok := l.lockMap[k]
if !ok {
break
}
for _, lri := range lris {
if time.Since(lri.TimeLastRefresh) > interval {
l.removeEntry(lri.Name, dsync.LockArgs{Owner: lri.Owner, UID: lri.UID}, &lris)
found = true
for k, lris := range l.lockMap {
modified := false
for i := 0; i < len(lris); {
lri := &lris[i]
if time.Since(lri.TimeLastRefresh) > interval {
delete(l.lockUID, formatUUID(lri.UID, lri.idx))
if len(lris) == 1 {
// Remove the write lock.
delete(l.lockMap, lri.Name)
modified = false
break
}
modified = true
// Remove the appropriate lock.
lris = append(lris[:i], lris[i+1:]...)
// Check same i
} else {
// Move to next
i++
}
// We did not find any more to expire.
if !found {
break
}
}
if modified {
l.lockMap[k] = lris
}
}
}

View File

@ -19,9 +19,13 @@ package cmd
import (
"context"
"encoding/hex"
"fmt"
"math/rand"
"testing"
"time"
"github.com/google/uuid"
"github.com/minio/minio/internal/dsync"
)
@ -254,3 +258,89 @@ func TestLocalLockerUnlock(t *testing.T) {
t.Fatalf("lockUID len, got %d, want %d + %d", len(l.lockUID), 0, 0)
}
}
func Test_localLocker_expireOldLocksExpire(t *testing.T) {
rng := rand.New(rand.NewSource(0))
// Numbers of unique locks
for _, locks := range []int{100, 1000, 1e6} {
if testing.Short() && locks > 100 {
continue
}
t.Run(fmt.Sprintf("%d-locks", locks), func(t *testing.T) {
// Number of readers per lock...
for _, readers := range []int{1, 10, 100} {
if locks > 1000 && readers > 1 {
continue
}
if testing.Short() && readers > 10 {
continue
}
t.Run(fmt.Sprintf("%d-read", readers), func(t *testing.T) {
l := newLocker()
for i := 0; i < locks; i++ {
var tmp [16]byte
rng.Read(tmp[:])
res := []string{hex.EncodeToString(tmp[:])}
for i := 0; i < readers; i++ {
rng.Read(tmp[:])
ok, err := l.RLock(context.Background(), dsync.LockArgs{
UID: uuid.NewString(),
Resources: res,
Source: hex.EncodeToString(tmp[:8]),
Owner: hex.EncodeToString(tmp[8:]),
Quorum: 0,
})
if !ok || err != nil {
t.Fatal("failed:", err, ok)
}
}
}
start := time.Now()
l.expireOldLocks(time.Hour)
t.Logf("Scan Took: %v. Left: %d/%d", time.Since(start).Round(time.Millisecond), len(l.lockUID), len(l.lockMap))
if len(l.lockMap) != locks {
t.Fatalf("objects deleted, want %d != got %d", locks, len(l.lockMap))
}
if len(l.lockUID) != locks*readers {
t.Fatalf("objects deleted, want %d != got %d", locks*readers, len(l.lockUID))
}
// Expire 50%
expired := time.Now().Add(-time.Hour * 2)
for _, v := range l.lockMap {
for i := range v {
if rng.Intn(2) == 0 {
v[i].TimeLastRefresh = expired
}
}
}
start = time.Now()
l.expireOldLocks(time.Hour)
t.Logf("Expire 50%% took: %v. Left: %d/%d", time.Since(start).Round(time.Millisecond), len(l.lockUID), len(l.lockMap))
if len(l.lockUID) == locks*readers {
t.Fatalf("objects uids all remain, unlikely")
}
if len(l.lockMap) == 0 {
t.Fatalf("objects all deleted, 0 remains")
}
if len(l.lockUID) == 0 {
t.Fatalf("objects uids all deleted, 0 remains")
}
start = time.Now()
l.expireOldLocks(-time.Minute)
t.Logf("Expire rest took: %v. Left: %d/%d", time.Since(start).Round(time.Millisecond), len(l.lockUID), len(l.lockMap))
if len(l.lockMap) != 0 {
t.Fatalf("objects not deleted, want %d != got %d", 0, len(l.lockMap))
}
if len(l.lockUID) != 0 {
t.Fatalf("objects not deleted, want %d != got %d", 0, len(l.lockUID))
}
})
}
})
}
}