Top Locks command implementation (#7052)

API to list locks used in distributed XL mode
This commit is contained in:
kannappanr
2019-01-24 07:22:14 -08:00
committed by GitHub
parent 964e354d06
commit ce870466ff
19 changed files with 512 additions and 151 deletions

View File

@@ -5,20 +5,20 @@ A distributed locking and syncing package for Go.
Introduction
------------
`dsync` is a package for doing distributed locks over a network of `n` nodes. It is designed with simplicity in mind and hence offers limited scalability (`n <= 16`). Each node will be connected to all other nodes and lock requests from any node will be broadcast to all connected nodes. A node will succeed in getting the lock if `n/2 + 1` nodes (whether or not including itself) respond positively. If the lock is acquired it can be held for as long as the client desires and needs to be released afterwards. This will cause the release to be broadcast to all nodes after which the lock becomes available again.
`dsync` is a package for doing distributed locks over a network of `n` nodes. It is designed with simplicity in mind and hence offers limited scalability (`n <= 32`). Each node will be connected to all other nodes and lock requests from any node will be broadcast to all connected nodes. A node will succeed in getting the lock if `n/2 + 1` nodes (whether or not including itself) respond positively. If the lock is acquired it can be held for as long as the client desires and needs to be released afterwards. This will cause the release to be broadcast to all nodes after which the lock becomes available again.
Motivation
----------
This package was developed for the distributed server version of [Minio Object Storage](https://minio.io/). For this we needed a distributed locking mechanism for up to 16 servers that each would be running `minio server`. The locking mechanism itself should be a reader/writer mutual exclusion lock meaning that it can be held by a single writer or an arbitrary number of readers.
This package was developed for the distributed server version of [Minio Object Storage](https://minio.io/). For this we needed a distributed locking mechanism for up to 32 servers that each would be running `minio server`. The locking mechanism itself should be a reader/writer mutual exclusion lock meaning that it can be held by a single writer or an arbitrary number of readers.
For [minio](https://minio.io/) the distributed version is started as follows (for a 6-server system):
```
$ minio server http://server1/disk http://server2/disk http://server3/disk http://server4/disk http://server5/disk http://server6/disk
$ minio server http://server1/disk http://server2/disk http://server3/disk http://server4/disk http://server5/disk http://server6/disk
```
_(note that the same identical command should be run on servers `server1` through to `server6`)_
Design goals
@@ -33,7 +33,7 @@ Design goals
Restrictions
------------
* Limited scalability: up to 16 nodes.
* Limited scalability: up to 32 nodes.
* Fixed configuration: changes in the number and/or network names/IP addresses need a restart of all nodes in order to take effect.
* If a down node comes up, it will not try to (re)acquire any locks that it may have held.
* Not designed for high performance applications such as key/value stores.
@@ -41,10 +41,10 @@ Restrictions
Performance
-----------
* Support up to a total of 7500 locks/second for maximum size of 16 nodes (consuming 10% CPU usage per server) on moderately powerful server hardware.
* Support up to a total of 7500 locks/second for a size of 16 nodes (consuming 10% CPU usage per server) on moderately powerful server hardware.
* Lock requests (successful) should not take longer than 1ms (provided decent network connection of 1 Gbit or more between the nodes).
The tables below show detailed performance numbers.
The tables below show detailed performance numbers.
### Performance with varying number of nodes
@@ -91,7 +91,7 @@ Usage
been changed to `dsync.New([]NetLocker, nodeIndex)` which returns a `*Dsync` object to be used in
every instance of `NewDRWMutex("test", *Dsync)`
### Exclusive lock
### Exclusive lock
Here is a simple example showing how to protect a single resource (drop-in replacement for `sync.Mutex`):
@@ -105,7 +105,7 @@ func lockSameResource() {
// Create distributed mutex to protect resource 'test'
dm := dsync.NewDRWMutex("test", ds)
dm.Lock()
dm.Lock("lock-1", "example.go:505:lockSameResource()")
log.Println("first lock granted")
// Release 1st lock after 5 seconds
@@ -117,7 +117,7 @@ func lockSameResource() {
// Try to acquire lock again, will block until initial lock is released
log.Println("about to lock same resource again...")
dm.Lock()
dm.Lock("lock-1", "example.go:515:lockSameResource()")
log.Println("second lock granted")
time.Sleep(2 * time.Second)
@@ -143,10 +143,10 @@ func twoReadLocksAndSingleWriteLock() {
drwm := dsync.NewDRWMutex("resource", ds)
drwm.RLock()
drwm.RLock("RLock-1", "example.go:416:twoReadLocksAndSingleWriteLock()")
log.Println("1st read lock acquired, waiting...")
drwm.RLock()
drwm.RLock("RLock-2", "example.go:420:twoReadLocksAndSingleWriteLock()")
log.Println("2nd read lock acquired, waiting...")
go func() {
@@ -162,7 +162,7 @@ func twoReadLocksAndSingleWriteLock() {
}()
log.Println("Trying to acquire write lock, waiting...")
drwm.Lock()
drwm.Lock("Lock-1", "example.go:445:twoReadLocksAndSingleWriteLock()")
log.Println("Write lock acquired, waiting...")
time.Sleep(3 * time.Second)
@@ -190,7 +190,7 @@ Basic architecture
The basic steps in the lock process are as follows:
- broadcast lock message to all `n` nodes
- collect all responses within certain time-out window
- if quorum met (minimally `n/2 + 1` responded positively) then grant lock
- if quorum met (minimally `n/2 + 1` responded positively) then grant lock
- otherwise release all underlying locks and try again after a (semi-)random delay
- release any locks that (still) came in after time time-out window
@@ -236,7 +236,7 @@ This table summarizes the conditions for different configurations during which t
| 16 | 7 | 2 | 9 |
(for more info see `testMultipleServersOverQuorumDownDuringLockKnownError` in [chaos.go](https://github.com/minio/dsync/blob/master/chaos/chaos.go))
### Lock not available anymore
This would be due to too many stale locks and/or too many servers down (total over `n/2 - 1`). The following table shows the maximum toterable number for different node sizes:
@@ -284,7 +284,7 @@ func (l *lockServer) Unlock(args *LockArgs, reply *bool) error {
defer l.mutex.Unlock()
var locksHeld int64
if locksHeld, *reply = l.lockMap[args.Name]; !*reply { // No lock is held on the given name
return fmt.Errorf("Unlock attempted on an unlocked entity: %s", args.Name)
return fmt.Errorf("Unlock attempted on an unlocked entity: %s", args.Name)
}
if *reply = locksHeld == WriteLock; !*reply { // Unless it is a write lock
return fmt.Errorf("Unlock attempted on a read locked entity: %s (%d read locks active)", args.Name, locksHeld)
@@ -357,11 +357,11 @@ For this case it is possible to reduce the number of nodes to be contacted to fo
You do however want to make sure that you have some sort of 'random' selection of which 12 out of the 16 nodes will participate in every lock. See [here](https://gist.github.com/fwessels/dbbafd537c13ec8f88b360b3a0091ac0) for some sample code that could help with this.
### Scale beyond 16 nodes?
### Scale beyond 32 nodes?
Building on the previous example and depending on how resilient you want to be for outages of nodes, you can also go the other way, namely to increase the total number of nodes while keeping the number of nodes contacted per lock the same.
For instance you could imagine a system of 32 nodes where only a quorom majority of `9` would be needed out of `12` nodes. Again this requires some sort of pseudo-random 'deterministic' selection of 12 nodes out of the total of 32 servers (same [example](https://gist.github.com/fwessels/dbbafd537c13ec8f88b360b3a0091ac0) as above).
For instance you could imagine a system of 64 nodes where only a quorum majority of `17` would be needed out of `28` nodes. Again this requires some sort of pseudo-random 'deterministic' selection of 28 nodes out of the total of 64 servers (same [example](https://gist.github.com/harshavardhana/44614a69650c9111defe3470941cdd16) as above).
Other techniques
----------------

View File

@@ -17,10 +17,12 @@
package dsync
import (
cryptorand "crypto/rand"
"fmt"
golog "log"
"math/rand"
"os"
"path"
"runtime"
"sync"
"time"
)
@@ -31,6 +33,7 @@ var dsyncLog bool
func init() {
// Check for DSYNC_LOG env variable, if set logging will be enabled for failed RPC operations.
dsyncLog = os.Getenv("DSYNC_LOG") == "1"
rand.Seed(time.Now().UnixNano())
}
func log(msg ...interface{}) {
@@ -79,10 +82,10 @@ func NewDRWMutex(name string, clnt *Dsync) *DRWMutex {
//
// If the lock is already in use, the calling go routine
// blocks until the mutex is available.
func (dm *DRWMutex) Lock() {
func (dm *DRWMutex) Lock(id, source string) {
isReadLock := false
dm.lockBlocking(drwMutexInfinite, isReadLock)
dm.lockBlocking(drwMutexInfinite, id, source, isReadLock)
}
// GetLock tries to get a write lock on dm before the timeout elapses.
@@ -90,20 +93,20 @@ func (dm *DRWMutex) Lock() {
// If the lock is already in use, the calling go routine
// blocks until either the mutex becomes available and return success or
// more time has passed than the timeout value and return false.
func (dm *DRWMutex) GetLock(timeout time.Duration) (locked bool) {
func (dm *DRWMutex) GetLock(id, source string, timeout time.Duration) (locked bool) {
isReadLock := false
return dm.lockBlocking(timeout, isReadLock)
return dm.lockBlocking(timeout, id, source, isReadLock)
}
// RLock holds a read lock on dm.
//
// If one or more read locks are already in use, it will grant another lock.
// Otherwise the calling go routine blocks until the mutex is available.
func (dm *DRWMutex) RLock() {
func (dm *DRWMutex) RLock(id, source string) {
isReadLock := true
dm.lockBlocking(drwMutexInfinite, isReadLock)
dm.lockBlocking(drwMutexInfinite, id, source, isReadLock)
}
// GetRLock tries to get a read lock on dm before the timeout elapses.
@@ -112,10 +115,10 @@ func (dm *DRWMutex) RLock() {
// Otherwise the calling go routine blocks until either the mutex becomes
// available and return success or more time has passed than the timeout
// value and return false.
func (dm *DRWMutex) GetRLock(timeout time.Duration) (locked bool) {
func (dm *DRWMutex) GetRLock(id, source string, timeout time.Duration) (locked bool) {
isReadLock := true
return dm.lockBlocking(timeout, isReadLock)
return dm.lockBlocking(timeout, id, source, isReadLock)
}
// lockBlocking will try to acquire either a read or a write lock
@@ -123,7 +126,7 @@ func (dm *DRWMutex) GetRLock(timeout time.Duration) (locked bool) {
// The function will loop using a built-in timing randomized back-off
// algorithm until either the lock is acquired successfully or more
// time has elapsed than the timeout value.
func (dm *DRWMutex) lockBlocking(timeout time.Duration, isReadLock bool) (locked bool) {
func (dm *DRWMutex) lockBlocking(timeout time.Duration, id, source string, isReadLock bool) (locked bool) {
doneCh, start := make(chan struct{}), time.Now().UTC()
defer close(doneCh)
@@ -133,7 +136,7 @@ func (dm *DRWMutex) lockBlocking(timeout time.Duration, isReadLock bool) (locked
locks := make([]string, dm.clnt.dNodeCount)
// Try to acquire the lock.
success := lock(dm.clnt, &locks, dm.Name, isReadLock)
success := lock(dm.clnt, &locks, dm.Name, id, source, isReadLock)
if success {
dm.m.Lock()
defer dm.m.Unlock()
@@ -160,7 +163,7 @@ func (dm *DRWMutex) lockBlocking(timeout time.Duration, isReadLock bool) (locked
}
// lock tries to acquire the distributed lock, returning true or false.
func lock(ds *Dsync, locks *[]string, lockName string, isReadLock bool) bool {
func lock(ds *Dsync, locks *[]string, lockName, id, source string, isReadLock bool) bool {
// Create buffered channel of size equal to total number of nodes.
ch := make(chan Granted, ds.dNodeCount)
@@ -174,17 +177,12 @@ func lock(ds *Dsync, locks *[]string, lockName string, isReadLock bool) bool {
go func(index int, isReadLock bool, c NetLocker) {
defer wg.Done()
// All client methods issuing RPCs are thread-safe and goroutine-safe,
// i.e. it is safe to call them from multiple concurrently running go routines.
bytesUID := [16]byte{}
cryptorand.Read(bytesUID[:])
uid := fmt.Sprintf("%X", bytesUID[:])
args := LockArgs{
UID: uid,
UID: id,
Resource: lockName,
ServerAddr: ds.rpcClnts[ds.ownNode].ServerAddr(),
ServiceEndpoint: ds.rpcClnts[ds.ownNode].ServiceEndpoint(),
Source: source,
}
var locked bool
@@ -438,5 +436,29 @@ func (dm *DRWMutex) DRLocker() sync.Locker {
type drlocker DRWMutex
func (dr *drlocker) Lock() { (*DRWMutex)(dr).RLock() }
var letterRunes = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func randString(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letterRunes[rand.Intn(len(letterRunes))]
}
return string(b)
}
func getSource() string {
var funcName string
pc, filename, lineNum, ok := runtime.Caller(2)
if ok {
filename = path.Base(filename)
funcName = runtime.FuncForPC(pc).Name()
} else {
filename = "<unknown>"
lineNum = 0
}
return fmt.Sprintf("[%s:%d:%s()]", filename, lineNum, funcName)
}
func (dr *drlocker) Lock() { (*DRWMutex)(dr).RLock(randString(16), getSource()) }
func (dr *drlocker) Unlock() { (*DRWMutex)(dr).RUnlock() }

View File

@@ -29,6 +29,10 @@ type LockArgs struct {
// ServiceEndpoint contains the network path of above server to do lock/unlock.
ServiceEndpoint string
// Source contains the line number, function and file name of the code
// on the client node that requested the lock.
Source string
}
// NetLocker is dsync compatible locker interface.

View File

@@ -22,9 +22,9 @@ import (
)
const (
WRITELOCK = -1 + iota
NOLOCKS
READLOCKS
WRITELOCK = -1 + iota
NOLOCKS
READLOCKS
)
// A LRWMutex is a mutual exclusion lock with timeouts.
@@ -49,7 +49,7 @@ func (lm *LRWMutex) Lock() {
}
// GetLock tries to get a write lock on lm before the timeout occurs.
func (lm *LRWMutex) GetLock(timeout time.Duration) (locked bool) {
func (lm *LRWMutex) GetLock(id, source string, timeout time.Duration) (locked bool) {
isWriteLock := true
return lm.lockLoop(timeout, isWriteLock)
@@ -66,7 +66,7 @@ func (lm *LRWMutex) RLock() {
}
// GetRLock tries to get a read lock on lm before the timeout occurs.
func (lm *LRWMutex) GetRLock(timeout time.Duration) (locked bool) {
func (lm *LRWMutex) GetRLock(id, source string, timeout time.Duration) (locked bool) {
isWriteLock := false
return lm.lockLoop(timeout, isWriteLock)

6
vendor/vendor.json vendored
View File

@@ -579,10 +579,10 @@
"revisionTime": "2017-02-27T07:32:28Z"
},
{
"checksumSHA1": "1AQVDkFvVxn1RMTLyjeDSESBhrc=",
"checksumSHA1": "CwWXALTRt30txpLPfuazncqZnJ0=",
"path": "github.com/minio/dsync",
"revision": "439a0961af700f80db84cc180fe324a89070fa65",
"revisionTime": "2018-01-23T12:12:34Z"
"revision": "61c41ffdeea2cd0b58edea078ba30c9b6d7411e9",
"revisionTime": "2019-01-04T00:30:57Z"
},
{
"checksumSHA1": "CD2MtlgA8h0z6hYJHURS5eOmZ1k=",