mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
Upgrade to new dsync version incl. stale lock detection (#2708)
This commit is contained in:
@@ -161,3 +161,19 @@ func (authClient *AuthRPCClient) Call(serviceMethod string, args interface {
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Node returns the node (network address) of the connection
|
||||
func (authClient *AuthRPCClient) Node() string {
|
||||
if authClient.rpc != nil {
|
||||
return authClient.rpc.node
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// RPCPath returns the RPC path of the connection
|
||||
func (authClient *AuthRPCClient) RPCPath() string {
|
||||
if authClient.rpc != nil {
|
||||
return authClient.rpc.rpcPath
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net/rpc"
|
||||
"path"
|
||||
"strings"
|
||||
@@ -28,6 +29,8 @@ import (
|
||||
)
|
||||
|
||||
const lockRPCPath = "/minio/lock"
|
||||
const lockMaintenanceLoop = 1 * time.Minute
|
||||
const lockCheckValidityInterval = 2 * time.Minute
|
||||
|
||||
// LockArgs besides lock name, holds Token and Timestamp for session
|
||||
// authentication and validation server restart.
|
||||
@@ -35,6 +38,9 @@ type LockArgs struct {
|
||||
Name string
|
||||
Token string
|
||||
Timestamp time.Time
|
||||
Node string
|
||||
RPCPath string
|
||||
UID string
|
||||
}
|
||||
|
||||
// SetToken - sets the token to the supplied value.
|
||||
@@ -47,12 +53,26 @@ func (l *LockArgs) SetTimestamp(tstamp time.Time) {
|
||||
l.Timestamp = tstamp
|
||||
}
|
||||
|
||||
// lockRequesterInfo stores various info from the client for each lock that is requested
|
||||
type lockRequesterInfo struct {
|
||||
writer bool // Bool whether write or read lock
|
||||
node string // Network address of client claiming lock
|
||||
rpcPath string // RPC path of client claiming lock
|
||||
uid string // Uid to uniquely identify request of client
|
||||
timestamp time.Time // Timestamp set at the time of initialization
|
||||
timeLastCheck time.Time // Timestamp for last check of validity of lock
|
||||
}
|
||||
|
||||
// isWriteLock returns whether the lock is a write or read lock
|
||||
func isWriteLock(lri []lockRequesterInfo) bool {
|
||||
return len(lri) == 1 && lri[0].writer
|
||||
}
|
||||
|
||||
// lockServer is type for RPC handlers
|
||||
type lockServer struct {
|
||||
rpcPath string
|
||||
mutex sync.Mutex
|
||||
// e.g, when a Lock(name) is held, map[string][]bool{"name" : []bool{true}}
|
||||
// when one or more RLock() is held, map[string][]bool{"name" : []bool{false, false}}
|
||||
lockMap map[string][]bool
|
||||
rpcPath string
|
||||
mutex sync.Mutex
|
||||
lockMap map[string][]lockRequesterInfo
|
||||
timestamp time.Time // Timestamp set at the time of initialization. Resets naturally on minio server restart.
|
||||
}
|
||||
|
||||
@@ -93,15 +113,11 @@ func (l *lockServer) Lock(args *LockArgs, reply *bool) error {
|
||||
if err := l.verifyArgs(args); err != nil {
|
||||
return err
|
||||
}
|
||||
_, ok := l.lockMap[args.Name]
|
||||
// No locks held on the given name.
|
||||
if !ok {
|
||||
*reply = true
|
||||
l.lockMap[args.Name] = []bool{true}
|
||||
} else {
|
||||
// Either a read or write lock is held on the given name.
|
||||
*reply = false
|
||||
_, *reply = l.lockMap[args.Name]
|
||||
if !*reply { // No locks held on the given name, so claim write lock
|
||||
l.lockMap[args.Name] = []lockRequesterInfo{lockRequesterInfo{writer: true, node: args.Node, rpcPath: args.RPCPath, uid: args.UID, timestamp: time.Now(), timeLastCheck: time.Now()}}
|
||||
}
|
||||
*reply = !*reply // Negate *reply to return true when lock is granted or false otherwise
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -112,19 +128,18 @@ func (l *lockServer) Unlock(args *LockArgs, reply *bool) error {
|
||||
if err := l.verifyArgs(args); err != nil {
|
||||
return err
|
||||
}
|
||||
locksHeld, ok := l.lockMap[args.Name]
|
||||
// No lock is held on the given name, there must be some issue at the lock client side.
|
||||
if !ok {
|
||||
*reply = false
|
||||
return fmt.Errorf("Unlock attempted on an un-locked entity: %s", args.Name)
|
||||
} else if len(locksHeld) == 1 && locksHeld[0] == true {
|
||||
*reply = true
|
||||
delete(l.lockMap, args.Name)
|
||||
return nil
|
||||
} else {
|
||||
*reply = false
|
||||
return fmt.Errorf("Unlock attempted on a read locked entity: %s (%d read locks active)", args.Name, len(locksHeld))
|
||||
var lri []lockRequesterInfo
|
||||
lri, *reply = l.lockMap[args.Name]
|
||||
if !*reply { // No lock is held on the given name
|
||||
return fmt.Errorf("Unlock attempted on an unlocked entity: %s", args.Name)
|
||||
}
|
||||
if *reply = isWriteLock(lri); !*reply { // Unless it is a write lock
|
||||
return fmt.Errorf("Unlock attempted on a read locked entity: %s (%d read locks active)", args.Name, len(lri))
|
||||
}
|
||||
if l.removeEntry(args.Name, args.UID, &lri) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("Unlock unable to find corresponding lock for uid: %s", args.UID)
|
||||
}
|
||||
|
||||
// RLock - rpc handler for read lock operation.
|
||||
@@ -134,19 +149,15 @@ func (l *lockServer) RLock(args *LockArgs, reply *bool) error {
|
||||
if err := l.verifyArgs(args); err != nil {
|
||||
return err
|
||||
}
|
||||
locksHeld, ok := l.lockMap[args.Name]
|
||||
// No locks held on the given name.
|
||||
if !ok {
|
||||
// First read-lock to be held on *name.
|
||||
l.lockMap[args.Name] = []bool{false}
|
||||
var lri []lockRequesterInfo
|
||||
lri, *reply = l.lockMap[args.Name]
|
||||
if !*reply { // No locks held on the given name, so claim (first) read lock
|
||||
l.lockMap[args.Name] = []lockRequesterInfo{lockRequesterInfo{writer: false, node: args.Node, rpcPath: args.RPCPath, uid: args.UID, timestamp: time.Now(), timeLastCheck: time.Now()}}
|
||||
*reply = true
|
||||
} else if len(locksHeld) == 1 && locksHeld[0] == true {
|
||||
// A write-lock is held, read lock can't be granted.
|
||||
*reply = false
|
||||
} else {
|
||||
// Add an entry for this read lock.
|
||||
l.lockMap[args.Name] = append(locksHeld, false)
|
||||
*reply = true
|
||||
if *reply = !isWriteLock(lri); *reply { // Unless there is a write lock
|
||||
l.lockMap[args.Name] = append(l.lockMap[args.Name], lockRequesterInfo{writer: false, node: args.Node, rpcPath: args.RPCPath, uid: args.UID, timestamp: time.Now(), timeLastCheck: time.Now()})
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -158,26 +169,132 @@ func (l *lockServer) RUnlock(args *LockArgs, reply *bool) error {
|
||||
if err := l.verifyArgs(args); err != nil {
|
||||
return err
|
||||
}
|
||||
locksHeld, ok := l.lockMap[args.Name]
|
||||
if !ok {
|
||||
*reply = false
|
||||
return fmt.Errorf("RUnlock attempted on an un-locked entity: %s", args.Name)
|
||||
} else if len(locksHeld) == 1 && locksHeld[0] == true {
|
||||
// A write-lock is held, cannot release a read lock
|
||||
*reply = false
|
||||
return fmt.Errorf("RUnlock attempted on a write locked entity: %s", args.Name)
|
||||
} else if len(locksHeld) > 1 {
|
||||
// Remove one of the read locks held.
|
||||
locksHeld = locksHeld[1:]
|
||||
l.lockMap[args.Name] = locksHeld
|
||||
*reply = true
|
||||
} else {
|
||||
// Delete the map entry since this is the last read lock held
|
||||
// on *name.
|
||||
delete(l.lockMap, args.Name)
|
||||
*reply = true
|
||||
var lri []lockRequesterInfo
|
||||
if lri, *reply = l.lockMap[args.Name]; !*reply { // No lock is held on the given name
|
||||
return fmt.Errorf("RUnlock attempted on an unlocked entity: %s", args.Name)
|
||||
}
|
||||
if *reply = !isWriteLock(lri); !*reply { // A write-lock is held, cannot release a read lock
|
||||
return fmt.Errorf("RUnlock attempted on a write locked entity: %s", args.Name)
|
||||
}
|
||||
if l.removeEntry(args.Name, args.UID, &lri) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("RUnlock unable to find corresponding read lock for uid: %s", args.UID)
|
||||
}
|
||||
|
||||
// Active - rpc handler for active lock status.
|
||||
func (l *lockServer) Active(args *LockArgs, reply *bool) error {
|
||||
l.mutex.Lock()
|
||||
defer l.mutex.Unlock()
|
||||
if err := l.verifyArgs(args); err != nil {
|
||||
return err
|
||||
}
|
||||
var lri []lockRequesterInfo
|
||||
if lri, *reply = l.lockMap[args.Name]; !*reply {
|
||||
return nil // No lock is held on the given name so return false
|
||||
}
|
||||
// Check whether uid is still active
|
||||
for _, entry := range lri {
|
||||
if *reply = entry.uid == args.UID; *reply {
|
||||
return nil // When uid found return true
|
||||
}
|
||||
}
|
||||
return nil // None found so return false
|
||||
}
|
||||
|
||||
// removeEntry either, based on the uid of the lock message, removes a single entry from the
|
||||
// lockRequesterInfo array or the whole array from the map (in case of a write lock or last read lock)
|
||||
func (l *lockServer) removeEntry(name, uid string, lri *[]lockRequesterInfo) bool {
|
||||
// Find correct entry to remove based on uid
|
||||
for index, entry := range *lri {
|
||||
if entry.uid == uid {
|
||||
if len(*lri) == 1 {
|
||||
delete(l.lockMap, name) // Remove the (last) lock
|
||||
} else {
|
||||
// Remove the appropriate read lock
|
||||
*lri = append((*lri)[:index], (*lri)[index+1:]...)
|
||||
l.lockMap[name] = *lri
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// nameLockRequesterInfoPair is a helper type for lock maintenance
|
||||
type nameLockRequesterInfoPair struct {
|
||||
name string
|
||||
lri lockRequesterInfo
|
||||
}
|
||||
|
||||
// getLongLivedLocks returns locks that are older than a certain time and
|
||||
// have not been 'checked' for validity too soon enough
|
||||
func getLongLivedLocks(m map[string][]lockRequesterInfo, interval time.Duration) []nameLockRequesterInfoPair {
|
||||
|
||||
rslt := []nameLockRequesterInfoPair{}
|
||||
|
||||
for name, lriArray := range m {
|
||||
|
||||
for idx := range lriArray {
|
||||
// Check whether enough time has gone by since last check
|
||||
if time.Since(lriArray[idx].timeLastCheck) >= interval {
|
||||
rslt = append(rslt, nameLockRequesterInfoPair{name: name, lri: lriArray[idx]})
|
||||
lriArray[idx].timeLastCheck = time.Now()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rslt
|
||||
}
|
||||
|
||||
// lockMaintenance loops over locks that have been active for some time and checks back
|
||||
// with the original server whether it is still alive or not
|
||||
func (l *lockServer) lockMaintenance(interval time.Duration) {
|
||||
|
||||
l.mutex.Lock()
|
||||
// get list of locks to check
|
||||
nlripLongLived := getLongLivedLocks(l.lockMap, interval)
|
||||
l.mutex.Unlock()
|
||||
|
||||
for _, nlrip := range nlripLongLived {
|
||||
|
||||
c := newClient(nlrip.lri.node, nlrip.lri.rpcPath)
|
||||
|
||||
var active bool
|
||||
|
||||
// Call back to original server verify whether the lock is still active (based on name & uid)
|
||||
if err := c.Call("Dsync.Active", &LockArgs{Name: nlrip.name, UID: nlrip.lri.uid}, &active); err != nil {
|
||||
// We failed to connect back to the server that originated the lock, this can either be due to
|
||||
// - server at client down
|
||||
// - some network error (and server is up normally)
|
||||
//
|
||||
// We will ignore the error, and we will retry later to get resolve on this lock
|
||||
c.Close()
|
||||
} else {
|
||||
c.Close()
|
||||
|
||||
if !active { // The lock is no longer active at server that originated the lock
|
||||
// so remove the lock from the map
|
||||
l.mutex.Lock()
|
||||
// Check if entry is still in map (could have been removed altogether by 'concurrent' (R)Unlock of last entry)
|
||||
if lri, ok := l.lockMap[nlrip.name]; ok {
|
||||
if !l.removeEntry(nlrip.name, nlrip.lri.uid, &lri) {
|
||||
// Remove failed, in case it is a:
|
||||
if nlrip.lri.writer {
|
||||
// Writer: this should never happen as the whole (mapped) entry should have been deleted
|
||||
log.Errorln("Lock maintenance failed to remove entry for write lock (should never happen)", nlrip.name, nlrip.lri, lri)
|
||||
} else {
|
||||
// Reader: this can happen if multiple read locks were active and the one we are looking for
|
||||
// has been released concurrently (so it is fine)
|
||||
}
|
||||
} else {
|
||||
// remove went okay, all is fine
|
||||
}
|
||||
}
|
||||
l.mutex.Unlock()
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Initialize distributed lock.
|
||||
@@ -205,12 +322,26 @@ func newLockServers(serverConfig serverCmdConfig) (lockServers []*lockServer) {
|
||||
if idx := strings.LastIndex(export, ":"); idx != -1 {
|
||||
export = export[idx+1:]
|
||||
}
|
||||
lockServers = append(lockServers, &lockServer{
|
||||
|
||||
// Create handler for lock RPCs
|
||||
locker := &lockServer{
|
||||
rpcPath: export,
|
||||
mutex: sync.Mutex{},
|
||||
lockMap: make(map[string][]bool),
|
||||
lockMap: make(map[string][]lockRequesterInfo),
|
||||
timestamp: time.Now().UTC(),
|
||||
})
|
||||
}
|
||||
|
||||
// Start loop for stale lock maintenance
|
||||
go func() {
|
||||
// Start with random sleep time, so as to avoid "synchronous checks" between servers
|
||||
time.Sleep(time.Duration(rand.Float64() * float64(lockMaintenanceLoop)))
|
||||
for {
|
||||
time.Sleep(lockMaintenanceLoop)
|
||||
locker.lockMaintenance(lockCheckValidityInterval)
|
||||
}
|
||||
}()
|
||||
|
||||
lockServers = append(lockServers, locker)
|
||||
}
|
||||
}
|
||||
return lockServers
|
||||
|
||||
@@ -38,6 +38,7 @@ func initDsyncNodes(disks []string, port int) error {
|
||||
cred := serverConfig.GetCredential()
|
||||
// Initialize rpc lock client information only if this instance is a distributed setup.
|
||||
var clnts []dsync.RPC
|
||||
myNode := -1
|
||||
for _, disk := range disks {
|
||||
if idx := strings.LastIndex(disk, ":"); idx != -1 {
|
||||
clnts = append(clnts, newAuthClient(&authConfig{
|
||||
@@ -49,9 +50,14 @@ func initDsyncNodes(disks []string, port int) error {
|
||||
path: pathutil.Join(lockRPCPath, disk[idx+1:]),
|
||||
loginMethod: "Dsync.LoginHandler",
|
||||
}))
|
||||
|
||||
if isLocalStorage(disk) && myNode == -1 {
|
||||
myNode = len(clnts) - 1
|
||||
}
|
||||
}
|
||||
}
|
||||
return dsync.SetNodesWithClients(clnts)
|
||||
|
||||
return dsync.SetNodesWithClients(clnts, myNode)
|
||||
}
|
||||
|
||||
// initNSLock - initialize name space lock map.
|
||||
@@ -86,9 +92,8 @@ type nsParam struct {
|
||||
|
||||
// nsLock - provides primitives for locking critical namespace regions.
|
||||
type nsLock struct {
|
||||
writer RWLocker
|
||||
readerArray []RWLocker
|
||||
ref uint
|
||||
RWLocker
|
||||
ref uint
|
||||
}
|
||||
|
||||
// nsLockMap - namespace lock map, provides primitives to Lock,
|
||||
@@ -114,7 +119,7 @@ func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock
|
||||
nsLk, found := n.lockMap[param]
|
||||
if !found {
|
||||
nsLk = &nsLock{
|
||||
writer: func() RWLocker {
|
||||
RWLocker: func() RWLocker {
|
||||
if n.isDist {
|
||||
return dsync.NewDRWMutex(pathutil.Join(volume, path))
|
||||
}
|
||||
@@ -125,10 +130,6 @@ func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock
|
||||
n.lockMap[param] = nsLk
|
||||
}
|
||||
nsLk.ref++ // Update ref count here to avoid multiple races.
|
||||
rwlock := nsLk.writer
|
||||
if readLock && n.isDist {
|
||||
rwlock = dsync.NewDRWMutex(pathutil.Join(volume, path))
|
||||
}
|
||||
|
||||
if globalDebugLock {
|
||||
// change the state of the lock to be blocked for the given pair of <volume, path> and <OperationID> till the lock unblocks.
|
||||
@@ -143,21 +144,9 @@ func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock
|
||||
|
||||
// Locking here can block.
|
||||
if readLock {
|
||||
rwlock.RLock()
|
||||
|
||||
if n.isDist {
|
||||
// Only add (for reader case) to array after RLock() succeeds
|
||||
// (so that we know for sure that element in [0] can be RUnlocked())
|
||||
n.lockMapMutex.Lock()
|
||||
if len(nsLk.readerArray) == 0 {
|
||||
nsLk.readerArray = []RWLocker{rwlock}
|
||||
} else {
|
||||
nsLk.readerArray = append(nsLk.readerArray, rwlock)
|
||||
}
|
||||
n.lockMapMutex.Unlock()
|
||||
}
|
||||
nsLk.RLock()
|
||||
} else {
|
||||
rwlock.Lock()
|
||||
nsLk.Lock()
|
||||
}
|
||||
|
||||
// check if lock debugging enabled.
|
||||
@@ -180,19 +169,9 @@ func (n *nsLockMap) unlock(volume, path, opsID string, readLock bool) {
|
||||
param := nsParam{volume, path}
|
||||
if nsLk, found := n.lockMap[param]; found {
|
||||
if readLock {
|
||||
if n.isDist {
|
||||
if len(nsLk.readerArray) == 0 {
|
||||
errorIf(errors.New("Length of reader lock array cannot be 0."), "Invalid reader lock array length detected.")
|
||||
}
|
||||
// Release first lock first (FIFO)
|
||||
nsLk.readerArray[0].RUnlock()
|
||||
// And discard first element
|
||||
nsLk.readerArray = nsLk.readerArray[1:]
|
||||
} else {
|
||||
nsLk.writer.RUnlock()
|
||||
}
|
||||
nsLk.RUnlock()
|
||||
} else {
|
||||
nsLk.writer.Unlock()
|
||||
nsLk.Unlock()
|
||||
}
|
||||
if nsLk.ref == 0 {
|
||||
errorIf(errors.New("Namespace reference count cannot be 0."), "Invalid reference count detected.")
|
||||
@@ -208,10 +187,6 @@ func (n *nsLockMap) unlock(volume, path, opsID string, readLock bool) {
|
||||
}
|
||||
}
|
||||
if nsLk.ref == 0 {
|
||||
if len(nsLk.readerArray) != 0 && n.isDist {
|
||||
errorIf(errors.New("Length of reader lock array should be 0 upon deleting map entry."), "Invalid reader lock array length detected.")
|
||||
}
|
||||
|
||||
// Remove from the map if there are no more references.
|
||||
delete(n.lockMap, param)
|
||||
|
||||
|
||||
@@ -123,3 +123,13 @@ func (rpcClient *RPCClient) Close() error {
|
||||
rpcClient.clearRPCClient()
|
||||
return rpcLocalStack.Close()
|
||||
}
|
||||
|
||||
// Node returns the node (network address) of the connection
|
||||
func (rpcClient *RPCClient) Node() string {
|
||||
return rpcClient.node
|
||||
}
|
||||
|
||||
// RPCPath returns the RPC path of the connection
|
||||
func (rpcClient *RPCClient) RPCPath() string {
|
||||
return rpcClient.rpcPath
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user