Upgrade to new dsync version incl. stale lock detection (#2708)

2025-11-09 05:34:56 -05:00 · 2016-09-16 09:30:55 +02:00
parent 7a549096de
commit df2ef64d20
9 changed files with 735 additions and 240 deletions
--- a/cmd/auth-rpc-client.go
+++ b/cmd/auth-rpc-client.go
@@ -161,3 +161,19 @@ func (authClient *AuthRPCClient) Call(serviceMethod string, args interface {
 	}
 	return err
 }
+
+// Node returns the node (network address) of the connection
+func (authClient *AuthRPCClient) Node() string {
+	if authClient.rpc != nil {
+		return authClient.rpc.node
+	}
+	return ""
+}
+
+// RPCPath returns the RPC path of the connection
+func (authClient *AuthRPCClient) RPCPath() string {
+	if authClient.rpc != nil {
+		return authClient.rpc.rpcPath
+	}
+	return ""
+}
--- a/cmd/lock-rpc-server.go
+++ b/cmd/lock-rpc-server.go
@@ -18,6 +18,7 @@ package cmd

 import (
 	"fmt"
+	"math/rand"
 	"net/rpc"
 	"path"
 	"strings"
@@ -28,6 +29,8 @@ import (
 )

 const lockRPCPath = "/minio/lock"
+const lockMaintenanceLoop = 1 * time.Minute
+const lockCheckValidityInterval = 2 * time.Minute

 // LockArgs besides lock name, holds Token and Timestamp for session
 // authentication and validation server restart.
@@ -35,6 +38,9 @@ type LockArgs struct {
 	Name      string
 	Token     string
 	Timestamp time.Time
+	Node      string
+	RPCPath   string
+	UID       string
 }

 // SetToken - sets the token to the supplied value.
@@ -47,12 +53,26 @@ func (l *LockArgs) SetTimestamp(tstamp time.Time) {
 	l.Timestamp = tstamp
 }

+// lockRequesterInfo stores various info from the client for each lock that is requested
+type lockRequesterInfo struct {
+	writer        bool      // Bool whether write or read lock
+	node          string    // Network address of client claiming lock
+	rpcPath       string    // RPC path of client claiming lock
+	uid           string    // Uid to uniquely identify request of client
+	timestamp     time.Time // Timestamp set at the time of initialization
+	timeLastCheck time.Time // Timestamp for last check of validity of lock
+}
+
+// isWriteLock returns whether the lock is a write or read lock
+func isWriteLock(lri []lockRequesterInfo) bool {
+	return len(lri) == 1 && lri[0].writer
+}
+
+// lockServer is type for RPC handlers
 type lockServer struct {
-	rpcPath string
-	mutex   sync.Mutex
-	// e.g, when a Lock(name) is held, map[string][]bool{"name" : []bool{true}}
-	// when one or more RLock() is held, map[string][]bool{"name" : []bool{false, false}}
-	lockMap   map[string][]bool
+	rpcPath   string
+	mutex     sync.Mutex
+	lockMap   map[string][]lockRequesterInfo
 	timestamp time.Time // Timestamp set at the time of initialization. Resets naturally on minio server restart.
 }

@@ -93,15 +113,11 @@ func (l *lockServer) Lock(args *LockArgs, reply *bool) error {
 	if err := l.verifyArgs(args); err != nil {
 		return err
 	}
-	_, ok := l.lockMap[args.Name]
-	// No locks held on the given name.
-	if !ok {
-		*reply = true
-		l.lockMap[args.Name] = []bool{true}
-	} else {
-		// Either a read or write lock is held on the given name.
-		*reply = false
+	_, *reply = l.lockMap[args.Name]
+	if !*reply { // No locks held on the given name, so claim write lock
+		l.lockMap[args.Name] = []lockRequesterInfo{lockRequesterInfo{writer: true, node: args.Node, rpcPath: args.RPCPath, uid: args.UID, timestamp: time.Now(), timeLastCheck: time.Now()}}
 	}
+	*reply = !*reply // Negate *reply to return true when lock is granted or false otherwise
 	return nil
 }

@@ -112,19 +128,18 @@ func (l *lockServer) Unlock(args *LockArgs, reply *bool) error {
 	if err := l.verifyArgs(args); err != nil {
 		return err
 	}
-	locksHeld, ok := l.lockMap[args.Name]
-	// No lock is held on the given name, there must be some issue at the lock client side.
-	if !ok {
-		*reply = false
-		return fmt.Errorf("Unlock attempted on an un-locked entity: %s", args.Name)
-	} else if len(locksHeld) == 1 && locksHeld[0] == true {
-		*reply = true
-		delete(l.lockMap, args.Name)
-		return nil
-	} else {
-		*reply = false
-		return fmt.Errorf("Unlock attempted on a read locked entity: %s (%d read locks active)", args.Name, len(locksHeld))
+	var lri []lockRequesterInfo
+	lri, *reply = l.lockMap[args.Name]
+	if !*reply { // No lock is held on the given name
+		return fmt.Errorf("Unlock attempted on an unlocked entity: %s", args.Name)
 	}
+	if *reply = isWriteLock(lri); !*reply { // Unless it is a write lock
+		return fmt.Errorf("Unlock attempted on a read locked entity: %s (%d read locks active)", args.Name, len(lri))
+	}
+	if l.removeEntry(args.Name, args.UID, &lri) {
+		return nil
+	}
+	return fmt.Errorf("Unlock unable to find corresponding lock for uid: %s", args.UID)
 }

 // RLock - rpc handler for read lock operation.
@@ -134,19 +149,15 @@ func (l *lockServer) RLock(args *LockArgs, reply *bool) error {
 	if err := l.verifyArgs(args); err != nil {
 		return err
 	}
-	locksHeld, ok := l.lockMap[args.Name]
-	// No locks held on the given name.
-	if !ok {
-		// First read-lock to be held on *name.
-		l.lockMap[args.Name] = []bool{false}
+	var lri []lockRequesterInfo
+	lri, *reply = l.lockMap[args.Name]
+	if !*reply { // No locks held on the given name, so claim (first) read lock
+		l.lockMap[args.Name] = []lockRequesterInfo{lockRequesterInfo{writer: false, node: args.Node, rpcPath: args.RPCPath, uid: args.UID, timestamp: time.Now(), timeLastCheck: time.Now()}}
 		*reply = true
-	} else if len(locksHeld) == 1 && locksHeld[0] == true {
-		// A write-lock is held, read lock can't be granted.
-		*reply = false
 	} else {
-		// Add an entry for this read lock.
-		l.lockMap[args.Name] = append(locksHeld, false)
-		*reply = true
+		if *reply = !isWriteLock(lri); *reply { // Unless there is a write lock
+			l.lockMap[args.Name] = append(l.lockMap[args.Name], lockRequesterInfo{writer: false, node: args.Node, rpcPath: args.RPCPath, uid: args.UID, timestamp: time.Now(), timeLastCheck: time.Now()})
+		}
 	}
 	return nil
 }
@@ -158,26 +169,132 @@ func (l *lockServer) RUnlock(args *LockArgs, reply *bool) error {
 	if err := l.verifyArgs(args); err != nil {
 		return err
 	}
-	locksHeld, ok := l.lockMap[args.Name]
-	if !ok {
-		*reply = false
-		return fmt.Errorf("RUnlock attempted on an un-locked entity: %s", args.Name)
-	} else if len(locksHeld) == 1 && locksHeld[0] == true {
-		// A write-lock is held, cannot release a read lock
-		*reply = false
-		return fmt.Errorf("RUnlock attempted on a write locked entity: %s", args.Name)
-	} else if len(locksHeld) > 1 {
-		// Remove one of the read locks held.
-		locksHeld = locksHeld[1:]
-		l.lockMap[args.Name] = locksHeld
-		*reply = true
-	} else {
-		// Delete the map entry since this is the last read lock held
-		// on *name.
-		delete(l.lockMap, args.Name)
-		*reply = true
+	var lri []lockRequesterInfo
+	if lri, *reply = l.lockMap[args.Name]; !*reply { // No lock is held on the given name
+		return fmt.Errorf("RUnlock attempted on an unlocked entity: %s", args.Name)
+	}
+	if *reply = !isWriteLock(lri); !*reply { // A write-lock is held, cannot release a read lock
+		return fmt.Errorf("RUnlock attempted on a write locked entity: %s", args.Name)
+	}
+	if l.removeEntry(args.Name, args.UID, &lri) {
+		return nil
+	}
+	return fmt.Errorf("RUnlock unable to find corresponding read lock for uid: %s", args.UID)
+}
+
+// Active - rpc handler for active lock status.
+func (l *lockServer) Active(args *LockArgs, reply *bool) error {
+	l.mutex.Lock()
+	defer l.mutex.Unlock()
+	if err := l.verifyArgs(args); err != nil {
+		return err
+	}
+	var lri []lockRequesterInfo
+	if lri, *reply = l.lockMap[args.Name]; !*reply {
+		return nil // No lock is held on the given name so return false
+	}
+	// Check whether uid is still active
+	for _, entry := range lri {
+		if *reply = entry.uid == args.UID; *reply {
+			return nil // When uid found return true
+		}
+	}
+	return nil // None found so return false
+}
+
+// removeEntry either, based on the uid of the lock message, removes a single entry from the
+// lockRequesterInfo array or the whole array from the map (in case of a write lock or last read lock)
+func (l *lockServer) removeEntry(name, uid string, lri *[]lockRequesterInfo) bool {
+	// Find correct entry to remove based on uid
+	for index, entry := range *lri {
+		if entry.uid == uid {
+			if len(*lri) == 1 {
+				delete(l.lockMap, name) // Remove the (last) lock
+			} else {
+				// Remove the appropriate read lock
+				*lri = append((*lri)[:index], (*lri)[index+1:]...)
+				l.lockMap[name] = *lri
+			}
+			return true
+		}
+	}
+	return false
+}
+
+// nameLockRequesterInfoPair is a helper type for lock maintenance
+type nameLockRequesterInfoPair struct {
+	name string
+	lri  lockRequesterInfo
+}
+
+// getLongLivedLocks returns locks that are older than a certain time and
+// have not been 'checked' for validity too soon enough
+func getLongLivedLocks(m map[string][]lockRequesterInfo, interval time.Duration) []nameLockRequesterInfoPair {
+
+	rslt := []nameLockRequesterInfoPair{}
+
+	for name, lriArray := range m {
+
+		for idx := range lriArray {
+			// Check whether enough time has gone by since last check
+			if time.Since(lriArray[idx].timeLastCheck) >= interval {
+				rslt = append(rslt, nameLockRequesterInfoPair{name: name, lri: lriArray[idx]})
+				lriArray[idx].timeLastCheck = time.Now()
+			}
+		}
+	}
+
+	return rslt
+}
+
+// lockMaintenance loops over locks that have been active for some time and checks back
+// with the original server whether it is still alive or not
+func (l *lockServer) lockMaintenance(interval time.Duration) {
+
+	l.mutex.Lock()
+	// get list of locks to check
+	nlripLongLived := getLongLivedLocks(l.lockMap, interval)
+	l.mutex.Unlock()
+
+	for _, nlrip := range nlripLongLived {
+
+		c := newClient(nlrip.lri.node, nlrip.lri.rpcPath)
+
+		var active bool
+
+		// Call back to original server verify whether the lock is still active (based on name & uid)
+		if err := c.Call("Dsync.Active", &LockArgs{Name: nlrip.name, UID: nlrip.lri.uid}, &active); err != nil {
+			// We failed to connect back to the server that originated the lock, this can either be due to
+			// - server at client down
+			// - some network error (and server is up normally)
+			//
+			// We will ignore the error, and we will retry later to get resolve on this lock
+			c.Close()
+		} else {
+			c.Close()
+
+			if !active { // The lock is no longer active at server that originated the lock
+				// so remove the lock from the map
+				l.mutex.Lock()
+				// Check if entry is still in map (could have been removed altogether by 'concurrent' (R)Unlock of last entry)
+				if lri, ok := l.lockMap[nlrip.name]; ok {
+					if !l.removeEntry(nlrip.name, nlrip.lri.uid, &lri) {
+						// Remove failed, in case it is a:
+						if nlrip.lri.writer {
+							// Writer: this should never happen as the whole (mapped) entry should have been deleted
+							log.Errorln("Lock maintenance failed to remove entry for write lock (should never happen)", nlrip.name, nlrip.lri, lri)
+						} else {
+							// Reader: this can happen if multiple read locks were active and the one we are looking for
+							// has been released concurrently (so it is fine)
+						}
+					} else {
+						// remove went okay, all is fine
+					}
+				}
+				l.mutex.Unlock()
+			}
+		}
 	}
-	return nil
 }

 // Initialize distributed lock.
@@ -205,12 +322,26 @@ func newLockServers(serverConfig serverCmdConfig) (lockServers []*lockServer) {
 			if idx := strings.LastIndex(export, ":"); idx != -1 {
 				export = export[idx+1:]
 			}
-			lockServers = append(lockServers, &lockServer{
+
+			// Create handler for lock RPCs
+			locker := &lockServer{
 				rpcPath:   export,
 				mutex:     sync.Mutex{},
-				lockMap:   make(map[string][]bool),
+				lockMap:   make(map[string][]lockRequesterInfo),
 				timestamp: time.Now().UTC(),
-			})
+			}
+
+			// Start loop for stale lock maintenance
+			go func() {
+				// Start with random sleep time, so as to avoid "synchronous checks" between servers
+				time.Sleep(time.Duration(rand.Float64() * float64(lockMaintenanceLoop)))
+				for {
+					time.Sleep(lockMaintenanceLoop)
+					locker.lockMaintenance(lockCheckValidityInterval)
+				}
+			}()
+
+			lockServers = append(lockServers, locker)
 		}
 	}
 	return lockServers
--- a/cmd/namespace-lock.go
+++ b/cmd/namespace-lock.go
@@ -38,6 +38,7 @@ func initDsyncNodes(disks []string, port int) error {
 	cred := serverConfig.GetCredential()
 	// Initialize rpc lock client information only if this instance is a distributed setup.
 	var clnts []dsync.RPC
+	myNode := -1
 	for _, disk := range disks {
 		if idx := strings.LastIndex(disk, ":"); idx != -1 {
 			clnts = append(clnts, newAuthClient(&authConfig{
@@ -49,9 +50,14 @@ func initDsyncNodes(disks []string, port int) error {
 				path:        pathutil.Join(lockRPCPath, disk[idx+1:]),
 				loginMethod: "Dsync.LoginHandler",
 			}))
+
+			if isLocalStorage(disk) && myNode == -1 {
+				myNode = len(clnts) - 1
+			}
 		}
 	}
-	return dsync.SetNodesWithClients(clnts)
+
+	return dsync.SetNodesWithClients(clnts, myNode)
 }

 // initNSLock - initialize name space lock map.
@@ -86,9 +92,8 @@ type nsParam struct {

 // nsLock - provides primitives for locking critical namespace regions.
 type nsLock struct {
-	writer      RWLocker
-	readerArray []RWLocker
-	ref         uint
+	RWLocker
+	ref uint
 }

 // nsLockMap - namespace lock map, provides primitives to Lock,
@@ -114,7 +119,7 @@ func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock
 	nsLk, found := n.lockMap[param]
 	if !found {
 		nsLk = &nsLock{
-			writer: func() RWLocker {
+			RWLocker: func() RWLocker {
 				if n.isDist {
 					return dsync.NewDRWMutex(pathutil.Join(volume, path))
 				}
@@ -125,10 +130,6 @@ func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock
 		n.lockMap[param] = nsLk
 	}
 	nsLk.ref++ // Update ref count here to avoid multiple races.
-	rwlock := nsLk.writer
-	if readLock && n.isDist {
-		rwlock = dsync.NewDRWMutex(pathutil.Join(volume, path))
-	}

 	if globalDebugLock {
 		// change the state of the lock to be  blocked for the given pair of <volume, path> and <OperationID> till the lock unblocks.
@@ -143,21 +144,9 @@ func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock

 	// Locking here can block.
 	if readLock {
-		rwlock.RLock()
-
-		if n.isDist {
-			// Only add (for reader case) to array after RLock() succeeds
-			// (so that we know for sure that element in [0] can be RUnlocked())
-			n.lockMapMutex.Lock()
-			if len(nsLk.readerArray) == 0 {
-				nsLk.readerArray = []RWLocker{rwlock}
-			} else {
-				nsLk.readerArray = append(nsLk.readerArray, rwlock)
-			}
-			n.lockMapMutex.Unlock()
-		}
+		nsLk.RLock()
 	} else {
-		rwlock.Lock()
+		nsLk.Lock()
 	}

 	// check if lock debugging enabled.
@@ -180,19 +169,9 @@ func (n *nsLockMap) unlock(volume, path, opsID string, readLock bool) {
 	param := nsParam{volume, path}
 	if nsLk, found := n.lockMap[param]; found {
 		if readLock {
-			if n.isDist {
-				if len(nsLk.readerArray) == 0 {
-					errorIf(errors.New("Length of reader lock array cannot be 0."), "Invalid reader lock array length detected.")
-				}
-				// Release first lock first (FIFO)
-				nsLk.readerArray[0].RUnlock()
-				// And discard first element
-				nsLk.readerArray = nsLk.readerArray[1:]
-			} else {
-				nsLk.writer.RUnlock()
-			}
+			nsLk.RUnlock()
 		} else {
-			nsLk.writer.Unlock()
+			nsLk.Unlock()
 		}
 		if nsLk.ref == 0 {
 			errorIf(errors.New("Namespace reference count cannot be 0."), "Invalid reference count detected.")
@@ -208,10 +187,6 @@ func (n *nsLockMap) unlock(volume, path, opsID string, readLock bool) {
 			}
 		}
 		if nsLk.ref == 0 {
-			if len(nsLk.readerArray) != 0 && n.isDist {
-				errorIf(errors.New("Length of reader lock array should be 0 upon deleting map entry."), "Invalid reader lock array length detected.")
-			}
-
 			// Remove from the map if there are no more references.
 			delete(n.lockMap, param)

--- a/cmd/net-rpc-client.go
+++ b/cmd/net-rpc-client.go
@@ -123,3 +123,13 @@ func (rpcClient *RPCClient) Close() error {
 	rpcClient.clearRPCClient()
 	return rpcLocalStack.Close()
 }
+
+// Node returns the node (network address) of the connection
+func (rpcClient *RPCClient) Node() string {
+	return rpcClient.node
+}
+
+// RPCPath returns the RPC path of the connection
+func (rpcClient *RPCClient) RPCPath() string {
+	return rpcClient.rpcPath
+}