instrumentation: instrumentation for locks. (#2584)

- Instrumentation for locks. - Detailed test coverage. - Adding RPC control handler to fetch lock instrumentation. - RPC control handlers suite tests with a test RPC server.
2025-11-07 21:02:58 -05:00 · 2016-09-01 00:09:08 +05:30
parent de67bca211
commit 07d232c7b4
20 changed files with 2132 additions and 204 deletions
--- a/cmd/namespace-lock.go
+++ b/cmd/namespace-lock.go
@@ -18,7 +18,9 @@ package cmd

 import (
 	"errors"
+	"fmt"
 	pathutil "path"
+	"runtime"
 	"strconv"
 	"strings"
 	"sync"
@@ -58,6 +60,15 @@ func initNSLock(isDist bool) {
 		isDist:  isDist,
 		lockMap: make(map[nsParam]*nsLock),
 	}
+	if globalDebugLock {
+		// lock Debugging enabed, initialize nsLockMap with entry for debugging information.
+		// entries of <volume,path> -> stateInfo of locks, for instrumentation purpose.
+		nsMutex.debugLockMap = make(map[nsParam]*debugLockInfoPerVolumePath)
+	}
+}
+
+func (n *nsLockMap) initLockInfoForVolumePath(param nsParam) {
+	n.debugLockMap[param] = newDebugLockInfoPerVolumePath()
 }

 // RWLocker - interface that any read-write locking library should implement.
@@ -83,13 +94,19 @@ type nsLock struct {
 // nsLockMap - namespace lock map, provides primitives to Lock,
 // Unlock, RLock and RUnlock.
 type nsLockMap struct {
+	// lock counter used for lock debugging.
+	globalLockCounter  int64                                   //total locks held.
+	blockedCounter     int64                                   // total operations blocked waiting for locks.
+	runningLockCounter int64                                   // total locks held but not released yet.
+	debugLockMap       map[nsParam]*debugLockInfoPerVolumePath // info for instrumentation on locks.
+
 	isDist       bool // indicates whether the locking service is part of a distributed setup or not.
 	lockMap      map[nsParam]*nsLock
 	lockMapMutex sync.Mutex
 }

 // Lock the namespace resource.
-func (n *nsLockMap) lock(volume, path string, readLock bool) {
+func (n *nsLockMap) lock(volume, path string, lockOrigin, opsID string, readLock bool) {
 	var nsLk *nsLock
 	n.lockMapMutex.Lock()

@@ -112,6 +129,15 @@ func (n *nsLockMap) lock(volume, path string, readLock bool) {
 	if readLock && n.isDist {
 		rwlock = dsync.NewDRWMutex(pathutil.Join(volume, path))
 	}
+
+	if globalDebugLock {
+		// change the state of the lock to be  blocked for the given pair of <volume, path> and <OperationID> till the lock unblocks.
+		// The lock for accessing `nsMutex` is held inside the function itself.
+		err := n.statusNoneToBlocked(param, lockOrigin, opsID, readLock)
+		if err != nil {
+			errorIf(err, "Failed to set lock state to blocked.")
+		}
+	}
 	// Unlock map before Locking NS which might block.
 	n.lockMapMutex.Unlock()

@@ -133,10 +159,20 @@ func (n *nsLockMap) lock(volume, path string, readLock bool) {
 	} else {
 		rwlock.Lock()
 	}
+
+	// check if lock debugging enabled.
+	if globalDebugLock {
+		// Changing the status of the operation from blocked to running.
+		// change the state of the lock to be  running (from blocked) for the given pair of <volume, path> and <OperationID>.
+		err := n.statusBlockedToRunning(param, lockOrigin, opsID, readLock)
+		if err != nil {
+			errorIf(err, "Failed to set the lock state to running.")
+		}
+	}
 }

 // Unlock the namespace resource.
-func (n *nsLockMap) unlock(volume, path string, readLock bool) {
+func (n *nsLockMap) unlock(volume, path, opsID string, readLock bool) {
 	// nsLk.Unlock() will not block, hence locking the map for the entire function is fine.
 	n.lockMapMutex.Lock()
 	defer n.lockMapMutex.Unlock()
@@ -163,6 +199,13 @@ func (n *nsLockMap) unlock(volume, path string, readLock bool) {
 		}
 		if nsLk.ref != 0 {
 			nsLk.ref--
+			// locking debug enabled, delete the lock state entry for given operation ID.
+			if globalDebugLock {
+				err := n.deleteLockInfoEntryForOps(param, opsID)
+				if err != nil {
+					errorIf(err, "Failed to delete lock info entry.")
+				}
+			}
 		}
 		if nsLk.ref == 0 {
 			if len(nsLk.readerArray) != 0 && n.isDist {
@@ -171,31 +214,61 @@ func (n *nsLockMap) unlock(volume, path string, readLock bool) {

 			// Remove from the map if there are no more references.
 			delete(n.lockMap, param)
+
+			// locking debug enabled, delete the lock state entry for given <volume, path> pair.
+			if globalDebugLock {
+				err := n.deleteLockInfoEntryForVolumePath(param)
+				if err != nil {
+					errorIf(err, "Failed to delete lock info entry.")
+				}
+			}
 		}
 	}
 }

 // Lock - locks the given resource for writes, using a previously
 // allocated name space lock or initializing a new one.
-func (n *nsLockMap) Lock(volume, path string) {
+func (n *nsLockMap) Lock(volume, path, opsID string) {
+	var lockOrigin string
+	// lock debugging enabled. The caller information of the lock held has be obtained here before calling any other function.
+	if globalDebugLock {
+		// fetching the package, function name and the line number of the caller from the runtime.
+		// here is an example https://play.golang.org/p/perrmNRI9_ .
+		pc, fn, line, success := runtime.Caller(1)
+		if !success {
+			errorIf(errors.New("Couldn't get caller info."), "Fetching caller info form runtime failed.")
+		}
+		lockOrigin = fmt.Sprintf("[lock held] in %s[%s:%d]", runtime.FuncForPC(pc).Name(), fn, line)
+	}
 	readLock := false
-	n.lock(volume, path, readLock)
+	n.lock(volume, path, lockOrigin, opsID, readLock)
 }

 // Unlock - unlocks any previously acquired write locks.
-func (n *nsLockMap) Unlock(volume, path string) {
+func (n *nsLockMap) Unlock(volume, path, opsID string) {
 	readLock := false
-	n.unlock(volume, path, readLock)
+	n.unlock(volume, path, opsID, readLock)
 }

 // RLock - locks any previously acquired read locks.
-func (n *nsLockMap) RLock(volume, path string) {
+func (n *nsLockMap) RLock(volume, path, opsID string) {
+	var lockOrigin string
 	readLock := true
-	n.lock(volume, path, readLock)
+	// lock debugging enabled. The caller information of the lock held has be obtained here before calling any other function.
+	if globalDebugLock {
+		// fetching the package, function name and the line number of the caller from the runtime.
+		// here is an example https://play.golang.org/p/perrmNRI9_ .
+		pc, fn, line, success := runtime.Caller(1)
+		if !success {
+			errorIf(errors.New("Couldn't get caller info."), "Fetching caller info form runtime failed.")
+		}
+		lockOrigin = fmt.Sprintf("[lock held] in %s[%s:%d]", runtime.FuncForPC(pc).Name(), fn, line)
+	}
+	n.lock(volume, path, lockOrigin, opsID, readLock)
 }

 // RUnlock - unlocks any previously acquired read locks.
-func (n *nsLockMap) RUnlock(volume, path string) {
+func (n *nsLockMap) RUnlock(volume, path, opsID string) {
 	readLock := true
-	n.unlock(volume, path, readLock)
+	n.unlock(volume, path, opsID, readLock)
 }