From ce9aa2f2b2fbf93592a7ee4ff79b6acda6b42dbf Mon Sep 17 00:00:00 2001 From: Krishnan Parthasarathi Date: Wed, 8 Feb 2017 13:43:02 +0530 Subject: [PATCH] Add uptime to ServiceStatus (#3690) --- cmd/admin-handlers.go | 19 ++++---- cmd/admin-handlers_test.go | 7 ++- cmd/admin-rpc-client.go | 86 ++++++++++++++++++++++++++++++++++ cmd/admin-rpc-server.go | 27 +++++++++++ cmd/globals.go | 4 ++ cmd/server-main.go | 4 ++ pkg/madmin/service-commands.go | 2 + 7 files changed, 139 insertions(+), 10 deletions(-) diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index 2773040e1..4a7bb547b 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -19,7 +19,6 @@ package cmd import ( "encoding/json" "encoding/xml" - "fmt" "io/ioutil" "net/http" "net/url" @@ -54,8 +53,8 @@ type ServerVersion struct { // ServerStatus - contains the response of service status API type ServerStatus struct { - StorageInfo StorageInfo `json:"storageInfo"` ServerVersion ServerVersion `json:"serverVersion"` + Uptime time.Duration `json:"uptime"` } // ServiceStatusHandler - GET /?service @@ -70,15 +69,22 @@ func (adminAPI adminAPIHandlers) ServiceStatusHandler(w http.ResponseWriter, r * return } - // Fetch storage backend information - storageInfo := newObjectLayerFn().StorageInfo() // Fetch server version serverVersion := ServerVersion{Version: Version, CommitID: CommitID} + // Fetch uptimes from all peers. This may fail to due to lack + // of read-quorum availability. + uptime, err := getPeerUptimes(globalAdminPeers) + if err != nil { + writeErrorResponse(w, toAPIErrorCode(err), r.URL) + errorIf(err, "Possibly failed to get uptime from majority of servers.") + return + } + // Create API response serverStatus := ServerStatus{ - StorageInfo: storageInfo, ServerVersion: serverVersion, + Uptime: uptime, } // Marshal API response @@ -542,7 +548,6 @@ func (adminAPI adminAPIHandlers) HealFormatHandler(w http.ResponseWriter, r *htt // Create a new set of storage instances to heal format.json. bootstrapDisks, err := initStorageDisks(globalEndpoints) if err != nil { - fmt.Println(traceError(err)) writeErrorResponse(w, toAPIErrorCode(err), r.URL) return } @@ -550,7 +555,6 @@ func (adminAPI adminAPIHandlers) HealFormatHandler(w http.ResponseWriter, r *htt // Heal format.json on available storage. err = healFormatXL(bootstrapDisks) if err != nil { - fmt.Println(traceError(err)) writeErrorResponse(w, toAPIErrorCode(err), r.URL) return } @@ -558,7 +562,6 @@ func (adminAPI adminAPIHandlers) HealFormatHandler(w http.ResponseWriter, r *htt // Instantiate new object layer with newly formatted storage. newObjectAPI, err := newXLObjects(bootstrapDisks) if err != nil { - fmt.Println(traceError(err)) writeErrorResponse(w, toAPIErrorCode(err), r.URL) return } diff --git a/cmd/admin-handlers_test.go b/cmd/admin-handlers_test.go index c6b25055b..e576a5304 100644 --- a/cmd/admin-handlers_test.go +++ b/cmd/admin-handlers_test.go @@ -25,6 +25,7 @@ import ( "net/http/httptest" "net/url" "testing" + "time" router "github.com/gorilla/mux" ) @@ -55,6 +56,9 @@ func prepareAdminXLTestBed() (*adminXLTestBed, error) { return nil, xlErr } + // Initialize boot time + globalBootTime = time.Now().UTC() + // Set globalEndpoints for a single node XL setup. for _, xlDir := range xlDirs { globalEndpoints = append(globalEndpoints, &url.URL{ @@ -225,14 +229,13 @@ func testServicesCmdHandler(cmd cmdType, t *testing.T) { if cmd == statusCmd { expectedInfo := ServerStatus{ - StorageInfo: newObjectLayerFn().StorageInfo(), ServerVersion: ServerVersion{Version: Version, CommitID: CommitID}, } receivedInfo := ServerStatus{} if jsonErr := json.Unmarshal(rec.Body.Bytes(), &receivedInfo); jsonErr != nil { t.Errorf("Failed to unmarshal StorageInfo - %v", jsonErr) } - if expectedInfo != receivedInfo { + if expectedInfo.ServerVersion != receivedInfo.ServerVersion { t.Errorf("Expected storage info and received storage info differ, %v %v", expectedInfo, receivedInfo) } } diff --git a/cmd/admin-rpc-client.go b/cmd/admin-rpc-client.go index b963cbd6a..58a83cd0c 100644 --- a/cmd/admin-rpc-client.go +++ b/cmd/admin-rpc-client.go @@ -19,6 +19,7 @@ package cmd import ( "net/url" "path" + "sort" "sync" "time" ) @@ -39,6 +40,7 @@ type adminCmdRunner interface { Restart() error ListLocks(bucket, prefix string, duration time.Duration) ([]VolumeLockInfo, error) ReInitDisks() error + Uptime() (time.Duration, error) } // Restart - Sends a message over channel to the go-routine @@ -88,6 +90,28 @@ func (rc remoteAdminClient) ReInitDisks() error { return rc.Call("Admin.ReInitDisks", &args, &reply) } +// Uptime - Returns the uptime of this server. Timestamp is taken +// after object layer is initialized. +func (lc localAdminClient) Uptime() (time.Duration, error) { + if globalBootTime.IsZero() { + return time.Duration(0), errServerNotInitialized + } + + return time.Now().UTC().Sub(globalBootTime), nil +} + +// Uptime - returns the uptime of the server to which the RPC call is made. +func (rc remoteAdminClient) Uptime() (time.Duration, error) { + args := AuthRPCArgs{} + reply := UptimeReply{} + err := rc.Call("Admin.Uptime", &args, &reply) + if err != nil { + return time.Duration(0), err + } + + return reply.Uptime, nil +} + // adminPeer - represents an entity that implements Restart methods. type adminPeer struct { addr string @@ -241,3 +265,65 @@ func reInitPeerDisks(peers adminPeers) error { wg.Wait() return nil } + +// uptimeSlice - used to sort uptimes in chronological order. +type uptimeSlice []struct { + err error + uptime time.Duration +} + +func (ts uptimeSlice) Len() int { + return len(ts) +} + +func (ts uptimeSlice) Less(i, j int) bool { + return ts[i].uptime < ts[j].uptime +} + +func (ts uptimeSlice) Swap(i, j int) { + ts[i], ts[j] = ts[j], ts[i] +} + +// getPeerUptimes - returns the uptime since the last time read quorum +// was established on success. Otherwise returns errXLReadQuorum. +func getPeerUptimes(peers adminPeers) (time.Duration, error) { + uptimes := make(uptimeSlice, len(peers)) + + // Get up time of all servers. + wg := sync.WaitGroup{} + for i, peer := range peers { + wg.Add(1) + go func(idx int, peer adminPeer) { + defer wg.Done() + uptimes[idx].uptime, uptimes[idx].err = peer.cmdRunner.Uptime() + }(i, peer) + } + wg.Wait() + + // Sort uptimes in chronological order. + sort.Sort(uptimes) + + // Pick the readQuorum'th uptime in chronological order. i.e, + // the time at which read quorum was (re-)established. + readQuorum := len(uptimes) / 2 + validCount := 0 + latestUptime := time.Duration(0) + for _, uptime := range uptimes { + if uptime.err != nil { + continue + } + + validCount++ + if validCount >= readQuorum { + latestUptime = uptime.uptime + break + } + } + + // This implies there weren't read quorum number of servers up. + if latestUptime == time.Duration(0) { + return time.Duration(0), InsufficientReadQuorum{} + } + + return latestUptime, nil +} diff --git a/cmd/admin-rpc-server.go b/cmd/admin-rpc-server.go index fa5d5e781..d5f8178bd 100644 --- a/cmd/admin-rpc-server.go +++ b/cmd/admin-rpc-server.go @@ -48,6 +48,12 @@ type ListLocksReply struct { volLocks []VolumeLockInfo } +// UptimeReply - wraps the uptime response over RPC. +type UptimeReply struct { + AuthRPCReply + Uptime time.Duration +} + // Restart - Restart this instance of minio server. func (s *adminCmd) Restart(args *AuthRPCArgs, reply *AuthRPCReply) error { if err := args.IsAuthenticated(); err != nil { @@ -105,6 +111,27 @@ func (s *adminCmd) ReInitDisks(args *AuthRPCArgs, reply *AuthRPCReply) error { return nil } +// Uptime - returns the time when object layer was initialized on this server. +func (s *adminCmd) Uptime(args *AuthRPCArgs, reply *UptimeReply) error { + if err := args.IsAuthenticated(); err != nil { + return err + } + + if globalBootTime.IsZero() { + return errServerNotInitialized + } + + // N B The uptime is computed assuming that the system time is + // monotonic. This is not the case in time pkg in Go, see + // https://github.com/golang/go/issues/12914. This is expected + // to be fixed by go1.9. + *reply = UptimeReply{ + Uptime: time.Now().UTC().Sub(globalBootTime), + } + + return nil +} + // registerAdminRPCRouter - registers RPC methods for service status, // stop and restart commands. func registerAdminRPCRouter(mux *router.Router) error { diff --git a/cmd/globals.go b/cmd/globals.go index 8b9f312cb..2fe707ba0 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -124,9 +124,13 @@ var ( // Global server's network statistics globalConnStats = newConnStats() + // Global HTTP request statisitics globalHTTPStats = newHTTPStats() + // Time when object layer was initialized on start up. + globalBootTime time.Time + // Add new variable global values here. ) diff --git a/cmd/server-main.go b/cmd/server-main.go index 3164c5783..3d6602604 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -25,6 +25,7 @@ import ( "sort" "strconv" "strings" + "time" "runtime" @@ -465,6 +466,9 @@ func serverMain(c *cli.Context) { globalObjectAPI = newObject globalObjLayerMutex.Unlock() + // Set startup time + globalBootTime = time.Now().UTC() + // Prints the formatted startup message once object layer is initialized. printStartupMessage(apiEndPoints) diff --git a/pkg/madmin/service-commands.go b/pkg/madmin/service-commands.go index 37869cd6e..2556a8960 100644 --- a/pkg/madmin/service-commands.go +++ b/pkg/madmin/service-commands.go @@ -25,6 +25,7 @@ import ( "io/ioutil" "net/http" "net/url" + "time" ) // BackendType - represents different backend types. @@ -70,6 +71,7 @@ type ServerVersion struct { type ServiceStatusMetadata struct { StorageInfo StorageInfo `json:"storageInfo"` ServerVersion ServerVersion `json:"serverVersion"` + Uptime time.Duration `json:"uptime"` } // ServiceStatus - Connect to a minio server and call Service Status Management API