From 3e78ea8accdd8d3c09b2532374f07ff1d33b8f1c Mon Sep 17 00:00:00 2001 From: Sidhartha Mani Date: Sat, 18 Apr 2020 11:06:11 -0700 Subject: [PATCH] improve obd tests and optimize network (#9378) - keep long running obd network tests alive - fix error - wrong number of parents in process OBD info - ensure that osinfo does not error out when inside containers - remove limit on max number of connections per client transport The generic client transport uses a default limit of 64 conns per transport. This could end up limiting and throttling usage, and artificially slowing down the performance of MinIO even on hardware capable of doing better. --- cmd/obdinfo.go | 19 ++++++------------- cmd/peer-rest-client.go | 6 +++++- cmd/peer-rest-server.go | 3 +++ cmd/utils.go | 1 - 4 files changed, 14 insertions(+), 15 deletions(-) diff --git a/cmd/obdinfo.go b/cmd/obdinfo.go index 3f22c6d4a..cd5000b3c 100644 --- a/cmd/obdinfo.go +++ b/cmd/obdinfo.go @@ -317,16 +317,14 @@ func getLocalProcOBD(ctx context.Context) madmin.ServerProcOBDInfo { sysProc.PageFaults = pageFaults parent, err := proc.ParentWithContext(ctx) - if err != nil { - return errProcInfo(err) + if err == nil { + sysProc.Parent = parent.Pid } - sysProc.Parent = parent.Pid ppid, err := proc.PpidWithContext(ctx) - if err != nil { - return errProcInfo(err) + if err == nil { + sysProc.Ppid = ppid } - sysProc.Ppid = ppid rlimit, err := proc.RlimitWithContext(ctx) if err != nil { @@ -404,13 +402,8 @@ func getLocalOsInfoOBD(ctx context.Context) madmin.ServerOsOBDInfo { } } - users, err := host.UsersWithContext(ctx) - if err != nil { - return madmin.ServerOsOBDInfo{ - Addr: addr, - Error: err.Error(), - } - } + // ignore user err, as it cannot be obtained reliably inside containers + users, _ := host.UsersWithContext(ctx) return madmin.ServerOsOBDInfo{ Addr: addr, diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index 7a87c4c86..47db71cb5 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -374,7 +374,11 @@ func (client *peerRESTClient) DispatchNetOBDInfo(ctx context.Context) (info madm return } defer http.DrainBody(respBody) - err = gob.NewDecoder(respBody).Decode(&info) + waitReader, err := waitForHTTPResponse(respBody) + if err != nil { + return + } + err = gob.NewDecoder(waitReader).Decode(&info) return } diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index f971ad8f9..4194a80b3 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -403,9 +403,12 @@ func (s *peerRESTServer) DispatchNetOBDInfoHandler(w http.ResponseWriter, r *htt return } + done := keepHTTPResponseAlive(w) + ctx := newContext(r, w, "DispatchNetOBDInfo") info := globalNotificationSys.NetOBDInfo(ctx) + done() logger.LogIf(ctx, gob.NewEncoder(w).Encode(info)) w.(http.Flusher).Flush() } diff --git a/cmd/utils.go b/cmd/utils.go index ee83ed391..6e646f622 100644 --- a/cmd/utils.go +++ b/cmd/utils.go @@ -471,7 +471,6 @@ func newCustomHTTPTransport(tlsConfig *tls.Config, dialTimeout time.Duration) fu DialContext: newCustomDialContext(dialTimeout, 15*time.Second), MaxIdleConnsPerHost: 16, MaxIdleConns: 16, - MaxConnsPerHost: 64, // This is used per drive/rpc host. More requests will block until free. IdleConnTimeout: 1 * time.Minute, ResponseHeaderTimeout: 3 * time.Minute, // Set conservative timeouts for MinIO internode. TLSHandshakeTimeout: 10 * time.Second,