From 8e68ff93218427e40ee530a3db2631181b6f9c3d Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Fri, 9 Feb 2024 08:43:38 -0800 Subject: [PATCH] Add extra disconnect safety (#19022) Fix reported races that are actually synchronized by network calls. But this should add some extra safety for untimely disconnects. Race reported: ``` WARNING: DATA RACE Read at 0x00c00171c9c0 by goroutine 214: github.com/minio/minio/internal/grid.(*muxClient).addResponse() e:/gopath/src/github.com/minio/minio/internal/grid/muxclient.go:519 +0x111 github.com/minio/minio/internal/grid.(*muxClient).error() e:/gopath/src/github.com/minio/minio/internal/grid/muxclient.go:470 +0x21d github.com/minio/minio/internal/grid.(*Connection).handleDisconnectClientMux() e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:1391 +0x15b github.com/minio/minio/internal/grid.(*Connection).handleMsg() e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:1190 +0x1ab github.com/minio/minio/internal/grid.(*Connection).handleMessages.func1() e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:981 +0x610 Previous write at 0x00c00171c9c0 by goroutine 1081: github.com/minio/minio/internal/grid.(*muxClient).roundtrip() e:/gopath/src/github.com/minio/minio/internal/grid/muxclient.go:94 +0x324 github.com/minio/minio/internal/grid.(*muxClient).traceRoundtrip() e:/gopath/src/github.com/minio/minio/internal/grid/trace.go:74 +0x10e4 github.com/minio/minio/internal/grid.(*Subroute).Request() e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:366 +0x230 github.com/minio/minio/internal/grid.(*SingleHandler[go.shape.*github.com/minio/minio/cmd.DiskInfoOptions,go.shape.*github.com/minio/minio/cmd.DiskInfo]).Call() e:/gopath/src/github.com/minio/minio/internal/grid/handlers.go:554 +0x3fd github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo() e:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:314 +0x270 github.com/minio/minio/cmd.erasureObjects.getOnlineDisksWithHealingAndInfo.func1() e:/gopath/src/github.com/minio/minio/cmd/erasure.go:293 +0x171 ``` This read will always happen after the write, since there is a network call in between. However a disconnect could come in while we are setting up the call, so we protect against that with extra checks. --- internal/grid/debugmsg_string.go | 5 +++-- internal/grid/muxclient.go | 16 ++++++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/internal/grid/debugmsg_string.go b/internal/grid/debugmsg_string.go index 4c8676e39..a84f811b5 100644 --- a/internal/grid/debugmsg_string.go +++ b/internal/grid/debugmsg_string.go @@ -15,11 +15,12 @@ func _() { _ = x[debugSetConnPingDuration-4] _ = x[debugSetClientPingDuration-5] _ = x[debugAddToDeadline-6] + _ = x[debugIsOutgoingClosed-7] } -const _debugMsg_name = "debugShutdowndebugKillInbounddebugKillOutbounddebugWaitForExitdebugSetConnPingDurationdebugSetClientPingDurationdebugAddToDeadline" +const _debugMsg_name = "debugShutdowndebugKillInbounddebugKillOutbounddebugWaitForExitdebugSetConnPingDurationdebugSetClientPingDurationdebugAddToDeadlinedebugIsOutgoingClosed" -var _debugMsg_index = [...]uint8{0, 13, 29, 46, 62, 86, 112, 130} +var _debugMsg_index = [...]uint8{0, 13, 29, 46, 62, 86, 112, 130, 151} func (i debugMsg) String() string { if i < 0 || i >= debugMsg(len(_debugMsg_index)-1) { diff --git a/internal/grid/muxclient.go b/internal/grid/muxclient.go index 1b380a341..2078dc19a 100644 --- a/internal/grid/muxclient.go +++ b/internal/grid/muxclient.go @@ -91,7 +91,13 @@ func (m *muxClient) roundtrip(h HandlerID, req []byte) ([]byte, error) { msg.Flags |= FlagSubroute } ch := make(chan Response, 1) + m.respMu.Lock() + if m.closed { + m.respMu.Unlock() + return nil, ErrDisconnected + } m.respWait = ch + m.respMu.Unlock() ctx := m.ctx // Add deadline if none. @@ -101,8 +107,8 @@ func (m *muxClient) roundtrip(h HandlerID, req []byte) ([]byte, error) { ctx, cancel = context.WithTimeout(ctx, defaultSingleRequestTimeout) defer cancel() } - // Send... (no need for lock yet) - if err := m.sendLocked(msg); err != nil { + // Send request + if err := m.send(msg); err != nil { return nil, err } if debugReqs { @@ -215,7 +221,13 @@ func (m *muxClient) RequestStream(h HandlerID, payload []byte, requests chan []b return nil, errors.New("RequestStream: responses channel is nil") } m.init = true + m.respMu.Lock() + if m.closed { + m.respMu.Unlock() + return nil, ErrDisconnected + } m.respWait = responses // Route directly to output. + m.respMu.Unlock() // Try to grab an initial block. m.singleResp = false