Add extra disconnect safety (#19022)

Fix reported races that are actually synchronized by network calls.

But this should add some extra safety for untimely disconnects.

Race reported:

```
WARNING: DATA RACE
Read at 0x00c00171c9c0 by goroutine 214:
  github.com/minio/minio/internal/grid.(*muxClient).addResponse()
      e:/gopath/src/github.com/minio/minio/internal/grid/muxclient.go:519 +0x111
  github.com/minio/minio/internal/grid.(*muxClient).error()
      e:/gopath/src/github.com/minio/minio/internal/grid/muxclient.go:470 +0x21d
  github.com/minio/minio/internal/grid.(*Connection).handleDisconnectClientMux()
      e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:1391 +0x15b
  github.com/minio/minio/internal/grid.(*Connection).handleMsg()
      e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:1190 +0x1ab
  github.com/minio/minio/internal/grid.(*Connection).handleMessages.func1()
      e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:981 +0x610

Previous write at 0x00c00171c9c0 by goroutine 1081:
  github.com/minio/minio/internal/grid.(*muxClient).roundtrip()
      e:/gopath/src/github.com/minio/minio/internal/grid/muxclient.go:94 +0x324
  github.com/minio/minio/internal/grid.(*muxClient).traceRoundtrip()
      e:/gopath/src/github.com/minio/minio/internal/grid/trace.go:74 +0x10e4
  github.com/minio/minio/internal/grid.(*Subroute).Request()
      e:/gopath/src/github.com/minio/minio/internal/grid/connection.go:366 +0x230
  github.com/minio/minio/internal/grid.(*SingleHandler[go.shape.*github.com/minio/minio/cmd.DiskInfoOptions,go.shape.*github.com/minio/minio/cmd.DiskInfo]).Call()
      e:/gopath/src/github.com/minio/minio/internal/grid/handlers.go:554 +0x3fd
  github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo()
      e:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:314 +0x270
  github.com/minio/minio/cmd.erasureObjects.getOnlineDisksWithHealingAndInfo.func1()
      e:/gopath/src/github.com/minio/minio/cmd/erasure.go:293 +0x171
```

This read will always happen after the write, since there is a network call in between.

However a disconnect could come in while we are setting up the call, so we protect against that with extra checks.
This commit is contained in:
Klaus Post 2024-02-09 08:43:38 -08:00 committed by GitHub
parent 62761a23e6
commit 8e68ff9321
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 4 deletions

View File

@ -15,11 +15,12 @@ func _() {
_ = x[debugSetConnPingDuration-4] _ = x[debugSetConnPingDuration-4]
_ = x[debugSetClientPingDuration-5] _ = x[debugSetClientPingDuration-5]
_ = x[debugAddToDeadline-6] _ = x[debugAddToDeadline-6]
_ = x[debugIsOutgoingClosed-7]
} }
const _debugMsg_name = "debugShutdowndebugKillInbounddebugKillOutbounddebugWaitForExitdebugSetConnPingDurationdebugSetClientPingDurationdebugAddToDeadline" const _debugMsg_name = "debugShutdowndebugKillInbounddebugKillOutbounddebugWaitForExitdebugSetConnPingDurationdebugSetClientPingDurationdebugAddToDeadlinedebugIsOutgoingClosed"
var _debugMsg_index = [...]uint8{0, 13, 29, 46, 62, 86, 112, 130} var _debugMsg_index = [...]uint8{0, 13, 29, 46, 62, 86, 112, 130, 151}
func (i debugMsg) String() string { func (i debugMsg) String() string {
if i < 0 || i >= debugMsg(len(_debugMsg_index)-1) { if i < 0 || i >= debugMsg(len(_debugMsg_index)-1) {

View File

@ -91,7 +91,13 @@ func (m *muxClient) roundtrip(h HandlerID, req []byte) ([]byte, error) {
msg.Flags |= FlagSubroute msg.Flags |= FlagSubroute
} }
ch := make(chan Response, 1) ch := make(chan Response, 1)
m.respMu.Lock()
if m.closed {
m.respMu.Unlock()
return nil, ErrDisconnected
}
m.respWait = ch m.respWait = ch
m.respMu.Unlock()
ctx := m.ctx ctx := m.ctx
// Add deadline if none. // Add deadline if none.
@ -101,8 +107,8 @@ func (m *muxClient) roundtrip(h HandlerID, req []byte) ([]byte, error) {
ctx, cancel = context.WithTimeout(ctx, defaultSingleRequestTimeout) ctx, cancel = context.WithTimeout(ctx, defaultSingleRequestTimeout)
defer cancel() defer cancel()
} }
// Send... (no need for lock yet) // Send request
if err := m.sendLocked(msg); err != nil { if err := m.send(msg); err != nil {
return nil, err return nil, err
} }
if debugReqs { if debugReqs {
@ -215,7 +221,13 @@ func (m *muxClient) RequestStream(h HandlerID, payload []byte, requests chan []b
return nil, errors.New("RequestStream: responses channel is nil") return nil, errors.New("RequestStream: responses channel is nil")
} }
m.init = true m.init = true
m.respMu.Lock()
if m.closed {
m.respMu.Unlock()
return nil, ErrDisconnected
}
m.respWait = responses // Route directly to output. m.respWait = responses // Route directly to output.
m.respMu.Unlock()
// Try to grab an initial block. // Try to grab an initial block.
m.singleResp = false m.singleResp = false