fix: do not os.Exit(1) while writing goroutines during shutdown (#17640)

Also shutdown poll add jitter, to verify if the shutdown
sequence can finish before 500ms, this reduces the overall
time taken during "restart" of the service.

Provides speedup for `mc admin service restart` during
active I/O, also ensures that systemd doesn't treat the
returned 'error' as a failure, certain configurations in
systemd can cause it to 'auto-restart' the process by-itself
which can interfere with `mc admin service restart`.

It can be observed how now restarting the service is
much snappier.
This commit is contained in:
Harshavardhana 2023-07-12 07:18:30 -07:00 committed by GitHub
parent a566bcf613
commit 2d1cda2061
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 12 deletions

View File

@ -46,21 +46,17 @@ func handleSignals() {
} }
stopProcess := func() bool { stopProcess := func() bool {
var err, oerr error
// send signal to various go-routines that they need to quit. // send signal to various go-routines that they need to quit.
cancelGlobalContext() cancelGlobalContext()
if httpServer := newHTTPServerFn(); httpServer != nil { if httpServer := newHTTPServerFn(); httpServer != nil {
err = httpServer.Shutdown() if err := httpServer.Shutdown(); err != nil && !errors.Is(err, http.ErrServerClosed) {
if !errors.Is(err, http.ErrServerClosed) {
logger.LogIf(context.Background(), err) logger.LogIf(context.Background(), err)
} }
} }
if objAPI := newObjectLayerFn(); objAPI != nil { if objAPI := newObjectLayerFn(); objAPI != nil {
oerr = objAPI.Shutdown(context.Background()) logger.LogIf(context.Background(), objAPI.Shutdown(context.Background()))
logger.LogIf(context.Background(), oerr)
} }
if srv := newConsoleServerFn(); srv != nil { if srv := newConsoleServerFn(); srv != nil {
@ -71,7 +67,7 @@ func handleSignals() {
globalEventNotifier.RemoveAllBucketTargets() globalEventNotifier.RemoveAllBucketTargets()
} }
return (err == nil && oerr == nil) return true
} }
for { for {

View File

@ -1,4 +1,4 @@
// Copyright (c) 2015-2021 MinIO, Inc. // Copyright (c) 2015-2023 MinIO, Inc.
// //
// This file is part of MinIO Object Storage stack // This file is part of MinIO Object Storage stack
// //
@ -22,6 +22,7 @@ import (
"crypto/tls" "crypto/tls"
"errors" "errors"
"log" "log"
"math/rand"
"net" "net"
"net/http" "net/http"
"os" "os"
@ -42,7 +43,7 @@ var (
) )
const ( const (
serverShutdownPoll = 500 * time.Millisecond shutdownPollIntervalMax = 500 * time.Millisecond
// DefaultShutdownTimeout - default shutdown timeout to gracefully shutdown server. // DefaultShutdownTimeout - default shutdown timeout to gracefully shutdown server.
DefaultShutdownTimeout = 5 * time.Second DefaultShutdownTimeout = 5 * time.Second
@ -161,14 +162,32 @@ func (srv *Server) Shutdown() error {
return err return err
} }
pollIntervalBase := time.Millisecond
nextPollInterval := func() time.Duration {
// Add 10% jitter.
interval := pollIntervalBase + time.Duration(rand.Intn(int(pollIntervalBase/10)))
// Double and clamp for next time.
pollIntervalBase *= 2
if pollIntervalBase > shutdownPollIntervalMax {
pollIntervalBase = shutdownPollIntervalMax
}
return interval
}
// Wait for opened connection to be closed up to Shutdown timeout. // Wait for opened connection to be closed up to Shutdown timeout.
shutdownTimeout := srv.ShutdownTimeout shutdownTimeout := srv.ShutdownTimeout
shutdownTimer := time.NewTimer(shutdownTimeout) shutdownTimer := time.NewTimer(shutdownTimeout)
ticker := time.NewTicker(serverShutdownPoll) defer shutdownTimer.Stop()
defer ticker.Stop()
timer := time.NewTimer(nextPollInterval())
defer timer.Stop()
for { for {
select { select {
case <-shutdownTimer.C: case <-shutdownTimer.C:
if atomic.LoadInt32(&srv.requestCount) <= 0 {
return nil
}
// Write all running goroutines. // Write all running goroutines.
tmp, err := os.CreateTemp("", "minio-goroutines-*.txt") tmp, err := os.CreateTemp("", "minio-goroutines-*.txt")
if err == nil { if err == nil {
@ -177,10 +196,11 @@ func (srv *Server) Shutdown() error {
return errors.New("timed out. some connections are still active. goroutines written to " + tmp.Name()) return errors.New("timed out. some connections are still active. goroutines written to " + tmp.Name())
} }
return errors.New("timed out. some connections are still active") return errors.New("timed out. some connections are still active")
case <-ticker.C: case <-timer.C:
if atomic.LoadInt32(&srv.requestCount) <= 0 { if atomic.LoadInt32(&srv.requestCount) <= 0 {
return nil return nil
} }
timer.Reset(nextPollInterval())
} }
} }
} }