mirror of
https://github.com/minio/minio.git
synced 2025-11-07 04:42:56 -05:00
Retry name lookup for kubernetes and docker swarm environment (#4800)
Wait for remote hosts to resolve instead of failing on first host resolution error, when running in Kubernetes or Docker environment. Note that - Waiting is based on exponential back-off mechanism - If run as a binary, server fails if remote host is not resolvable This is needed because in orchestration platforms like Kubernetes, remote hosts are started sequentially and all the hosts are not up initially, though they are expected to come up in a short time frame It is difficult to identify a cap on the waiting time due to non-deterministic nature of infrastructure platforms, so the server waits infinitely for the hosts to come up, while logging the error messages to the console. Fixes: https://github.com/minio/minio/issues/4669
This commit is contained in:
committed by
Harshavardhana
parent
53f84d6084
commit
d4b107adf4
42
cmd/net.go
42
cmd/net.go
@@ -26,7 +26,9 @@ import (
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
humanize "github.com/dustin/go-humanize"
|
||||
"github.com/minio/minio-go/pkg/set"
|
||||
)
|
||||
|
||||
@@ -65,12 +67,44 @@ func mustGetLocalIP4() (ipList set.StringSet) {
|
||||
|
||||
// getHostIP4 returns IPv4 address of given host.
|
||||
func getHostIP4(host string) (ipList set.StringSet, err error) {
|
||||
ipList = set.NewStringSet()
|
||||
ips, err := net.LookupIP(host)
|
||||
if err != nil {
|
||||
return ipList, err
|
||||
var ips []net.IP
|
||||
|
||||
if ips, err = net.LookupIP(host); err != nil {
|
||||
// return err if not Docker or Kubernetes
|
||||
// We use IsDocker() method to check for Docker Swarm environment
|
||||
// as there is no reliable way to clearly identify Swarm from
|
||||
// Docker environment.
|
||||
if !IsDocker() && !IsKubernetes() {
|
||||
return ipList, err
|
||||
}
|
||||
|
||||
// channel to indicate completion of host resolution
|
||||
doneCh := make(chan struct{})
|
||||
// Indicate retry routine to exit cleanly, upon this function return.
|
||||
defer close(doneCh)
|
||||
// Mark the starting time
|
||||
startTime := time.Now()
|
||||
// wait for hosts to resolve in exponentialbackoff manner
|
||||
for _ = range newRetryTimerSimple(doneCh) {
|
||||
// Retry infinitely on Kubernetes and Docker swarm.
|
||||
// This is needed as the remote hosts are sometime
|
||||
// not available immediately.
|
||||
if ips, err = net.LookupIP(host); err == nil {
|
||||
break
|
||||
}
|
||||
// time elapsed
|
||||
timeElapsed := time.Since(startTime)
|
||||
// log error only if more than 1s elapsed
|
||||
if timeElapsed > time.Second {
|
||||
// log the message to console about the host not being
|
||||
// resolveable.
|
||||
errorIf(err, "Unable to resolve host %s (%s)", host,
|
||||
humanize.RelTime(startTime, startTime.Add(timeElapsed), "elapsed", ""))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ipList = set.NewStringSet()
|
||||
for _, ip := range ips {
|
||||
if ip.To4() != nil {
|
||||
ipList.Add(ip.String())
|
||||
|
||||
Reference in New Issue
Block a user