mirror of
https://github.com/minio/minio.git
synced 2025-01-12 15:33:22 -05:00
Retry name lookup for kubernetes and docker swarm environment (#4800)
Wait for remote hosts to resolve instead of failing on first host resolution error, when running in Kubernetes or Docker environment. Note that - Waiting is based on exponential back-off mechanism - If run as a binary, server fails if remote host is not resolvable This is needed because in orchestration platforms like Kubernetes, remote hosts are started sequentially and all the hosts are not up initially, though they are expected to come up in a short time frame It is difficult to identify a cap on the waiting time due to non-deterministic nature of infrastructure platforms, so the server waits infinitely for the hosts to come up, while logging the error messages to the console. Fixes: https://github.com/minio/minio/issues/4669
This commit is contained in:
parent
53f84d6084
commit
d4b107adf4
@ -22,41 +22,6 @@ if [ "${1}" != "minio" ]; then
|
|||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Wait for all the hosts to come online and have
|
|
||||||
# their DNS entries populated properly.
|
|
||||||
docker_wait_hosts() {
|
|
||||||
hosts="$@"
|
|
||||||
num_hosts=0
|
|
||||||
# Count number of hosts in arguments.
|
|
||||||
for host in $hosts; do
|
|
||||||
[ $(echo "$host" | grep -E "^http") ] || continue
|
|
||||||
num_hosts=$((num_hosts+1))
|
|
||||||
done
|
|
||||||
if [ $num_hosts -gt 0 ]; then
|
|
||||||
echo -n "Waiting for all hosts to resolve..."
|
|
||||||
while true; do
|
|
||||||
x=0
|
|
||||||
for host in $hosts; do
|
|
||||||
[ $(echo "$host" | grep -E "^http") ] || continue
|
|
||||||
# Extract the domain.
|
|
||||||
host=$(echo $host | sed -e 's/^http[s]\?:\/\/\([^\/]\+\).*/\1/')
|
|
||||||
echo -n .
|
|
||||||
val=$(ping -c 1 $host 2>/dev/null)
|
|
||||||
if [ $? != 0 ]; then
|
|
||||||
echo "Failed to lookup $host"
|
|
||||||
continue
|
|
||||||
fi
|
|
||||||
x=$((x+1))
|
|
||||||
done
|
|
||||||
# Provided hosts same as successful hosts, should break out.
|
|
||||||
test $x -eq $num_hosts && break
|
|
||||||
echo "Failed to resolve hosts.. retrying after 1 second."
|
|
||||||
sleep 1
|
|
||||||
done
|
|
||||||
echo "All hosts are resolving proceeding to initialize Minio."
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
## Look for docker secrets in default documented location.
|
## Look for docker secrets in default documented location.
|
||||||
docker_secrets_env() {
|
docker_secrets_env() {
|
||||||
local MINIO_ACCESS_KEY_FILE="/run/secrets/access_key"
|
local MINIO_ACCESS_KEY_FILE="/run/secrets/access_key"
|
||||||
@ -75,7 +40,4 @@ docker_secrets_env() {
|
|||||||
## Set access env from secrets if necessary.
|
## Set access env from secrets if necessary.
|
||||||
docker_secrets_env
|
docker_secrets_env
|
||||||
|
|
||||||
## Wait for all the hosts to come online.
|
|
||||||
docker_wait_hosts "$@"
|
|
||||||
|
|
||||||
exec "$@"
|
exec "$@"
|
||||||
|
40
cmd/net.go
40
cmd/net.go
@ -26,7 +26,9 @@ import (
|
|||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
humanize "github.com/dustin/go-humanize"
|
||||||
"github.com/minio/minio-go/pkg/set"
|
"github.com/minio/minio-go/pkg/set"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -65,12 +67,44 @@ func mustGetLocalIP4() (ipList set.StringSet) {
|
|||||||
|
|
||||||
// getHostIP4 returns IPv4 address of given host.
|
// getHostIP4 returns IPv4 address of given host.
|
||||||
func getHostIP4(host string) (ipList set.StringSet, err error) {
|
func getHostIP4(host string) (ipList set.StringSet, err error) {
|
||||||
ipList = set.NewStringSet()
|
var ips []net.IP
|
||||||
ips, err := net.LookupIP(host)
|
|
||||||
if err != nil {
|
if ips, err = net.LookupIP(host); err != nil {
|
||||||
|
// return err if not Docker or Kubernetes
|
||||||
|
// We use IsDocker() method to check for Docker Swarm environment
|
||||||
|
// as there is no reliable way to clearly identify Swarm from
|
||||||
|
// Docker environment.
|
||||||
|
if !IsDocker() && !IsKubernetes() {
|
||||||
return ipList, err
|
return ipList, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// channel to indicate completion of host resolution
|
||||||
|
doneCh := make(chan struct{})
|
||||||
|
// Indicate retry routine to exit cleanly, upon this function return.
|
||||||
|
defer close(doneCh)
|
||||||
|
// Mark the starting time
|
||||||
|
startTime := time.Now()
|
||||||
|
// wait for hosts to resolve in exponentialbackoff manner
|
||||||
|
for _ = range newRetryTimerSimple(doneCh) {
|
||||||
|
// Retry infinitely on Kubernetes and Docker swarm.
|
||||||
|
// This is needed as the remote hosts are sometime
|
||||||
|
// not available immediately.
|
||||||
|
if ips, err = net.LookupIP(host); err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
// time elapsed
|
||||||
|
timeElapsed := time.Since(startTime)
|
||||||
|
// log error only if more than 1s elapsed
|
||||||
|
if timeElapsed > time.Second {
|
||||||
|
// log the message to console about the host not being
|
||||||
|
// resolveable.
|
||||||
|
errorIf(err, "Unable to resolve host %s (%s)", host,
|
||||||
|
humanize.RelTime(startTime, startTime.Add(timeElapsed), "elapsed", ""))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ipList = set.NewStringSet()
|
||||||
for _, ip := range ips {
|
for _, ip := range ips {
|
||||||
if ip.To4() != nil {
|
if ip.To4() != nil {
|
||||||
ipList.Add(ip.String())
|
ipList.Add(ip.String())
|
||||||
|
Loading…
Reference in New Issue
Block a user