fix: simplify healthcheck code to freeze calls only once (#15082)

- currently subnet health check was freezing and calling
  locks at multiple locations, avoid them.

- throw errors if first attempt itself fails with no results
This commit is contained in:
Harshavardhana 2022-06-14 11:22:07 -07:00 committed by GitHub
parent 14645142db
commit d2a10dbe69
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 45 additions and 27 deletions

View File

@ -1110,7 +1110,6 @@ func (a adminAPIHandlers) ObjectSpeedtestHandler(w http.ResponseWriter, r *http.
}
sufficientCapacity, canAutotune, capacityErrMsg := validateObjPerfOptions(ctx, objectAPI, concurrent, size, autotune)
if !sufficientCapacity {
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, AdminError{
Code: "XMinioSpeedtestInsufficientCapacity",
@ -1874,6 +1873,12 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
healthCtx, healthCancel := context.WithTimeout(lkctx.Context(), deadline)
defer healthCancel()
// Freeze all incoming S3 API calls before running speedtest.
globalNotificationSys.ServiceFreeze(ctx, true)
// unfreeze all incoming S3 API calls after speedtest.
defer globalNotificationSys.ServiceFreeze(ctx, false)
hostAnonymizer := createHostAnonymizer()
// anonAddr - Anonymizes hosts in given input string.
anonAddr := func(addr string) string {
@ -2111,11 +2116,6 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
getAndWriteDrivePerfInfo := func() {
if query.Get(string(madmin.HealthDataTypePerfDrive)) == "true" {
// Freeze all incoming S3 API calls before running speedtest.
globalNotificationSys.ServiceFreeze(ctx, true)
// unfreeze all incoming S3 API calls after speedtest.
defer globalNotificationSys.ServiceFreeze(ctx, false)
opts := madmin.DriveSpeedTestOpts{
Serial: false,
BlockSize: 4 * humanize.MiByte,
@ -2136,6 +2136,10 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
getAndWriteObjPerfInfo := func() {
if query.Get(string(madmin.HealthDataTypePerfObj)) == "true" {
concurrent := 32
if runtime.GOMAXPROCS(0) < concurrent {
concurrent = runtime.GOMAXPROCS(0)
}
size := 64 * humanize.MiByte
autotune := true
@ -2162,12 +2166,6 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
defer deleteObjectPerfBucket(objectAPI)
}
// Freeze all incoming S3 API calls before running speedtest.
globalNotificationSys.ServiceFreeze(ctx, true)
// unfreeze all incoming S3 API calls after speedtest.
defer globalNotificationSys.ServiceFreeze(ctx, false)
opts := speedTestOpts{
throughputSize: size,
concurrencyStart: concurrent,
@ -2189,19 +2187,12 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
return
}
nsLock := objectAPI.NewNSLock(minioMetaBucket, "netperf")
lkctx, err := nsLock.GetLock(ctx, globalOperationTimeout)
if err != nil {
healthInfo.Perf.Error = "Could not acquire lock for netperf: " + err.Error()
} else {
defer nsLock.Unlock(lkctx.Cancel)
netPerf := globalNotificationSys.Netperf(ctx, time.Second*10)
for _, np := range netPerf {
np.Endpoint = anonAddr(np.Endpoint)
healthInfo.Perf.NetPerf = append(healthInfo.Perf.NetPerf, np)
}
netPerf := globalNotificationSys.Netperf(ctx, time.Second*10)
for _, np := range netPerf {
np.Endpoint = anonAddr(np.Endpoint)
healthInfo.Perf.NetPerf = append(healthInfo.Perf.NetPerf, np)
}
partialWrite(healthInfo)
}
}

View File

@ -87,7 +87,7 @@ func selfSpeedtest(ctx context.Context, size, concurrent int, duration time.Dura
uploadsCancel()
}()
objNamePrefix := uuid.New().String() + "/"
objNamePrefix := uuid.New().String() + SlashSeparator
userMetadata := make(map[string]string)
userMetadata[globalObjectPerfUserMetadata] = "true"
@ -98,7 +98,11 @@ func selfSpeedtest(ctx context.Context, size, concurrent int, duration time.Dura
defer wg.Done()
for {
reader := newRandomReader(size)
info, err := client.PutObject(uploadsCtx, globalObjectPerfBucket, fmt.Sprintf("%s%d.%d", objNamePrefix, i, objCountPerThread[i]), reader, int64(size), minio.PutObjectOptions{UserMetadata: userMetadata, DisableMultipart: true}) // Bypass S3 API freeze
tmpObjName := fmt.Sprintf("%s%d.%d", objNamePrefix, i, objCountPerThread[i])
info, err := client.PutObject(uploadsCtx, globalObjectPerfBucket, tmpObjName, reader, int64(size), minio.PutObjectOptions{
UserMetadata: userMetadata,
DisableMultipart: true,
}) // Bypass S3 API freeze
if err != nil {
if !contextCanceled(uploadsCtx) && !errors.Is(err, context.Canceled) {
errOnce.Do(func() {

View File

@ -19,6 +19,8 @@ package cmd
import (
"context"
"fmt"
"net/url"
"sort"
"time"
@ -60,6 +62,17 @@ func objectSpeedTest(ctx context.Context, opts speedTestOpts) chan madmin.SpeedT
if throughputHighestResults[i].Error != "" {
errStr = throughputHighestResults[i].Error
}
// if the default concurrency yields zero results, throw an error.
if throughputHighestResults[i].Downloads == 0 && opts.concurrencyStart == concurrency {
errStr = fmt.Sprintf("no results for downloads upon first attempt, concurrency %d and duration %s", opts.concurrencyStart, opts.duration)
}
// if the default concurrency yields zero results, throw an error.
if throughputHighestResults[i].Uploads == 0 && opts.concurrencyStart == concurrency {
errStr = fmt.Sprintf("no results for uploads upon first attempt, concurrency %d and duration %s", opts.concurrencyStart, opts.duration)
}
result.PUTStats.Servers = append(result.PUTStats.Servers, madmin.SpeedTestStatServer{
Endpoint: throughputHighestResults[i].Endpoint,
ThroughputPerSec: throughputHighestResults[i].Uploads / uint64(durationSecs),
@ -171,9 +184,19 @@ func driveSpeedTest(ctx context.Context, opts madmin.DriveSpeedTestOpts) madmin.
return tmpPaths
}()
scheme := "http"
if globalIsTLS {
scheme = "https"
}
u := &url.URL{
Scheme: scheme,
Host: globalLocalNodeName,
}
perfs, err := perf.Run(ctx, paths...)
return madmin.DriveSpeedTestResult{
Endpoint: globalLocalNodeName,
Endpoint: u.String(),
Version: Version,
DrivePerf: func() (results []madmin.DrivePerf) {
for idx, r := range perfs {