mirror of https://github.com/minio/minio.git
fix: simplify healthcheck code to freeze calls only once (#15082)
- currently subnet health check was freezing and calling locks at multiple locations, avoid them. - throw errors if first attempt itself fails with no results
This commit is contained in:
parent
14645142db
commit
d2a10dbe69
|
@ -1110,7 +1110,6 @@ func (a adminAPIHandlers) ObjectSpeedtestHandler(w http.ResponseWriter, r *http.
|
||||||
}
|
}
|
||||||
|
|
||||||
sufficientCapacity, canAutotune, capacityErrMsg := validateObjPerfOptions(ctx, objectAPI, concurrent, size, autotune)
|
sufficientCapacity, canAutotune, capacityErrMsg := validateObjPerfOptions(ctx, objectAPI, concurrent, size, autotune)
|
||||||
|
|
||||||
if !sufficientCapacity {
|
if !sufficientCapacity {
|
||||||
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, AdminError{
|
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, AdminError{
|
||||||
Code: "XMinioSpeedtestInsufficientCapacity",
|
Code: "XMinioSpeedtestInsufficientCapacity",
|
||||||
|
@ -1874,6 +1873,12 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||||
healthCtx, healthCancel := context.WithTimeout(lkctx.Context(), deadline)
|
healthCtx, healthCancel := context.WithTimeout(lkctx.Context(), deadline)
|
||||||
defer healthCancel()
|
defer healthCancel()
|
||||||
|
|
||||||
|
// Freeze all incoming S3 API calls before running speedtest.
|
||||||
|
globalNotificationSys.ServiceFreeze(ctx, true)
|
||||||
|
|
||||||
|
// unfreeze all incoming S3 API calls after speedtest.
|
||||||
|
defer globalNotificationSys.ServiceFreeze(ctx, false)
|
||||||
|
|
||||||
hostAnonymizer := createHostAnonymizer()
|
hostAnonymizer := createHostAnonymizer()
|
||||||
// anonAddr - Anonymizes hosts in given input string.
|
// anonAddr - Anonymizes hosts in given input string.
|
||||||
anonAddr := func(addr string) string {
|
anonAddr := func(addr string) string {
|
||||||
|
@ -2111,11 +2116,6 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||||
|
|
||||||
getAndWriteDrivePerfInfo := func() {
|
getAndWriteDrivePerfInfo := func() {
|
||||||
if query.Get(string(madmin.HealthDataTypePerfDrive)) == "true" {
|
if query.Get(string(madmin.HealthDataTypePerfDrive)) == "true" {
|
||||||
// Freeze all incoming S3 API calls before running speedtest.
|
|
||||||
globalNotificationSys.ServiceFreeze(ctx, true)
|
|
||||||
// unfreeze all incoming S3 API calls after speedtest.
|
|
||||||
defer globalNotificationSys.ServiceFreeze(ctx, false)
|
|
||||||
|
|
||||||
opts := madmin.DriveSpeedTestOpts{
|
opts := madmin.DriveSpeedTestOpts{
|
||||||
Serial: false,
|
Serial: false,
|
||||||
BlockSize: 4 * humanize.MiByte,
|
BlockSize: 4 * humanize.MiByte,
|
||||||
|
@ -2136,6 +2136,10 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||||
getAndWriteObjPerfInfo := func() {
|
getAndWriteObjPerfInfo := func() {
|
||||||
if query.Get(string(madmin.HealthDataTypePerfObj)) == "true" {
|
if query.Get(string(madmin.HealthDataTypePerfObj)) == "true" {
|
||||||
concurrent := 32
|
concurrent := 32
|
||||||
|
if runtime.GOMAXPROCS(0) < concurrent {
|
||||||
|
concurrent = runtime.GOMAXPROCS(0)
|
||||||
|
}
|
||||||
|
|
||||||
size := 64 * humanize.MiByte
|
size := 64 * humanize.MiByte
|
||||||
autotune := true
|
autotune := true
|
||||||
|
|
||||||
|
@ -2162,12 +2166,6 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||||
defer deleteObjectPerfBucket(objectAPI)
|
defer deleteObjectPerfBucket(objectAPI)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Freeze all incoming S3 API calls before running speedtest.
|
|
||||||
globalNotificationSys.ServiceFreeze(ctx, true)
|
|
||||||
|
|
||||||
// unfreeze all incoming S3 API calls after speedtest.
|
|
||||||
defer globalNotificationSys.ServiceFreeze(ctx, false)
|
|
||||||
|
|
||||||
opts := speedTestOpts{
|
opts := speedTestOpts{
|
||||||
throughputSize: size,
|
throughputSize: size,
|
||||||
concurrencyStart: concurrent,
|
concurrencyStart: concurrent,
|
||||||
|
@ -2189,19 +2187,12 @@ func (a adminAPIHandlers) HealthInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
nsLock := objectAPI.NewNSLock(minioMetaBucket, "netperf")
|
netPerf := globalNotificationSys.Netperf(ctx, time.Second*10)
|
||||||
lkctx, err := nsLock.GetLock(ctx, globalOperationTimeout)
|
for _, np := range netPerf {
|
||||||
if err != nil {
|
np.Endpoint = anonAddr(np.Endpoint)
|
||||||
healthInfo.Perf.Error = "Could not acquire lock for netperf: " + err.Error()
|
healthInfo.Perf.NetPerf = append(healthInfo.Perf.NetPerf, np)
|
||||||
} else {
|
|
||||||
defer nsLock.Unlock(lkctx.Cancel)
|
|
||||||
|
|
||||||
netPerf := globalNotificationSys.Netperf(ctx, time.Second*10)
|
|
||||||
for _, np := range netPerf {
|
|
||||||
np.Endpoint = anonAddr(np.Endpoint)
|
|
||||||
healthInfo.Perf.NetPerf = append(healthInfo.Perf.NetPerf, np)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
partialWrite(healthInfo)
|
partialWrite(healthInfo)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -87,7 +87,7 @@ func selfSpeedtest(ctx context.Context, size, concurrent int, duration time.Dura
|
||||||
uploadsCancel()
|
uploadsCancel()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
objNamePrefix := uuid.New().String() + "/"
|
objNamePrefix := uuid.New().String() + SlashSeparator
|
||||||
|
|
||||||
userMetadata := make(map[string]string)
|
userMetadata := make(map[string]string)
|
||||||
userMetadata[globalObjectPerfUserMetadata] = "true"
|
userMetadata[globalObjectPerfUserMetadata] = "true"
|
||||||
|
@ -98,7 +98,11 @@ func selfSpeedtest(ctx context.Context, size, concurrent int, duration time.Dura
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
for {
|
for {
|
||||||
reader := newRandomReader(size)
|
reader := newRandomReader(size)
|
||||||
info, err := client.PutObject(uploadsCtx, globalObjectPerfBucket, fmt.Sprintf("%s%d.%d", objNamePrefix, i, objCountPerThread[i]), reader, int64(size), minio.PutObjectOptions{UserMetadata: userMetadata, DisableMultipart: true}) // Bypass S3 API freeze
|
tmpObjName := fmt.Sprintf("%s%d.%d", objNamePrefix, i, objCountPerThread[i])
|
||||||
|
info, err := client.PutObject(uploadsCtx, globalObjectPerfBucket, tmpObjName, reader, int64(size), minio.PutObjectOptions{
|
||||||
|
UserMetadata: userMetadata,
|
||||||
|
DisableMultipart: true,
|
||||||
|
}) // Bypass S3 API freeze
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !contextCanceled(uploadsCtx) && !errors.Is(err, context.Canceled) {
|
if !contextCanceled(uploadsCtx) && !errors.Is(err, context.Canceled) {
|
||||||
errOnce.Do(func() {
|
errOnce.Do(func() {
|
||||||
|
|
|
@ -19,6 +19,8 @@ package cmd
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/url"
|
||||||
"sort"
|
"sort"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
@ -60,6 +62,17 @@ func objectSpeedTest(ctx context.Context, opts speedTestOpts) chan madmin.SpeedT
|
||||||
if throughputHighestResults[i].Error != "" {
|
if throughputHighestResults[i].Error != "" {
|
||||||
errStr = throughputHighestResults[i].Error
|
errStr = throughputHighestResults[i].Error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if the default concurrency yields zero results, throw an error.
|
||||||
|
if throughputHighestResults[i].Downloads == 0 && opts.concurrencyStart == concurrency {
|
||||||
|
errStr = fmt.Sprintf("no results for downloads upon first attempt, concurrency %d and duration %s", opts.concurrencyStart, opts.duration)
|
||||||
|
}
|
||||||
|
|
||||||
|
// if the default concurrency yields zero results, throw an error.
|
||||||
|
if throughputHighestResults[i].Uploads == 0 && opts.concurrencyStart == concurrency {
|
||||||
|
errStr = fmt.Sprintf("no results for uploads upon first attempt, concurrency %d and duration %s", opts.concurrencyStart, opts.duration)
|
||||||
|
}
|
||||||
|
|
||||||
result.PUTStats.Servers = append(result.PUTStats.Servers, madmin.SpeedTestStatServer{
|
result.PUTStats.Servers = append(result.PUTStats.Servers, madmin.SpeedTestStatServer{
|
||||||
Endpoint: throughputHighestResults[i].Endpoint,
|
Endpoint: throughputHighestResults[i].Endpoint,
|
||||||
ThroughputPerSec: throughputHighestResults[i].Uploads / uint64(durationSecs),
|
ThroughputPerSec: throughputHighestResults[i].Uploads / uint64(durationSecs),
|
||||||
|
@ -171,9 +184,19 @@ func driveSpeedTest(ctx context.Context, opts madmin.DriveSpeedTestOpts) madmin.
|
||||||
return tmpPaths
|
return tmpPaths
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
scheme := "http"
|
||||||
|
if globalIsTLS {
|
||||||
|
scheme = "https"
|
||||||
|
}
|
||||||
|
|
||||||
|
u := &url.URL{
|
||||||
|
Scheme: scheme,
|
||||||
|
Host: globalLocalNodeName,
|
||||||
|
}
|
||||||
|
|
||||||
perfs, err := perf.Run(ctx, paths...)
|
perfs, err := perf.Run(ctx, paths...)
|
||||||
return madmin.DriveSpeedTestResult{
|
return madmin.DriveSpeedTestResult{
|
||||||
Endpoint: globalLocalNodeName,
|
Endpoint: u.String(),
|
||||||
Version: Version,
|
Version: Version,
|
||||||
DrivePerf: func() (results []madmin.DrivePerf) {
|
DrivePerf: func() (results []madmin.DrivePerf) {
|
||||||
for idx, r := range perfs {
|
for idx, r := range perfs {
|
||||||
|
|
Loading…
Reference in New Issue