metrics: Add the number of requests in the waiting queue (#11580)

We can use this metric to check if there are too many S3 clients in the
queue and could explain why some of those S3 clients are timing out.

```
minio_s3_requests_waiting_total{server="127.0.0.1:9000"} 9981
```

If max_requests is 10000 then there is a strong possibility that clients
are timing out because of the queue deadline.
This commit is contained in:
Anis Elleuch 2021-02-20 09:21:55 +01:00 committed by GitHub
parent 173284903b
commit 98d3f94996
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 27 additions and 4 deletions

View File

@ -259,6 +259,7 @@ type ServerHTTPAPIStats struct {
// ServerHTTPStats holds all type of http operations performed to/from the server // ServerHTTPStats holds all type of http operations performed to/from the server
// including their average execution time. // including their average execution time.
type ServerHTTPStats struct { type ServerHTTPStats struct {
S3RequestsInQueue int32 `json:"s3RequestsInQueue"`
CurrentS3Requests ServerHTTPAPIStats `json:"currentS3Requests"` CurrentS3Requests ServerHTTPAPIStats `json:"currentS3Requests"`
TotalS3Requests ServerHTTPAPIStats `json:"totalS3Requests"` TotalS3Requests ServerHTTPAPIStats `json:"totalS3Requests"`
TotalS3Errors ServerHTTPAPIStats `json:"totalS3Errors"` TotalS3Errors ServerHTTPAPIStats `json:"totalS3Errors"`

View File

@ -136,20 +136,25 @@ func maxClients(f http.HandlerFunc) http.HandlerFunc {
return return
} }
globalHTTPStats.addRequestsInQueue(1)
deadlineTimer := time.NewTimer(deadline) deadlineTimer := time.NewTimer(deadline)
defer deadlineTimer.Stop() defer deadlineTimer.Stop()
select { select {
case pool <- struct{}{}: case pool <- struct{}{}:
defer func() { <-pool }() defer func() { <-pool }()
globalHTTPStats.addRequestsInQueue(-1)
f.ServeHTTP(w, r) f.ServeHTTP(w, r)
case <-deadlineTimer.C: case <-deadlineTimer.C:
// Send a http timeout message // Send a http timeout message
writeErrorResponse(r.Context(), w, writeErrorResponse(r.Context(), w,
errorCodes.ToAPIErr(ErrOperationMaxedOut), errorCodes.ToAPIErr(ErrOperationMaxedOut),
r.URL, guessIsBrowserReq(r)) r.URL, guessIsBrowserReq(r))
globalHTTPStats.addRequestsInQueue(-1)
return return
case <-r.Context().Done(): case <-r.Context().Done():
globalHTTPStats.addRequestsInQueue(-1)
return return
} }
} }

View File

@ -137,23 +137,26 @@ func (stats *HTTPAPIStats) Load() map[string]int {
// HTTPStats holds statistics information about // HTTPStats holds statistics information about
// HTTP requests made by all clients // HTTP requests made by all clients
type HTTPStats struct { type HTTPStats struct {
s3RequestsInQueue int32
currentS3Requests HTTPAPIStats currentS3Requests HTTPAPIStats
totalS3Requests HTTPAPIStats totalS3Requests HTTPAPIStats
totalS3Errors HTTPAPIStats totalS3Errors HTTPAPIStats
} }
func (st *HTTPStats) addRequestsInQueue(i int32) {
atomic.AddInt32(&st.s3RequestsInQueue, i)
}
// Converts http stats into struct to be sent back to the client. // Converts http stats into struct to be sent back to the client.
func (st *HTTPStats) toServerHTTPStats() ServerHTTPStats { func (st *HTTPStats) toServerHTTPStats() ServerHTTPStats {
serverStats := ServerHTTPStats{} serverStats := ServerHTTPStats{}
serverStats.S3RequestsInQueue = atomic.LoadInt32(&st.s3RequestsInQueue)
serverStats.CurrentS3Requests = ServerHTTPAPIStats{ serverStats.CurrentS3Requests = ServerHTTPAPIStats{
APIStats: st.currentS3Requests.Load(), APIStats: st.currentS3Requests.Load(),
} }
serverStats.TotalS3Requests = ServerHTTPAPIStats{ serverStats.TotalS3Requests = ServerHTTPAPIStats{
APIStats: st.totalS3Requests.Load(), APIStats: st.totalS3Requests.Load(),
} }
serverStats.TotalS3Errors = ServerHTTPAPIStats{ serverStats.TotalS3Errors = ServerHTTPAPIStats{
APIStats: st.totalS3Errors.Load(), APIStats: st.totalS3Errors.Load(),
} }

View File

@ -78,6 +78,7 @@ const (
inflightTotal MetricName = "inflight_total" inflightTotal MetricName = "inflight_total"
limitTotal MetricName = "limit_total" limitTotal MetricName = "limit_total"
missedTotal MetricName = "missed_total" missedTotal MetricName = "missed_total"
waitingTotal MetricName = "waiting_total"
objectTotal MetricName = "object_total" objectTotal MetricName = "object_total"
offlineTotal MetricName = "offline_total" offlineTotal MetricName = "offline_total"
onlineTotal MetricName = "online_total" onlineTotal MetricName = "online_total"
@ -386,10 +387,19 @@ func getS3RequestsInFlightMD() MetricDescription {
Namespace: s3MetricNamespace, Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem, Subsystem: requestsSubsystem,
Name: inflightTotal, Name: inflightTotal,
Help: "Total number of S3 requests currently in flight.", Help: "Total number of S3 requests currently in flight",
Type: gaugeMetric, Type: gaugeMetric,
} }
} }
func getS3RequestsInQueueMD() MetricDescription {
return MetricDescription{
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: waitingTotal,
Help: "Number of S3 requests in the waiting queue",
Type: counterMetric,
}
}
func getS3RequestsTotalMD() MetricDescription { func getS3RequestsTotalMD() MetricDescription {
return MetricDescription{ return MetricDescription{
Namespace: s3MetricNamespace, Namespace: s3MetricNamespace,
@ -939,6 +949,10 @@ func getHTTPMetrics() MetricsGroup {
Metrics: []Metric{}, Metrics: []Metric{},
initialize: func(ctx context.Context, metrics *MetricsGroup) { initialize: func(ctx context.Context, metrics *MetricsGroup) {
httpStats := globalHTTPStats.toServerHTTPStats() httpStats := globalHTTPStats.toServerHTTPStats()
metrics.Metrics = append(metrics.Metrics, Metric{
Description: getS3RequestsInQueueMD(),
Value: float64(httpStats.S3RequestsInQueue),
})
for api, value := range httpStats.CurrentS3Requests.APIStats { for api, value := range httpStats.CurrentS3Requests.APIStats {
metrics.Metrics = append(metrics.Metrics, Metric{ metrics.Metrics = append(metrics.Metrics, Metric{
Description: getS3RequestsInFlightMD(), Description: getS3RequestsInFlightMD(),