diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index 097939183..2890e9069 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -291,6 +291,8 @@ type ServerHTTPStats struct { CurrentS3Requests ServerHTTPAPIStats `json:"currentS3Requests"` TotalS3Requests ServerHTTPAPIStats `json:"totalS3Requests"` TotalS3Errors ServerHTTPAPIStats `json:"totalS3Errors"` + TotalS35xxErrors ServerHTTPAPIStats `json:"totalS35xxErrors"` + TotalS34xxErrors ServerHTTPAPIStats `json:"totalS34xxErrors"` TotalS3Canceled ServerHTTPAPIStats `json:"totalS3Canceled"` TotalS3RejectedAuth uint64 `json:"totalS3RejectedAuth"` TotalS3RejectedTime uint64 `json:"totalS3RejectedTime"` diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index 80af8c7d5..5479dced4 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -115,9 +115,11 @@ func setRequestLimitHandler(h http.Handler) http.Handler { // Reserved bucket. const ( - minioReservedBucket = "minio" - minioReservedBucketPath = SlashSeparator + minioReservedBucket - loginPathPrefix = SlashSeparator + "login" + minioReservedBucket = "minio" + minioReservedBucketPath = SlashSeparator + minioReservedBucket + minioReservedBucketPathWithSlash = SlashSeparator + minioReservedBucket + SlashSeparator + + loginPathPrefix = SlashSeparator + "login" ) func guessIsBrowserReq(r *http.Request) bool { diff --git a/cmd/http-stats.go b/cmd/http-stats.go index 9041d4bc9..7b9361250 100644 --- a/cmd/http-stats.go +++ b/cmd/http-stats.go @@ -148,6 +148,8 @@ type HTTPStats struct { currentS3Requests HTTPAPIStats totalS3Requests HTTPAPIStats totalS3Errors HTTPAPIStats + totalS34xxErrors HTTPAPIStats + totalS35xxErrors HTTPAPIStats totalS3Canceled HTTPAPIStats } @@ -178,6 +180,12 @@ func (st *HTTPStats) toServerHTTPStats() ServerHTTPStats { serverStats.TotalS3Errors = ServerHTTPAPIStats{ APIStats: st.totalS3Errors.Load(), } + serverStats.TotalS34xxErrors = ServerHTTPAPIStats{ + APIStats: st.totalS34xxErrors.Load(), + } + serverStats.TotalS35xxErrors = ServerHTTPAPIStats{ + APIStats: st.totalS35xxErrors.Load(), + } serverStats.TotalS3Canceled = ServerHTTPAPIStats{ APIStats: st.totalS3Canceled.Load(), } @@ -186,27 +194,29 @@ func (st *HTTPStats) toServerHTTPStats() ServerHTTPStats { // Update statistics from http request and response data func (st *HTTPStats) updateStats(api string, r *http.Request, w *logger.ResponseWriter) { - // A successful request has a 2xx response code or < 4xx response - successReq := w.StatusCode >= 200 && w.StatusCode < 400 - - if !strings.HasSuffix(r.URL.Path, prometheusMetricsPathLegacy) || - !strings.HasSuffix(r.URL.Path, prometheusMetricsV2ClusterPath) || - !strings.HasSuffix(r.URL.Path, prometheusMetricsV2NodePath) { - st.totalS3Requests.Inc(api) - if !successReq { - switch w.StatusCode { - case 0: - case 499: - // 499 is a good error, shall be counted as canceled. - st.totalS3Canceled.Inc(api) - default: - st.totalS3Errors.Inc(api) - } - } + // Ignore non S3 requests + if strings.HasSuffix(r.URL.Path, minioReservedBucketPathWithSlash) { + return } // Increment the prometheus http request response histogram with appropriate label httpRequestsDuration.With(prometheus.Labels{"api": api}).Observe(w.TimeToFirstByte.Seconds()) + + code := w.StatusCode + + switch { + case code == 0: + case code == 499: + // 499 is a good error, shall be counted as canceled. + st.totalS3Canceled.Inc(api) + case code >= http.StatusBadRequest: + st.totalS3Errors.Inc(api) + if code >= http.StatusInternalServerError { + st.totalS35xxErrors.Inc(api) + } else { + st.totalS34xxErrors.Inc(api) + } + } } // Prepare new HTTPStats structure diff --git a/cmd/metrics-v2.go b/cmd/metrics-v2.go index 535e272fa..05746a3ba 100644 --- a/cmd/metrics-v2.go +++ b/cmd/metrics-v2.go @@ -603,7 +603,27 @@ func getS3RequestsErrorsMD() MetricDescription { Namespace: s3MetricNamespace, Subsystem: requestsSubsystem, Name: errorsTotal, - Help: "Total number S3 requests with errors", + Help: "Total number S3 requests with (4xx and 5xx) errors", + Type: counterMetric, + } +} + +func getS3Requests4xxErrorsMD() MetricDescription { + return MetricDescription{ + Namespace: s3MetricNamespace, + Subsystem: requestsSubsystem, + Name: "4xx_" + errorsTotal, + Help: "Total number S3 requests with (4xx) errors", + Type: counterMetric, + } +} + +func getS3Requests5xxErrorsMD() MetricDescription { + return MetricDescription{ + Namespace: s3MetricNamespace, + Subsystem: requestsSubsystem, + Name: "5xx_" + errorsTotal, + Help: "Total number S3 requests with (5xx) errors", Type: counterMetric, } } @@ -1488,7 +1508,9 @@ func getHTTPMetrics() *MetricsGroup { metrics = make([]Metric, 0, 3+ len(httpStats.CurrentS3Requests.APIStats)+ len(httpStats.TotalS3Requests.APIStats)+ - len(httpStats.TotalS3Errors.APIStats)) + len(httpStats.TotalS3Errors.APIStats)+ + len(httpStats.TotalS35xxErrors.APIStats)+ + len(httpStats.TotalS34xxErrors.APIStats)) metrics = append(metrics, Metric{ Description: getS3RejectedAuthRequestsTotalMD(), Value: float64(httpStats.TotalS3RejectedAuth), @@ -1535,6 +1557,20 @@ func getHTTPMetrics() *MetricsGroup { VariableLabels: map[string]string{"api": api}, }) } + for api, value := range httpStats.TotalS35xxErrors.APIStats { + metrics = append(metrics, Metric{ + Description: getS3Requests5xxErrorsMD(), + Value: float64(value), + VariableLabels: map[string]string{"api": api}, + }) + } + for api, value := range httpStats.TotalS34xxErrors.APIStats { + metrics = append(metrics, Metric{ + Description: getS3Requests4xxErrorsMD(), + Value: float64(value), + VariableLabels: map[string]string{"api": api}, + }) + } for api, value := range httpStats.TotalS3Canceled.APIStats { metrics = append(metrics, Metric{ Description: getS3RequestsCanceledMD(), diff --git a/docs/metrics/prometheus/list.md b/docs/metrics/prometheus/list.md index 1310df62b..98fe647a0 100644 --- a/docs/metrics/prometheus/list.md +++ b/docs/metrics/prometheus/list.md @@ -52,7 +52,9 @@ These metrics can be from any MinIO server once per collection. | `minio_node_process_uptime_seconds` | Uptime for MinIO process per node in seconds. | | `minio_node_syscall_read_total` | Total read SysCalls to the kernel. /proc/[pid]/io syscr | | `minio_node_syscall_write_total` | Total write SysCalls to the kernel. /proc/[pid]/io syscw | -| `minio_s3_requests_error_total` | Total number S3 requests with errors | +| `minio_s3_requests_errors_total` | Total number S3 requests with 4xx and 5xx errors | +| `minio_s3_requests_4xx_errors_total` | Total number S3 requests with 4xx errors | +| `minio_s3_requests_5xx_errors_total` | Total number S3 requests with 5xx errors | | `minio_s3_requests_inflight_total` | Total number of S3 requests currently in flight | | `minio_s3_requests_total` | Total number S3 requests | | `minio_s3_time_ttfb_seconds_distribution` | Distribution of the time to first byte across API calls. |