add support for bucket level request count per API (#17468)

New metrics added to calculate API request count
per bucket, per API.  Captures errors, including
4xx, 5xx HTTP status codes separately.
This commit is contained in:
Harshavardhana
2023-06-21 09:41:59 -07:00
committed by GitHub
parent ccc5801112
commit 7605d07bb2
9 changed files with 578 additions and 340 deletions

View File

@@ -184,6 +184,7 @@ const (
ErrBucketAlreadyExists
ErrMetadataTooLarge
ErrUnsupportedMetadata
ErrUnsupportedHostHeader
ErrMaximumExpires
ErrSlowDownRead
ErrSlowDownWrite
@@ -1442,6 +1443,11 @@ var errorCodes = errorCodeMap{
Description: "Your metadata headers are not supported.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrUnsupportedHostHeader: {
Code: "InvalidArgument",
Description: "Your Host header is malformed.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrObjectTampered: {
Code: "XMinioObjectTampered",
Description: errObjectTampered.Error(),

File diff suppressed because one or more lines are too long

View File

@@ -245,8 +245,9 @@ var (
// Global HTTP request statisitics
globalHTTPStats = newHTTPStats()
// Global bucket network statistics
// Global bucket network and API statistics
globalBucketConnStats = newBucketConnStats()
globalBucketHTTPStats = newBucketHTTPStats()
// Time when the server is started
globalBootTime = UTCNow()

View File

@@ -269,9 +269,26 @@ func trimAwsChunkedContentEncoding(contentEnc string) (trimmedContentEnc string)
func collectAPIStats(api string, f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
resource, err := getResource(r.URL.Path, r.Host, globalDomainNames)
if err != nil {
defer logger.AuditLog(r.Context(), w, r, mustGetClaimsFromToken(r))
apiErr := errorCodes.ToAPIErr(ErrUnsupportedHostHeader)
apiErr.Description = fmt.Sprintf("%s: %v", apiErr.Description, err)
writeErrorResponse(r.Context(), w, apiErr, r.URL)
return
}
bucket, _ := path2BucketObject(resource)
globalHTTPStats.currentS3Requests.Inc(api)
defer globalHTTPStats.currentS3Requests.Dec(api)
if bucket != "" && bucket != minioReservedBucket {
globalBucketHTTPStats.updateHTTPStats(bucket, api, nil)
}
f.ServeHTTP(w, r)
tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt)
@@ -299,16 +316,10 @@ func collectAPIStats(api string, f http.HandlerFunc) http.HandlerFunc {
globalConnStats.incS3InputBytes(int64(tc.RequestRecorder.Size()))
globalConnStats.incS3OutputBytes(int64(tc.ResponseRecorder.Size()))
resource, err := getResource(r.URL.Path, r.Host, globalDomainNames)
if err != nil {
logger.LogIf(r.Context(), fmt.Errorf("Unable to get the actual resource in the incoming request: %v", err))
return
}
bucket, _ := path2BucketObject(resource)
if bucket != "" && bucket != minioReservedBucket {
globalBucketConnStats.incS3InputBytes(bucket, int64(tc.RequestRecorder.Size()))
globalBucketConnStats.incS3OutputBytes(bucket, int64(tc.ResponseRecorder.Size()))
globalBucketHTTPStats.updateHTTPStats(bucket, api, tc.ResponseRecorder)
}
}
}

View File

@@ -120,6 +120,105 @@ type bucketS3RXTX struct {
s3OutputBytes uint64
}
type bucketHTTPAPIStats struct {
currentS3Requests *HTTPAPIStats
totalS3Requests *HTTPAPIStats
totalS34xxErrors *HTTPAPIStats
totalS35xxErrors *HTTPAPIStats
totalS3Canceled *HTTPAPIStats
}
type bucketHTTPStats struct {
sync.RWMutex
httpStats map[string]bucketHTTPAPIStats
}
func newBucketHTTPStats() *bucketHTTPStats {
return &bucketHTTPStats{
httpStats: make(map[string]bucketHTTPAPIStats),
}
}
func (bh *bucketHTTPStats) delete(bucket string) {
bh.Lock()
defer bh.Unlock()
delete(bh.httpStats, bucket)
}
func (bh *bucketHTTPStats) updateHTTPStats(bucket, api string, w *xhttp.ResponseRecorder) {
if bh == nil {
return
}
bh.Lock()
defer bh.Unlock()
hstats, ok := bh.httpStats[bucket]
if !ok {
hstats = bucketHTTPAPIStats{
currentS3Requests: &HTTPAPIStats{},
totalS3Requests: &HTTPAPIStats{},
totalS3Canceled: &HTTPAPIStats{},
totalS34xxErrors: &HTTPAPIStats{},
totalS35xxErrors: &HTTPAPIStats{},
}
}
if w == nil { // when response recorder nil, this is an active request
hstats.currentS3Requests.Inc(api)
bh.httpStats[bucket] = hstats
return
} // else {
hstats.currentS3Requests.Dec(api) // decrement this once we have the response recorder.
hstats.totalS3Requests.Inc(api)
code := w.StatusCode
switch {
case code == 0:
case code == 499:
// 499 is a good error, shall be counted as canceled.
hstats.totalS3Canceled.Inc(api)
case code >= http.StatusBadRequest:
if code >= http.StatusInternalServerError {
hstats.totalS35xxErrors.Inc(api)
} else {
hstats.totalS34xxErrors.Inc(api)
}
}
bh.httpStats[bucket] = hstats
}
func (bh *bucketHTTPStats) load(bucket string) bucketHTTPAPIStats {
if bh == nil {
return bucketHTTPAPIStats{
currentS3Requests: &HTTPAPIStats{},
totalS3Requests: &HTTPAPIStats{},
totalS3Canceled: &HTTPAPIStats{},
totalS34xxErrors: &HTTPAPIStats{},
totalS35xxErrors: &HTTPAPIStats{},
}
}
bh.RLock()
defer bh.RUnlock()
val, ok := bh.httpStats[bucket]
if ok {
return val
}
return bucketHTTPAPIStats{
currentS3Requests: &HTTPAPIStats{},
totalS3Requests: &HTTPAPIStats{},
totalS3Canceled: &HTTPAPIStats{},
totalS34xxErrors: &HTTPAPIStats{},
totalS35xxErrors: &HTTPAPIStats{},
}
}
type bucketConnStats struct {
sync.RWMutex
stats map[string]*bucketS3RXTX
@@ -225,10 +324,32 @@ func (stats *HTTPAPIStats) Dec(api string) {
}
}
// Get returns the current counter on input API string
func (stats *HTTPAPIStats) Get(api string) int {
if stats == nil {
return 0
}
stats.RLock()
defer stats.RUnlock()
val, ok := stats.apiStats[api]
if ok {
return val
}
return 0
}
// Load returns the recorded stats.
func (stats *HTTPAPIStats) Load() map[string]int {
stats.Lock()
defer stats.Unlock()
if stats == nil {
return map[string]int{}
}
stats.RLock()
defer stats.RUnlock()
apiStats := make(map[string]int, len(stats.apiStats))
for k, v := range stats.apiStats {
apiStats[k] = v

View File

@@ -483,7 +483,7 @@ func getUsageLastScanActivityMD() MetricDescription {
Namespace: minioMetricNamespace,
Subsystem: usageSubsystem,
Name: lastActivityTime,
Help: "Time elapsed (in nano seconds) since last scan activity. This is set to 0 until first scan cycle",
Help: "Time elapsed (in nano seconds) since last scan activity.",
Type: gaugeMetric,
}
}
@@ -703,7 +703,7 @@ func getS3RequestsInQueueMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: waitingTotal,
Help: "Number of S3 requests in the waiting queue",
Help: "Total number of S3 requests in the waiting queue",
Type: gaugeMetric,
}
}
@@ -713,7 +713,7 @@ func getIncomingS3RequestsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: incomingTotal,
Help: "Volatile number of total incoming S3 requests",
Help: "Total number of incoming S3 requests",
Type: gaugeMetric,
}
}
@@ -723,7 +723,7 @@ func getS3RequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: total,
Help: "Total number S3 requests",
Help: "Total number of S3 requests",
Type: counterMetric,
}
}
@@ -733,7 +733,7 @@ func getS3RequestsErrorsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: errorsTotal,
Help: "Total number S3 requests with (4xx and 5xx) errors",
Help: "Total number of S3 requests with (4xx and 5xx) errors",
Type: counterMetric,
}
}
@@ -743,7 +743,7 @@ func getS3Requests4xxErrorsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: "4xx_" + errorsTotal,
Help: "Total number S3 requests with (4xx) errors",
Help: "Total number of S3 requests with (4xx) errors",
Type: counterMetric,
}
}
@@ -753,7 +753,7 @@ func getS3Requests5xxErrorsMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: "5xx_" + errorsTotal,
Help: "Total number S3 requests with (5xx) errors",
Help: "Total number of S3 requests with (5xx) errors",
Type: counterMetric,
}
}
@@ -763,7 +763,7 @@ func getS3RequestsCanceledMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsSubsystem,
Name: canceledTotal,
Help: "Total number S3 requests that were canceled from the client while processing",
Help: "Total number of S3 requests that were canceled by the client",
Type: counterMetric,
}
}
@@ -773,7 +773,7 @@ func getS3RejectedAuthRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: authTotal,
Help: "Total number S3 requests rejected for auth failure",
Help: "Total number of S3 requests rejected for auth failure",
Type: counterMetric,
}
}
@@ -783,7 +783,7 @@ func getS3RejectedHeaderRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: headerTotal,
Help: "Total number S3 requests rejected for invalid header",
Help: "Total number of S3 requests rejected for invalid header",
Type: counterMetric,
}
}
@@ -793,7 +793,7 @@ func getS3RejectedTimestampRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: timestampTotal,
Help: "Total number S3 requests rejected for invalid timestamp",
Help: "Total number of S3 requests rejected for invalid timestamp",
Type: counterMetric,
}
}
@@ -803,7 +803,7 @@ func getS3RejectedInvalidRequestsTotalMD() MetricDescription {
Namespace: s3MetricNamespace,
Subsystem: requestsRejectedSubsystem,
Name: invalidTotal,
Help: "Total number S3 invalid requests",
Help: "Total number of invalid S3 requests",
Type: counterMetric,
}
}
@@ -913,7 +913,7 @@ func getHealLastActivityTimeMD() MetricDescription {
Namespace: healMetricNamespace,
Subsystem: timeSubsystem,
Name: lastActivityTime,
Help: "Time elapsed (in nano seconds) since last self healing activity. This is set to -1 until initial self heal activity",
Help: "Time elapsed (in nano seconds) since last self healing activity.",
Type: gaugeMetric,
}
}
@@ -1309,6 +1309,56 @@ func getExpiryPendingTasksMD() MetricDescription {
}
}
func getBucketS3RequestsInFlightMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: inflightTotal,
Help: "Total number of S3 requests currently in flight on a bucket",
Type: gaugeMetric,
}
}
func getBucketS3RequestsTotalMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: total,
Help: "Total number of S3 requests on a bucket",
Type: counterMetric,
}
}
func getBucketS3Requests4xxErrorsMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: "4xx_" + errorsTotal,
Help: "Total number of S3 requests with (4xx) errors on a bucket",
Type: counterMetric,
}
}
func getBucketS3Requests5xxErrorsMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: "5xx_" + errorsTotal,
Help: "Total number of S3 requests with (5xx) errors on a bucket",
Type: counterMetric,
}
}
func getBucketS3RequestsCanceledMD() MetricDescription {
return MetricDescription{
Namespace: bucketMetricNamespace,
Subsystem: requestsSubsystem,
Name: canceledTotal,
Help: "Total number of S3 requests that were canceled from the client while processing on a bucket",
Type: counterMetric,
}
}
func getILMNodeMetrics() *MetricsGroup {
mg := &MetricsGroup{
cacheInterval: 10 * time.Second,
@@ -1456,7 +1506,7 @@ func getIAMNodeMetrics() *MetricsGroup {
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "since_last_sync_millis",
Help: "Time (in milliseconds) since last successful IAM data sync. This is set to 0 until the first sync after server start.",
Help: "Time (in milliseconds) since last successful IAM data sync.",
Type: gaugeMetric,
},
Value: float64(sinceLastSyncMillis),
@@ -2072,6 +2122,47 @@ func getBucketUsageMetrics() *MetricsGroup {
})
}
httpStats := globalBucketHTTPStats.load(bucket)
for k, v := range httpStats.currentS3Requests.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3RequestsInFlightMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS3Requests.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3RequestsTotalMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS3Canceled.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3RequestsCanceledMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS34xxErrors.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3Requests4xxErrorsMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
for k, v := range httpStats.totalS35xxErrors.Load() {
metrics = append(metrics, Metric{
Description: getBucketS3Requests5xxErrorsMD(),
Value: float64(v),
VariableLabels: map[string]string{"bucket": bucket, "api": k},
})
}
if stats.hasReplicationUsage() {
for arn, stat := range stats.Stats {
metrics = append(metrics, Metric{

View File

@@ -453,6 +453,7 @@ func (sys *NotificationSys) DeleteBucketMetadata(ctx context.Context, bucketName
globalBucketTargetSys.Delete(bucketName)
globalEventNotifier.RemoveNotification(bucketName)
globalBucketConnStats.delete(bucketName)
globalBucketHTTPStats.delete(bucketName)
if localMetacacheMgr != nil {
localMetacacheMgr.deleteBucketCache(bucketName)
}

View File

@@ -533,6 +533,7 @@ func (s *peerRESTServer) DeleteBucketMetadataHandler(w http.ResponseWriter, r *h
globalBucketTargetSys.Delete(bucketName)
globalEventNotifier.RemoveNotification(bucketName)
globalBucketConnStats.delete(bucketName)
globalBucketHTTPStats.delete(bucketName)
if localMetacacheMgr != nil {
localMetacacheMgr.deleteBucketCache(bucketName)
}