diff --git a/cmd/admin-handlers.go b/cmd/admin-handlers.go index fdce73262..349a77334 100644 --- a/cmd/admin-handlers.go +++ b/cmd/admin-handlers.go @@ -1031,26 +1031,75 @@ func toAdminAPIErr(ctx context.Context, err error) APIError { // false if certain conditions are not met. // - input entry is not of the type *trace.Info* // - errOnly entries are to be traced, not status code 2xx, 3xx. -// - all entries to be traced, if not trace only S3 API requests. -func mustTrace(entry interface{}, trcAll, errOnly bool) bool { +// - trace.Info type is asked by opts +func mustTrace(entry interface{}, opts madmin.ServiceTraceOpts) (shouldTrace bool) { trcInfo, ok := entry.(trace.Info) if !ok { return false } - // Handle browser requests separately filter them and return. - if HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+"/upload") { - if errOnly { - return trcInfo.RespInfo.StatusCode >= http.StatusBadRequest + // Override shouldTrace decision with errOnly filtering + defer func() { + if shouldTrace && opts.OnlyErrors { + shouldTrace = trcInfo.RespInfo.StatusCode >= http.StatusBadRequest } + }() + + if opts.Threshold > 0 { + var latency time.Duration + switch trcInfo.TraceType { + case trace.OS: + latency = trcInfo.OSStats.Duration + case trace.Storage: + latency = trcInfo.StorageStats.Duration + case trace.HTTP: + latency = trcInfo.CallStats.Latency + } + if latency < opts.Threshold { + return false + } + } + + if opts.Internal && trcInfo.TraceType == trace.HTTP && HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) { return true } - trace := trcAll || !HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) - if errOnly { - return trace && trcInfo.RespInfo.StatusCode >= http.StatusBadRequest + if opts.S3 && trcInfo.TraceType == trace.HTTP && !HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) { + return true } - return trace + + if opts.Storage && trcInfo.TraceType == trace.Storage { + return true + } + + return opts.OS && trcInfo.TraceType == trace.OS +} + +func extractTraceOptions(r *http.Request) (opts madmin.ServiceTraceOpts, err error) { + q := r.URL.Query() + + opts.OnlyErrors = q.Get("err") == "true" + opts.S3 = q.Get("s3") == "true" + opts.Internal = q.Get("internal") == "true" + opts.Storage = q.Get("storage") == "true" + opts.OS = q.Get("os") == "true" + + // Support deprecated 'all' query + if q.Get("all") == "true" { + opts.S3 = true + opts.Internal = true + opts.Storage = true + opts.OS = true + } + + if t := q.Get("threshold"); t != "" { + d, err := time.ParseDuration(t) + if err != nil { + return opts, err + } + opts.Threshold = d + } + return } // TraceHandler - POST /minio/admin/v3/trace @@ -1059,9 +1108,6 @@ func mustTrace(entry interface{}, trcAll, errOnly bool) bool { func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) { ctx := newContext(r, w, "HTTPTrace") - trcAll := r.URL.Query().Get("all") == "true" - trcErr := r.URL.Query().Get("err") == "true" - // Validate request signature. _, adminAPIErr := checkAdminRequestAuth(ctx, r, iampolicy.TraceAdminAction, "") if adminAPIErr != ErrNone { @@ -1069,6 +1115,12 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) { return } + traceOpts, err := extractTraceOptions(r) + if err != nil { + writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrInvalidRequest), r.URL) + return + } + setEventStreamHeaders(w) // Trace Publisher and peer-trace-client uses nonblocking send and hence does not wait for slow receivers. @@ -1077,15 +1129,15 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) { peers, _ := newPeerRestClients(globalEndpoints) - globalHTTPTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool { - return mustTrace(entry, trcAll, trcErr) + globalTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool { + return mustTrace(entry, traceOpts) }) for _, peer := range peers { if peer == nil { continue } - peer.Trace(traceCh, ctx.Done(), trcAll, trcErr) + peer.Trace(traceCh, ctx.Done(), traceOpts) } keepAliveTicker := time.NewTicker(500 * time.Millisecond) diff --git a/cmd/background-heal-ops.go b/cmd/background-heal-ops.go index 47c3e610f..08697f7de 100644 --- a/cmd/background-heal-ops.go +++ b/cmd/background-heal-ops.go @@ -66,7 +66,7 @@ func waitForLowHTTPReq(maxIO int, maxWait time.Duration) { // Bucket notification and http trace are not costly, it is okay to ignore them // while counting the number of concurrent connections maxIOFn := func() int { - return maxIO + int(globalHTTPListen.NumSubscribers()) + int(globalHTTPTrace.NumSubscribers()) + return maxIO + int(globalHTTPListen.NumSubscribers()) + int(globalTrace.NumSubscribers()) } tmpMaxWait := maxWait diff --git a/cmd/config/constants.go b/cmd/config/constants.go index 0173f6df4..0551519eb 100644 --- a/cmd/config/constants.go +++ b/cmd/config/constants.go @@ -23,23 +23,21 @@ const ( // Top level common ENVs const ( - EnvAccessKey = "MINIO_ACCESS_KEY" - EnvSecretKey = "MINIO_SECRET_KEY" - EnvRootUser = "MINIO_ROOT_USER" - EnvRootPassword = "MINIO_ROOT_PASSWORD" - EnvAccessKeyOld = "MINIO_ACCESS_KEY_OLD" - EnvSecretKeyOld = "MINIO_SECRET_KEY_OLD" - EnvRootUserOld = "MINIO_ROOT_USER_OLD" - EnvRootPasswordOld = "MINIO_ROOT_PASSWORD_OLD" - EnvBrowser = "MINIO_BROWSER" - EnvDomain = "MINIO_DOMAIN" - EnvRegionName = "MINIO_REGION_NAME" - EnvPublicIPs = "MINIO_PUBLIC_IPS" - EnvFSOSync = "MINIO_FS_OSYNC" - EnvArgs = "MINIO_ARGS" - EnvDNSWebhook = "MINIO_DNS_WEBHOOK_ENDPOINT" - EnvLogPosixTimes = "MINIO_LOG_POSIX_TIMES" - EnvLogPosixThresholdInMS = "MINIO_LOG_POSIX_THRESHOLD_MS" + EnvAccessKey = "MINIO_ACCESS_KEY" + EnvSecretKey = "MINIO_SECRET_KEY" + EnvRootUser = "MINIO_ROOT_USER" + EnvRootPassword = "MINIO_ROOT_PASSWORD" + EnvAccessKeyOld = "MINIO_ACCESS_KEY_OLD" + EnvSecretKeyOld = "MINIO_SECRET_KEY_OLD" + EnvRootUserOld = "MINIO_ROOT_USER_OLD" + EnvRootPasswordOld = "MINIO_ROOT_PASSWORD_OLD" + EnvBrowser = "MINIO_BROWSER" + EnvDomain = "MINIO_DOMAIN" + EnvRegionName = "MINIO_REGION_NAME" + EnvPublicIPs = "MINIO_PUBLIC_IPS" + EnvFSOSync = "MINIO_FS_OSYNC" + EnvArgs = "MINIO_ARGS" + EnvDNSWebhook = "MINIO_DNS_WEBHOOK_ENDPOINT" EnvUpdate = "MINIO_UPDATE" diff --git a/cmd/globals.go b/cmd/globals.go index 84d01918a..5615eb548 100644 --- a/cmd/globals.go +++ b/cmd/globals.go @@ -181,9 +181,9 @@ var ( globalHTTPServerErrorCh = make(chan error) globalOSSignalCh = make(chan os.Signal, 1) - // global Trace system to send HTTP request/response logs to - // registered listeners - globalHTTPTrace = pubsub.New() + // global Trace system to send HTTP request/response + // and Storage/OS calls info to registered listeners. + globalTrace = pubsub.New() // global Listen system to send S3 API events to registered listeners globalHTTPListen = pubsub.New() diff --git a/cmd/handler-utils.go b/cmd/handler-utils.go index eb0ed4248..418e48bf2 100644 --- a/cmd/handler-utils.go +++ b/cmd/handler-utils.go @@ -359,24 +359,24 @@ func extractPostPolicyFormValues(ctx context.Context, form *multipart.Form) (fil // Log headers and body. func httpTraceAll(f http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - if globalHTTPTrace.NumSubscribers() == 0 { + if globalTrace.NumSubscribers() == 0 { f.ServeHTTP(w, r) return } trace := Trace(f, true, w, r) - globalHTTPTrace.Publish(trace) + globalTrace.Publish(trace) } } // Log only the headers. func httpTraceHdrs(f http.HandlerFunc) http.HandlerFunc { return func(w http.ResponseWriter, r *http.Request) { - if globalHTTPTrace.NumSubscribers() == 0 { + if globalTrace.NumSubscribers() == 0 { f.ServeHTTP(w, r) return } trace := Trace(f, false, w, r) - globalHTTPTrace.Publish(trace) + globalTrace.Publish(trace) } } diff --git a/cmd/http-tracer.go b/cmd/http-tracer.go index a962dbc28..574859971 100644 --- a/cmd/http-tracer.go +++ b/cmd/http-tracer.go @@ -124,7 +124,8 @@ func WebTrace(ri *jsonrpc.RequestInfo) trace.Info { reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ",")) } - t := trace.Info{FuncName: name} + now := time.Now().UTC() + t := trace.Info{TraceType: trace.HTTP, FuncName: name, Time: now} t.NodeName = r.Host if globalIsDistErasure { t.NodeName = globalLocalNodeName @@ -137,7 +138,7 @@ func WebTrace(ri *jsonrpc.RequestInfo) trace.Info { vars := mux.Vars(r) rq := trace.RequestInfo{ - Time: time.Now().UTC(), + Time: now, Proto: r.Proto, Method: r.Method, Path: SlashSeparator + pathJoin(vars["bucket"], vars["object"]), @@ -185,10 +186,12 @@ func Trace(f http.HandlerFunc, logBody bool, w http.ResponseWriter, r *http.Requ reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ",")) } - var reqBodyRecorder *recordRequest - t := trace.Info{FuncName: name} - reqBodyRecorder = &recordRequest{Reader: r.Body, logBody: logBody, headers: reqHeaders} + reqBodyRecorder := &recordRequest{Reader: r.Body, logBody: logBody, headers: reqHeaders} r.Body = ioutil.NopCloser(reqBodyRecorder) + + now := time.Now().UTC() + t := trace.Info{TraceType: trace.HTTP, FuncName: name, Time: now} + t.NodeName = r.Host if globalIsDistErasure { t.NodeName = globalLocalNodeName @@ -197,8 +200,9 @@ func Trace(f http.HandlerFunc, logBody bool, w http.ResponseWriter, r *http.Requ if host, _, err := net.SplitHostPort(t.NodeName); err == nil { t.NodeName = host } + rq := trace.RequestInfo{ - Time: time.Now().UTC(), + Time: now, Proto: r.Proto, Method: r.Method, Path: r.URL.Path, diff --git a/cmd/os-instrumented.go b/cmd/os-instrumented.go index 043119c5f..8fd770a2e 100644 --- a/cmd/os-instrumented.go +++ b/cmd/os-instrumented.go @@ -17,145 +17,109 @@ package cmd import ( - "fmt" "os" "strings" "time" - "github.com/minio/minio/cmd/config" "github.com/minio/minio/pkg/disk" - "github.com/minio/minio/pkg/env" + trace "github.com/minio/minio/pkg/trace" ) -var ( - logTime bool = false - threshold time.Duration +//go:generate stringer -type=osMetric -trimprefix=osMetric $GOFILE + +type osMetric uint8 + +const ( + osMetricRemoveAll osMetric = iota + osMetricMkdirAll + osMetricRename + osMetricOpenFile + osMetricOpen + osMetricOpenFileDirectIO + osMetricLstat + osMetricRemove + osMetricStat + // .... add more + + osMetricLast ) -func init() { - logTime = env.IsSet(config.EnvLogPosixTimes) - t, _ := env.GetInt( - config.EnvLogPosixThresholdInMS, - 100, - ) - threshold = time.Duration(t) * time.Millisecond +func osTrace(s osMetric, startTime time.Time, duration time.Duration, path string) trace.Info { + return trace.Info{ + TraceType: trace.OS, + Time: startTime, + NodeName: globalLocalNodeName, + FuncName: s.String(), + OSStats: trace.OSStats{ + Duration: duration, + Path: path, + }, + } } -func reportTime(name *strings.Builder, startTime time.Time) { - delta := time.Since(startTime) - if delta > threshold { - name.WriteString(" ") - name.WriteString(delta.String()) - fmt.Println(name.String()) +func updateOSMetrics(s osMetric, paths ...string) func() { + if globalTrace.NumSubscribers() == 0 { + return func() {} + } + + startTime := time.Now() + return func() { + duration := time.Since(startTime) + + globalTrace.Publish(osTrace(s, startTime, duration, strings.Join(paths, " "))) } } // RemoveAll captures time taken to call the underlying os.RemoveAll func RemoveAll(dirPath string) error { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.RemoveAll: ") - s.WriteString(dirPath) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricRemoveAll, dirPath)() return os.RemoveAll(dirPath) } // MkdirAll captures time taken to call os.MkdirAll func MkdirAll(dirPath string, mode os.FileMode) error { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.MkdirAll: ") - s.WriteString(dirPath) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricMkdirAll, dirPath)() return os.MkdirAll(dirPath, mode) } // Rename captures time taken to call os.Rename func Rename(src, dst string) error { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.Rename: ") - s.WriteString(src) - s.WriteString(" to ") - s.WriteString(dst) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricRename, src, dst)() return os.Rename(src, dst) } // OpenFile captures time taken to call os.OpenFile func OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.OpenFile: ") - s.WriteString(name) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricOpenFile, name)() return os.OpenFile(name, flag, perm) } // Open captures time taken to call os.Open func Open(name string) (*os.File, error) { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.Open: ") - s.WriteString(name) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricOpen, name)() return os.Open(name) } // OpenFileDirectIO captures time taken to call disk.OpenFileDirectIO func OpenFileDirectIO(name string, flag int, perm os.FileMode) (*os.File, error) { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("disk.OpenFileDirectIO: ") - s.WriteString(name) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricOpenFileDirectIO, name)() return disk.OpenFileDirectIO(name, flag, perm) } // Lstat captures time taken to call os.Lstat func Lstat(name string) (os.FileInfo, error) { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.Lstat: ") - s.WriteString(name) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricLstat, name)() return os.Lstat(name) } // Remove captures time taken to call os.Remove func Remove(deletePath string) error { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.Remove: ") - s.WriteString(deletePath) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricRemove, deletePath)() return os.Remove(deletePath) } // Stat captures time taken to call os.Stat func Stat(name string) (os.FileInfo, error) { - if logTime { - startTime := time.Now() - var s strings.Builder - s.WriteString("os.Stat: ") - s.WriteString(name) - defer reportTime(&s, startTime) - } + defer updateOSMetrics(osMetricStat, name)() return os.Stat(name) } diff --git a/cmd/osmetric_string.go b/cmd/osmetric_string.go new file mode 100644 index 000000000..773dce2eb --- /dev/null +++ b/cmd/osmetric_string.go @@ -0,0 +1,32 @@ +// Code generated by "stringer -type=osMetric -trimprefix=osMetric os-instrumented.go"; DO NOT EDIT. + +package cmd + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[osMetricRemoveAll-0] + _ = x[osMetricMkdirAll-1] + _ = x[osMetricRename-2] + _ = x[osMetricOpenFile-3] + _ = x[osMetricOpen-4] + _ = x[osMetricOpenFileDirectIO-5] + _ = x[osMetricLstat-6] + _ = x[osMetricRemove-7] + _ = x[osMetricStat-8] + _ = x[osMetricLast-9] +} + +const _osMetric_name = "RemoveAllMkdirAllRenameOpenFileOpenOpenFileDirectIOLstatRemoveStatLast" + +var _osMetric_index = [...]uint8{0, 9, 17, 23, 31, 35, 51, 56, 62, 66, 70} + +func (i osMetric) String() string { + if i >= osMetric(len(_osMetric_index)-1) { + return "osMetric(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _osMetric_name[_osMetric_index[i]:_osMetric_index[i+1]] +} diff --git a/cmd/peer-rest-client.go b/cmd/peer-rest-client.go index fdb836052..3e4ad01c9 100644 --- a/cmd/peer-rest-client.go +++ b/cmd/peer-rest-client.go @@ -682,10 +682,14 @@ func (client *peerRESTClient) UpdateMetacacheListing(ctx context.Context, m meta } -func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) { +func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, traceOpts madmin.ServiceTraceOpts) { values := make(url.Values) - values.Set(peerRESTTraceAll, strconv.FormatBool(trcAll)) - values.Set(peerRESTTraceErr, strconv.FormatBool(trcErr)) + values.Set(peerRESTTraceErr, strconv.FormatBool(traceOpts.OnlyErrors)) + values.Set(peerRESTTraceS3, strconv.FormatBool(traceOpts.S3)) + values.Set(peerRESTTraceStorage, strconv.FormatBool(traceOpts.Storage)) + values.Set(peerRESTTraceOS, strconv.FormatBool(traceOpts.OS)) + values.Set(peerRESTTraceInternal, strconv.FormatBool(traceOpts.Internal)) + values.Set(peerRESTTraceThreshold, traceOpts.Threshold.String()) // To cancel the REST request in case doneCh gets closed. ctx, cancel := context.WithCancel(GlobalContext) @@ -779,10 +783,10 @@ func (client *peerRESTClient) Listen(listenCh chan interface{}, doneCh <-chan st } // Trace - send http trace request to peer nodes -func (client *peerRESTClient) Trace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) { +func (client *peerRESTClient) Trace(traceCh chan interface{}, doneCh <-chan struct{}, traceOpts madmin.ServiceTraceOpts) { go func() { for { - client.doTrace(traceCh, doneCh, trcAll, trcErr) + client.doTrace(traceCh, doneCh, traceOpts) select { case <-doneCh: return diff --git a/cmd/peer-rest-common.go b/cmd/peer-rest-common.go index d1b214067..66cb8c4ad 100644 --- a/cmd/peer-rest-common.go +++ b/cmd/peer-rest-common.go @@ -17,7 +17,7 @@ package cmd const ( - peerRESTVersion = "v12" + peerRESTVersion = "v13" // Add storage tracing peerRESTVersionPrefix = SlashSeparator + peerRESTVersion peerRESTPrefix = minioReservedBucketPath + "/peer" peerRESTPath = peerRESTPrefix + peerRESTVersionPrefix @@ -62,18 +62,22 @@ const ( ) const ( - peerRESTBucket = "bucket" - peerRESTBuckets = "buckets" - peerRESTUser = "user" - peerRESTGroup = "group" - peerRESTUserTemp = "user-temp" - peerRESTPolicy = "policy" - peerRESTUserOrGroup = "user-or-group" - peerRESTIsGroup = "is-group" - peerRESTSignal = "signal" - peerRESTProfiler = "profiler" - peerRESTTraceAll = "all" - peerRESTTraceErr = "err" + peerRESTBucket = "bucket" + peerRESTBuckets = "buckets" + peerRESTUser = "user" + peerRESTGroup = "group" + peerRESTUserTemp = "user-temp" + peerRESTPolicy = "policy" + peerRESTUserOrGroup = "user-or-group" + peerRESTIsGroup = "is-group" + peerRESTSignal = "signal" + peerRESTProfiler = "profiler" + peerRESTTraceErr = "err" + peerRESTTraceInternal = "internal" + peerRESTTraceStorage = "storage" + peerRESTTraceS3 = "s3" + peerRESTTraceOS = "os" + peerRESTTraceThreshold = "threshold" peerRESTListenBucket = "bucket" peerRESTListenPrefix = "prefix" diff --git a/cmd/peer-rest-server.go b/cmd/peer-rest-server.go index a9aebd4e9..19b2e2072 100644 --- a/cmd/peer-rest-server.go +++ b/cmd/peer-rest-server.go @@ -899,14 +899,37 @@ func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) { } } +func extractTraceOptsFromPeerRequest(r *http.Request) (opts madmin.ServiceTraceOpts, err error) { + + q := r.URL.Query() + opts.OnlyErrors = q.Get(peerRESTTraceErr) == "true" + opts.Storage = q.Get(peerRESTTraceStorage) == "true" + opts.Internal = q.Get(peerRESTTraceInternal) == "true" + opts.S3 = q.Get(peerRESTTraceS3) == "true" + opts.OS = q.Get(peerRESTTraceOS) == "true" + + if t := q.Get(peerRESTTraceThreshold); t != "" { + d, err := time.ParseDuration(t) + if err != nil { + return opts, err + } + opts.Threshold = d + } + return +} + // TraceHandler sends http trace messages back to peer rest client func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) { if !s.IsValid(w, r) { s.writeErrorResponse(w, errors.New("Invalid request")) return } - trcAll := r.URL.Query().Get(peerRESTTraceAll) == "true" - trcErr := r.URL.Query().Get(peerRESTTraceErr) == "true" + + traceOpts, err := extractTraceOptsFromPeerRequest(r) + if err != nil { + s.writeErrorResponse(w, errors.New("Invalid request")) + return + } w.WriteHeader(http.StatusOK) w.(http.Flusher).Flush() @@ -918,8 +941,8 @@ func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) { // Use buffered channel to take care of burst sends or slow w.Write() ch := make(chan interface{}, 2000) - globalHTTPTrace.Subscribe(ch, doneCh, func(entry interface{}) bool { - return mustTrace(entry, trcAll, trcErr) + globalTrace.Subscribe(ch, doneCh, func(entry interface{}) bool { + return mustTrace(entry, traceOpts) }) keepAliveTicker := time.NewTicker(500 * time.Millisecond) diff --git a/cmd/storagemetric_string.go b/cmd/storagemetric_string.go index 9b94aaecf..b8702d64d 100644 --- a/cmd/storagemetric_string.go +++ b/cmd/storagemetric_string.go @@ -31,12 +31,12 @@ func _() { _ = x[storageMetricWriteMetadata-20] _ = x[storageMetricReadVersion-21] _ = x[storageMetricReadAll-22] - _ = x[metricLast-23] + _ = x[storageMetricLast-23] } -const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsCheckFileDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataReadVersionReadAllmetricLast" +const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsCheckFileDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataReadVersionReadAllLast" -var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 137, 143, 157, 167, 175, 188, 201, 212, 219, 229} +var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 137, 143, 157, 167, 175, 188, 201, 212, 219, 223} func (i storageMetric) String() string { if i >= storageMetric(len(_storageMetric_index)-1) { diff --git a/cmd/web-router.go b/cmd/web-router.go index 0ec4fa3d9..b390a5e2c 100644 --- a/cmd/web-router.go +++ b/cmd/web-router.go @@ -77,8 +77,8 @@ func registerWebRouter(router *mux.Router) error { "bucket": bucketName, "object": objectName, }) - if globalHTTPTrace.NumSubscribers() > 0 { - globalHTTPTrace.Publish(WebTrace(ri)) + if globalTrace.NumSubscribers() > 0 { + globalTrace.Publish(WebTrace(ri)) } ctx := newContext(ri.Request, ri.ResponseWriter, ri.Method) logger.AuditLog(ctx, ri.ResponseWriter, ri.Request, claims.Map()) diff --git a/cmd/xl-storage-disk-id-check.go b/cmd/xl-storage-disk-id-check.go index 85c0ee6e6..eaf201552 100644 --- a/cmd/xl-storage-disk-id-check.go +++ b/cmd/xl-storage-disk-id-check.go @@ -19,11 +19,13 @@ package cmd import ( "context" "io" + "strings" "sync" "sync/atomic" "time" ewma "github.com/VividCortex/ewma" + trace "github.com/minio/minio/pkg/trace" ) //go:generate stringer -type=storageMetric -trimprefix=storageMetric $GOFILE @@ -57,7 +59,7 @@ const ( // .... add more - metricLast + storageMetricLast ) // Detects change in underlying disk. @@ -65,8 +67,8 @@ type xlStorageDiskIDCheck struct { storage StorageAPI diskID string - apiCalls [metricLast]uint64 - apiLatencies [metricLast]ewma.MovingAverage + apiCalls [storageMetricLast]uint64 + apiLatencies [storageMetricLast]ewma.MovingAverage } func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics { @@ -222,7 +224,7 @@ func (p *xlStorageDiskIDCheck) DiskInfo(ctx context.Context) (info DiskInfo, err } func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...string) (err error) { - defer p.updateStorageMetrics(storageMetricMakeVolBulk)() + defer p.updateStorageMetrics(storageMetricMakeVolBulk, volumes...)() select { case <-ctx.Done(): @@ -237,7 +239,7 @@ func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...strin } func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err error) { - defer p.updateStorageMetrics(storageMetricMakeVol)() + defer p.updateStorageMetrics(storageMetricMakeVol, volume)() select { case <-ctx.Done(): @@ -252,7 +254,7 @@ func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err } func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error) { - defer p.updateStorageMetrics(storageMetricListVols)() + defer p.updateStorageMetrics(storageMetricListVols, "/")() select { case <-ctx.Done(): @@ -267,7 +269,7 @@ func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error) } func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) { - defer p.updateStorageMetrics(storageMetricStatVol)() + defer p.updateStorageMetrics(storageMetricStatVol, volume)() select { case <-ctx.Done(): @@ -282,7 +284,7 @@ func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol } func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) { - defer p.updateStorageMetrics(storageMetricDeleteVol)() + defer p.updateStorageMetrics(storageMetricDeleteVol, volume)() select { case <-ctx.Done(): @@ -297,7 +299,7 @@ func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, for } func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath string, count int) ([]string, error) { - defer p.updateStorageMetrics(storageMetricListDir)() + defer p.updateStorageMetrics(storageMetricListDir, volume, dirPath)() select { case <-ctx.Done(): @@ -313,7 +315,7 @@ func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath stri } func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path string, offset int64, buf []byte, verifier *BitrotVerifier) (n int64, err error) { - defer p.updateStorageMetrics(storageMetricReadFile)() + defer p.updateStorageMetrics(storageMetricReadFile, volume, path)() select { case <-ctx.Done(): @@ -329,7 +331,7 @@ func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path } func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) { - defer p.updateStorageMetrics(storageMetricAppendFile)() + defer p.updateStorageMetrics(storageMetricAppendFile, volume, path)() select { case <-ctx.Done(): @@ -345,7 +347,7 @@ func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, pa } func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) error { - defer p.updateStorageMetrics(storageMetricCreateFile)() + defer p.updateStorageMetrics(storageMetricCreateFile, volume, path)() select { case <-ctx.Done(): @@ -361,7 +363,7 @@ func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path stri } func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) { - defer p.updateStorageMetrics(storageMetricReadFileStream)() + defer p.updateStorageMetrics(storageMetricReadFileStream, volume, path)() select { case <-ctx.Done(): @@ -377,7 +379,7 @@ func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path } func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error { - defer p.updateStorageMetrics(storageMetricRenameFile)() + defer p.updateStorageMetrics(storageMetricRenameFile, srcVolume, srcPath, dstVolume, dstPath)() select { case <-ctx.Done(): @@ -393,7 +395,7 @@ func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPat } func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error { - defer p.updateStorageMetrics(storageMetricRenameData)() + defer p.updateStorageMetrics(storageMetricRenameData, srcPath, dataDir, dstVolume, dstPath)() select { case <-ctx.Done(): @@ -409,7 +411,7 @@ func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPat } func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) { - defer p.updateStorageMetrics(storageMetricCheckParts)() + defer p.updateStorageMetrics(storageMetricCheckParts, volume, path)() select { case <-ctx.Done(): @@ -425,7 +427,7 @@ func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, pa } func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, path string) (err error) { - defer p.updateStorageMetrics(storageMetricCheckFile)() + defer p.updateStorageMetrics(storageMetricCheckFile, volume, path)() select { case <-ctx.Done(): @@ -441,7 +443,7 @@ func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, pat } func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) { - defer p.updateStorageMetrics(storageMetricDelete)() + defer p.updateStorageMetrics(storageMetricDelete, volume, path)() select { case <-ctx.Done(): @@ -456,12 +458,21 @@ func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path s return p.storage.Delete(ctx, volume, path, recursive) } +// DeleteVersions deletes slice of versions, it can be same object +// or multiple objects. func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) (errs []error) { - defer p.updateStorageMetrics(storageMetricDeleteVersions)() + // Mererly for tracing storage + path := "" + if len(versions) > 0 { + path = versions[0].Name + } + + defer p.updateStorageMetrics(storageMetricDeleteVersions, volume, path)() + + errs = make([]error, len(versions)) select { case <-ctx.Done(): - errs = make([]error, len(versions)) for i := range errs { errs[i] = ctx.Err() } @@ -470,17 +481,17 @@ func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string } if err := p.checkDiskStale(); err != nil { - errs = make([]error, len(versions)) for i := range errs { errs[i] = err } return errs } + return p.storage.DeleteVersions(ctx, volume, versions) } func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error { - defer p.updateStorageMetrics(storageMetricVerifyFile)() + defer p.updateStorageMetrics(storageMetricVerifyFile, volume, path)() select { case <-ctx.Done(): @@ -496,7 +507,7 @@ func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path stri } func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) { - defer p.updateStorageMetrics(storageMetricWriteAll)() + defer p.updateStorageMetrics(storageMetricWriteAll, volume, path)() select { case <-ctx.Done(): @@ -512,7 +523,7 @@ func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path } func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool) (err error) { - defer p.updateStorageMetrics(storageMetricDeleteVersion)() + defer p.updateStorageMetrics(storageMetricDeleteVersion, volume, path)() select { case <-ctx.Done(): @@ -528,7 +539,7 @@ func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path s } func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) (err error) { - defer p.updateStorageMetrics(storageMetricWriteMetadata)() + defer p.updateStorageMetrics(storageMetricWriteMetadata, volume, path)() select { case <-ctx.Done(): @@ -544,7 +555,7 @@ func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path s } func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (fi FileInfo, err error) { - defer p.updateStorageMetrics(storageMetricReadVersion)() + defer p.updateStorageMetrics(storageMetricReadVersion, volume, path)() select { case <-ctx.Done(): @@ -560,7 +571,7 @@ func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, ve } func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { - defer p.updateStorageMetrics(storageMetricReadAll)() + defer p.updateStorageMetrics(storageMetricReadAll, volume, path)() select { case <-ctx.Done(): @@ -575,11 +586,31 @@ func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path return p.storage.ReadAll(ctx, volume, path) } -// Update storage metrics -func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric) func() { - startTime := time.Now() - return func() { - atomic.AddUint64(&p.apiCalls[s], 1) - p.apiLatencies[s].Add(float64(time.Since(startTime))) +func storageTrace(s storageMetric, startTime time.Time, duration time.Duration, path string) trace.Info { + return trace.Info{ + TraceType: trace.Storage, + Time: startTime, + NodeName: globalLocalNodeName, + FuncName: s.String(), + StorageStats: trace.StorageStats{ + Duration: duration, + Path: path, + }, + } +} + +// Update storage metrics +func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...string) func() { + startTime := time.Now() + trace := globalTrace.NumSubscribers() > 0 + return func() { + duration := time.Since(startTime) + + atomic.AddUint64(&p.apiCalls[s], 1) + p.apiLatencies[s].Add(float64(duration)) + + if trace { + globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " "))) + } } } diff --git a/pkg/madmin/examples/service-trace.go b/pkg/madmin/examples/service-trace.go index 99144a133..6380e0bbd 100644 --- a/pkg/madmin/examples/service-trace.go +++ b/pkg/madmin/examples/service-trace.go @@ -37,14 +37,15 @@ func main() { if err != nil { log.Fatalln(err) } - doneCh := make(chan struct{}) - defer close(doneCh) - // Start listening on all http trace activity from all servers - // in the minio cluster. - allTrace := false - errTrace := false - traceCh := madmClnt.ServiceTrace(context.Background(), allTrace, errTrace, doneCh) + // Start listening on all http trace activity from all servers in the minio cluster. + traceCh := madmClnt.ServiceTrace(context.Background(), madmin.ServiceTraceOpts{ + S3: true, + Internal: true, + Storage: true, + OS: true, + Threshold: 0, + }) for traceInfo := range traceCh { if traceInfo.Err != nil { fmt.Println(traceInfo.Err) diff --git a/pkg/madmin/service-commands.go b/pkg/madmin/service-commands.go index a4eec01b1..1c251731f 100644 --- a/pkg/madmin/service-commands.go +++ b/pkg/madmin/service-commands.go @@ -23,6 +23,7 @@ import ( "net/http" "net/url" "strconv" + "time" trace "github.com/minio/minio/pkg/trace" ) @@ -77,16 +78,30 @@ type ServiceTraceInfo struct { Err error `json:"-"` } +// ServiceTraceOpts holds tracing options +type ServiceTraceOpts struct { + S3 bool + Internal bool + Storage bool + OS bool + OnlyErrors bool + Threshold time.Duration +} + // ServiceTrace - listen on http trace notifications. -func (adm AdminClient) ServiceTrace(ctx context.Context, allTrace, errTrace bool) <-chan ServiceTraceInfo { +func (adm AdminClient) ServiceTrace(ctx context.Context, opts ServiceTraceOpts) <-chan ServiceTraceInfo { traceInfoCh := make(chan ServiceTraceInfo) // Only success, start a routine to start reading line by line. go func(traceInfoCh chan<- ServiceTraceInfo) { defer close(traceInfoCh) for { urlValues := make(url.Values) - urlValues.Set("all", strconv.FormatBool(allTrace)) - urlValues.Set("err", strconv.FormatBool(errTrace)) + urlValues.Set("err", strconv.FormatBool(opts.OnlyErrors)) + urlValues.Set("s3", strconv.FormatBool(opts.S3)) + urlValues.Set("internal", strconv.FormatBool(opts.Internal)) + urlValues.Set("storage", strconv.FormatBool(opts.Storage)) + urlValues.Set("os", strconv.FormatBool(opts.OS)) + urlValues.Set("threshold", opts.Threshold.String()) reqData := requestData{ relPath: adminAPIPrefix + "/trace", queryValues: urlValues, diff --git a/pkg/trace/trace.go b/pkg/trace/trace.go index c010bbc03..21ec7edbe 100644 --- a/pkg/trace/trace.go +++ b/pkg/trace/trace.go @@ -21,14 +21,45 @@ import ( "time" ) +// Type indicates the type of the tracing Info +type Type int + +const ( + // OS tracing (Golang os package calls) + OS Type = iota + // Storage tracing (MinIO Storage Layer) + Storage + // HTTP tracing (MinIO S3 & Internode) + HTTP +) + // Info - represents a trace record, additionally // also reports errors if any while listening on trace. type Info struct { - NodeName string `json:"nodename"` - FuncName string `json:"funcname"` + TraceType Type `json:"type"` + + NodeName string `json:"nodename"` + FuncName string `json:"funcname"` + Time time.Time `json:"time"` + ReqInfo RequestInfo `json:"request"` RespInfo ResponseInfo `json:"response"` CallStats CallStats `json:"stats"` + + StorageStats StorageStats `json:"storageStats"` + OSStats OSStats `json:"osStats"` +} + +// StorageStats statistics on MinIO Storage layer calls +type StorageStats struct { + Path string `json:"path"` + Duration time.Duration `json:"duration"` +} + +// OSStats statistics on operating system specific calls. +type OSStats struct { + Path string `json:"path"` + Duration time.Duration `json:"duration"` } // CallStats records request stats