trace: Add storage & OS tracing (#11889)

This commit is contained in:
Anis Elleuch 2021-03-27 07:24:07 +01:00 committed by GitHub
parent 95096e31a7
commit d8b5adfd10
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 362 additions and 203 deletions

View File

@ -1031,26 +1031,75 @@ func toAdminAPIErr(ctx context.Context, err error) APIError {
// false if certain conditions are not met. // false if certain conditions are not met.
// - input entry is not of the type *trace.Info* // - input entry is not of the type *trace.Info*
// - errOnly entries are to be traced, not status code 2xx, 3xx. // - errOnly entries are to be traced, not status code 2xx, 3xx.
// - all entries to be traced, if not trace only S3 API requests. // - trace.Info type is asked by opts
func mustTrace(entry interface{}, trcAll, errOnly bool) bool { func mustTrace(entry interface{}, opts madmin.ServiceTraceOpts) (shouldTrace bool) {
trcInfo, ok := entry.(trace.Info) trcInfo, ok := entry.(trace.Info)
if !ok { if !ok {
return false return false
} }
// Handle browser requests separately filter them and return. // Override shouldTrace decision with errOnly filtering
if HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+"/upload") { defer func() {
if errOnly { if shouldTrace && opts.OnlyErrors {
return trcInfo.RespInfo.StatusCode >= http.StatusBadRequest shouldTrace = trcInfo.RespInfo.StatusCode >= http.StatusBadRequest
} }
}()
if opts.Threshold > 0 {
var latency time.Duration
switch trcInfo.TraceType {
case trace.OS:
latency = trcInfo.OSStats.Duration
case trace.Storage:
latency = trcInfo.StorageStats.Duration
case trace.HTTP:
latency = trcInfo.CallStats.Latency
}
if latency < opts.Threshold {
return false
}
}
if opts.Internal && trcInfo.TraceType == trace.HTTP && HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) {
return true return true
} }
trace := trcAll || !HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) if opts.S3 && trcInfo.TraceType == trace.HTTP && !HasPrefix(trcInfo.ReqInfo.Path, minioReservedBucketPath+SlashSeparator) {
if errOnly { return true
return trace && trcInfo.RespInfo.StatusCode >= http.StatusBadRequest
} }
return trace
if opts.Storage && trcInfo.TraceType == trace.Storage {
return true
}
return opts.OS && trcInfo.TraceType == trace.OS
}
func extractTraceOptions(r *http.Request) (opts madmin.ServiceTraceOpts, err error) {
q := r.URL.Query()
opts.OnlyErrors = q.Get("err") == "true"
opts.S3 = q.Get("s3") == "true"
opts.Internal = q.Get("internal") == "true"
opts.Storage = q.Get("storage") == "true"
opts.OS = q.Get("os") == "true"
// Support deprecated 'all' query
if q.Get("all") == "true" {
opts.S3 = true
opts.Internal = true
opts.Storage = true
opts.OS = true
}
if t := q.Get("threshold"); t != "" {
d, err := time.ParseDuration(t)
if err != nil {
return opts, err
}
opts.Threshold = d
}
return
} }
// TraceHandler - POST /minio/admin/v3/trace // TraceHandler - POST /minio/admin/v3/trace
@ -1059,9 +1108,6 @@ func mustTrace(entry interface{}, trcAll, errOnly bool) bool {
func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) { func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
ctx := newContext(r, w, "HTTPTrace") ctx := newContext(r, w, "HTTPTrace")
trcAll := r.URL.Query().Get("all") == "true"
trcErr := r.URL.Query().Get("err") == "true"
// Validate request signature. // Validate request signature.
_, adminAPIErr := checkAdminRequestAuth(ctx, r, iampolicy.TraceAdminAction, "") _, adminAPIErr := checkAdminRequestAuth(ctx, r, iampolicy.TraceAdminAction, "")
if adminAPIErr != ErrNone { if adminAPIErr != ErrNone {
@ -1069,6 +1115,12 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
return return
} }
traceOpts, err := extractTraceOptions(r)
if err != nil {
writeErrorResponseJSON(ctx, w, errorCodes.ToAPIErr(ErrInvalidRequest), r.URL)
return
}
setEventStreamHeaders(w) setEventStreamHeaders(w)
// Trace Publisher and peer-trace-client uses nonblocking send and hence does not wait for slow receivers. // Trace Publisher and peer-trace-client uses nonblocking send and hence does not wait for slow receivers.
@ -1077,15 +1129,15 @@ func (a adminAPIHandlers) TraceHandler(w http.ResponseWriter, r *http.Request) {
peers, _ := newPeerRestClients(globalEndpoints) peers, _ := newPeerRestClients(globalEndpoints)
globalHTTPTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool { globalTrace.Subscribe(traceCh, ctx.Done(), func(entry interface{}) bool {
return mustTrace(entry, trcAll, trcErr) return mustTrace(entry, traceOpts)
}) })
for _, peer := range peers { for _, peer := range peers {
if peer == nil { if peer == nil {
continue continue
} }
peer.Trace(traceCh, ctx.Done(), trcAll, trcErr) peer.Trace(traceCh, ctx.Done(), traceOpts)
} }
keepAliveTicker := time.NewTicker(500 * time.Millisecond) keepAliveTicker := time.NewTicker(500 * time.Millisecond)

View File

@ -66,7 +66,7 @@ func waitForLowHTTPReq(maxIO int, maxWait time.Duration) {
// Bucket notification and http trace are not costly, it is okay to ignore them // Bucket notification and http trace are not costly, it is okay to ignore them
// while counting the number of concurrent connections // while counting the number of concurrent connections
maxIOFn := func() int { maxIOFn := func() int {
return maxIO + int(globalHTTPListen.NumSubscribers()) + int(globalHTTPTrace.NumSubscribers()) return maxIO + int(globalHTTPListen.NumSubscribers()) + int(globalTrace.NumSubscribers())
} }
tmpMaxWait := maxWait tmpMaxWait := maxWait

View File

@ -38,8 +38,6 @@ const (
EnvFSOSync = "MINIO_FS_OSYNC" EnvFSOSync = "MINIO_FS_OSYNC"
EnvArgs = "MINIO_ARGS" EnvArgs = "MINIO_ARGS"
EnvDNSWebhook = "MINIO_DNS_WEBHOOK_ENDPOINT" EnvDNSWebhook = "MINIO_DNS_WEBHOOK_ENDPOINT"
EnvLogPosixTimes = "MINIO_LOG_POSIX_TIMES"
EnvLogPosixThresholdInMS = "MINIO_LOG_POSIX_THRESHOLD_MS"
EnvUpdate = "MINIO_UPDATE" EnvUpdate = "MINIO_UPDATE"

View File

@ -181,9 +181,9 @@ var (
globalHTTPServerErrorCh = make(chan error) globalHTTPServerErrorCh = make(chan error)
globalOSSignalCh = make(chan os.Signal, 1) globalOSSignalCh = make(chan os.Signal, 1)
// global Trace system to send HTTP request/response logs to // global Trace system to send HTTP request/response
// registered listeners // and Storage/OS calls info to registered listeners.
globalHTTPTrace = pubsub.New() globalTrace = pubsub.New()
// global Listen system to send S3 API events to registered listeners // global Listen system to send S3 API events to registered listeners
globalHTTPListen = pubsub.New() globalHTTPListen = pubsub.New()

View File

@ -359,24 +359,24 @@ func extractPostPolicyFormValues(ctx context.Context, form *multipart.Form) (fil
// Log headers and body. // Log headers and body.
func httpTraceAll(f http.HandlerFunc) http.HandlerFunc { func httpTraceAll(f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
if globalHTTPTrace.NumSubscribers() == 0 { if globalTrace.NumSubscribers() == 0 {
f.ServeHTTP(w, r) f.ServeHTTP(w, r)
return return
} }
trace := Trace(f, true, w, r) trace := Trace(f, true, w, r)
globalHTTPTrace.Publish(trace) globalTrace.Publish(trace)
} }
} }
// Log only the headers. // Log only the headers.
func httpTraceHdrs(f http.HandlerFunc) http.HandlerFunc { func httpTraceHdrs(f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
if globalHTTPTrace.NumSubscribers() == 0 { if globalTrace.NumSubscribers() == 0 {
f.ServeHTTP(w, r) f.ServeHTTP(w, r)
return return
} }
trace := Trace(f, false, w, r) trace := Trace(f, false, w, r)
globalHTTPTrace.Publish(trace) globalTrace.Publish(trace)
} }
} }

View File

@ -124,7 +124,8 @@ func WebTrace(ri *jsonrpc.RequestInfo) trace.Info {
reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ",")) reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ","))
} }
t := trace.Info{FuncName: name} now := time.Now().UTC()
t := trace.Info{TraceType: trace.HTTP, FuncName: name, Time: now}
t.NodeName = r.Host t.NodeName = r.Host
if globalIsDistErasure { if globalIsDistErasure {
t.NodeName = globalLocalNodeName t.NodeName = globalLocalNodeName
@ -137,7 +138,7 @@ func WebTrace(ri *jsonrpc.RequestInfo) trace.Info {
vars := mux.Vars(r) vars := mux.Vars(r)
rq := trace.RequestInfo{ rq := trace.RequestInfo{
Time: time.Now().UTC(), Time: now,
Proto: r.Proto, Proto: r.Proto,
Method: r.Method, Method: r.Method,
Path: SlashSeparator + pathJoin(vars["bucket"], vars["object"]), Path: SlashSeparator + pathJoin(vars["bucket"], vars["object"]),
@ -185,10 +186,12 @@ func Trace(f http.HandlerFunc, logBody bool, w http.ResponseWriter, r *http.Requ
reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ",")) reqHeaders.Set("Transfer-Encoding", strings.Join(r.TransferEncoding, ","))
} }
var reqBodyRecorder *recordRequest reqBodyRecorder := &recordRequest{Reader: r.Body, logBody: logBody, headers: reqHeaders}
t := trace.Info{FuncName: name}
reqBodyRecorder = &recordRequest{Reader: r.Body, logBody: logBody, headers: reqHeaders}
r.Body = ioutil.NopCloser(reqBodyRecorder) r.Body = ioutil.NopCloser(reqBodyRecorder)
now := time.Now().UTC()
t := trace.Info{TraceType: trace.HTTP, FuncName: name, Time: now}
t.NodeName = r.Host t.NodeName = r.Host
if globalIsDistErasure { if globalIsDistErasure {
t.NodeName = globalLocalNodeName t.NodeName = globalLocalNodeName
@ -197,8 +200,9 @@ func Trace(f http.HandlerFunc, logBody bool, w http.ResponseWriter, r *http.Requ
if host, _, err := net.SplitHostPort(t.NodeName); err == nil { if host, _, err := net.SplitHostPort(t.NodeName); err == nil {
t.NodeName = host t.NodeName = host
} }
rq := trace.RequestInfo{ rq := trace.RequestInfo{
Time: time.Now().UTC(), Time: now,
Proto: r.Proto, Proto: r.Proto,
Method: r.Method, Method: r.Method,
Path: r.URL.Path, Path: r.URL.Path,

View File

@ -17,145 +17,109 @@
package cmd package cmd
import ( import (
"fmt"
"os" "os"
"strings" "strings"
"time" "time"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/pkg/disk" "github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/env" trace "github.com/minio/minio/pkg/trace"
) )
var ( //go:generate stringer -type=osMetric -trimprefix=osMetric $GOFILE
logTime bool = false
threshold time.Duration type osMetric uint8
const (
osMetricRemoveAll osMetric = iota
osMetricMkdirAll
osMetricRename
osMetricOpenFile
osMetricOpen
osMetricOpenFileDirectIO
osMetricLstat
osMetricRemove
osMetricStat
// .... add more
osMetricLast
) )
func init() { func osTrace(s osMetric, startTime time.Time, duration time.Duration, path string) trace.Info {
logTime = env.IsSet(config.EnvLogPosixTimes) return trace.Info{
t, _ := env.GetInt( TraceType: trace.OS,
config.EnvLogPosixThresholdInMS, Time: startTime,
100, NodeName: globalLocalNodeName,
) FuncName: s.String(),
threshold = time.Duration(t) * time.Millisecond OSStats: trace.OSStats{
Duration: duration,
Path: path,
},
}
} }
func reportTime(name *strings.Builder, startTime time.Time) { func updateOSMetrics(s osMetric, paths ...string) func() {
delta := time.Since(startTime) if globalTrace.NumSubscribers() == 0 {
if delta > threshold { return func() {}
name.WriteString(" ") }
name.WriteString(delta.String())
fmt.Println(name.String()) startTime := time.Now()
return func() {
duration := time.Since(startTime)
globalTrace.Publish(osTrace(s, startTime, duration, strings.Join(paths, " ")))
} }
} }
// RemoveAll captures time taken to call the underlying os.RemoveAll // RemoveAll captures time taken to call the underlying os.RemoveAll
func RemoveAll(dirPath string) error { func RemoveAll(dirPath string) error {
if logTime { defer updateOSMetrics(osMetricRemoveAll, dirPath)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.RemoveAll: ")
s.WriteString(dirPath)
defer reportTime(&s, startTime)
}
return os.RemoveAll(dirPath) return os.RemoveAll(dirPath)
} }
// MkdirAll captures time taken to call os.MkdirAll // MkdirAll captures time taken to call os.MkdirAll
func MkdirAll(dirPath string, mode os.FileMode) error { func MkdirAll(dirPath string, mode os.FileMode) error {
if logTime { defer updateOSMetrics(osMetricMkdirAll, dirPath)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.MkdirAll: ")
s.WriteString(dirPath)
defer reportTime(&s, startTime)
}
return os.MkdirAll(dirPath, mode) return os.MkdirAll(dirPath, mode)
} }
// Rename captures time taken to call os.Rename // Rename captures time taken to call os.Rename
func Rename(src, dst string) error { func Rename(src, dst string) error {
if logTime { defer updateOSMetrics(osMetricRename, src, dst)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Rename: ")
s.WriteString(src)
s.WriteString(" to ")
s.WriteString(dst)
defer reportTime(&s, startTime)
}
return os.Rename(src, dst) return os.Rename(src, dst)
} }
// OpenFile captures time taken to call os.OpenFile // OpenFile captures time taken to call os.OpenFile
func OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) { func OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) {
if logTime { defer updateOSMetrics(osMetricOpenFile, name)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.OpenFile: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
return os.OpenFile(name, flag, perm) return os.OpenFile(name, flag, perm)
} }
// Open captures time taken to call os.Open // Open captures time taken to call os.Open
func Open(name string) (*os.File, error) { func Open(name string) (*os.File, error) {
if logTime { defer updateOSMetrics(osMetricOpen, name)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Open: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
return os.Open(name) return os.Open(name)
} }
// OpenFileDirectIO captures time taken to call disk.OpenFileDirectIO // OpenFileDirectIO captures time taken to call disk.OpenFileDirectIO
func OpenFileDirectIO(name string, flag int, perm os.FileMode) (*os.File, error) { func OpenFileDirectIO(name string, flag int, perm os.FileMode) (*os.File, error) {
if logTime { defer updateOSMetrics(osMetricOpenFileDirectIO, name)()
startTime := time.Now()
var s strings.Builder
s.WriteString("disk.OpenFileDirectIO: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
return disk.OpenFileDirectIO(name, flag, perm) return disk.OpenFileDirectIO(name, flag, perm)
} }
// Lstat captures time taken to call os.Lstat // Lstat captures time taken to call os.Lstat
func Lstat(name string) (os.FileInfo, error) { func Lstat(name string) (os.FileInfo, error) {
if logTime { defer updateOSMetrics(osMetricLstat, name)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Lstat: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
return os.Lstat(name) return os.Lstat(name)
} }
// Remove captures time taken to call os.Remove // Remove captures time taken to call os.Remove
func Remove(deletePath string) error { func Remove(deletePath string) error {
if logTime { defer updateOSMetrics(osMetricRemove, deletePath)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Remove: ")
s.WriteString(deletePath)
defer reportTime(&s, startTime)
}
return os.Remove(deletePath) return os.Remove(deletePath)
} }
// Stat captures time taken to call os.Stat // Stat captures time taken to call os.Stat
func Stat(name string) (os.FileInfo, error) { func Stat(name string) (os.FileInfo, error) {
if logTime { defer updateOSMetrics(osMetricStat, name)()
startTime := time.Now()
var s strings.Builder
s.WriteString("os.Stat: ")
s.WriteString(name)
defer reportTime(&s, startTime)
}
return os.Stat(name) return os.Stat(name)
} }

32
cmd/osmetric_string.go Normal file
View File

@ -0,0 +1,32 @@
// Code generated by "stringer -type=osMetric -trimprefix=osMetric os-instrumented.go"; DO NOT EDIT.
package cmd
import "strconv"
func _() {
// An "invalid array index" compiler error signifies that the constant values have changed.
// Re-run the stringer command to generate them again.
var x [1]struct{}
_ = x[osMetricRemoveAll-0]
_ = x[osMetricMkdirAll-1]
_ = x[osMetricRename-2]
_ = x[osMetricOpenFile-3]
_ = x[osMetricOpen-4]
_ = x[osMetricOpenFileDirectIO-5]
_ = x[osMetricLstat-6]
_ = x[osMetricRemove-7]
_ = x[osMetricStat-8]
_ = x[osMetricLast-9]
}
const _osMetric_name = "RemoveAllMkdirAllRenameOpenFileOpenOpenFileDirectIOLstatRemoveStatLast"
var _osMetric_index = [...]uint8{0, 9, 17, 23, 31, 35, 51, 56, 62, 66, 70}
func (i osMetric) String() string {
if i >= osMetric(len(_osMetric_index)-1) {
return "osMetric(" + strconv.FormatInt(int64(i), 10) + ")"
}
return _osMetric_name[_osMetric_index[i]:_osMetric_index[i+1]]
}

View File

@ -682,10 +682,14 @@ func (client *peerRESTClient) UpdateMetacacheListing(ctx context.Context, m meta
} }
func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) { func (client *peerRESTClient) doTrace(traceCh chan interface{}, doneCh <-chan struct{}, traceOpts madmin.ServiceTraceOpts) {
values := make(url.Values) values := make(url.Values)
values.Set(peerRESTTraceAll, strconv.FormatBool(trcAll)) values.Set(peerRESTTraceErr, strconv.FormatBool(traceOpts.OnlyErrors))
values.Set(peerRESTTraceErr, strconv.FormatBool(trcErr)) values.Set(peerRESTTraceS3, strconv.FormatBool(traceOpts.S3))
values.Set(peerRESTTraceStorage, strconv.FormatBool(traceOpts.Storage))
values.Set(peerRESTTraceOS, strconv.FormatBool(traceOpts.OS))
values.Set(peerRESTTraceInternal, strconv.FormatBool(traceOpts.Internal))
values.Set(peerRESTTraceThreshold, traceOpts.Threshold.String())
// To cancel the REST request in case doneCh gets closed. // To cancel the REST request in case doneCh gets closed.
ctx, cancel := context.WithCancel(GlobalContext) ctx, cancel := context.WithCancel(GlobalContext)
@ -779,10 +783,10 @@ func (client *peerRESTClient) Listen(listenCh chan interface{}, doneCh <-chan st
} }
// Trace - send http trace request to peer nodes // Trace - send http trace request to peer nodes
func (client *peerRESTClient) Trace(traceCh chan interface{}, doneCh <-chan struct{}, trcAll, trcErr bool) { func (client *peerRESTClient) Trace(traceCh chan interface{}, doneCh <-chan struct{}, traceOpts madmin.ServiceTraceOpts) {
go func() { go func() {
for { for {
client.doTrace(traceCh, doneCh, trcAll, trcErr) client.doTrace(traceCh, doneCh, traceOpts)
select { select {
case <-doneCh: case <-doneCh:
return return

View File

@ -17,7 +17,7 @@
package cmd package cmd
const ( const (
peerRESTVersion = "v12" peerRESTVersion = "v13" // Add storage tracing
peerRESTVersionPrefix = SlashSeparator + peerRESTVersion peerRESTVersionPrefix = SlashSeparator + peerRESTVersion
peerRESTPrefix = minioReservedBucketPath + "/peer" peerRESTPrefix = minioReservedBucketPath + "/peer"
peerRESTPath = peerRESTPrefix + peerRESTVersionPrefix peerRESTPath = peerRESTPrefix + peerRESTVersionPrefix
@ -72,8 +72,12 @@ const (
peerRESTIsGroup = "is-group" peerRESTIsGroup = "is-group"
peerRESTSignal = "signal" peerRESTSignal = "signal"
peerRESTProfiler = "profiler" peerRESTProfiler = "profiler"
peerRESTTraceAll = "all"
peerRESTTraceErr = "err" peerRESTTraceErr = "err"
peerRESTTraceInternal = "internal"
peerRESTTraceStorage = "storage"
peerRESTTraceS3 = "s3"
peerRESTTraceOS = "os"
peerRESTTraceThreshold = "threshold"
peerRESTListenBucket = "bucket" peerRESTListenBucket = "bucket"
peerRESTListenPrefix = "prefix" peerRESTListenPrefix = "prefix"

View File

@ -899,14 +899,37 @@ func (s *peerRESTServer) ListenHandler(w http.ResponseWriter, r *http.Request) {
} }
} }
func extractTraceOptsFromPeerRequest(r *http.Request) (opts madmin.ServiceTraceOpts, err error) {
q := r.URL.Query()
opts.OnlyErrors = q.Get(peerRESTTraceErr) == "true"
opts.Storage = q.Get(peerRESTTraceStorage) == "true"
opts.Internal = q.Get(peerRESTTraceInternal) == "true"
opts.S3 = q.Get(peerRESTTraceS3) == "true"
opts.OS = q.Get(peerRESTTraceOS) == "true"
if t := q.Get(peerRESTTraceThreshold); t != "" {
d, err := time.ParseDuration(t)
if err != nil {
return opts, err
}
opts.Threshold = d
}
return
}
// TraceHandler sends http trace messages back to peer rest client // TraceHandler sends http trace messages back to peer rest client
func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) { func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) { if !s.IsValid(w, r) {
s.writeErrorResponse(w, errors.New("Invalid request")) s.writeErrorResponse(w, errors.New("Invalid request"))
return return
} }
trcAll := r.URL.Query().Get(peerRESTTraceAll) == "true"
trcErr := r.URL.Query().Get(peerRESTTraceErr) == "true" traceOpts, err := extractTraceOptsFromPeerRequest(r)
if err != nil {
s.writeErrorResponse(w, errors.New("Invalid request"))
return
}
w.WriteHeader(http.StatusOK) w.WriteHeader(http.StatusOK)
w.(http.Flusher).Flush() w.(http.Flusher).Flush()
@ -918,8 +941,8 @@ func (s *peerRESTServer) TraceHandler(w http.ResponseWriter, r *http.Request) {
// Use buffered channel to take care of burst sends or slow w.Write() // Use buffered channel to take care of burst sends or slow w.Write()
ch := make(chan interface{}, 2000) ch := make(chan interface{}, 2000)
globalHTTPTrace.Subscribe(ch, doneCh, func(entry interface{}) bool { globalTrace.Subscribe(ch, doneCh, func(entry interface{}) bool {
return mustTrace(entry, trcAll, trcErr) return mustTrace(entry, traceOpts)
}) })
keepAliveTicker := time.NewTicker(500 * time.Millisecond) keepAliveTicker := time.NewTicker(500 * time.Millisecond)

View File

@ -31,12 +31,12 @@ func _() {
_ = x[storageMetricWriteMetadata-20] _ = x[storageMetricWriteMetadata-20]
_ = x[storageMetricReadVersion-21] _ = x[storageMetricReadVersion-21]
_ = x[storageMetricReadAll-22] _ = x[storageMetricReadAll-22]
_ = x[metricLast-23] _ = x[storageMetricLast-23]
} }
const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsCheckFileDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataReadVersionReadAllmetricLast" const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsCheckFileDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataReadVersionReadAllLast"
var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 137, 143, 157, 167, 175, 188, 201, 212, 219, 229} var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 137, 143, 157, 167, 175, 188, 201, 212, 219, 223}
func (i storageMetric) String() string { func (i storageMetric) String() string {
if i >= storageMetric(len(_storageMetric_index)-1) { if i >= storageMetric(len(_storageMetric_index)-1) {

View File

@ -77,8 +77,8 @@ func registerWebRouter(router *mux.Router) error {
"bucket": bucketName, "bucket": bucketName,
"object": objectName, "object": objectName,
}) })
if globalHTTPTrace.NumSubscribers() > 0 { if globalTrace.NumSubscribers() > 0 {
globalHTTPTrace.Publish(WebTrace(ri)) globalTrace.Publish(WebTrace(ri))
} }
ctx := newContext(ri.Request, ri.ResponseWriter, ri.Method) ctx := newContext(ri.Request, ri.ResponseWriter, ri.Method)
logger.AuditLog(ctx, ri.ResponseWriter, ri.Request, claims.Map()) logger.AuditLog(ctx, ri.ResponseWriter, ri.Request, claims.Map())

View File

@ -19,11 +19,13 @@ package cmd
import ( import (
"context" "context"
"io" "io"
"strings"
"sync" "sync"
"sync/atomic" "sync/atomic"
"time" "time"
ewma "github.com/VividCortex/ewma" ewma "github.com/VividCortex/ewma"
trace "github.com/minio/minio/pkg/trace"
) )
//go:generate stringer -type=storageMetric -trimprefix=storageMetric $GOFILE //go:generate stringer -type=storageMetric -trimprefix=storageMetric $GOFILE
@ -57,7 +59,7 @@ const (
// .... add more // .... add more
metricLast storageMetricLast
) )
// Detects change in underlying disk. // Detects change in underlying disk.
@ -65,8 +67,8 @@ type xlStorageDiskIDCheck struct {
storage StorageAPI storage StorageAPI
diskID string diskID string
apiCalls [metricLast]uint64 apiCalls [storageMetricLast]uint64
apiLatencies [metricLast]ewma.MovingAverage apiLatencies [storageMetricLast]ewma.MovingAverage
} }
func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics { func (p *xlStorageDiskIDCheck) getMetrics() DiskMetrics {
@ -222,7 +224,7 @@ func (p *xlStorageDiskIDCheck) DiskInfo(ctx context.Context) (info DiskInfo, err
} }
func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...string) (err error) { func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...string) (err error) {
defer p.updateStorageMetrics(storageMetricMakeVolBulk)() defer p.updateStorageMetrics(storageMetricMakeVolBulk, volumes...)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -237,7 +239,7 @@ func (p *xlStorageDiskIDCheck) MakeVolBulk(ctx context.Context, volumes ...strin
} }
func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err error) { func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err error) {
defer p.updateStorageMetrics(storageMetricMakeVol)() defer p.updateStorageMetrics(storageMetricMakeVol, volume)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -252,7 +254,7 @@ func (p *xlStorageDiskIDCheck) MakeVol(ctx context.Context, volume string) (err
} }
func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error) { func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error) {
defer p.updateStorageMetrics(storageMetricListVols)() defer p.updateStorageMetrics(storageMetricListVols, "/")()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -267,7 +269,7 @@ func (p *xlStorageDiskIDCheck) ListVols(ctx context.Context) ([]VolInfo, error)
} }
func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) { func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) {
defer p.updateStorageMetrics(storageMetricStatVol)() defer p.updateStorageMetrics(storageMetricStatVol, volume)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -282,7 +284,7 @@ func (p *xlStorageDiskIDCheck) StatVol(ctx context.Context, volume string) (vol
} }
func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) { func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) {
defer p.updateStorageMetrics(storageMetricDeleteVol)() defer p.updateStorageMetrics(storageMetricDeleteVol, volume)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -297,7 +299,7 @@ func (p *xlStorageDiskIDCheck) DeleteVol(ctx context.Context, volume string, for
} }
func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath string, count int) ([]string, error) { func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath string, count int) ([]string, error) {
defer p.updateStorageMetrics(storageMetricListDir)() defer p.updateStorageMetrics(storageMetricListDir, volume, dirPath)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -313,7 +315,7 @@ func (p *xlStorageDiskIDCheck) ListDir(ctx context.Context, volume, dirPath stri
} }
func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path string, offset int64, buf []byte, verifier *BitrotVerifier) (n int64, err error) { func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path string, offset int64, buf []byte, verifier *BitrotVerifier) (n int64, err error) {
defer p.updateStorageMetrics(storageMetricReadFile)() defer p.updateStorageMetrics(storageMetricReadFile, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -329,7 +331,7 @@ func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path
} }
func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) { func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) {
defer p.updateStorageMetrics(storageMetricAppendFile)() defer p.updateStorageMetrics(storageMetricAppendFile, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -345,7 +347,7 @@ func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, pa
} }
func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) error { func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) error {
defer p.updateStorageMetrics(storageMetricCreateFile)() defer p.updateStorageMetrics(storageMetricCreateFile, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -361,7 +363,7 @@ func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path stri
} }
func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) { func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
defer p.updateStorageMetrics(storageMetricReadFileStream)() defer p.updateStorageMetrics(storageMetricReadFileStream, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -377,7 +379,7 @@ func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path
} }
func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error { func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) error {
defer p.updateStorageMetrics(storageMetricRenameFile)() defer p.updateStorageMetrics(storageMetricRenameFile, srcVolume, srcPath, dstVolume, dstPath)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -393,7 +395,7 @@ func (p *xlStorageDiskIDCheck) RenameFile(ctx context.Context, srcVolume, srcPat
} }
func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error { func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) error {
defer p.updateStorageMetrics(storageMetricRenameData)() defer p.updateStorageMetrics(storageMetricRenameData, srcPath, dataDir, dstVolume, dstPath)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -409,7 +411,7 @@ func (p *xlStorageDiskIDCheck) RenameData(ctx context.Context, srcVolume, srcPat
} }
func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) { func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (err error) {
defer p.updateStorageMetrics(storageMetricCheckParts)() defer p.updateStorageMetrics(storageMetricCheckParts, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -425,7 +427,7 @@ func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, pa
} }
func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, path string) (err error) { func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, path string) (err error) {
defer p.updateStorageMetrics(storageMetricCheckFile)() defer p.updateStorageMetrics(storageMetricCheckFile, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -441,7 +443,7 @@ func (p *xlStorageDiskIDCheck) CheckFile(ctx context.Context, volume string, pat
} }
func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) { func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) {
defer p.updateStorageMetrics(storageMetricDelete)() defer p.updateStorageMetrics(storageMetricDelete, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -456,12 +458,21 @@ func (p *xlStorageDiskIDCheck) Delete(ctx context.Context, volume string, path s
return p.storage.Delete(ctx, volume, path, recursive) return p.storage.Delete(ctx, volume, path, recursive)
} }
// DeleteVersions deletes slice of versions, it can be same object
// or multiple objects.
func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) (errs []error) { func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) (errs []error) {
defer p.updateStorageMetrics(storageMetricDeleteVersions)() // Mererly for tracing storage
path := ""
if len(versions) > 0 {
path = versions[0].Name
}
defer p.updateStorageMetrics(storageMetricDeleteVersions, volume, path)()
errs = make([]error, len(versions))
select { select {
case <-ctx.Done(): case <-ctx.Done():
errs = make([]error, len(versions))
for i := range errs { for i := range errs {
errs[i] = ctx.Err() errs[i] = ctx.Err()
} }
@ -470,17 +481,17 @@ func (p *xlStorageDiskIDCheck) DeleteVersions(ctx context.Context, volume string
} }
if err := p.checkDiskStale(); err != nil { if err := p.checkDiskStale(); err != nil {
errs = make([]error, len(versions))
for i := range errs { for i := range errs {
errs[i] = err errs[i] = err
} }
return errs return errs
} }
return p.storage.DeleteVersions(ctx, volume, versions) return p.storage.DeleteVersions(ctx, volume, versions)
} }
func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error { func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error {
defer p.updateStorageMetrics(storageMetricVerifyFile)() defer p.updateStorageMetrics(storageMetricVerifyFile, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -496,7 +507,7 @@ func (p *xlStorageDiskIDCheck) VerifyFile(ctx context.Context, volume, path stri
} }
func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) { func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) {
defer p.updateStorageMetrics(storageMetricWriteAll)() defer p.updateStorageMetrics(storageMetricWriteAll, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -512,7 +523,7 @@ func (p *xlStorageDiskIDCheck) WriteAll(ctx context.Context, volume string, path
} }
func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool) (err error) { func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool) (err error) {
defer p.updateStorageMetrics(storageMetricDeleteVersion)() defer p.updateStorageMetrics(storageMetricDeleteVersion, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -528,7 +539,7 @@ func (p *xlStorageDiskIDCheck) DeleteVersion(ctx context.Context, volume, path s
} }
func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) (err error) { func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) (err error) {
defer p.updateStorageMetrics(storageMetricWriteMetadata)() defer p.updateStorageMetrics(storageMetricWriteMetadata, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -544,7 +555,7 @@ func (p *xlStorageDiskIDCheck) WriteMetadata(ctx context.Context, volume, path s
} }
func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (fi FileInfo, err error) { func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (fi FileInfo, err error) {
defer p.updateStorageMetrics(storageMetricReadVersion)() defer p.updateStorageMetrics(storageMetricReadVersion, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -560,7 +571,7 @@ func (p *xlStorageDiskIDCheck) ReadVersion(ctx context.Context, volume, path, ve
} }
func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) {
defer p.updateStorageMetrics(storageMetricReadAll)() defer p.updateStorageMetrics(storageMetricReadAll, volume, path)()
select { select {
case <-ctx.Done(): case <-ctx.Done():
@ -575,11 +586,31 @@ func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path
return p.storage.ReadAll(ctx, volume, path) return p.storage.ReadAll(ctx, volume, path)
} }
// Update storage metrics func storageTrace(s storageMetric, startTime time.Time, duration time.Duration, path string) trace.Info {
func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric) func() { return trace.Info{
startTime := time.Now() TraceType: trace.Storage,
return func() { Time: startTime,
atomic.AddUint64(&p.apiCalls[s], 1) NodeName: globalLocalNodeName,
p.apiLatencies[s].Add(float64(time.Since(startTime))) FuncName: s.String(),
StorageStats: trace.StorageStats{
Duration: duration,
Path: path,
},
}
}
// Update storage metrics
func (p *xlStorageDiskIDCheck) updateStorageMetrics(s storageMetric, paths ...string) func() {
startTime := time.Now()
trace := globalTrace.NumSubscribers() > 0
return func() {
duration := time.Since(startTime)
atomic.AddUint64(&p.apiCalls[s], 1)
p.apiLatencies[s].Add(float64(duration))
if trace {
globalTrace.Publish(storageTrace(s, startTime, duration, strings.Join(paths, " ")))
}
} }
} }

View File

@ -37,14 +37,15 @@ func main() {
if err != nil { if err != nil {
log.Fatalln(err) log.Fatalln(err)
} }
doneCh := make(chan struct{})
defer close(doneCh)
// Start listening on all http trace activity from all servers // Start listening on all http trace activity from all servers in the minio cluster.
// in the minio cluster. traceCh := madmClnt.ServiceTrace(context.Background(), madmin.ServiceTraceOpts{
allTrace := false S3: true,
errTrace := false Internal: true,
traceCh := madmClnt.ServiceTrace(context.Background(), allTrace, errTrace, doneCh) Storage: true,
OS: true,
Threshold: 0,
})
for traceInfo := range traceCh { for traceInfo := range traceCh {
if traceInfo.Err != nil { if traceInfo.Err != nil {
fmt.Println(traceInfo.Err) fmt.Println(traceInfo.Err)

View File

@ -23,6 +23,7 @@ import (
"net/http" "net/http"
"net/url" "net/url"
"strconv" "strconv"
"time"
trace "github.com/minio/minio/pkg/trace" trace "github.com/minio/minio/pkg/trace"
) )
@ -77,16 +78,30 @@ type ServiceTraceInfo struct {
Err error `json:"-"` Err error `json:"-"`
} }
// ServiceTraceOpts holds tracing options
type ServiceTraceOpts struct {
S3 bool
Internal bool
Storage bool
OS bool
OnlyErrors bool
Threshold time.Duration
}
// ServiceTrace - listen on http trace notifications. // ServiceTrace - listen on http trace notifications.
func (adm AdminClient) ServiceTrace(ctx context.Context, allTrace, errTrace bool) <-chan ServiceTraceInfo { func (adm AdminClient) ServiceTrace(ctx context.Context, opts ServiceTraceOpts) <-chan ServiceTraceInfo {
traceInfoCh := make(chan ServiceTraceInfo) traceInfoCh := make(chan ServiceTraceInfo)
// Only success, start a routine to start reading line by line. // Only success, start a routine to start reading line by line.
go func(traceInfoCh chan<- ServiceTraceInfo) { go func(traceInfoCh chan<- ServiceTraceInfo) {
defer close(traceInfoCh) defer close(traceInfoCh)
for { for {
urlValues := make(url.Values) urlValues := make(url.Values)
urlValues.Set("all", strconv.FormatBool(allTrace)) urlValues.Set("err", strconv.FormatBool(opts.OnlyErrors))
urlValues.Set("err", strconv.FormatBool(errTrace)) urlValues.Set("s3", strconv.FormatBool(opts.S3))
urlValues.Set("internal", strconv.FormatBool(opts.Internal))
urlValues.Set("storage", strconv.FormatBool(opts.Storage))
urlValues.Set("os", strconv.FormatBool(opts.OS))
urlValues.Set("threshold", opts.Threshold.String())
reqData := requestData{ reqData := requestData{
relPath: adminAPIPrefix + "/trace", relPath: adminAPIPrefix + "/trace",
queryValues: urlValues, queryValues: urlValues,

View File

@ -21,14 +21,45 @@ import (
"time" "time"
) )
// Type indicates the type of the tracing Info
type Type int
const (
// OS tracing (Golang os package calls)
OS Type = iota
// Storage tracing (MinIO Storage Layer)
Storage
// HTTP tracing (MinIO S3 & Internode)
HTTP
)
// Info - represents a trace record, additionally // Info - represents a trace record, additionally
// also reports errors if any while listening on trace. // also reports errors if any while listening on trace.
type Info struct { type Info struct {
TraceType Type `json:"type"`
NodeName string `json:"nodename"` NodeName string `json:"nodename"`
FuncName string `json:"funcname"` FuncName string `json:"funcname"`
Time time.Time `json:"time"`
ReqInfo RequestInfo `json:"request"` ReqInfo RequestInfo `json:"request"`
RespInfo ResponseInfo `json:"response"` RespInfo ResponseInfo `json:"response"`
CallStats CallStats `json:"stats"` CallStats CallStats `json:"stats"`
StorageStats StorageStats `json:"storageStats"`
OSStats OSStats `json:"osStats"`
}
// StorageStats statistics on MinIO Storage layer calls
type StorageStats struct {
Path string `json:"path"`
Duration time.Duration `json:"duration"`
}
// OSStats statistics on operating system specific calls.
type OSStats struct {
Path string `json:"path"`
Duration time.Duration `json:"duration"`
} }
// CallStats records request stats // CallStats records request stats