From ed5ed7e490577457c927156f311de8d8ffa3cbec Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 22 Oct 2024 14:10:34 -0700 Subject: [PATCH] Trace ILM errors (#20576) Some paths would attempt transitions but in case of failures no traces would be emitted. Add traces (with errors) when transition operations fail. --- cmd/bucket-lifecycle.go | 15 ++++++++++----- cmd/data-scanner.go | 9 ++++++--- cmd/erasure-object.go | 2 ++ 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/cmd/bucket-lifecycle.go b/cmd/bucket-lifecycle.go index 29c88c529..eef2ee6e7 100644 --- a/cmd/bucket-lifecycle.go +++ b/cmd/bucket-lifecycle.go @@ -71,7 +71,7 @@ func NewLifecycleSys() *LifecycleSys { return &LifecycleSys{} } -func ilmTrace(startTime time.Time, duration time.Duration, oi ObjectInfo, event string, metadata map[string]string) madmin.TraceInfo { +func ilmTrace(startTime time.Time, duration time.Duration, oi ObjectInfo, event string, metadata map[string]string, err string) madmin.TraceInfo { sz, _ := oi.GetActualSize() return madmin.TraceInfo{ TraceType: madmin.TraceILM, @@ -81,18 +81,22 @@ func ilmTrace(startTime time.Time, duration time.Duration, oi ObjectInfo, event Duration: duration, Path: pathJoin(oi.Bucket, oi.Name), Bytes: sz, - Error: "", + Error: err, Message: getSource(4), Custom: metadata, } } -func (sys *LifecycleSys) trace(oi ObjectInfo) func(event string, metadata map[string]string) { +func (sys *LifecycleSys) trace(oi ObjectInfo) func(event string, metadata map[string]string, err error) { startTime := time.Now() - return func(event string, metadata map[string]string) { + return func(event string, metadata map[string]string, err error) { duration := time.Since(startTime) if globalTrace.NumSubscribers(madmin.TraceILM) > 0 { - globalTrace.Publish(ilmTrace(startTime, duration, oi, event, metadata)) + e := "" + if err != nil { + e = err.Error() + } + globalTrace.Publish(ilmTrace(startTime, duration, oi, event, metadata, e)) } } } @@ -362,6 +366,7 @@ func (es *expiryState) Worker(input <-chan expiryOp) { err := deleteObjectFromRemoteTier(es.ctx, oi.TransitionedObject.Name, oi.TransitionedObject.VersionID, oi.TransitionedObject.Tier) if ignoreNotFoundErr(err) != nil { transitionLogIf(es.ctx, err) + traceFn(ILMFreeVersionDelete, nil, err) return } diff --git a/cmd/data-scanner.go b/cmd/data-scanner.go index 3604d2281..1b9acee09 100644 --- a/cmd/data-scanner.go +++ b/cmd/data-scanner.go @@ -1325,10 +1325,13 @@ func applyExpiryOnNonTransitionedObjects(ctx context.Context, objLayer ObjectLay dobj, err = objLayer.DeleteObject(ctx, obj.Bucket, encodeDirObject(obj.Name), opts) if err != nil { if isErrObjectNotFound(err) || isErrVersionNotFound(err) { + traceFn(ILMExpiry, nil, nil) return false } // Assume it is still there. - ilmLogOnceIf(ctx, fmt.Errorf("DeleteObject(%s, %s): %w", obj.Bucket, obj.Name, err), "non-transition-expiry"+obj.Name) + err := fmt.Errorf("DeleteObject(%s, %s): %w", obj.Bucket, obj.Name, err) + ilmLogOnceIf(ctx, err, "non-transition-expiry"+obj.Name) + traceFn(ILMExpiry, nil, err) return false } if dobj.Name == "" { @@ -1549,7 +1552,7 @@ const ( ILMTransition = " ilm:transition" ) -func auditLogLifecycle(ctx context.Context, oi ObjectInfo, event string, tags map[string]string, traceFn func(event string, metadata map[string]string)) { +func auditLogLifecycle(ctx context.Context, oi ObjectInfo, event string, tags map[string]string, traceFn func(event string, metadata map[string]string, err error)) { var apiName string switch event { case ILMExpiry: @@ -1567,5 +1570,5 @@ func auditLogLifecycle(ctx context.Context, oi ObjectInfo, event string, tags ma VersionID: oi.VersionID, Tags: tags, }) - traceFn(event, tags) + traceFn(event, tags, nil) } diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 367f489e9..8ee2ae6dd 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -2365,6 +2365,7 @@ func (er erasureObjects) TransitionObject(ctx context.Context, bucket, object st destObj, err := genTransitionObjName(bucket) if err != nil { + traceFn(ILMTransition, nil, err) return err } @@ -2378,6 +2379,7 @@ func (er erasureObjects) TransitionObject(ctx context.Context, bucket, object st rv, err = tgtClient.Put(ctx, destObj, pr, fi.Size) pr.CloseWithError(err) if err != nil { + traceFn(ILMTransition, nil, err) return err } fi.TransitionStatus = lifecycle.TransitionComplete