Add metrics for custom auth plugin (#16701)

This commit is contained in:
Aditya Manthramurthy 2023-02-27 09:55:18 -08:00 committed by GitHub
parent fe7bf6cbbc
commit 8cde38404d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 256 additions and 8 deletions

View File

@ -243,7 +243,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
logger.LogIf(ctx, fmt.Errorf("Unable to initialize AuthNPlugin: %w", err))
}
setGlobalAuthNPlugin(idplugin.New(authNPluginCfg))
setGlobalAuthNPlugin(idplugin.New(GlobalContext, authNPluginCfg))
authZPluginCfg, err := polplugin.LookupConfig(s, GetDefaultConnSettings(), xhttp.DrainBody)
if err != nil {

View File

@ -1385,6 +1385,7 @@ func getIAMNodeMetrics() *MetricsGroup {
sinceLastSyncMillis = (uint64(time.Now().UnixNano()) - lastSyncTime) / uint64(time.Millisecond)
}
pluginAuthNMetrics := globalAuthNPlugin.Metrics()
metrics = []Metric{
{
Description: MetricDescription{
@ -1426,7 +1427,68 @@ func getIAMNodeMetrics() *MetricsGroup {
},
Value: float64(atomic.LoadUint64(&globalIAMSys.TotalRefreshFailures)),
},
{
Description: MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "plugin_authn_service_last_succ_seconds",
Help: "When plugin authentication is configured, returns time (in seconds) since the last successful request to the service",
Type: gaugeMetric,
},
Value: pluginAuthNMetrics.LastReachableSecs,
},
{
Description: MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "plugin_authn_service_last_fail_seconds",
Help: "When plugin authentication is configured, returns time (in seconds) since the last failed request to the service",
Type: gaugeMetric,
},
Value: pluginAuthNMetrics.LastUnreachableSecs,
},
{
Description: MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "plugin_authn_service_total_requests_minute",
Help: "When plugin authentication is configured, returns total requests count in the last full minute",
Type: gaugeMetric,
},
Value: float64(pluginAuthNMetrics.TotalRequests),
},
{
Description: MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "plugin_authn_service_failed_requests_minute",
Help: "When plugin authentication is configured, returns failed requests count in the last full minute",
Type: gaugeMetric,
},
Value: float64(pluginAuthNMetrics.FailedRequests),
},
{
Description: MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "plugin_authn_service_succ_avg_rtt_ms_minute",
Help: "When plugin authentication is configured, returns average round-trip-time of successful requests in the last full minute",
Type: gaugeMetric,
},
Value: pluginAuthNMetrics.AvgSuccRTTMs,
},
{
Description: MetricDescription{
Namespace: nodeMetricNamespace,
Subsystem: iamSubsystem,
Name: "plugin_authn_service_succ_max_rtt_ms_minute",
Help: "When plugin authentication is configured, returns maximum round-trip-time of successful requests in the last full minute",
Type: gaugeMetric,
},
Value: pluginAuthNMetrics.MaxSuccRTTMs,
},
}
return metrics
})
return mg

View File

@ -19,6 +19,7 @@ package plugin
import (
"bytes"
"context"
"crypto/sha1"
"encoding/base64"
"encoding/json"
@ -27,9 +28,12 @@ import (
"net/http"
"net/url"
"regexp"
"sync"
"time"
"github.com/minio/minio/internal/arn"
"github.com/minio/minio/internal/config"
"github.com/minio/minio/internal/logger"
"github.com/minio/pkg/env"
xnet "github.com/minio/pkg/net"
)
@ -142,10 +146,92 @@ func (a *Args) Validate() error {
return nil
}
type serviceRTTMinuteStats struct {
statsTime time.Time
rttMsSum, maxRttMs float64
successRequestCount int64
failedRequestCount int64
}
type metrics struct {
sync.Mutex
LastCheckSuccess time.Time
LastCheckFailure time.Time
lastFullMinute serviceRTTMinuteStats
currentMinute serviceRTTMinuteStats
}
func (h *metrics) setConnSuccess(reqStartTime time.Time) {
h.Lock()
defer h.Unlock()
h.LastCheckSuccess = reqStartTime
}
func (h *metrics) setConnFailure(reqStartTime time.Time) {
h.Lock()
defer h.Unlock()
h.LastCheckFailure = reqStartTime
}
func (h *metrics) updateLastFullMinute(currReqMinute time.Time) {
// Assumes the caller has h.Lock()'ed
h.lastFullMinute = h.currentMinute
h.currentMinute = serviceRTTMinuteStats{
statsTime: currReqMinute,
}
}
func (h *metrics) accumRequestRTT(reqStartTime time.Time, rttMs float64, isSuccess bool) {
h.Lock()
defer h.Unlock()
// Update connectivity times
if isSuccess {
if reqStartTime.After(h.LastCheckSuccess) {
h.LastCheckSuccess = reqStartTime
}
} else {
if reqStartTime.After(h.LastCheckFailure) {
h.LastCheckFailure = reqStartTime
}
}
// Round the reqest time *down* to whole minute.
reqTimeMinute := reqStartTime.Truncate(time.Minute)
if reqTimeMinute.After(h.currentMinute.statsTime) {
// Drop the last full minute now, since we got a request for a time we
// are not yet tracking.
h.updateLastFullMinute(reqTimeMinute)
}
var entry *serviceRTTMinuteStats
if reqTimeMinute.Equal(h.currentMinute.statsTime) {
entry = &h.currentMinute
} else if reqTimeMinute.Equal(h.lastFullMinute.statsTime) {
entry = &h.lastFullMinute
} else {
// This request is too old, it should never happen, ignore it as we
// cannot return an error.
return
}
// Update stats
if isSuccess {
if entry.maxRttMs < rttMs {
entry.maxRttMs = rttMs
}
entry.rttMsSum += rttMs
entry.successRequestCount++
} else {
entry.failedRequestCount++
}
}
// AuthNPlugin - implements pluggable authentication via webhook.
type AuthNPlugin struct {
args Args
client *http.Client
args Args
client *http.Client
shutdownCtx context.Context
serviceMetrics *metrics
}
// Enabled returns if AuthNPlugin is enabled.
@ -218,14 +304,24 @@ func LookupConfig(kv config.KVS, transport *http.Transport, closeRespFn func(io.
}
// New - initializes Authorization Management Plugin.
func New(args Args) *AuthNPlugin {
func New(shutdownCtx context.Context, args Args) *AuthNPlugin {
if args.URL == nil || args.URL.Scheme == "" && args.AuthToken == "" {
return nil
}
return &AuthNPlugin{
args: args,
client: &http.Client{Transport: args.Transport},
plugin := AuthNPlugin{
args: args,
client: &http.Client{Transport: args.Transport},
shutdownCtx: shutdownCtx,
serviceMetrics: &metrics{
Mutex: sync.Mutex{},
LastCheckSuccess: time.Unix(0, 0),
LastCheckFailure: time.Unix(0, 0),
lastFullMinute: serviceRTTMinuteStats{},
currentMinute: serviceRTTMinuteStats{},
},
}
go plugin.doPeriodicHealthCheck()
return &plugin
}
// AuthNSuccessResponse - represents the response from the authentication plugin
@ -269,7 +365,9 @@ func (o *AuthNPlugin) Authenticate(roleArn arn.ARN, token string) (AuthNResponse
q.Set("token", token)
u.RawQuery = q.Encode()
req, err := http.NewRequest(http.MethodPost, u.String(), nil)
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
req, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), nil)
if err != nil {
return AuthNResponse{}, err
}
@ -278,11 +376,15 @@ func (o *AuthNPlugin) Authenticate(roleArn arn.ARN, token string) (AuthNResponse
req.Header.Set("Authorization", o.args.AuthToken)
}
reqStartTime := time.Now()
resp, err := o.client.Do(req)
if err != nil {
o.serviceMetrics.accumRequestRTT(reqStartTime, 0, false)
return AuthNResponse{}, err
}
defer o.args.CloseRespFn(resp.Body)
reqDurNanos := time.Since(reqStartTime).Nanoseconds()
o.serviceMetrics.accumRequestRTT(reqStartTime, float64(reqDurNanos)/1e6, true)
switch resp.StatusCode {
case 200:
@ -320,3 +422,87 @@ func (o *AuthNPlugin) GetRoleInfo() map[arn.ARN]string {
o.args.RoleARN: o.args.RolePolicy,
}
}
// checkConnectivity returns true if we are able to connect to the plugin
// service.
func (o *AuthNPlugin) checkConnectivity(ctx context.Context) bool {
ctx, cancel := context.WithTimeout(ctx, healthCheckTimeout)
defer cancel()
u := url.URL(*o.args.URL)
req, err := http.NewRequestWithContext(ctx, http.MethodHead, u.String(), nil)
if err != nil {
logger.LogIf(ctx, err)
return false
}
if o.args.AuthToken != "" {
req.Header.Set("Authorization", o.args.AuthToken)
}
resp, err := o.client.Do(req)
if err != nil {
return false
}
defer o.args.CloseRespFn(resp.Body)
return true
}
var (
healthCheckInterval = 1 * time.Minute
healthCheckTimeout = 5 * time.Second
)
func (o *AuthNPlugin) doPeriodicHealthCheck() {
ticker := time.NewTicker(healthCheckInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
now := time.Now()
isConnected := o.checkConnectivity(o.shutdownCtx)
if isConnected {
o.serviceMetrics.setConnSuccess(now)
} else {
o.serviceMetrics.setConnFailure(now)
}
case <-o.shutdownCtx.Done():
return
}
}
}
// Metrics contains metrics about the authentication plugin service.
type Metrics struct {
LastReachableSecs, LastUnreachableSecs float64
// Last whole minute stats
TotalRequests, FailedRequests int64
AvgSuccRTTMs float64
MaxSuccRTTMs float64
}
// Metrics reports metrics related to plugin service reachability and stats for the last whole minute
func (o *AuthNPlugin) Metrics() Metrics {
if o == nil {
// Return empty metrics when not configured.
return Metrics{}
}
o.serviceMetrics.Lock()
defer o.serviceMetrics.Unlock()
l := &o.serviceMetrics.lastFullMinute
var avg float64
if l.successRequestCount > 0 {
avg = l.rttMsSum / float64(l.successRequestCount)
}
now := time.Now().UTC()
return Metrics{
LastReachableSecs: now.Sub(o.serviceMetrics.LastCheckSuccess).Seconds(),
LastUnreachableSecs: now.Sub(o.serviceMetrics.LastCheckFailure).Seconds(),
TotalRequests: l.failedRequestCount + l.successRequestCount,
FailedRequests: l.failedRequestCount,
AvgSuccRTTMs: avg,
MaxSuccRTTMs: l.maxRttMs,
}
}