mirror of https://github.com/minio/minio.git
Add metrics for custom auth plugin (#16701)
This commit is contained in:
parent
fe7bf6cbbc
commit
8cde38404d
|
@ -243,7 +243,7 @@ func (sys *IAMSys) Init(ctx context.Context, objAPI ObjectLayer, etcdClient *etc
|
||||||
logger.LogIf(ctx, fmt.Errorf("Unable to initialize AuthNPlugin: %w", err))
|
logger.LogIf(ctx, fmt.Errorf("Unable to initialize AuthNPlugin: %w", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
setGlobalAuthNPlugin(idplugin.New(authNPluginCfg))
|
setGlobalAuthNPlugin(idplugin.New(GlobalContext, authNPluginCfg))
|
||||||
|
|
||||||
authZPluginCfg, err := polplugin.LookupConfig(s, GetDefaultConnSettings(), xhttp.DrainBody)
|
authZPluginCfg, err := polplugin.LookupConfig(s, GetDefaultConnSettings(), xhttp.DrainBody)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
|
@ -1385,6 +1385,7 @@ func getIAMNodeMetrics() *MetricsGroup {
|
||||||
sinceLastSyncMillis = (uint64(time.Now().UnixNano()) - lastSyncTime) / uint64(time.Millisecond)
|
sinceLastSyncMillis = (uint64(time.Now().UnixNano()) - lastSyncTime) / uint64(time.Millisecond)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pluginAuthNMetrics := globalAuthNPlugin.Metrics()
|
||||||
metrics = []Metric{
|
metrics = []Metric{
|
||||||
{
|
{
|
||||||
Description: MetricDescription{
|
Description: MetricDescription{
|
||||||
|
@ -1426,7 +1427,68 @@ func getIAMNodeMetrics() *MetricsGroup {
|
||||||
},
|
},
|
||||||
Value: float64(atomic.LoadUint64(&globalIAMSys.TotalRefreshFailures)),
|
Value: float64(atomic.LoadUint64(&globalIAMSys.TotalRefreshFailures)),
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
Description: MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: iamSubsystem,
|
||||||
|
Name: "plugin_authn_service_last_succ_seconds",
|
||||||
|
Help: "When plugin authentication is configured, returns time (in seconds) since the last successful request to the service",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
},
|
||||||
|
Value: pluginAuthNMetrics.LastReachableSecs,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: iamSubsystem,
|
||||||
|
Name: "plugin_authn_service_last_fail_seconds",
|
||||||
|
Help: "When plugin authentication is configured, returns time (in seconds) since the last failed request to the service",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
},
|
||||||
|
Value: pluginAuthNMetrics.LastUnreachableSecs,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: iamSubsystem,
|
||||||
|
Name: "plugin_authn_service_total_requests_minute",
|
||||||
|
Help: "When plugin authentication is configured, returns total requests count in the last full minute",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
},
|
||||||
|
Value: float64(pluginAuthNMetrics.TotalRequests),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: iamSubsystem,
|
||||||
|
Name: "plugin_authn_service_failed_requests_minute",
|
||||||
|
Help: "When plugin authentication is configured, returns failed requests count in the last full minute",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
},
|
||||||
|
Value: float64(pluginAuthNMetrics.FailedRequests),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: iamSubsystem,
|
||||||
|
Name: "plugin_authn_service_succ_avg_rtt_ms_minute",
|
||||||
|
Help: "When plugin authentication is configured, returns average round-trip-time of successful requests in the last full minute",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
},
|
||||||
|
Value: pluginAuthNMetrics.AvgSuccRTTMs,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Description: MetricDescription{
|
||||||
|
Namespace: nodeMetricNamespace,
|
||||||
|
Subsystem: iamSubsystem,
|
||||||
|
Name: "plugin_authn_service_succ_max_rtt_ms_minute",
|
||||||
|
Help: "When plugin authentication is configured, returns maximum round-trip-time of successful requests in the last full minute",
|
||||||
|
Type: gaugeMetric,
|
||||||
|
},
|
||||||
|
Value: pluginAuthNMetrics.MaxSuccRTTMs,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return metrics
|
return metrics
|
||||||
})
|
})
|
||||||
return mg
|
return mg
|
||||||
|
|
|
@ -19,6 +19,7 @@ package plugin
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"crypto/sha1"
|
"crypto/sha1"
|
||||||
"encoding/base64"
|
"encoding/base64"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
@ -27,9 +28,12 @@ import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/minio/minio/internal/arn"
|
"github.com/minio/minio/internal/arn"
|
||||||
"github.com/minio/minio/internal/config"
|
"github.com/minio/minio/internal/config"
|
||||||
|
"github.com/minio/minio/internal/logger"
|
||||||
"github.com/minio/pkg/env"
|
"github.com/minio/pkg/env"
|
||||||
xnet "github.com/minio/pkg/net"
|
xnet "github.com/minio/pkg/net"
|
||||||
)
|
)
|
||||||
|
@ -142,10 +146,92 @@ func (a *Args) Validate() error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type serviceRTTMinuteStats struct {
|
||||||
|
statsTime time.Time
|
||||||
|
rttMsSum, maxRttMs float64
|
||||||
|
successRequestCount int64
|
||||||
|
failedRequestCount int64
|
||||||
|
}
|
||||||
|
|
||||||
|
type metrics struct {
|
||||||
|
sync.Mutex
|
||||||
|
LastCheckSuccess time.Time
|
||||||
|
LastCheckFailure time.Time
|
||||||
|
lastFullMinute serviceRTTMinuteStats
|
||||||
|
currentMinute serviceRTTMinuteStats
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *metrics) setConnSuccess(reqStartTime time.Time) {
|
||||||
|
h.Lock()
|
||||||
|
defer h.Unlock()
|
||||||
|
h.LastCheckSuccess = reqStartTime
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *metrics) setConnFailure(reqStartTime time.Time) {
|
||||||
|
h.Lock()
|
||||||
|
defer h.Unlock()
|
||||||
|
h.LastCheckFailure = reqStartTime
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *metrics) updateLastFullMinute(currReqMinute time.Time) {
|
||||||
|
// Assumes the caller has h.Lock()'ed
|
||||||
|
h.lastFullMinute = h.currentMinute
|
||||||
|
h.currentMinute = serviceRTTMinuteStats{
|
||||||
|
statsTime: currReqMinute,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *metrics) accumRequestRTT(reqStartTime time.Time, rttMs float64, isSuccess bool) {
|
||||||
|
h.Lock()
|
||||||
|
defer h.Unlock()
|
||||||
|
|
||||||
|
// Update connectivity times
|
||||||
|
if isSuccess {
|
||||||
|
if reqStartTime.After(h.LastCheckSuccess) {
|
||||||
|
h.LastCheckSuccess = reqStartTime
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if reqStartTime.After(h.LastCheckFailure) {
|
||||||
|
h.LastCheckFailure = reqStartTime
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Round the reqest time *down* to whole minute.
|
||||||
|
reqTimeMinute := reqStartTime.Truncate(time.Minute)
|
||||||
|
if reqTimeMinute.After(h.currentMinute.statsTime) {
|
||||||
|
// Drop the last full minute now, since we got a request for a time we
|
||||||
|
// are not yet tracking.
|
||||||
|
h.updateLastFullMinute(reqTimeMinute)
|
||||||
|
}
|
||||||
|
var entry *serviceRTTMinuteStats
|
||||||
|
if reqTimeMinute.Equal(h.currentMinute.statsTime) {
|
||||||
|
entry = &h.currentMinute
|
||||||
|
} else if reqTimeMinute.Equal(h.lastFullMinute.statsTime) {
|
||||||
|
entry = &h.lastFullMinute
|
||||||
|
} else {
|
||||||
|
// This request is too old, it should never happen, ignore it as we
|
||||||
|
// cannot return an error.
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update stats
|
||||||
|
if isSuccess {
|
||||||
|
if entry.maxRttMs < rttMs {
|
||||||
|
entry.maxRttMs = rttMs
|
||||||
|
}
|
||||||
|
entry.rttMsSum += rttMs
|
||||||
|
entry.successRequestCount++
|
||||||
|
} else {
|
||||||
|
entry.failedRequestCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// AuthNPlugin - implements pluggable authentication via webhook.
|
// AuthNPlugin - implements pluggable authentication via webhook.
|
||||||
type AuthNPlugin struct {
|
type AuthNPlugin struct {
|
||||||
args Args
|
args Args
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
shutdownCtx context.Context
|
||||||
|
serviceMetrics *metrics
|
||||||
}
|
}
|
||||||
|
|
||||||
// Enabled returns if AuthNPlugin is enabled.
|
// Enabled returns if AuthNPlugin is enabled.
|
||||||
|
@ -218,14 +304,24 @@ func LookupConfig(kv config.KVS, transport *http.Transport, closeRespFn func(io.
|
||||||
}
|
}
|
||||||
|
|
||||||
// New - initializes Authorization Management Plugin.
|
// New - initializes Authorization Management Plugin.
|
||||||
func New(args Args) *AuthNPlugin {
|
func New(shutdownCtx context.Context, args Args) *AuthNPlugin {
|
||||||
if args.URL == nil || args.URL.Scheme == "" && args.AuthToken == "" {
|
if args.URL == nil || args.URL.Scheme == "" && args.AuthToken == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
return &AuthNPlugin{
|
plugin := AuthNPlugin{
|
||||||
args: args,
|
args: args,
|
||||||
client: &http.Client{Transport: args.Transport},
|
client: &http.Client{Transport: args.Transport},
|
||||||
|
shutdownCtx: shutdownCtx,
|
||||||
|
serviceMetrics: &metrics{
|
||||||
|
Mutex: sync.Mutex{},
|
||||||
|
LastCheckSuccess: time.Unix(0, 0),
|
||||||
|
LastCheckFailure: time.Unix(0, 0),
|
||||||
|
lastFullMinute: serviceRTTMinuteStats{},
|
||||||
|
currentMinute: serviceRTTMinuteStats{},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
go plugin.doPeriodicHealthCheck()
|
||||||
|
return &plugin
|
||||||
}
|
}
|
||||||
|
|
||||||
// AuthNSuccessResponse - represents the response from the authentication plugin
|
// AuthNSuccessResponse - represents the response from the authentication plugin
|
||||||
|
@ -269,7 +365,9 @@ func (o *AuthNPlugin) Authenticate(roleArn arn.ARN, token string) (AuthNResponse
|
||||||
q.Set("token", token)
|
q.Set("token", token)
|
||||||
u.RawQuery = q.Encode()
|
u.RawQuery = q.Encode()
|
||||||
|
|
||||||
req, err := http.NewRequest(http.MethodPost, u.String(), nil)
|
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, u.String(), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return AuthNResponse{}, err
|
return AuthNResponse{}, err
|
||||||
}
|
}
|
||||||
|
@ -278,11 +376,15 @@ func (o *AuthNPlugin) Authenticate(roleArn arn.ARN, token string) (AuthNResponse
|
||||||
req.Header.Set("Authorization", o.args.AuthToken)
|
req.Header.Set("Authorization", o.args.AuthToken)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reqStartTime := time.Now()
|
||||||
resp, err := o.client.Do(req)
|
resp, err := o.client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
o.serviceMetrics.accumRequestRTT(reqStartTime, 0, false)
|
||||||
return AuthNResponse{}, err
|
return AuthNResponse{}, err
|
||||||
}
|
}
|
||||||
defer o.args.CloseRespFn(resp.Body)
|
defer o.args.CloseRespFn(resp.Body)
|
||||||
|
reqDurNanos := time.Since(reqStartTime).Nanoseconds()
|
||||||
|
o.serviceMetrics.accumRequestRTT(reqStartTime, float64(reqDurNanos)/1e6, true)
|
||||||
|
|
||||||
switch resp.StatusCode {
|
switch resp.StatusCode {
|
||||||
case 200:
|
case 200:
|
||||||
|
@ -320,3 +422,87 @@ func (o *AuthNPlugin) GetRoleInfo() map[arn.ARN]string {
|
||||||
o.args.RoleARN: o.args.RolePolicy,
|
o.args.RoleARN: o.args.RolePolicy,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// checkConnectivity returns true if we are able to connect to the plugin
|
||||||
|
// service.
|
||||||
|
func (o *AuthNPlugin) checkConnectivity(ctx context.Context) bool {
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, healthCheckTimeout)
|
||||||
|
defer cancel()
|
||||||
|
u := url.URL(*o.args.URL)
|
||||||
|
|
||||||
|
req, err := http.NewRequestWithContext(ctx, http.MethodHead, u.String(), nil)
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
if o.args.AuthToken != "" {
|
||||||
|
req.Header.Set("Authorization", o.args.AuthToken)
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := o.client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
defer o.args.CloseRespFn(resp.Body)
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
var (
|
||||||
|
healthCheckInterval = 1 * time.Minute
|
||||||
|
healthCheckTimeout = 5 * time.Second
|
||||||
|
)
|
||||||
|
|
||||||
|
func (o *AuthNPlugin) doPeriodicHealthCheck() {
|
||||||
|
ticker := time.NewTicker(healthCheckInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
now := time.Now()
|
||||||
|
isConnected := o.checkConnectivity(o.shutdownCtx)
|
||||||
|
if isConnected {
|
||||||
|
o.serviceMetrics.setConnSuccess(now)
|
||||||
|
} else {
|
||||||
|
o.serviceMetrics.setConnFailure(now)
|
||||||
|
}
|
||||||
|
case <-o.shutdownCtx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metrics contains metrics about the authentication plugin service.
|
||||||
|
type Metrics struct {
|
||||||
|
LastReachableSecs, LastUnreachableSecs float64
|
||||||
|
|
||||||
|
// Last whole minute stats
|
||||||
|
TotalRequests, FailedRequests int64
|
||||||
|
AvgSuccRTTMs float64
|
||||||
|
MaxSuccRTTMs float64
|
||||||
|
}
|
||||||
|
|
||||||
|
// Metrics reports metrics related to plugin service reachability and stats for the last whole minute
|
||||||
|
func (o *AuthNPlugin) Metrics() Metrics {
|
||||||
|
if o == nil {
|
||||||
|
// Return empty metrics when not configured.
|
||||||
|
return Metrics{}
|
||||||
|
}
|
||||||
|
o.serviceMetrics.Lock()
|
||||||
|
defer o.serviceMetrics.Unlock()
|
||||||
|
l := &o.serviceMetrics.lastFullMinute
|
||||||
|
var avg float64
|
||||||
|
if l.successRequestCount > 0 {
|
||||||
|
avg = l.rttMsSum / float64(l.successRequestCount)
|
||||||
|
}
|
||||||
|
now := time.Now().UTC()
|
||||||
|
return Metrics{
|
||||||
|
LastReachableSecs: now.Sub(o.serviceMetrics.LastCheckSuccess).Seconds(),
|
||||||
|
LastUnreachableSecs: now.Sub(o.serviceMetrics.LastCheckFailure).Seconds(),
|
||||||
|
TotalRequests: l.failedRequestCount + l.successRequestCount,
|
||||||
|
FailedRequests: l.failedRequestCount,
|
||||||
|
AvgSuccRTTMs: avg,
|
||||||
|
MaxSuccRTTMs: l.maxRttMs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue