Export bucket usage counts as part of bucket metrics (#9710)

Bonus fixes in quota enforcement to use the
new datastructure and use timedValue to cache
a value/reload automatically avoids one less
global variable.
This commit is contained in:
Harshavardhana
2020-05-27 06:45:43 -07:00
committed by GitHub
parent cccf2de129
commit 53aaa5d2a5
9 changed files with 281 additions and 115 deletions

View File

@@ -654,8 +654,8 @@ func (a adminAPIHandlers) AccountUsageInfoHandler(w http.ResponseWriter, r *http
if rd || wr {
var size uint64
// Fetch the data usage of the current bucket
if !dataUsageInfo.LastUpdate.IsZero() && len(dataUsageInfo.BucketsSizes) > 0 {
size = dataUsageInfo.BucketsSizes[bucket.Name]
if !dataUsageInfo.LastUpdate.IsZero() {
size = dataUsageInfo.BucketsUsage[bucket.Name].Size
}
acctInfo.Buckets = append(acctInfo.Buckets, madmin.BucketUsageInfo{
Name: bucket.Name,

View File

@@ -1329,7 +1329,7 @@ func (a adminAPIHandlers) ServerInfoHandler(w http.ResponseWriter, r *http.Reque
dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI)
if err == nil {
buckets = madmin.Buckets{Count: dataUsageInfo.BucketsCount}
objects = madmin.Objects{Count: dataUsageInfo.ObjectsCount}
objects = madmin.Objects{Count: dataUsageInfo.ObjectsTotalCount}
usage = madmin.Usage{Size: dataUsageInfo.ObjectsTotalSize}
}

View File

@@ -20,7 +20,6 @@ import (
"context"
"encoding/json"
"fmt"
"sync"
"time"
"github.com/minio/minio/cmd/config"
@@ -31,7 +30,9 @@ import (
)
// BucketQuotaSys - map of bucket and quota configuration.
type BucketQuotaSys struct{}
type BucketQuotaSys struct {
bucketStorageCache timedValue
}
// Get - Get quota configuration.
func (sys *BucketQuotaSys) Get(bucketName string) (*madmin.BucketQuota, error) {
@@ -63,40 +64,13 @@ func parseBucketQuota(bucket string, data []byte) (quotaCfg *madmin.BucketQuota,
return
}
type bucketStorageCache struct {
bucketsSizes map[string]uint64
lastUpdate time.Time
mu sync.Mutex
}
func (b *bucketStorageCache) check(ctx context.Context, q *madmin.BucketQuota, bucket string, size int64) error {
if q.Quota == 0 {
// No quota set return quickly.
return nil
func (sys *BucketQuotaSys) check(ctx context.Context, bucket string, size int64) error {
objAPI := newObjectLayerWithoutSafeModeFn()
if objAPI == nil {
return errServerNotInitialized
}
b.mu.Lock()
defer b.mu.Unlock()
if time.Since(b.lastUpdate) > 10*time.Second {
dui, err := loadDataUsageFromBackend(ctx, newObjectLayerWithoutSafeModeFn())
if err != nil {
return err
}
b.lastUpdate = time.Now()
b.bucketsSizes = dui.BucketsSizes
}
currUsage := b.bucketsSizes[bucket]
if (currUsage + uint64(size)) > q.Quota {
return BucketQuotaExceeded{Bucket: bucket}
}
return nil
}
func enforceBucketQuota(ctx context.Context, bucket string, size int64) error {
if size < 0 {
return nil
}
q, err := globalBucketQuotaSys.Get(bucket)
q, err := sys.Get(bucket)
if err != nil {
return nil
}
@@ -105,7 +79,45 @@ func enforceBucketQuota(ctx context.Context, bucket string, size int64) error {
return nil
}
return globalBucketStorageCache.check(ctx, q, bucket, size)
if q.Quota == 0 {
// No quota set return quickly.
return nil
}
sys.bucketStorageCache.Once.Do(func() {
sys.bucketStorageCache.TTL = 10 * time.Second
sys.bucketStorageCache.Update = func() (interface{}, error) {
return loadDataUsageFromBackend(ctx, objAPI)
}
})
v, err := sys.bucketStorageCache.Get()
if err != nil {
return err
}
dui := v.(DataUsageInfo)
bui, ok := dui.BucketsUsage[bucket]
if !ok {
// bucket not found, cannot enforce quota
// call will fail anyways later.
return nil
}
if (bui.Size + uint64(size)) > q.Quota {
return BucketQuotaExceeded{Bucket: bucket}
}
return nil
}
func enforceBucketQuota(ctx context.Context, bucket string, size int64) error {
if size < 0 {
return nil
}
return globalBucketQuotaSys.check(ctx, bucket, size)
}
const (
@@ -139,29 +151,40 @@ func enforceFIFOQuota(ctx context.Context, objectAPI ObjectLayer) error {
if env.Get(envDataUsageCrawlConf, config.EnableOn) == config.EnableOff {
return nil
}
buckets, err := objectAPI.ListBuckets(ctx)
if err != nil {
return err
}
dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI)
if err != nil {
return err
}
for _, binfo := range buckets {
bucket := binfo.Name
bui, ok := dataUsageInfo.BucketsUsage[bucket]
if !ok {
// bucket doesn't exist anymore, or we
// do not have any information to proceed.
continue
}
// Check if the current bucket has quota restrictions, if not skip it
cfg, err := globalBucketQuotaSys.Get(bucket)
if err != nil {
continue
}
if cfg.Type != madmin.FIFOQuota {
continue
}
dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI)
if err != nil {
return err
}
var toFree uint64
if dataUsageInfo.BucketsSizes[bucket] > cfg.Quota && cfg.Quota > 0 {
toFree = dataUsageInfo.BucketsSizes[bucket] - cfg.Quota
if bui.Size > cfg.Quota && cfg.Quota > 0 {
toFree = bui.Size - cfg.Quota
}
if toFree == 0 {
@@ -175,6 +198,7 @@ func enforceFIFOQuota(ctx context.Context, objectAPI ObjectLayer) error {
if err := objectAPI.Walk(ctx, bucket, "", objInfoCh); err != nil {
return err
}
// reuse the fileScorer used by disk cache to score entries by
// ModTime to find the oldest objects in bucket to delete. In
// the context of bucket quota enforcement - number of hits are

View File

@@ -116,16 +116,16 @@ func (d *dataUsageCache) find(path string) *dataUsageEntry {
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
e := d.find(path)
if e == nil {
return DataUsageInfo{LastUpdate: UTCNow()}
// No entry found, return empty.
return DataUsageInfo{}
}
flat := d.flatten(*e)
return DataUsageInfo{
LastUpdate: d.Info.LastUpdate,
ObjectsCount: flat.Objects,
ObjectsTotalSize: uint64(flat.Size),
ObjectsSizesHistogram: flat.ObjSizes.asMap(),
BucketsCount: uint64(len(e.Children)),
BucketsSizes: d.pathSizes(buckets),
LastUpdate: d.Info.LastUpdate,
ObjectsTotalCount: flat.Objects,
ObjectsTotalSize: uint64(flat.Size),
BucketsCount: uint64(len(e.Children)),
BucketsUsage: d.bucketsUsageInfo(buckets),
}
}
@@ -232,25 +232,30 @@ func (h *sizeHistogram) add(size int64) {
}
}
// asMap returns the map as a map[string]uint64.
func (h *sizeHistogram) asMap() map[string]uint64 {
res := make(map[string]uint64, 7)
// toMap returns the map to a map[string]uint64.
func (h *sizeHistogram) toMap() map[string]uint64 {
res := make(map[string]uint64, dataUsageBucketLen)
for i, count := range h {
res[ObjectsHistogramIntervals[i].name] = count
}
return res
}
// pathSizes returns the path sizes as a map.
func (d *dataUsageCache) pathSizes(buckets []BucketInfo) map[string]uint64 {
var dst = make(map[string]uint64, len(buckets))
// bucketsUsageInfo returns the buckets usage info as a map, with
// key as bucket name
func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]BucketUsageInfo {
var dst = make(map[string]BucketUsageInfo, len(buckets))
for _, bucket := range buckets {
e := d.find(bucket.Name)
if e == nil {
continue
}
flat := d.flatten(*e)
dst[bucket.Name] = uint64(flat.Size)
dst[bucket.Name] = BucketUsageInfo{
Size: uint64(flat.Size),
ObjectsCount: uint64(flat.Objects),
ObjectSizesHistogram: flat.ObjSizes.toMap(),
}
}
return dst
}

View File

@@ -154,6 +154,22 @@ func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsag
return DataUsageInfo{}, err
}
// For forward compatibility reasons, we need to add this code.
if len(dataUsageInfo.BucketsUsage) == 0 {
dataUsageInfo.BucketsUsage = make(map[string]BucketUsageInfo, len(dataUsageInfo.BucketSizes))
for bucket, size := range dataUsageInfo.BucketSizes {
dataUsageInfo.BucketsUsage[bucket] = BucketUsageInfo{Size: size}
}
}
// For backward compatibility reasons, we need to add this code.
if len(dataUsageInfo.BucketSizes) == 0 {
dataUsageInfo.BucketSizes = make(map[string]uint64, len(dataUsageInfo.BucketsUsage))
for bucket, bui := range dataUsageInfo.BucketsUsage {
dataUsageInfo.BucketSizes[bucket] = bui.Size
}
}
return dataUsageInfo, nil
}

View File

@@ -215,7 +215,6 @@ var (
globalBucketObjectLockSys *BucketObjectLockSys
globalBucketQuotaSys *BucketQuotaSys
globalBucketStorageCache bucketStorageCache
// Disk cache drives
globalCacheConfig cache.Config

View File

@@ -1,5 +1,5 @@
/*
* MinIO Cloud Storage, (C) 2018 MinIO, Inc.
* MinIO Cloud Storage, (C) 2018-2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -22,7 +22,9 @@ import (
"sync/atomic"
"time"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/env"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
@@ -84,6 +86,7 @@ func (c *minioCollector) Collect(ch chan<- prometheus.Metric) {
minioVersionInfo.WithLabelValues(Version, CommitID).Set(float64(1.0))
storageMetricsPrometheus(ch)
bucketUsageMetricsPrometheus(ch)
networkMetricsPrometheus(ch)
httpMetricsPrometheus(ch)
cacheMetricsPrometheus(ch)
@@ -345,6 +348,65 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
)
}
// Populates prometheus with bucket usage metrics, this metrics
// is only enabled if crawler is enabled.
func bucketUsageMetricsPrometheus(ch chan<- prometheus.Metric) {
objLayer := newObjectLayerWithoutSafeModeFn()
// Service not initialized yet
if objLayer == nil {
return
}
// Crawler disabled, nothing to do.
if env.Get(envDataUsageCrawlConf, config.EnableOn) != config.EnableOn {
return
}
dataUsageInfo, err := loadDataUsageFromBackend(GlobalContext, objLayer)
if err != nil {
return
}
// data usage has not captured any data yet.
if dataUsageInfo.LastUpdate.IsZero() {
return
}
for bucket, usageInfo := range dataUsageInfo.BucketsUsage {
// Total space used by bucket
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName("bucket", "usage", "size"),
"Total bucket size",
[]string{"bucket"}, nil),
prometheus.GaugeValue,
float64(usageInfo.Size),
bucket,
)
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName("bucket", "objects", "count"),
"Total number of objects in a bucket",
[]string{"bucket"}, nil),
prometheus.GaugeValue,
float64(usageInfo.ObjectsCount),
bucket,
)
for k, v := range usageInfo.ObjectSizesHistogram {
ch <- prometheus.MustNewConstMetric(
prometheus.NewDesc(
prometheus.BuildFQName("bucket", "objects", "histogram"),
"Total number of objects of different sizes in a bucket",
[]string{"bucket", "object_size"}, nil),
prometheus.GaugeValue,
float64(v),
bucket,
k,
)
}
}
}
// collects storage metrics for MinIO server in Prometheus specific format
// and sends to given channel
func storageMetricsPrometheus(ch chan<- prometheus.Metric) {

View File

@@ -21,6 +21,7 @@ import (
"math"
"time"
humanize "github.com/dustin/go-humanize"
"github.com/minio/minio/pkg/hash"
"github.com/minio/minio/pkg/madmin"
)
@@ -86,13 +87,23 @@ const (
// ObjectsHistogramIntervals is the list of all intervals
// of object sizes to be included in objects histogram.
var ObjectsHistogramIntervals = []objectHistogramInterval{
{"LESS_THAN_1024_B", -1, 1024 - 1},
{"BETWEEN_1024_B_AND_1_MB", 1024, 1024*1024 - 1},
{"BETWEEN_1_MB_AND_10_MB", 1024 * 1024, 1024*1024*10 - 1},
{"BETWEEN_10_MB_AND_64_MB", 1024 * 1024 * 10, 1024*1024*64 - 1},
{"BETWEEN_64_MB_AND_128_MB", 1024 * 1024 * 64, 1024*1024*128 - 1},
{"BETWEEN_128_MB_AND_512_MB", 1024 * 1024 * 128, 1024*1024*512 - 1},
{"GREATER_THAN_512_MB", 1024 * 1024 * 512, math.MaxInt64},
{"LESS_THAN_1024_B", 0, humanize.KiByte - 1},
{"BETWEEN_1024_B_AND_1_MB", humanize.KiByte, humanize.MiByte - 1},
{"BETWEEN_1_MB_AND_10_MB", humanize.MiByte, humanize.MiByte*10 - 1},
{"BETWEEN_10_MB_AND_64_MB", humanize.MiByte * 10, humanize.MiByte*64 - 1},
{"BETWEEN_64_MB_AND_128_MB", humanize.MiByte * 64, humanize.MiByte*128 - 1},
{"BETWEEN_128_MB_AND_512_MB", humanize.MiByte * 128, humanize.MiByte*512 - 1},
{"GREATER_THAN_512_MB", humanize.MiByte * 512, math.MaxInt64},
}
// BucketUsageInfo - bucket usage info provides
// - total size of the bucket
// - total objects in a bucket
// - object size histogram per bucket
type BucketUsageInfo struct {
Size uint64 `json:"size"`
ObjectsCount uint64 `json:"objectsCount"`
ObjectSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"`
}
// DataUsageInfo represents data usage stats of the underlying Object API
@@ -101,17 +112,23 @@ type DataUsageInfo struct {
// This does not indicate a full scan.
LastUpdate time.Time `json:"lastUpdate"`
ObjectsCount uint64 `json:"objectsCount"`
// Objects total size
// Objects total count across all buckets
ObjectsTotalCount uint64 `json:"objectsCount"`
// Objects total size across all buckets
ObjectsTotalSize uint64 `json:"objectsTotalSize"`
// ObjectsSizesHistogram contains information on objects across all buckets.
// See ObjectsHistogramIntervals.
ObjectsSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"`
// Total number of buckets in this cluster
BucketsCount uint64 `json:"bucketsCount"`
// BucketsSizes is "bucket name" -> size.
BucketsSizes map[string]uint64 `json:"bucketsSizes"`
// Buckets usage info provides following information across all buckets
// - total size of the bucket
// - total objects in a bucket
// - object size histogram per bucket
BucketsUsage map[string]BucketUsageInfo `json:"bucketsUsageInfo"`
// Deprecated kept here for backward compatibility reasons.
BucketSizes map[string]uint64 `json:"bucketsSizes"`
}
// BucketInfo - represents bucket metadata.