re-implement data usage crawler to be more efficient (#9075)

Implementation overview: 

https://gist.github.com/klauspost/1801c858d5e0df391114436fdad6987b
This commit is contained in:
Klaus Post
2020-03-19 00:19:29 +01:00
committed by GitHub
parent 7fdeb44372
commit 8d98662633
61 changed files with 2895 additions and 543 deletions

View File

@@ -64,14 +64,14 @@ func (err ErrInvalidFilterName) Error() string {
type ErrFilterNamePrefix struct{}
func (err ErrFilterNamePrefix) Error() string {
return fmt.Sprintf("more than one prefix in filter rule")
return "more than one prefix in filter rule"
}
// ErrFilterNameSuffix - more than one suffix usage error.
type ErrFilterNameSuffix struct{}
func (err ErrFilterNameSuffix) Error() string {
return fmt.Sprintf("more than one suffix in filter rule")
return "more than one suffix in filter rule"
}
// ErrInvalidFilterValue - invalid filter value error.

View File

@@ -49,6 +49,8 @@ type AMQPArgs struct {
QueueLimit uint64 `json:"queueLimit"`
}
//lint:file-ignore ST1003 We cannot change these exported names.
// AMQP input constants.
const (
AmqpQueueDir = "queue_dir"

View File

@@ -91,7 +91,8 @@ func (f *Forwarder) getURLFromRequest(req *http.Request) *url.URL {
func copyURL(i *url.URL) *url.URL {
out := *i
if i.User != nil {
out.User = &(*i.User)
u := *i.User
out.User = &u
}
return &out
}

View File

@@ -78,7 +78,7 @@ type LimitWriter struct {
wLimit int64
}
// Implements the io.Writer interface limiting upto
// Write implements the io.Writer interface limiting upto
// configured length, also skips the first N bytes.
func (w *LimitWriter) Write(p []byte) (n int, err error) {
n = len(p)

View File

@@ -80,6 +80,7 @@ func TryLockedOpenFile(path string, flag int, perm os.FileMode) (*LockedFile, er
switch flag {
case syscall.O_RDONLY:
// https://docs.microsoft.com/en-us/windows/desktop/api/fileapi/nf-fileapi-lockfileex
//lint:ignore SA4016 Reasons
lockType = lockFileFailImmediately | 0 // Set this to enable shared lock and fail immediately.
}
return lockedOpenFile(path, flag, perm, lockType)

View File

@@ -21,6 +21,7 @@ import (
"encoding/json"
"errors"
"io/ioutil"
"math"
"net/http"
"net/url"
"strconv"
@@ -155,17 +156,24 @@ var ObjectsHistogramIntervals = []objectHistogramInterval{
{"BETWEEN_10_MB_AND_64_MB", 1024 * 1024 * 10, 1024*1024*64 - 1},
{"BETWEEN_64_MB_AND_128_MB", 1024 * 1024 * 64, 1024*1024*128 - 1},
{"BETWEEN_128_MB_AND_512_MB", 1024 * 1024 * 128, 1024*1024*512 - 1},
{"GREATER_THAN_512_MB", 1024 * 1024 * 512, -1},
{"GREATER_THAN_512_MB", 1024 * 1024 * 512, math.MaxInt64},
}
// DataUsageInfo represents data usage of an Object API
type DataUsageInfo struct {
LastUpdate time.Time `json:"lastUpdate"`
ObjectsCount uint64 `json:"objectsCount"`
ObjectsTotalSize uint64 `json:"objectsTotalSize"`
// LastUpdate is the timestamp of when the data usage info was last updated.
// This does not indicate a full scan.
LastUpdate time.Time `json:"lastUpdate"`
ObjectsCount uint64 `json:"objectsCount"`
ObjectsTotalSize uint64 `json:"objectsTotalSize"`
// ObjectsSizesHistogram contains information on objects across all buckets.
// See ObjectsHistogramIntervals.
ObjectsSizesHistogram map[string]uint64 `json:"objectsSizesHistogram"`
BucketsCount uint64 `json:"bucketsCount"`
BucketsCount uint64 `json:"bucketsCount"`
// BucketsSizes is "bucket name" -> size.
BucketsSizes map[string]uint64 `json:"bucketsSizes"`
}

View File

@@ -50,7 +50,7 @@ func (e *JSONPathElement) String() string {
return ""
}
// Removes double quotes in quoted identifiers
// String removes double quotes in quoted identifiers
func (i *Identifier) String() string {
if i.Unquoted != nil {
return *i.Unquoted