mirror of
https://github.com/minio/minio.git
synced 2025-03-30 17:23:42 -04:00
add backups for usage-caches to rely on upon error (#18029)
This allows scanner to avoid lengthy scans, skip things appropriately and also not lose metrics in any manner. reduce longer deadlines for usage-cache loads/saves to match the disk timeout which is 2minutes now per IOP.
This commit is contained in:
parent
822cbd4b43
commit
a2aabfabd9
@ -1501,9 +1501,18 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob
|
|||||||
var uploadedParts []minio.CompletePart
|
var uploadedParts []minio.CompletePart
|
||||||
// new multipart must not set mtime as it may lead to erroneous cleanups at various intervals.
|
// new multipart must not set mtime as it may lead to erroneous cleanups at various intervals.
|
||||||
opts.Internal.SourceMTime = time.Time{} // this value is saved properly in CompleteMultipartUpload()
|
opts.Internal.SourceMTime = time.Time{} // this value is saved properly in CompleteMultipartUpload()
|
||||||
nctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
var uploadID string
|
||||||
defer cancel()
|
attempts := 1
|
||||||
uploadID, err := c.NewMultipartUpload(nctx, bucket, object, opts)
|
for attempts <= 3 {
|
||||||
|
nctx, cancel := context.WithTimeout(ctx, time.Minute)
|
||||||
|
uploadID, err = c.NewMultipartUpload(nctx, bucket, object, opts)
|
||||||
|
cancel()
|
||||||
|
if err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
attempts++
|
||||||
|
time.Sleep(time.Duration(rand.Int63n(int64(time.Second))))
|
||||||
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@ -1524,7 +1533,7 @@ func replicateObjectWithMultipart(ctx context.Context, c *minio.Core, bucket, ob
|
|||||||
fmt.Errorf("trying %s: Unable to cleanup failed multipart replication %s on remote %s/%s: %w - this may consume space on remote cluster",
|
fmt.Errorf("trying %s: Unable to cleanup failed multipart replication %s on remote %s/%s: %w - this may consume space on remote cluster",
|
||||||
humanize.Ordinal(attempts), uploadID, bucket, object, aerr))
|
humanize.Ordinal(attempts), uploadID, bucket, object, aerr))
|
||||||
attempts++
|
attempts++
|
||||||
time.Sleep(time.Second)
|
time.Sleep(time.Duration(rand.Int63n(int64(time.Second))))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
@ -38,6 +38,7 @@ import (
|
|||||||
"github.com/minio/minio/internal/hash"
|
"github.com/minio/minio/internal/hash"
|
||||||
"github.com/minio/minio/internal/logger"
|
"github.com/minio/minio/internal/logger"
|
||||||
"github.com/tinylib/msgp/msgp"
|
"github.com/tinylib/msgp/msgp"
|
||||||
|
"github.com/valyala/bytebufferpool"
|
||||||
)
|
)
|
||||||
|
|
||||||
//go:generate msgp -file $GOFILE -unexported
|
//go:generate msgp -file $GOFILE -unexported
|
||||||
@ -927,34 +928,42 @@ type objectIO interface {
|
|||||||
// The loader is optimistic and has no locking, but tries 5 times before giving up.
|
// The loader is optimistic and has no locking, but tries 5 times before giving up.
|
||||||
// If the object is not found or unable to deserialize d is cleared and nil error is returned.
|
// If the object is not found or unable to deserialize d is cleared and nil error is returned.
|
||||||
func (d *dataUsageCache) load(ctx context.Context, store objectIO, name string) error {
|
func (d *dataUsageCache) load(ctx context.Context, store objectIO, name string) error {
|
||||||
// Abandon if more than 5 minutes, so we don't hold up scanner.
|
load := func(name string, timeout time.Duration) (bool, error) {
|
||||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
// Abandon if more than time.Minute, so we don't hold up scanner.
|
||||||
defer cancel()
|
// drive timeout by default is 2 minutes, we do not need to wait longer.
|
||||||
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||||
|
defer cancel()
|
||||||
|
|
||||||
// Caches are read+written without locks,
|
|
||||||
retries := 0
|
|
||||||
for retries < 5 {
|
|
||||||
r, err := store.GetObjectNInfo(ctx, dataUsageBucket, name, nil, http.Header{}, ObjectOptions{NoLock: true})
|
r, err := store.GetObjectNInfo(ctx, dataUsageBucket, name, nil, http.Header{}, ObjectOptions{NoLock: true})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
switch err.(type) {
|
switch err.(type) {
|
||||||
case ObjectNotFound, BucketNotFound:
|
case ObjectNotFound, BucketNotFound:
|
||||||
case InsufficientReadQuorum, StorageErr:
|
case InsufficientReadQuorum, StorageErr:
|
||||||
retries++
|
return true, nil
|
||||||
time.Sleep(time.Duration(rand.Int63n(int64(time.Second))))
|
|
||||||
continue
|
|
||||||
default:
|
|
||||||
return toObjectErr(err, dataUsageBucket, name)
|
|
||||||
}
|
}
|
||||||
*d = dataUsageCache{}
|
return false, toObjectErr(err, dataUsageBucket, name)
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
if err := d.deserialize(r); err != nil {
|
err = d.deserialize(r)
|
||||||
r.Close()
|
r.Close()
|
||||||
|
return err != nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Caches are read+written without locks,
|
||||||
|
retries := 0
|
||||||
|
for retries < 5 {
|
||||||
|
retry, err := load(name, time.Minute)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if retry {
|
||||||
|
retry, _ = load(name+".bkp", 30*time.Second)
|
||||||
|
if !retry {
|
||||||
|
break
|
||||||
|
}
|
||||||
retries++
|
retries++
|
||||||
time.Sleep(time.Duration(rand.Int63n(int64(time.Second))))
|
time.Sleep(time.Duration(rand.Int63n(int64(time.Second))))
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
r.Close()
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
*d = dataUsageCache{}
|
*d = dataUsageCache{}
|
||||||
@ -967,47 +976,52 @@ var maxConcurrentScannerSaves = make(chan struct{}, 4)
|
|||||||
// save the content of the cache to minioMetaBackgroundOpsBucket with the provided name.
|
// save the content of the cache to minioMetaBackgroundOpsBucket with the provided name.
|
||||||
// Note that no locking is done when saving.
|
// Note that no locking is done when saving.
|
||||||
func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string) error {
|
func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string) error {
|
||||||
var r io.Reader
|
select {
|
||||||
maxConcurrentScannerSaves <- struct{}{}
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
case maxConcurrentScannerSaves <- struct{}{}:
|
||||||
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
<-maxConcurrentScannerSaves
|
select {
|
||||||
}()
|
case <-ctx.Done():
|
||||||
// If big, do streaming...
|
case <-maxConcurrentScannerSaves:
|
||||||
size := int64(-1)
|
|
||||||
if len(d.Cache) > 10000 {
|
|
||||||
pr, pw := io.Pipe()
|
|
||||||
go func() {
|
|
||||||
pw.CloseWithError(d.serializeTo(pw))
|
|
||||||
}()
|
|
||||||
defer pr.Close()
|
|
||||||
r = pr
|
|
||||||
} else {
|
|
||||||
var buf bytes.Buffer
|
|
||||||
err := d.serializeTo(&buf)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
r = &buf
|
}()
|
||||||
size = int64(buf.Len())
|
|
||||||
|
buf := bytebufferpool.Get()
|
||||||
|
defer func() {
|
||||||
|
buf.Reset()
|
||||||
|
bytebufferpool.Put(buf)
|
||||||
|
}()
|
||||||
|
|
||||||
|
if err := d.serializeTo(buf); err != nil {
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
hr, err := hash.NewReader(r, size, "", "", size)
|
hr, err := hash.NewReader(bytes.NewReader(buf.Bytes()), int64(buf.Len()), "", "", int64(buf.Len()))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Abandon if more than 5 minutes, so we don't hold up scanner.
|
save := func(name string, timeout time.Duration) error {
|
||||||
ctx, cancel := context.WithTimeout(ctx, 5*time.Minute)
|
// Abandon if more than a minute, so we don't hold up scanner.
|
||||||
defer cancel()
|
ctx, cancel := context.WithTimeout(ctx, timeout)
|
||||||
_, err = store.PutObject(ctx,
|
defer cancel()
|
||||||
dataUsageBucket,
|
|
||||||
name,
|
_, err = store.PutObject(ctx,
|
||||||
NewPutObjReader(hr),
|
dataUsageBucket,
|
||||||
ObjectOptions{NoLock: true})
|
name,
|
||||||
if isErrBucketNotFound(err) {
|
NewPutObjReader(hr),
|
||||||
return nil
|
ObjectOptions{NoLock: true})
|
||||||
|
if isErrBucketNotFound(err) {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
return err
|
defer save(name+".bkp", 30*time.Second) // Keep a backup as well
|
||||||
|
|
||||||
|
// drive timeout by default is 2 minutes, we do not need to wait longer.
|
||||||
|
return save(name, time.Minute)
|
||||||
}
|
}
|
||||||
|
|
||||||
// dataUsageCacheVer indicates the cache version.
|
// dataUsageCacheVer indicates the cache version.
|
||||||
|
@ -42,6 +42,7 @@ const (
|
|||||||
|
|
||||||
// storeDataUsageInBackend will store all objects sent on the gui channel until closed.
|
// storeDataUsageInBackend will store all objects sent on the gui channel until closed.
|
||||||
func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, dui <-chan DataUsageInfo) {
|
func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, dui <-chan DataUsageInfo) {
|
||||||
|
attempts := 1
|
||||||
for dataUsageInfo := range dui {
|
for dataUsageInfo := range dui {
|
||||||
json := jsoniter.ConfigCompatibleWithStandardLibrary
|
json := jsoniter.ConfigCompatibleWithStandardLibrary
|
||||||
dataUsageJSON, err := json.Marshal(dataUsageInfo)
|
dataUsageJSON, err := json.Marshal(dataUsageInfo)
|
||||||
@ -49,9 +50,14 @@ func storeDataUsageInBackend(ctx context.Context, objAPI ObjectLayer, dui <-chan
|
|||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if attempts > 10 {
|
||||||
|
saveConfig(ctx, objAPI, dataUsageObjNamePath+".bkp", dataUsageJSON) // Save a backup every 10th update.
|
||||||
|
attempts = 1
|
||||||
|
}
|
||||||
if err = saveConfig(ctx, objAPI, dataUsageObjNamePath, dataUsageJSON); err != nil {
|
if err = saveConfig(ctx, objAPI, dataUsageObjNamePath, dataUsageJSON); err != nil {
|
||||||
logger.LogIf(ctx, err)
|
logger.LogIf(ctx, err)
|
||||||
}
|
}
|
||||||
|
attempts++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -94,10 +100,13 @@ func loadPrefixUsageFromBackend(ctx context.Context, objAPI ObjectLayer, bucket
|
|||||||
func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsageInfo, error) {
|
func loadDataUsageFromBackend(ctx context.Context, objAPI ObjectLayer) (DataUsageInfo, error) {
|
||||||
buf, err := readConfig(ctx, objAPI, dataUsageObjNamePath)
|
buf, err := readConfig(ctx, objAPI, dataUsageObjNamePath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, errConfigNotFound) {
|
buf, err = readConfig(ctx, objAPI, dataUsageObjNamePath+".bkp")
|
||||||
return DataUsageInfo{}, nil
|
if err != nil {
|
||||||
|
if errors.Is(err, errConfigNotFound) {
|
||||||
|
return DataUsageInfo{}, nil
|
||||||
|
}
|
||||||
|
return DataUsageInfo{}, toObjectErr(err, minioMetaBucket, dataUsageObjNamePath)
|
||||||
}
|
}
|
||||||
return DataUsageInfo{}, toObjectErr(err, minioMetaBucket, dataUsageObjNamePath)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
var dataUsageInfo DataUsageInfo
|
var dataUsageInfo DataUsageInfo
|
||||||
|
Loading…
x
Reference in New Issue
Block a user