mirror of
https://github.com/minio/minio.git
synced 2025-11-06 20:33:07 -05:00
Revert "fix: remove persistence layer for metacache store in memory (#11538)"
This reverts commit b23659927c.
This commit is contained in:
@@ -857,6 +857,7 @@ func (z *erasureServerPools) ListObjectVersions(ctx context.Context, bucket, pre
|
||||
ri := logger.GetReqInfo(ctx)
|
||||
if ri != nil && strings.Contains(ri.UserAgent, `1.0 Veeam/1.0 Backup`) && strings.HasSuffix(prefix, ".blk") {
|
||||
opts.discardResult = true
|
||||
opts.Transient = true
|
||||
}
|
||||
|
||||
merged, err := z.listPath(ctx, opts)
|
||||
@@ -1214,6 +1215,31 @@ func (z *erasureServerPools) DeleteBucket(ctx context.Context, bucket string, fo
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteAll will delete a bucket+prefix unconditionally across all disks.
|
||||
// Note that set distribution is ignored so it should only be used in cases where
|
||||
// data is not distributed across sets.
|
||||
// Errors are logged but individual disk failures are not returned.
|
||||
func (z *erasureServerPools) deleteAll(ctx context.Context, bucket, prefix string) {
|
||||
for _, servers := range z.serverPools {
|
||||
for _, set := range servers.sets {
|
||||
set.deleteAll(ctx, bucket, prefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// renameAll will rename bucket+prefix unconditionally across all disks to
|
||||
// minioMetaTmpBucket + unique uuid,
|
||||
// Note that set distribution is ignored so it should only be used in cases where
|
||||
// data is not distributed across sets. Errors are logged but individual
|
||||
// disk failures are not returned.
|
||||
func (z *erasureServerPools) renameAll(ctx context.Context, bucket, prefix string) {
|
||||
for _, servers := range z.serverPools {
|
||||
for _, set := range servers.sets {
|
||||
set.renameAll(ctx, bucket, prefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function is used to undo a successful DeleteBucket operation.
|
||||
func undoDeleteBucketServerPools(ctx context.Context, bucket string, serverPools []*erasureSets, errs []error) {
|
||||
g := errgroup.WithNErrs(len(serverPools))
|
||||
|
||||
@@ -29,7 +29,6 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/dchest/siphash"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/google/uuid"
|
||||
"github.com/minio/minio-go/v7/pkg/set"
|
||||
"github.com/minio/minio-go/v7/pkg/tags"
|
||||
@@ -38,7 +37,6 @@ import (
|
||||
"github.com/minio/minio/pkg/console"
|
||||
"github.com/minio/minio/pkg/dsync"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
"github.com/minio/minio/pkg/objcache"
|
||||
"github.com/minio/minio/pkg/sync/errgroup"
|
||||
)
|
||||
|
||||
@@ -368,11 +366,6 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
|
||||
// setCount * setDriveCount with each memory upto blockSizeV1.
|
||||
bp := bpool.NewBytePoolCap(n, blockSizeV1, blockSizeV1*2)
|
||||
|
||||
mcache, err := objcache.New(1*humanize.GiByte, objcache.DefaultExpiry)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for i := 0; i < setCount; i++ {
|
||||
s.erasureDisks[i] = make([]StorageAPI, setDriveCount)
|
||||
}
|
||||
@@ -419,7 +412,6 @@ func newErasureSets(ctx context.Context, endpoints Endpoints, storageDisks []Sto
|
||||
getEndpoints: s.GetEndpoints(i),
|
||||
nsMutex: mutex,
|
||||
bp: bp,
|
||||
metaCache: mcache,
|
||||
mrfOpCh: make(chan partialOperation, 10000),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -30,7 +30,6 @@ import (
|
||||
"github.com/minio/minio/pkg/color"
|
||||
"github.com/minio/minio/pkg/dsync"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
"github.com/minio/minio/pkg/objcache"
|
||||
"github.com/minio/minio/pkg/sync/errgroup"
|
||||
)
|
||||
|
||||
@@ -71,9 +70,6 @@ type erasureObjects struct {
|
||||
// Byte pools used for temporary i/o buffers.
|
||||
bp *bpool.BytePoolCap
|
||||
|
||||
// holds current list cache.
|
||||
metaCache *objcache.Cache
|
||||
|
||||
mrfOpCh chan partialOperation
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,11 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path"
|
||||
"runtime/debug"
|
||||
"sort"
|
||||
@@ -24,8 +29,11 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/klauspost/compress/s2"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/console"
|
||||
"github.com/minio/minio/pkg/hash"
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
//go:generate msgp -file $GOFILE -unexported
|
||||
@@ -49,7 +57,16 @@ type bucketMetacache struct {
|
||||
|
||||
// newBucketMetacache creates a new bucketMetacache.
|
||||
// Optionally remove all existing caches.
|
||||
func newBucketMetacache(bucket string) *bucketMetacache {
|
||||
func newBucketMetacache(bucket string, cleanup bool) *bucketMetacache {
|
||||
if cleanup {
|
||||
// Recursively delete all caches.
|
||||
objAPI := newObjectLayerFn()
|
||||
ez, ok := objAPI.(*erasureServerPools)
|
||||
if ok {
|
||||
ctx := context.Background()
|
||||
ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(bucket, slashSeparator))
|
||||
}
|
||||
}
|
||||
return &bucketMetacache{
|
||||
bucket: bucket,
|
||||
caches: make(map[string]metacache, 10),
|
||||
@@ -63,6 +80,111 @@ func (b *bucketMetacache) debugf(format string, data ...interface{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// loadBucketMetaCache will load the cache from the object layer.
|
||||
// If the cache cannot be found a new one is created.
|
||||
func loadBucketMetaCache(ctx context.Context, bucket string) (*bucketMetacache, error) {
|
||||
objAPI := newObjectLayerFn()
|
||||
for objAPI == nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, ctx.Err()
|
||||
default:
|
||||
time.Sleep(250 * time.Millisecond)
|
||||
}
|
||||
objAPI = newObjectLayerFn()
|
||||
if objAPI == nil {
|
||||
logger.LogIf(ctx, fmt.Errorf("loadBucketMetaCache: object layer not ready. bucket: %q", bucket))
|
||||
}
|
||||
}
|
||||
|
||||
var meta bucketMetacache
|
||||
var decErr error
|
||||
// Use global context for this.
|
||||
r, err := objAPI.GetObjectNInfo(GlobalContext, minioMetaBucket, pathJoin("buckets", bucket, ".metacache", "index.s2"), nil, http.Header{}, readLock, ObjectOptions{})
|
||||
if err == nil {
|
||||
dec := s2DecPool.Get().(*s2.Reader)
|
||||
dec.Reset(r)
|
||||
decErr = meta.DecodeMsg(msgp.NewReader(dec))
|
||||
dec.Reset(nil)
|
||||
r.Close()
|
||||
s2DecPool.Put(dec)
|
||||
}
|
||||
if err != nil {
|
||||
switch err.(type) {
|
||||
case ObjectNotFound:
|
||||
err = nil
|
||||
case InsufficientReadQuorum:
|
||||
// Cache is likely lost. Clean up and return new.
|
||||
return newBucketMetacache(bucket, true), nil
|
||||
default:
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
return newBucketMetacache(bucket, false), err
|
||||
}
|
||||
if decErr != nil {
|
||||
if errors.Is(err, context.Canceled) {
|
||||
return newBucketMetacache(bucket, false), err
|
||||
}
|
||||
// Log the error, but assume the data is lost and return a fresh bucket.
|
||||
// Otherwise a broken cache will never recover.
|
||||
logger.LogIf(ctx, decErr)
|
||||
return newBucketMetacache(bucket, true), nil
|
||||
}
|
||||
// Sanity check...
|
||||
if meta.bucket != bucket {
|
||||
logger.Info("loadBucketMetaCache: loaded cache name mismatch, want %s, got %s. Discarding.", bucket, meta.bucket)
|
||||
return newBucketMetacache(bucket, true), nil
|
||||
}
|
||||
meta.cachesRoot = make(map[string][]string, len(meta.caches)/10)
|
||||
// Index roots
|
||||
for id, cache := range meta.caches {
|
||||
meta.cachesRoot[cache.root] = append(meta.cachesRoot[cache.root], id)
|
||||
}
|
||||
return &meta, nil
|
||||
}
|
||||
|
||||
// save the bucket cache to the object storage.
|
||||
func (b *bucketMetacache) save(ctx context.Context) error {
|
||||
if b.transient {
|
||||
return nil
|
||||
}
|
||||
objAPI := newObjectLayerFn()
|
||||
if objAPI == nil {
|
||||
return errServerNotInitialized
|
||||
}
|
||||
|
||||
// Keep lock while we marshal.
|
||||
// We need a write lock since we update 'updated'
|
||||
b.mu.Lock()
|
||||
if !b.updated {
|
||||
b.mu.Unlock()
|
||||
return nil
|
||||
}
|
||||
// Save as s2 compressed msgpack
|
||||
tmp := bytes.NewBuffer(make([]byte, 0, b.Msgsize()))
|
||||
enc := s2.NewWriter(tmp)
|
||||
err := msgp.Encode(enc, b)
|
||||
if err != nil {
|
||||
b.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
err = enc.Close()
|
||||
if err != nil {
|
||||
b.mu.Unlock()
|
||||
return err
|
||||
}
|
||||
b.updated = false
|
||||
b.mu.Unlock()
|
||||
|
||||
hr, err := hash.NewReader(tmp, int64(tmp.Len()), "", "", int64(tmp.Len()))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = objAPI.PutObject(ctx, minioMetaBucket, pathJoin("buckets", b.bucket, ".metacache", "index.s2"), NewPutObjReader(hr), ObjectOptions{})
|
||||
logger.LogIf(ctx, err)
|
||||
return err
|
||||
}
|
||||
|
||||
// findCache will attempt to find a matching cache for the provided options.
|
||||
// If a cache with the same ID exists already it will be returned.
|
||||
// If none can be found a new is created with the provided ID.
|
||||
@@ -326,6 +448,41 @@ func (b *bucketMetacache) getCache(id string) *metacache {
|
||||
return &c
|
||||
}
|
||||
|
||||
// deleteAll will delete all on disk data for ALL caches.
|
||||
// Deletes are performed concurrently.
|
||||
func (b *bucketMetacache) deleteAll() {
|
||||
ctx := context.Background()
|
||||
ez, ok := newObjectLayerFn().(*erasureServerPools)
|
||||
if !ok {
|
||||
logger.LogIf(ctx, errors.New("bucketMetacache: expected objAPI to be *erasurePools"))
|
||||
return
|
||||
}
|
||||
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
b.updated = true
|
||||
if !b.transient {
|
||||
// Delete all.
|
||||
ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(b.bucket, slashSeparator))
|
||||
b.caches = make(map[string]metacache, 10)
|
||||
b.cachesRoot = make(map[string][]string, 10)
|
||||
return
|
||||
}
|
||||
|
||||
// Transient are in different buckets.
|
||||
var wg sync.WaitGroup
|
||||
for id := range b.caches {
|
||||
wg.Add(1)
|
||||
go func(cache metacache) {
|
||||
defer wg.Done()
|
||||
ez.renameAll(ctx, minioMetaBucket, metacachePrefixForID(cache.bucket, cache.id))
|
||||
}(b.caches[id])
|
||||
}
|
||||
wg.Wait()
|
||||
b.caches = make(map[string]metacache, 10)
|
||||
}
|
||||
|
||||
// deleteCache will delete a specific cache and all files related to it across the cluster.
|
||||
func (b *bucketMetacache) deleteCache(id string) {
|
||||
b.mu.Lock()
|
||||
@@ -344,4 +501,7 @@ func (b *bucketMetacache) deleteCache(id string) {
|
||||
b.updated = true
|
||||
}
|
||||
b.mu.Unlock()
|
||||
if ok {
|
||||
c.delete(context.Background())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
)
|
||||
|
||||
func Benchmark_bucketMetacache_findCache(b *testing.B) {
|
||||
bm := newBucketMetacache("")
|
||||
bm := newBucketMetacache("", false)
|
||||
const elements = 50000
|
||||
const paths = 100
|
||||
if elements%paths != 0 {
|
||||
|
||||
@@ -18,6 +18,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"runtime/debug"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -41,13 +42,25 @@ type metacacheManager struct {
|
||||
trash map[string]metacache // Recently deleted lists.
|
||||
}
|
||||
|
||||
const metacacheManagerTransientBucket = "**transient**"
|
||||
const metacacheMaxEntries = 5000
|
||||
|
||||
// initManager will start async saving the cache.
|
||||
func (m *metacacheManager) initManager() {
|
||||
// Add a transient bucket.
|
||||
tb := newBucketMetacache(metacacheManagerTransientBucket, false)
|
||||
tb.transient = true
|
||||
m.buckets[metacacheManagerTransientBucket] = tb
|
||||
|
||||
// Start saver when object layer is ready.
|
||||
go func() {
|
||||
objAPI := newObjectLayerFn()
|
||||
for objAPI == nil {
|
||||
time.Sleep(time.Second)
|
||||
objAPI = newObjectLayerFn()
|
||||
}
|
||||
if !globalIsErasure {
|
||||
logger.Info("metacacheManager was initialized in non-erasure mode, skipping save")
|
||||
return
|
||||
}
|
||||
|
||||
@@ -55,6 +68,7 @@ func (m *metacacheManager) initManager() {
|
||||
defer t.Stop()
|
||||
|
||||
var exit bool
|
||||
bg := context.Background()
|
||||
for !exit {
|
||||
select {
|
||||
case <-t.C:
|
||||
@@ -66,11 +80,13 @@ func (m *metacacheManager) initManager() {
|
||||
if !exit {
|
||||
v.cleanup()
|
||||
}
|
||||
logger.LogIf(bg, v.save(bg))
|
||||
}
|
||||
m.mu.RUnlock()
|
||||
m.mu.Lock()
|
||||
for k, v := range m.trash {
|
||||
if time.Since(v.lastUpdate) > metacacheMaxRunningAge {
|
||||
v.delete(context.Background())
|
||||
delete(m.trash, k)
|
||||
}
|
||||
}
|
||||
@@ -81,6 +97,9 @@ func (m *metacacheManager) initManager() {
|
||||
|
||||
// findCache will get a metacache.
|
||||
func (m *metacacheManager) findCache(ctx context.Context, o listPathOptions) metacache {
|
||||
if o.Transient || isReservedOrInvalidBucket(o.Bucket, false) {
|
||||
return m.getTransient().findCache(o)
|
||||
}
|
||||
m.mu.RLock()
|
||||
b, ok := m.buckets[o.Bucket]
|
||||
if ok {
|
||||
@@ -117,6 +136,10 @@ func (m *metacacheManager) updateCacheEntry(update metacache) (metacache, error)
|
||||
func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucketMetacache {
|
||||
m.init.Do(m.initManager)
|
||||
|
||||
// Return a transient bucket for invalid or system buckets.
|
||||
if isReservedOrInvalidBucket(bucket, false) {
|
||||
return m.getTransient()
|
||||
}
|
||||
m.mu.RLock()
|
||||
b, ok := m.buckets[bucket]
|
||||
m.mu.RUnlock()
|
||||
@@ -140,7 +163,16 @@ func (m *metacacheManager) getBucket(ctx context.Context, bucket string) *bucket
|
||||
return b
|
||||
}
|
||||
|
||||
b = newBucketMetacache(bucket)
|
||||
// Load bucket. If we fail return the transient bucket.
|
||||
b, err := loadBucketMetaCache(ctx, bucket)
|
||||
if err != nil {
|
||||
m.mu.Unlock()
|
||||
logger.LogIf(ctx, err)
|
||||
return m.getTransient()
|
||||
}
|
||||
if b.bucket != bucket {
|
||||
logger.LogIf(ctx, fmt.Errorf("getBucket: loaded bucket %s does not match this bucket %s", b.bucket, bucket))
|
||||
}
|
||||
m.buckets[bucket] = b
|
||||
m.mu.Unlock()
|
||||
return b
|
||||
@@ -163,6 +195,10 @@ func (m *metacacheManager) deleteBucketCache(bucket string) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
for k, v := range b.caches {
|
||||
if time.Since(v.lastUpdate) > metacacheMaxRunningAge {
|
||||
v.delete(context.Background())
|
||||
continue
|
||||
}
|
||||
v.error = "Bucket deleted"
|
||||
v.status = scanStateError
|
||||
m.mu.Lock()
|
||||
@@ -176,7 +212,59 @@ func (m *metacacheManager) deleteAll() {
|
||||
m.init.Do(m.initManager)
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
for bucket := range m.buckets {
|
||||
delete(m.buckets, bucket)
|
||||
for bucket, b := range m.buckets {
|
||||
b.deleteAll()
|
||||
if !b.transient {
|
||||
delete(m.buckets, bucket)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// getTransient will return a transient bucket.
|
||||
func (m *metacacheManager) getTransient() *bucketMetacache {
|
||||
m.init.Do(m.initManager)
|
||||
m.mu.RLock()
|
||||
bmc := m.buckets[metacacheManagerTransientBucket]
|
||||
m.mu.RUnlock()
|
||||
return bmc
|
||||
}
|
||||
|
||||
// checkMetacacheState should be used if data is not updating.
|
||||
// Should only be called if a failure occurred.
|
||||
func (o listPathOptions) checkMetacacheState(ctx context.Context, rpc *peerRESTClient) error {
|
||||
// We operate on a copy...
|
||||
o.Create = false
|
||||
var cache metacache
|
||||
if rpc == nil || o.Transient {
|
||||
cache = localMetacacheMgr.findCache(ctx, o)
|
||||
} else {
|
||||
c, err := rpc.GetMetacacheListing(ctx, o)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cache = *c
|
||||
}
|
||||
|
||||
if cache.status == scanStateNone || cache.fileNotFound {
|
||||
return errFileNotFound
|
||||
}
|
||||
if cache.status == scanStateSuccess || cache.status == scanStateStarted {
|
||||
if time.Since(cache.lastUpdate) > metacacheMaxRunningAge {
|
||||
// We got a stale entry, mark error on handling server.
|
||||
err := fmt.Errorf("timeout: list %s not updated", cache.id)
|
||||
cache.error = err.Error()
|
||||
cache.status = scanStateError
|
||||
if rpc == nil || o.Transient {
|
||||
localMetacacheMgr.updateCacheEntry(cache)
|
||||
} else {
|
||||
rpc.UpdateMetacacheListing(ctx, cache)
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if cache.error != "" {
|
||||
return fmt.Errorf("async cache listing failed with: %s", cache.error)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -116,6 +116,7 @@ func (z *erasureServerPools) listPath(ctx context.Context, o listPathOptions) (e
|
||||
// will be generated due to the marker without ID and this check failing.
|
||||
if o.Limit < 10 && o.Marker == "" && o.Create && o.Recursive {
|
||||
o.discardResult = true
|
||||
o.Transient = true
|
||||
}
|
||||
|
||||
var cache metacache
|
||||
@@ -126,13 +127,12 @@ func (z *erasureServerPools) listPath(ctx context.Context, o listPathOptions) (e
|
||||
var cache metacache
|
||||
rpc := globalNotificationSys.restClientFromHash(o.Bucket)
|
||||
if isReservedOrInvalidBucket(o.Bucket, false) {
|
||||
// discard all list caches for reserved buckets.
|
||||
o.discardResult = true
|
||||
rpc = nil
|
||||
o.Transient = true
|
||||
}
|
||||
// Apply prefix filter if enabled.
|
||||
o.SetFilter()
|
||||
if rpc == nil {
|
||||
if rpc == nil || o.Transient {
|
||||
// Local
|
||||
cache = localMetacacheMgr.findCache(ctx, o)
|
||||
} else {
|
||||
@@ -148,6 +148,7 @@ func (z *erasureServerPools) listPath(ctx context.Context, o listPathOptions) (e
|
||||
if !errors.Is(err, context.DeadlineExceeded) {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
o.Transient = true
|
||||
cache = localMetacacheMgr.findCache(ctx, o)
|
||||
} else {
|
||||
cache = *c
|
||||
|
||||
@@ -17,18 +17,23 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/color"
|
||||
"github.com/minio/minio/pkg/console"
|
||||
"github.com/minio/minio/pkg/hash"
|
||||
)
|
||||
|
||||
type listPathOptions struct {
|
||||
@@ -86,6 +91,11 @@ type listPathOptions struct {
|
||||
// Include pure directories.
|
||||
IncludeDirectories bool
|
||||
|
||||
// Transient is set if the cache is transient due to an error or being a reserved bucket.
|
||||
// This means the cache metadata will not be persisted on disk.
|
||||
// A transient result will never be returned from the cache so knowing the list id is required.
|
||||
Transient bool
|
||||
|
||||
// discardResult will not persist the cache to storage.
|
||||
// When the initial results are returned listing will be canceled.
|
||||
discardResult bool
|
||||
@@ -186,14 +196,74 @@ func (o *listPathOptions) gatherResults(in <-chan metaCacheEntry) func() (metaCa
|
||||
}
|
||||
}
|
||||
|
||||
// findFirstPart will find the part with 0 being the first that corresponds to the marker in the options.
|
||||
// io.ErrUnexpectedEOF is returned if the place containing the marker hasn't been scanned yet.
|
||||
// io.EOF indicates the marker is beyond the end of the stream and does not exist.
|
||||
func (o *listPathOptions) findFirstPart(fi FileInfo) (int, error) {
|
||||
search := o.Marker
|
||||
if search == "" {
|
||||
search = o.Prefix
|
||||
}
|
||||
if search == "" {
|
||||
return 0, nil
|
||||
}
|
||||
o.debugln("searching for ", search)
|
||||
var tmp metacacheBlock
|
||||
var json = jsoniter.ConfigCompatibleWithStandardLibrary
|
||||
i := 0
|
||||
for {
|
||||
partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, i)
|
||||
v, ok := fi.Metadata[partKey]
|
||||
if !ok {
|
||||
o.debugln("no match in metadata, waiting")
|
||||
return -1, io.ErrUnexpectedEOF
|
||||
}
|
||||
err := json.Unmarshal([]byte(v), &tmp)
|
||||
if !ok {
|
||||
logger.LogIf(context.Background(), err)
|
||||
return -1, err
|
||||
}
|
||||
if tmp.First == "" && tmp.Last == "" && tmp.EOS {
|
||||
return 0, errFileNotFound
|
||||
}
|
||||
if tmp.First >= search {
|
||||
o.debugln("First >= search", v)
|
||||
return i, nil
|
||||
}
|
||||
if tmp.Last >= search {
|
||||
o.debugln("Last >= search", v)
|
||||
return i, nil
|
||||
}
|
||||
if tmp.EOS {
|
||||
o.debugln("no match, at EOS", v)
|
||||
return -3, io.EOF
|
||||
}
|
||||
o.debugln("First ", tmp.First, "<", search, " search", i)
|
||||
i++
|
||||
}
|
||||
}
|
||||
|
||||
// updateMetacacheListing will update the metacache listing.
|
||||
func (o *listPathOptions) updateMetacacheListing(m metacache, rpc *peerRESTClient) (metacache, error) {
|
||||
if o.Transient {
|
||||
return localMetacacheMgr.getTransient().updateCacheEntry(m)
|
||||
}
|
||||
if rpc == nil {
|
||||
return localMetacacheMgr.updateCacheEntry(m)
|
||||
}
|
||||
return rpc.UpdateMetacacheListing(context.Background(), m)
|
||||
}
|
||||
|
||||
func getMetacacheBlockInfo(fi FileInfo, block int) (*metacacheBlock, error) {
|
||||
var tmp metacacheBlock
|
||||
partKey := fmt.Sprintf("%s-metacache-part-%d", ReservedMetadataPrefixLower, block)
|
||||
v, ok := fi.Metadata[partKey]
|
||||
if !ok {
|
||||
return nil, io.ErrUnexpectedEOF
|
||||
}
|
||||
return &tmp, json.Unmarshal([]byte(v), &tmp)
|
||||
}
|
||||
|
||||
const metacachePrefix = ".metacache"
|
||||
|
||||
func metacachePrefixForID(bucket, id string) string {
|
||||
@@ -201,8 +271,8 @@ func metacachePrefixForID(bucket, id string) string {
|
||||
}
|
||||
|
||||
// objectPath returns the object path of the cache.
|
||||
func (o *listPathOptions) objectPath() string {
|
||||
return pathJoin(metacachePrefixForID(o.Bucket, o.ID), "block.s2")
|
||||
func (o *listPathOptions) objectPath(block int) string {
|
||||
return pathJoin(metacachePrefixForID(o.Bucket, o.ID), "block-"+strconv.Itoa(block)+".s2")
|
||||
}
|
||||
|
||||
func (o *listPathOptions) SetFilter() {
|
||||
@@ -287,33 +357,187 @@ func (r *metacacheReader) filter(o listPathOptions) (entries metaCacheEntriesSor
|
||||
}
|
||||
|
||||
func (er *erasureObjects) streamMetadataParts(ctx context.Context, o listPathOptions) (entries metaCacheEntriesSorted, err error) {
|
||||
r, err := er.metaCache.Open(pathJoin(minioMetaBucket, o.objectPath()), time.Now().Add(-time.Hour))
|
||||
if err != nil {
|
||||
return entries, io.EOF
|
||||
}
|
||||
retries := 0
|
||||
rpc := globalNotificationSys.restClientFromHash(o.Bucket)
|
||||
|
||||
tmp, err := newMetacacheReader(r)
|
||||
if err != nil {
|
||||
return entries, err
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return entries, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
e, err := tmp.filter(o)
|
||||
entries.o = append(entries.o, e.o...)
|
||||
if o.Limit > 0 && entries.len() > o.Limit {
|
||||
entries.truncate(o.Limit)
|
||||
return entries, nil
|
||||
}
|
||||
// If many failures, check the cache state.
|
||||
if retries > 10 {
|
||||
err := o.checkMetacacheState(ctx, rpc)
|
||||
if err != nil {
|
||||
return entries, fmt.Errorf("remote listing canceled: %w", err)
|
||||
}
|
||||
retries = 1
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
// We stopped within the listing, we are done for now...
|
||||
return entries, nil
|
||||
}
|
||||
const retryDelay = 500 * time.Millisecond
|
||||
// Load first part metadata...
|
||||
// All operations are performed without locks, so we must be careful and allow for failures.
|
||||
// Read metadata associated with the object from a disk.
|
||||
if retries > 0 {
|
||||
disks := er.getOnlineDisks()
|
||||
if len(disks) == 0 {
|
||||
time.Sleep(retryDelay)
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
|
||||
if !errors.Is(err, io.EOF) {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
_, err := disks[0].ReadVersion(ctx, minioMetaBucket, o.objectPath(0), "", false)
|
||||
if err != nil {
|
||||
time.Sleep(retryDelay)
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
return entries, err
|
||||
// Read metadata associated with the object from all disks.
|
||||
fi, metaArr, onlineDisks, err := er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(0), ObjectOptions{}, true)
|
||||
if err != nil {
|
||||
switch toObjectErr(err, minioMetaBucket, o.objectPath(0)).(type) {
|
||||
case ObjectNotFound:
|
||||
retries++
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
case InsufficientReadQuorum:
|
||||
retries++
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
default:
|
||||
return entries, fmt.Errorf("reading first part metadata: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
partN, err := o.findFirstPart(fi)
|
||||
switch {
|
||||
case err == nil:
|
||||
case errors.Is(err, io.ErrUnexpectedEOF):
|
||||
if retries == 10 {
|
||||
err := o.checkMetacacheState(ctx, rpc)
|
||||
if err != nil {
|
||||
return entries, fmt.Errorf("remote listing canceled: %w", err)
|
||||
}
|
||||
retries = -1
|
||||
}
|
||||
retries++
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
case errors.Is(err, io.EOF):
|
||||
return entries, io.EOF
|
||||
}
|
||||
|
||||
// We got a stream to start at.
|
||||
loadedPart := 0
|
||||
buf := bufferPool.Get().(*bytes.Buffer)
|
||||
defer func() {
|
||||
buf.Reset()
|
||||
bufferPool.Put(buf)
|
||||
}()
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return entries, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if partN != loadedPart {
|
||||
if retries > 10 {
|
||||
err := o.checkMetacacheState(ctx, rpc)
|
||||
if err != nil {
|
||||
return entries, fmt.Errorf("waiting for next part %d: %w", partN, err)
|
||||
}
|
||||
retries = 1
|
||||
}
|
||||
|
||||
if retries > 0 {
|
||||
// Load from one disk only
|
||||
disks := er.getOnlineDisks()
|
||||
if len(disks) == 0 {
|
||||
time.Sleep(retryDelay)
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
|
||||
_, err := disks[0].ReadVersion(ctx, minioMetaBucket, o.objectPath(partN), "", false)
|
||||
if err != nil {
|
||||
time.Sleep(retryDelay)
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
}
|
||||
// Load first part metadata...
|
||||
fi, metaArr, onlineDisks, err = er.getObjectFileInfo(ctx, minioMetaBucket, o.objectPath(partN), ObjectOptions{}, true)
|
||||
if err != nil {
|
||||
time.Sleep(retryDelay)
|
||||
retries++
|
||||
continue
|
||||
}
|
||||
loadedPart = partN
|
||||
bi, err := getMetacacheBlockInfo(fi, partN)
|
||||
logger.LogIf(ctx, err)
|
||||
if err == nil {
|
||||
if bi.pastPrefix(o.Prefix) {
|
||||
return entries, io.EOF
|
||||
}
|
||||
}
|
||||
}
|
||||
buf.Reset()
|
||||
err := er.getObjectWithFileInfo(ctx, minioMetaBucket, o.objectPath(partN), 0, fi.Size, buf, fi, metaArr, onlineDisks)
|
||||
if err != nil {
|
||||
switch toObjectErr(err, minioMetaBucket, o.objectPath(partN)).(type) {
|
||||
case ObjectNotFound:
|
||||
retries++
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
case InsufficientReadQuorum:
|
||||
retries++
|
||||
time.Sleep(retryDelay)
|
||||
continue
|
||||
default:
|
||||
logger.LogIf(ctx, err)
|
||||
return entries, err
|
||||
}
|
||||
}
|
||||
tmp, err := newMetacacheReader(buf)
|
||||
if err != nil {
|
||||
return entries, err
|
||||
}
|
||||
e, err := tmp.filter(o)
|
||||
entries.o = append(entries.o, e.o...)
|
||||
if o.Limit > 0 && entries.len() > o.Limit {
|
||||
entries.truncate(o.Limit)
|
||||
return entries, nil
|
||||
}
|
||||
if err == nil {
|
||||
// We stopped within the listing, we are done for now...
|
||||
return entries, nil
|
||||
}
|
||||
if !errors.Is(err, io.EOF) {
|
||||
logger.LogIf(ctx, err)
|
||||
return entries, err
|
||||
}
|
||||
|
||||
// We finished at the end of the block.
|
||||
// And should not expect any more results.
|
||||
bi, err := getMetacacheBlockInfo(fi, partN)
|
||||
logger.LogIf(ctx, err)
|
||||
if err != nil || bi.EOS {
|
||||
// We are done and there are no more parts.
|
||||
return entries, io.EOF
|
||||
}
|
||||
if bi.endedPrefix(o.Prefix) {
|
||||
// Nothing more for prefix.
|
||||
return entries, io.EOF
|
||||
}
|
||||
partN++
|
||||
retries = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Will return io.EOF if continuing would not yield more results.
|
||||
@@ -434,14 +658,28 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
|
||||
}
|
||||
}()
|
||||
|
||||
wc := er.metaCache.Create(pathJoin(minioMetaBucket, o.objectPath()))
|
||||
const retryDelay = 200 * time.Millisecond
|
||||
const maxTries = 5
|
||||
|
||||
var bw *metacacheBlockWriter
|
||||
// Don't save single object listings.
|
||||
if !o.discardResult {
|
||||
// Write results to disk.
|
||||
bw = newMetacacheBlockWriter(cacheCh, func(b *metacacheBlock) error {
|
||||
n, err := wc.Write(b.data)
|
||||
// if the block is 0 bytes and its a first block skip it.
|
||||
// skip only this for Transient caches.
|
||||
if len(b.data) == 0 && b.n == 0 && o.Transient {
|
||||
return nil
|
||||
}
|
||||
o.debugln(color.Green("listPath:")+" saving block", b.n, "to", o.objectPath(b.n))
|
||||
r, err := hash.NewReader(bytes.NewReader(b.data), int64(len(b.data)), "", "", int64(len(b.data)))
|
||||
logger.LogIf(ctx, err)
|
||||
custom := b.headerKV()
|
||||
_, err = er.putObject(ctx, minioMetaBucket, o.objectPath(b.n), NewPutObjReader(r), ObjectOptions{
|
||||
UserDefined: custom,
|
||||
NoLock: true, // No need to hold namespace lock, each prefix caches uniquely.
|
||||
ParentIsObject: nil,
|
||||
})
|
||||
if err != nil {
|
||||
metaMu.Lock()
|
||||
if meta.error != "" {
|
||||
@@ -452,17 +690,29 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
|
||||
cancel()
|
||||
return err
|
||||
}
|
||||
if n != len(b.data) {
|
||||
metaMu.Lock()
|
||||
if meta.error != "" {
|
||||
meta.status = scanStateError
|
||||
meta.error = io.ErrShortWrite.Error()
|
||||
}
|
||||
metaMu.Unlock()
|
||||
cancel()
|
||||
return io.ErrShortWrite
|
||||
if b.n == 0 {
|
||||
return nil
|
||||
}
|
||||
// Update block 0 metadata.
|
||||
var retries int
|
||||
for {
|
||||
err := er.updateObjectMeta(ctx, minioMetaBucket, o.objectPath(0), b.headerKV(), ObjectOptions{})
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
switch err.(type) {
|
||||
case ObjectNotFound:
|
||||
return err
|
||||
case InsufficientReadQuorum:
|
||||
default:
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
if retries >= maxTries {
|
||||
return err
|
||||
}
|
||||
retries++
|
||||
time.Sleep(retryDelay)
|
||||
}
|
||||
o.debugln(color.Green("listPath:")+" saving block to", o.objectPath())
|
||||
return nil
|
||||
})
|
||||
}
|
||||
@@ -522,13 +772,6 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions) (entr
|
||||
meta, _ = o.updateMetacacheListing(meta, rpc)
|
||||
metaMu.Unlock()
|
||||
}
|
||||
if err := wc.Close(); err != nil {
|
||||
metaMu.Lock()
|
||||
meta.error = err.Error()
|
||||
meta.status = scanStateError
|
||||
meta, _ = o.updateMetacacheListing(meta, rpc)
|
||||
metaMu.Unlock()
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
|
||||
@@ -17,9 +17,14 @@
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
)
|
||||
|
||||
type scanStatus uint8
|
||||
@@ -226,3 +231,21 @@ func (m *metacache) update(update metacache) {
|
||||
}
|
||||
m.fileNotFound = m.fileNotFound || update.fileNotFound
|
||||
}
|
||||
|
||||
// delete all cache data on disks.
|
||||
func (m *metacache) delete(ctx context.Context) {
|
||||
if m.bucket == "" || m.id == "" {
|
||||
logger.LogIf(ctx, fmt.Errorf("metacache.delete: bucket (%s) or id (%s) empty", m.bucket, m.id))
|
||||
}
|
||||
objAPI := newObjectLayerFn()
|
||||
if objAPI == nil {
|
||||
logger.LogIf(ctx, errors.New("metacache.delete: no object layer"))
|
||||
return
|
||||
}
|
||||
ez, ok := objAPI.(*erasureServerPools)
|
||||
if !ok {
|
||||
logger.LogIf(ctx, errors.New("metacache.delete: expected objAPI to be *erasureServerPools"))
|
||||
return
|
||||
}
|
||||
ez.deleteAll(ctx, minioMetaBucket, metacachePrefixForID(m.bucket, m.id))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user