mirror of
https://github.com/minio/minio.git
synced 2025-11-20 18:06:10 -05:00
Various improvements in replication (#11949)
- collect real time replication metrics for prometheus.
- add pending_count, failed_count metric for total pending/failed replication operations.
- add API to get replication metrics
- add MRF worker to handle spill-over replication operations
- multiple issues found with replication
- fixes an issue when client sends a bucket
name with `/` at the end from SetRemoteTarget
API call make sure to trim the bucket name to
avoid any extra `/`.
- hold write locks in GetObjectNInfo during replication
to ensure that object version stack is not overwritten
while reading the content.
- add additional protection during WriteMetadata() to
ensure that we always write a valid FileInfo{} and avoid
ever writing empty FileInfo{} to the lowest layers.
Co-authored-by: Poorna Krishnamoorthy <poorna@minio.io>
Co-authored-by: Harshavardhana <harsha@minio.io>
This commit is contained in:
committed by
GitHub
parent
dca7cf7200
commit
47c09a1e6f
@@ -32,6 +32,7 @@ import (
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/bucket/lifecycle"
|
||||
"github.com/minio/minio/pkg/hash"
|
||||
"github.com/minio/minio/pkg/madmin"
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
@@ -45,15 +46,26 @@ type sizeHistogram [dataUsageBucketLen]uint64
|
||||
|
||||
//msgp:tuple dataUsageEntry
|
||||
type dataUsageEntry struct {
|
||||
Children dataUsageHashMap
|
||||
// These fields do no include any children.
|
||||
Size int64
|
||||
ReplicatedSize uint64
|
||||
ReplicationPendingSize uint64
|
||||
ReplicationFailedSize uint64
|
||||
ReplicaSize uint64
|
||||
Objects uint64
|
||||
ObjSizes sizeHistogram
|
||||
Children dataUsageHashMap
|
||||
Size int64
|
||||
Objects uint64
|
||||
ObjSizes sizeHistogram
|
||||
ReplicationStats replicationStats
|
||||
}
|
||||
|
||||
//msgp:tuple replicationStats
|
||||
type replicationStats struct {
|
||||
PendingSize uint64
|
||||
ReplicatedSize uint64
|
||||
FailedSize uint64
|
||||
ReplicaSize uint64
|
||||
FailedCount uint64
|
||||
PendingCount uint64
|
||||
MissedThresholdSize uint64
|
||||
AfterThresholdSize uint64
|
||||
MissedThresholdCount uint64
|
||||
AfterThresholdCount uint64
|
||||
}
|
||||
|
||||
//msgp:tuple dataUsageEntryV2
|
||||
@@ -65,20 +77,40 @@ type dataUsageEntryV2 struct {
|
||||
Children dataUsageHashMap
|
||||
}
|
||||
|
||||
// dataUsageCache contains a cache of data usage entries latest version 3.
|
||||
type dataUsageCache struct {
|
||||
Info dataUsageCacheInfo
|
||||
Disks []string
|
||||
Cache map[string]dataUsageEntry
|
||||
//msgp:tuple dataUsageEntryV3
|
||||
type dataUsageEntryV3 struct {
|
||||
// These fields do no include any children.
|
||||
Size int64
|
||||
ReplicatedSize uint64
|
||||
ReplicationPendingSize uint64
|
||||
ReplicationFailedSize uint64
|
||||
ReplicaSize uint64
|
||||
Objects uint64
|
||||
ObjSizes sizeHistogram
|
||||
Children dataUsageHashMap
|
||||
}
|
||||
|
||||
// dataUsageCache contains a cache of data usage entries version 2.
|
||||
// dataUsageCache contains a cache of data usage entries latest version 4.
|
||||
type dataUsageCache struct {
|
||||
Info dataUsageCacheInfo
|
||||
Cache map[string]dataUsageEntry
|
||||
Disks []string
|
||||
}
|
||||
|
||||
// dataUsageCacheV2 contains a cache of data usage entries version 2.
|
||||
type dataUsageCacheV2 struct {
|
||||
Info dataUsageCacheInfo
|
||||
Disks []string
|
||||
Cache map[string]dataUsageEntryV2
|
||||
}
|
||||
|
||||
// dataUsageCache contains a cache of data usage entries version 3.
|
||||
type dataUsageCacheV3 struct {
|
||||
Info dataUsageCacheInfo
|
||||
Disks []string
|
||||
Cache map[string]dataUsageEntryV3
|
||||
}
|
||||
|
||||
//msgp:ignore dataUsageEntryInfo
|
||||
type dataUsageEntryInfo struct {
|
||||
Name string
|
||||
@@ -89,8 +121,8 @@ type dataUsageEntryInfo struct {
|
||||
type dataUsageCacheInfo struct {
|
||||
// Name of the bucket. Also root element.
|
||||
Name string
|
||||
LastUpdate time.Time
|
||||
NextCycle uint32
|
||||
LastUpdate time.Time
|
||||
// indicates if the disk is being healed and scanner
|
||||
// should skip healing the disk
|
||||
SkipHealing bool
|
||||
@@ -100,20 +132,25 @@ type dataUsageCacheInfo struct {
|
||||
|
||||
func (e *dataUsageEntry) addSizes(summary sizeSummary) {
|
||||
e.Size += summary.totalSize
|
||||
e.ReplicatedSize += uint64(summary.replicatedSize)
|
||||
e.ReplicationFailedSize += uint64(summary.failedSize)
|
||||
e.ReplicationPendingSize += uint64(summary.pendingSize)
|
||||
e.ReplicaSize += uint64(summary.replicaSize)
|
||||
e.ReplicationStats.ReplicatedSize += uint64(summary.replicatedSize)
|
||||
e.ReplicationStats.FailedSize += uint64(summary.failedSize)
|
||||
e.ReplicationStats.PendingSize += uint64(summary.pendingSize)
|
||||
e.ReplicationStats.ReplicaSize += uint64(summary.replicaSize)
|
||||
e.ReplicationStats.PendingCount += uint64(summary.pendingCount)
|
||||
e.ReplicationStats.FailedCount += uint64(summary.failedCount)
|
||||
|
||||
}
|
||||
|
||||
// merge other data usage entry into this, excluding children.
|
||||
func (e *dataUsageEntry) merge(other dataUsageEntry) {
|
||||
e.Objects += other.Objects
|
||||
e.Size += other.Size
|
||||
e.ReplicationPendingSize += other.ReplicationPendingSize
|
||||
e.ReplicationFailedSize += other.ReplicationFailedSize
|
||||
e.ReplicatedSize += other.ReplicatedSize
|
||||
e.ReplicaSize += other.ReplicaSize
|
||||
e.ReplicationStats.PendingSize += other.ReplicationStats.PendingSize
|
||||
e.ReplicationStats.FailedSize += other.ReplicationStats.FailedSize
|
||||
e.ReplicationStats.ReplicatedSize += other.ReplicationStats.ReplicatedSize
|
||||
e.ReplicationStats.ReplicaSize += other.ReplicationStats.ReplicaSize
|
||||
e.ReplicationStats.PendingCount += other.ReplicationStats.PendingCount
|
||||
e.ReplicationStats.FailedCount += other.ReplicationStats.FailedCount
|
||||
|
||||
for i, v := range other.ObjSizes[:] {
|
||||
e.ObjSizes[i] += v
|
||||
@@ -238,25 +275,27 @@ func (d *dataUsageCache) keepRootChildren(list map[dataUsageHash]struct{}) {
|
||||
}
|
||||
}
|
||||
|
||||
// dui converts the flattened version of the path to DataUsageInfo.
|
||||
// dui converts the flattened version of the path to madmin.DataUsageInfo.
|
||||
// As a side effect d will be flattened, use a clone if this is not ok.
|
||||
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
|
||||
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) madmin.DataUsageInfo {
|
||||
e := d.find(path)
|
||||
if e == nil {
|
||||
// No entry found, return empty.
|
||||
return DataUsageInfo{}
|
||||
return madmin.DataUsageInfo{}
|
||||
}
|
||||
flat := d.flatten(*e)
|
||||
return DataUsageInfo{
|
||||
LastUpdate: d.Info.LastUpdate,
|
||||
ObjectsTotalCount: flat.Objects,
|
||||
ObjectsTotalSize: uint64(flat.Size),
|
||||
ReplicatedSize: flat.ReplicatedSize,
|
||||
ReplicationFailedSize: flat.ReplicationFailedSize,
|
||||
ReplicationPendingSize: flat.ReplicationPendingSize,
|
||||
ReplicaSize: flat.ReplicaSize,
|
||||
BucketsCount: uint64(len(e.Children)),
|
||||
BucketsUsage: d.bucketsUsageInfo(buckets),
|
||||
return madmin.DataUsageInfo{
|
||||
LastUpdate: d.Info.LastUpdate,
|
||||
ObjectsTotalCount: flat.Objects,
|
||||
ObjectsTotalSize: uint64(flat.Size),
|
||||
ReplicatedSize: flat.ReplicationStats.ReplicatedSize,
|
||||
ReplicationFailedSize: flat.ReplicationStats.FailedSize,
|
||||
ReplicationPendingSize: flat.ReplicationStats.PendingSize,
|
||||
ReplicaSize: flat.ReplicationStats.ReplicaSize,
|
||||
ReplicationPendingCount: flat.ReplicationStats.PendingCount,
|
||||
ReplicationFailedCount: flat.ReplicationStats.FailedCount,
|
||||
BucketsCount: uint64(len(e.Children)),
|
||||
BucketsUsage: d.bucketsUsageInfo(buckets),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -373,22 +412,24 @@ func (h *sizeHistogram) toMap() map[string]uint64 {
|
||||
|
||||
// bucketsUsageInfo returns the buckets usage info as a map, with
|
||||
// key as bucket name
|
||||
func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]BucketUsageInfo {
|
||||
var dst = make(map[string]BucketUsageInfo, len(buckets))
|
||||
func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]madmin.BucketUsageInfo {
|
||||
var dst = make(map[string]madmin.BucketUsageInfo, len(buckets))
|
||||
for _, bucket := range buckets {
|
||||
e := d.find(bucket.Name)
|
||||
if e == nil {
|
||||
continue
|
||||
}
|
||||
flat := d.flatten(*e)
|
||||
dst[bucket.Name] = BucketUsageInfo{
|
||||
Size: uint64(flat.Size),
|
||||
ObjectsCount: flat.Objects,
|
||||
ReplicationPendingSize: flat.ReplicationPendingSize,
|
||||
ReplicatedSize: flat.ReplicatedSize,
|
||||
ReplicationFailedSize: flat.ReplicationFailedSize,
|
||||
ReplicaSize: flat.ReplicaSize,
|
||||
ObjectSizesHistogram: flat.ObjSizes.toMap(),
|
||||
dst[bucket.Name] = madmin.BucketUsageInfo{
|
||||
Size: uint64(flat.Size),
|
||||
ObjectsCount: flat.Objects,
|
||||
ReplicationPendingSize: flat.ReplicationStats.PendingSize,
|
||||
ReplicatedSize: flat.ReplicationStats.ReplicatedSize,
|
||||
ReplicationFailedSize: flat.ReplicationStats.FailedSize,
|
||||
ReplicationPendingCount: flat.ReplicationStats.PendingCount,
|
||||
ReplicationFailedCount: flat.ReplicationStats.FailedCount,
|
||||
ReplicaSize: flat.ReplicationStats.ReplicaSize,
|
||||
ObjectSizesHistogram: flat.ObjSizes.toMap(),
|
||||
}
|
||||
}
|
||||
return dst
|
||||
@@ -396,20 +437,22 @@ func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]Bucke
|
||||
|
||||
// bucketUsageInfo returns the buckets usage info.
|
||||
// If not found all values returned are zero values.
|
||||
func (d *dataUsageCache) bucketUsageInfo(bucket string) BucketUsageInfo {
|
||||
func (d *dataUsageCache) bucketUsageInfo(bucket string) madmin.BucketUsageInfo {
|
||||
e := d.find(bucket)
|
||||
if e == nil {
|
||||
return BucketUsageInfo{}
|
||||
return madmin.BucketUsageInfo{}
|
||||
}
|
||||
flat := d.flatten(*e)
|
||||
return BucketUsageInfo{
|
||||
Size: uint64(flat.Size),
|
||||
ObjectsCount: flat.Objects,
|
||||
ReplicationPendingSize: flat.ReplicationPendingSize,
|
||||
ReplicatedSize: flat.ReplicatedSize,
|
||||
ReplicationFailedSize: flat.ReplicationFailedSize,
|
||||
ReplicaSize: flat.ReplicaSize,
|
||||
ObjectSizesHistogram: flat.ObjSizes.toMap(),
|
||||
return madmin.BucketUsageInfo{
|
||||
Size: uint64(flat.Size),
|
||||
ObjectsCount: flat.Objects,
|
||||
ReplicationPendingSize: flat.ReplicationStats.PendingSize,
|
||||
ReplicationPendingCount: flat.ReplicationStats.PendingCount,
|
||||
ReplicatedSize: flat.ReplicationStats.ReplicatedSize,
|
||||
ReplicationFailedSize: flat.ReplicationStats.FailedSize,
|
||||
ReplicationFailedCount: flat.ReplicationStats.FailedCount,
|
||||
ReplicaSize: flat.ReplicationStats.ReplicaSize,
|
||||
ObjectSizesHistogram: flat.ObjSizes.toMap(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -533,6 +576,7 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string)
|
||||
// Bumping the cache version will drop data from previous versions
|
||||
// and write new data with the new version.
|
||||
const (
|
||||
dataUsageCacheVerV4 = 4
|
||||
dataUsageCacheVerV3 = 3
|
||||
dataUsageCacheVerV2 = 2
|
||||
dataUsageCacheVerV1 = 1
|
||||
@@ -541,7 +585,7 @@ const (
|
||||
// serialize the contents of the cache.
|
||||
func (d *dataUsageCache) serializeTo(dst io.Writer) error {
|
||||
// Add version and compress.
|
||||
_, err := dst.Write([]byte{dataUsageCacheVerV3})
|
||||
_, err := dst.Write([]byte{dataUsageCacheVerV4})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -609,6 +653,35 @@ func (d *dataUsageCache) deserialize(r io.Reader) error {
|
||||
return err
|
||||
}
|
||||
defer dec.Close()
|
||||
dold := &dataUsageCacheV3{}
|
||||
if err = dold.DecodeMsg(msgp.NewReader(dec)); err != nil {
|
||||
return err
|
||||
}
|
||||
d.Info = dold.Info
|
||||
d.Disks = dold.Disks
|
||||
d.Cache = make(map[string]dataUsageEntry, len(dold.Cache))
|
||||
for k, v := range dold.Cache {
|
||||
d.Cache[k] = dataUsageEntry{
|
||||
Size: v.Size,
|
||||
Objects: v.Objects,
|
||||
ObjSizes: v.ObjSizes,
|
||||
Children: v.Children,
|
||||
ReplicationStats: replicationStats{
|
||||
ReplicatedSize: v.ReplicatedSize,
|
||||
ReplicaSize: v.ReplicaSize,
|
||||
FailedSize: v.ReplicationFailedSize,
|
||||
PendingSize: v.ReplicationPendingSize,
|
||||
},
|
||||
}
|
||||
}
|
||||
return nil
|
||||
case dataUsageCacheVerV4:
|
||||
// Zstd compressed.
|
||||
dec, err := zstd.NewReader(r, zstd.WithDecoderConcurrency(2))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer dec.Close()
|
||||
|
||||
return d.DecodeMsg(msgp.NewReader(dec))
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user