Various improvements in replication (#11949)

- collect real time replication metrics for prometheus.
- add pending_count, failed_count metric for total pending/failed replication operations.

- add API to get replication metrics

- add MRF worker to handle spill-over replication operations

- multiple issues found with replication
- fixes an issue when client sends a bucket
 name with `/` at the end from SetRemoteTarget
 API call make sure to trim the bucket name to 
 avoid any extra `/`.

- hold write locks in GetObjectNInfo during replication
  to ensure that object version stack is not overwritten
  while reading the content.

- add additional protection during WriteMetadata() to
  ensure that we always write a valid FileInfo{} and avoid
  ever writing empty FileInfo{} to the lowest layers.

Co-authored-by: Poorna Krishnamoorthy <poorna@minio.io>
Co-authored-by: Harshavardhana <harsha@minio.io>
This commit is contained in:
Poorna Krishnamoorthy
2021-04-03 09:03:42 -07:00
committed by GitHub
parent dca7cf7200
commit 47c09a1e6f
36 changed files with 1914 additions and 496 deletions

View File

@@ -32,6 +32,7 @@ import (
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/bucket/lifecycle"
"github.com/minio/minio/pkg/hash"
"github.com/minio/minio/pkg/madmin"
"github.com/tinylib/msgp/msgp"
)
@@ -45,15 +46,26 @@ type sizeHistogram [dataUsageBucketLen]uint64
//msgp:tuple dataUsageEntry
type dataUsageEntry struct {
Children dataUsageHashMap
// These fields do no include any children.
Size int64
ReplicatedSize uint64
ReplicationPendingSize uint64
ReplicationFailedSize uint64
ReplicaSize uint64
Objects uint64
ObjSizes sizeHistogram
Children dataUsageHashMap
Size int64
Objects uint64
ObjSizes sizeHistogram
ReplicationStats replicationStats
}
//msgp:tuple replicationStats
type replicationStats struct {
PendingSize uint64
ReplicatedSize uint64
FailedSize uint64
ReplicaSize uint64
FailedCount uint64
PendingCount uint64
MissedThresholdSize uint64
AfterThresholdSize uint64
MissedThresholdCount uint64
AfterThresholdCount uint64
}
//msgp:tuple dataUsageEntryV2
@@ -65,20 +77,40 @@ type dataUsageEntryV2 struct {
Children dataUsageHashMap
}
// dataUsageCache contains a cache of data usage entries latest version 3.
type dataUsageCache struct {
Info dataUsageCacheInfo
Disks []string
Cache map[string]dataUsageEntry
//msgp:tuple dataUsageEntryV3
type dataUsageEntryV3 struct {
// These fields do no include any children.
Size int64
ReplicatedSize uint64
ReplicationPendingSize uint64
ReplicationFailedSize uint64
ReplicaSize uint64
Objects uint64
ObjSizes sizeHistogram
Children dataUsageHashMap
}
// dataUsageCache contains a cache of data usage entries version 2.
// dataUsageCache contains a cache of data usage entries latest version 4.
type dataUsageCache struct {
Info dataUsageCacheInfo
Cache map[string]dataUsageEntry
Disks []string
}
// dataUsageCacheV2 contains a cache of data usage entries version 2.
type dataUsageCacheV2 struct {
Info dataUsageCacheInfo
Disks []string
Cache map[string]dataUsageEntryV2
}
// dataUsageCache contains a cache of data usage entries version 3.
type dataUsageCacheV3 struct {
Info dataUsageCacheInfo
Disks []string
Cache map[string]dataUsageEntryV3
}
//msgp:ignore dataUsageEntryInfo
type dataUsageEntryInfo struct {
Name string
@@ -89,8 +121,8 @@ type dataUsageEntryInfo struct {
type dataUsageCacheInfo struct {
// Name of the bucket. Also root element.
Name string
LastUpdate time.Time
NextCycle uint32
LastUpdate time.Time
// indicates if the disk is being healed and scanner
// should skip healing the disk
SkipHealing bool
@@ -100,20 +132,25 @@ type dataUsageCacheInfo struct {
func (e *dataUsageEntry) addSizes(summary sizeSummary) {
e.Size += summary.totalSize
e.ReplicatedSize += uint64(summary.replicatedSize)
e.ReplicationFailedSize += uint64(summary.failedSize)
e.ReplicationPendingSize += uint64(summary.pendingSize)
e.ReplicaSize += uint64(summary.replicaSize)
e.ReplicationStats.ReplicatedSize += uint64(summary.replicatedSize)
e.ReplicationStats.FailedSize += uint64(summary.failedSize)
e.ReplicationStats.PendingSize += uint64(summary.pendingSize)
e.ReplicationStats.ReplicaSize += uint64(summary.replicaSize)
e.ReplicationStats.PendingCount += uint64(summary.pendingCount)
e.ReplicationStats.FailedCount += uint64(summary.failedCount)
}
// merge other data usage entry into this, excluding children.
func (e *dataUsageEntry) merge(other dataUsageEntry) {
e.Objects += other.Objects
e.Size += other.Size
e.ReplicationPendingSize += other.ReplicationPendingSize
e.ReplicationFailedSize += other.ReplicationFailedSize
e.ReplicatedSize += other.ReplicatedSize
e.ReplicaSize += other.ReplicaSize
e.ReplicationStats.PendingSize += other.ReplicationStats.PendingSize
e.ReplicationStats.FailedSize += other.ReplicationStats.FailedSize
e.ReplicationStats.ReplicatedSize += other.ReplicationStats.ReplicatedSize
e.ReplicationStats.ReplicaSize += other.ReplicationStats.ReplicaSize
e.ReplicationStats.PendingCount += other.ReplicationStats.PendingCount
e.ReplicationStats.FailedCount += other.ReplicationStats.FailedCount
for i, v := range other.ObjSizes[:] {
e.ObjSizes[i] += v
@@ -238,25 +275,27 @@ func (d *dataUsageCache) keepRootChildren(list map[dataUsageHash]struct{}) {
}
}
// dui converts the flattened version of the path to DataUsageInfo.
// dui converts the flattened version of the path to madmin.DataUsageInfo.
// As a side effect d will be flattened, use a clone if this is not ok.
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) DataUsageInfo {
func (d *dataUsageCache) dui(path string, buckets []BucketInfo) madmin.DataUsageInfo {
e := d.find(path)
if e == nil {
// No entry found, return empty.
return DataUsageInfo{}
return madmin.DataUsageInfo{}
}
flat := d.flatten(*e)
return DataUsageInfo{
LastUpdate: d.Info.LastUpdate,
ObjectsTotalCount: flat.Objects,
ObjectsTotalSize: uint64(flat.Size),
ReplicatedSize: flat.ReplicatedSize,
ReplicationFailedSize: flat.ReplicationFailedSize,
ReplicationPendingSize: flat.ReplicationPendingSize,
ReplicaSize: flat.ReplicaSize,
BucketsCount: uint64(len(e.Children)),
BucketsUsage: d.bucketsUsageInfo(buckets),
return madmin.DataUsageInfo{
LastUpdate: d.Info.LastUpdate,
ObjectsTotalCount: flat.Objects,
ObjectsTotalSize: uint64(flat.Size),
ReplicatedSize: flat.ReplicationStats.ReplicatedSize,
ReplicationFailedSize: flat.ReplicationStats.FailedSize,
ReplicationPendingSize: flat.ReplicationStats.PendingSize,
ReplicaSize: flat.ReplicationStats.ReplicaSize,
ReplicationPendingCount: flat.ReplicationStats.PendingCount,
ReplicationFailedCount: flat.ReplicationStats.FailedCount,
BucketsCount: uint64(len(e.Children)),
BucketsUsage: d.bucketsUsageInfo(buckets),
}
}
@@ -373,22 +412,24 @@ func (h *sizeHistogram) toMap() map[string]uint64 {
// bucketsUsageInfo returns the buckets usage info as a map, with
// key as bucket name
func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]BucketUsageInfo {
var dst = make(map[string]BucketUsageInfo, len(buckets))
func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]madmin.BucketUsageInfo {
var dst = make(map[string]madmin.BucketUsageInfo, len(buckets))
for _, bucket := range buckets {
e := d.find(bucket.Name)
if e == nil {
continue
}
flat := d.flatten(*e)
dst[bucket.Name] = BucketUsageInfo{
Size: uint64(flat.Size),
ObjectsCount: flat.Objects,
ReplicationPendingSize: flat.ReplicationPendingSize,
ReplicatedSize: flat.ReplicatedSize,
ReplicationFailedSize: flat.ReplicationFailedSize,
ReplicaSize: flat.ReplicaSize,
ObjectSizesHistogram: flat.ObjSizes.toMap(),
dst[bucket.Name] = madmin.BucketUsageInfo{
Size: uint64(flat.Size),
ObjectsCount: flat.Objects,
ReplicationPendingSize: flat.ReplicationStats.PendingSize,
ReplicatedSize: flat.ReplicationStats.ReplicatedSize,
ReplicationFailedSize: flat.ReplicationStats.FailedSize,
ReplicationPendingCount: flat.ReplicationStats.PendingCount,
ReplicationFailedCount: flat.ReplicationStats.FailedCount,
ReplicaSize: flat.ReplicationStats.ReplicaSize,
ObjectSizesHistogram: flat.ObjSizes.toMap(),
}
}
return dst
@@ -396,20 +437,22 @@ func (d *dataUsageCache) bucketsUsageInfo(buckets []BucketInfo) map[string]Bucke
// bucketUsageInfo returns the buckets usage info.
// If not found all values returned are zero values.
func (d *dataUsageCache) bucketUsageInfo(bucket string) BucketUsageInfo {
func (d *dataUsageCache) bucketUsageInfo(bucket string) madmin.BucketUsageInfo {
e := d.find(bucket)
if e == nil {
return BucketUsageInfo{}
return madmin.BucketUsageInfo{}
}
flat := d.flatten(*e)
return BucketUsageInfo{
Size: uint64(flat.Size),
ObjectsCount: flat.Objects,
ReplicationPendingSize: flat.ReplicationPendingSize,
ReplicatedSize: flat.ReplicatedSize,
ReplicationFailedSize: flat.ReplicationFailedSize,
ReplicaSize: flat.ReplicaSize,
ObjectSizesHistogram: flat.ObjSizes.toMap(),
return madmin.BucketUsageInfo{
Size: uint64(flat.Size),
ObjectsCount: flat.Objects,
ReplicationPendingSize: flat.ReplicationStats.PendingSize,
ReplicationPendingCount: flat.ReplicationStats.PendingCount,
ReplicatedSize: flat.ReplicationStats.ReplicatedSize,
ReplicationFailedSize: flat.ReplicationStats.FailedSize,
ReplicationFailedCount: flat.ReplicationStats.FailedCount,
ReplicaSize: flat.ReplicationStats.ReplicaSize,
ObjectSizesHistogram: flat.ObjSizes.toMap(),
}
}
@@ -533,6 +576,7 @@ func (d *dataUsageCache) save(ctx context.Context, store objectIO, name string)
// Bumping the cache version will drop data from previous versions
// and write new data with the new version.
const (
dataUsageCacheVerV4 = 4
dataUsageCacheVerV3 = 3
dataUsageCacheVerV2 = 2
dataUsageCacheVerV1 = 1
@@ -541,7 +585,7 @@ const (
// serialize the contents of the cache.
func (d *dataUsageCache) serializeTo(dst io.Writer) error {
// Add version and compress.
_, err := dst.Write([]byte{dataUsageCacheVerV3})
_, err := dst.Write([]byte{dataUsageCacheVerV4})
if err != nil {
return err
}
@@ -609,6 +653,35 @@ func (d *dataUsageCache) deserialize(r io.Reader) error {
return err
}
defer dec.Close()
dold := &dataUsageCacheV3{}
if err = dold.DecodeMsg(msgp.NewReader(dec)); err != nil {
return err
}
d.Info = dold.Info
d.Disks = dold.Disks
d.Cache = make(map[string]dataUsageEntry, len(dold.Cache))
for k, v := range dold.Cache {
d.Cache[k] = dataUsageEntry{
Size: v.Size,
Objects: v.Objects,
ObjSizes: v.ObjSizes,
Children: v.Children,
ReplicationStats: replicationStats{
ReplicatedSize: v.ReplicatedSize,
ReplicaSize: v.ReplicaSize,
FailedSize: v.ReplicationFailedSize,
PendingSize: v.ReplicationPendingSize,
},
}
}
return nil
case dataUsageCacheVerV4:
// Zstd compressed.
dec, err := zstd.NewReader(r, zstd.WithDecoderConcurrency(2))
if err != nil {
return err
}
defer dec.Close()
return d.DecodeMsg(msgp.NewReader(dec))
}