mirror of
https://github.com/minio/minio.git
synced 2024-12-25 06:35:56 -05:00
82dc6aff1c
just like replication workers, allow failed replication workers to be configurable in situations like DR failures etc to catch up on replication sooner when DR is back online. Signed-off-by: Harshavardhana <harsha@minio.io>
194 lines
5.4 KiB
Go
194 lines
5.4 KiB
Go
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"context"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/minio/minio/pkg/bucket/replication"
|
|
)
|
|
|
|
func (b *BucketReplicationStats) hasReplicationUsage() bool {
|
|
return b.PendingSize > 0 ||
|
|
b.FailedSize > 0 ||
|
|
b.ReplicatedSize > 0 ||
|
|
b.ReplicaSize > 0 ||
|
|
b.PendingCount > 0 ||
|
|
b.FailedCount > 0
|
|
}
|
|
|
|
// ReplicationStats holds the global in-memory replication stats
|
|
type ReplicationStats struct {
|
|
Cache map[string]*BucketReplicationStats
|
|
UsageCache map[string]*BucketReplicationStats
|
|
sync.RWMutex
|
|
}
|
|
|
|
// Delete deletes in-memory replication statistics for a bucket.
|
|
func (r *ReplicationStats) Delete(bucket string) {
|
|
if r == nil {
|
|
return
|
|
}
|
|
|
|
r.Lock()
|
|
defer r.Unlock()
|
|
delete(r.Cache, bucket)
|
|
delete(r.UsageCache, bucket)
|
|
|
|
}
|
|
|
|
// Update updates in-memory replication statistics with new values.
|
|
func (r *ReplicationStats) Update(bucket string, n int64, status, prevStatus replication.StatusType, opType replication.Type) {
|
|
if r == nil {
|
|
return
|
|
}
|
|
|
|
r.RLock()
|
|
b, ok := r.Cache[bucket]
|
|
if !ok {
|
|
b = &BucketReplicationStats{}
|
|
}
|
|
r.RUnlock()
|
|
switch status {
|
|
case replication.Pending:
|
|
if opType == replication.ObjectReplicationType {
|
|
atomic.AddUint64(&b.PendingSize, uint64(n))
|
|
}
|
|
atomic.AddUint64(&b.PendingCount, 1)
|
|
case replication.Completed:
|
|
switch prevStatus { // adjust counters based on previous state
|
|
case replication.Pending:
|
|
atomic.AddUint64(&b.PendingCount, ^uint64(0))
|
|
case replication.Failed:
|
|
atomic.AddUint64(&b.FailedCount, ^uint64(0))
|
|
}
|
|
if opType == replication.ObjectReplicationType {
|
|
atomic.AddUint64(&b.ReplicatedSize, uint64(n))
|
|
switch prevStatus {
|
|
case replication.Pending:
|
|
atomic.AddUint64(&b.PendingSize, ^uint64(n-1))
|
|
case replication.Failed:
|
|
atomic.AddUint64(&b.FailedSize, ^uint64(n-1))
|
|
}
|
|
}
|
|
case replication.Failed:
|
|
// count failures only once - not on every retry
|
|
switch prevStatus { // adjust counters based on previous state
|
|
case replication.Pending:
|
|
atomic.AddUint64(&b.PendingCount, ^uint64(0))
|
|
}
|
|
if opType == replication.ObjectReplicationType {
|
|
if prevStatus == replication.Pending {
|
|
atomic.AddUint64(&b.FailedSize, uint64(n))
|
|
atomic.AddUint64(&b.FailedCount, 1)
|
|
atomic.AddUint64(&b.PendingSize, ^uint64(n-1))
|
|
}
|
|
}
|
|
case replication.Replica:
|
|
if opType == replication.ObjectReplicationType {
|
|
atomic.AddUint64(&b.ReplicaSize, uint64(n))
|
|
}
|
|
}
|
|
r.Lock()
|
|
r.Cache[bucket] = b
|
|
r.Unlock()
|
|
}
|
|
|
|
// GetInitialUsage get replication metrics available at the time of cluster initialization
|
|
func (r *ReplicationStats) GetInitialUsage(bucket string) BucketReplicationStats {
|
|
if r == nil {
|
|
return BucketReplicationStats{}
|
|
}
|
|
|
|
r.RLock()
|
|
defer r.RUnlock()
|
|
|
|
st, ok := r.UsageCache[bucket]
|
|
if !ok {
|
|
return BucketReplicationStats{}
|
|
}
|
|
return BucketReplicationStats{
|
|
PendingSize: atomic.LoadUint64(&st.PendingSize),
|
|
FailedSize: atomic.LoadUint64(&st.FailedSize),
|
|
ReplicatedSize: atomic.LoadUint64(&st.ReplicatedSize),
|
|
ReplicaSize: atomic.LoadUint64(&st.ReplicaSize),
|
|
PendingCount: atomic.LoadUint64(&st.PendingCount),
|
|
FailedCount: atomic.LoadUint64(&st.FailedCount),
|
|
}
|
|
}
|
|
|
|
// Get replication metrics for a bucket from this node since this node came up.
|
|
func (r *ReplicationStats) Get(bucket string) BucketReplicationStats {
|
|
if r == nil {
|
|
return BucketReplicationStats{}
|
|
}
|
|
|
|
r.RLock()
|
|
defer r.RUnlock()
|
|
|
|
st, ok := r.Cache[bucket]
|
|
if !ok {
|
|
return BucketReplicationStats{}
|
|
}
|
|
|
|
return BucketReplicationStats{
|
|
PendingSize: atomic.LoadUint64(&st.PendingSize),
|
|
FailedSize: atomic.LoadUint64(&st.FailedSize),
|
|
ReplicatedSize: atomic.LoadUint64(&st.ReplicatedSize),
|
|
ReplicaSize: atomic.LoadUint64(&st.ReplicaSize),
|
|
PendingCount: atomic.LoadUint64(&st.PendingCount),
|
|
FailedCount: atomic.LoadUint64(&st.FailedCount),
|
|
}
|
|
}
|
|
|
|
// NewReplicationStats initialize in-memory replication statistics
|
|
func NewReplicationStats(ctx context.Context, objectAPI ObjectLayer) *ReplicationStats {
|
|
st := &ReplicationStats{
|
|
Cache: make(map[string]*BucketReplicationStats),
|
|
UsageCache: make(map[string]*BucketReplicationStats),
|
|
}
|
|
|
|
dataUsageInfo, err := loadDataUsageFromBackend(ctx, objectAPI)
|
|
if err != nil {
|
|
return st
|
|
}
|
|
|
|
// data usage has not captured any data yet.
|
|
if dataUsageInfo.LastUpdate.IsZero() {
|
|
return st
|
|
}
|
|
|
|
for bucket, usage := range dataUsageInfo.BucketsUsage {
|
|
b := &BucketReplicationStats{
|
|
PendingSize: usage.ReplicationPendingSize,
|
|
FailedSize: usage.ReplicationFailedSize,
|
|
ReplicatedSize: usage.ReplicatedSize,
|
|
ReplicaSize: usage.ReplicaSize,
|
|
PendingCount: usage.ReplicationPendingCount,
|
|
FailedCount: usage.ReplicationFailedCount,
|
|
}
|
|
if b.hasReplicationUsage() {
|
|
st.UsageCache[bucket] = b
|
|
}
|
|
}
|
|
|
|
return st
|
|
}
|