mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
heal: Persist MRF queue in the disk during shutdown (#19410)
This commit is contained in:
205
cmd/mrf.go
205
cmd/mrf.go
@@ -1,4 +1,4 @@
|
||||
// Copyright (c) 2015-2021 MinIO, Inc.
|
||||
// Copyright (c) 2015-2024 MinIO, Inc.
|
||||
//
|
||||
// This file is part of MinIO Object Storage stack
|
||||
//
|
||||
@@ -15,51 +15,203 @@
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
//go:generate msgp -file=$GOFILE
|
||||
|
||||
package cmd
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/minio/madmin-go/v3"
|
||||
"github.com/minio/pkg/v3/wildcard"
|
||||
"github.com/tinylib/msgp/msgp"
|
||||
)
|
||||
|
||||
const (
|
||||
mrfOpsQueueSize = 100000
|
||||
)
|
||||
|
||||
// partialOperation is a successful upload/delete of an object
|
||||
const (
|
||||
healDir = ".heal"
|
||||
healMRFDir = bucketMetaPrefix + SlashSeparator + healDir + SlashSeparator + "mrf"
|
||||
healMRFMetaFormat = 1
|
||||
healMRFMetaVersionV1 = 1
|
||||
)
|
||||
|
||||
// PartialOperation is a successful upload/delete of an object
|
||||
// but not written in all disks (having quorum)
|
||||
type partialOperation struct {
|
||||
bucket string
|
||||
object string
|
||||
versionID string
|
||||
versions []byte
|
||||
setIndex, poolIndex int
|
||||
queued time.Time
|
||||
scanMode madmin.HealScanMode
|
||||
type PartialOperation struct {
|
||||
Bucket string
|
||||
Object string
|
||||
VersionID string
|
||||
Versions []byte
|
||||
SetIndex, PoolIndex int
|
||||
Queued time.Time
|
||||
BitrotScan bool
|
||||
}
|
||||
|
||||
// mrfState sncapsulates all the information
|
||||
// related to the global background MRF.
|
||||
type mrfState struct {
|
||||
opCh chan partialOperation
|
||||
opCh chan PartialOperation
|
||||
|
||||
closed int32
|
||||
closing int32
|
||||
wg sync.WaitGroup
|
||||
}
|
||||
|
||||
func newMRFState() mrfState {
|
||||
return mrfState{
|
||||
opCh: make(chan PartialOperation, mrfOpsQueueSize),
|
||||
}
|
||||
}
|
||||
|
||||
// Add a partial S3 operation (put/delete) when one or more disks are offline.
|
||||
func (m *mrfState) addPartialOp(op partialOperation) {
|
||||
func (m *mrfState) addPartialOp(op PartialOperation) {
|
||||
if m == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if atomic.LoadInt32(&m.closed) == 1 {
|
||||
return
|
||||
}
|
||||
|
||||
m.wg.Add(1)
|
||||
defer m.wg.Done()
|
||||
|
||||
if atomic.LoadInt32(&m.closing) == 1 {
|
||||
return
|
||||
}
|
||||
|
||||
select {
|
||||
case m.opCh <- op:
|
||||
default:
|
||||
}
|
||||
}
|
||||
|
||||
// Do not accept new MRF operations anymore and start to save
|
||||
// the current heal status in one available disk
|
||||
func (m *mrfState) shutdown() {
|
||||
atomic.StoreInt32(&m.closing, 1)
|
||||
m.wg.Wait()
|
||||
close(m.opCh)
|
||||
atomic.StoreInt32(&m.closed, 1)
|
||||
|
||||
if len(m.opCh) > 0 {
|
||||
healingLogEvent(context.Background(), "Saving MRF healing data (%d entries)", len(m.opCh))
|
||||
}
|
||||
|
||||
newReader := func() io.ReadCloser {
|
||||
r, w := io.Pipe()
|
||||
go func() {
|
||||
// Initialize MRF meta header.
|
||||
var data [4]byte
|
||||
binary.LittleEndian.PutUint16(data[0:2], healMRFMetaFormat)
|
||||
binary.LittleEndian.PutUint16(data[2:4], healMRFMetaVersionV1)
|
||||
mw := msgp.NewWriter(w)
|
||||
n, err := mw.Write(data[:])
|
||||
if err != nil {
|
||||
w.CloseWithError(err)
|
||||
return
|
||||
}
|
||||
if n != len(data) {
|
||||
w.CloseWithError(io.ErrShortWrite)
|
||||
return
|
||||
}
|
||||
for item := range m.opCh {
|
||||
err = item.EncodeMsg(mw)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
mw.Flush()
|
||||
w.CloseWithError(err)
|
||||
}()
|
||||
return r
|
||||
}
|
||||
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
for _, localDrive := range localDrives {
|
||||
r := newReader()
|
||||
err := localDrive.CreateFile(context.Background(), "", minioMetaBucket, pathJoin(healMRFDir, "list.bin"), -1, r)
|
||||
r.Close()
|
||||
if err == nil {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (m *mrfState) startMRFPersistence() {
|
||||
loadMRF := func(rc io.ReadCloser, opCh chan PartialOperation) error {
|
||||
defer rc.Close()
|
||||
var data [4]byte
|
||||
n, err := rc.Read(data[:])
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if n != len(data) {
|
||||
return errors.New("heal mrf: no data")
|
||||
}
|
||||
// Read resync meta header
|
||||
switch binary.LittleEndian.Uint16(data[0:2]) {
|
||||
case healMRFMetaFormat:
|
||||
default:
|
||||
return fmt.Errorf("heal mrf: unknown format: %d", binary.LittleEndian.Uint16(data[0:2]))
|
||||
}
|
||||
switch binary.LittleEndian.Uint16(data[2:4]) {
|
||||
case healMRFMetaVersionV1:
|
||||
default:
|
||||
return fmt.Errorf("heal mrf: unknown version: %d", binary.LittleEndian.Uint16(data[2:4]))
|
||||
}
|
||||
|
||||
mr := msgp.NewReader(rc)
|
||||
for {
|
||||
op := PartialOperation{}
|
||||
err = op.DecodeMsg(mr)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
opCh <- op
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
globalLocalDrivesMu.RLock()
|
||||
localDrives := cloneDrives(globalLocalDrivesMap)
|
||||
globalLocalDrivesMu.RUnlock()
|
||||
|
||||
for _, localDrive := range localDrives {
|
||||
if localDrive == nil {
|
||||
continue
|
||||
}
|
||||
rc, err := localDrive.ReadFileStream(context.Background(), minioMetaBucket, pathJoin(healMRFDir, "list.bin"), 0, -1)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
err = loadMRF(rc, m.opCh)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
// finally delete the file after processing mrf entries
|
||||
localDrive.Delete(GlobalContext, minioMetaBucket, pathJoin(healMRFDir, "list.bin"), DeleteOptions{})
|
||||
break
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
var healSleeper = newDynamicSleeper(5, time.Second, false)
|
||||
|
||||
// healRoutine listens to new disks reconnection events and
|
||||
@@ -78,24 +230,24 @@ func (m *mrfState) healRoutine(z *erasureServerPools) {
|
||||
// We might land at .metacache, .trash, .multipart
|
||||
// no need to heal them skip, only when bucket
|
||||
// is '.minio.sys'
|
||||
if u.bucket == minioMetaBucket {
|
||||
if u.Bucket == minioMetaBucket {
|
||||
// No MRF needed for temporary objects
|
||||
if wildcard.Match("buckets/*/.metacache/*", u.object) {
|
||||
if wildcard.Match("buckets/*/.metacache/*", u.Object) {
|
||||
continue
|
||||
}
|
||||
if wildcard.Match("tmp/*", u.object) {
|
||||
if wildcard.Match("tmp/*", u.Object) {
|
||||
continue
|
||||
}
|
||||
if wildcard.Match("multipart/*", u.object) {
|
||||
if wildcard.Match("multipart/*", u.Object) {
|
||||
continue
|
||||
}
|
||||
if wildcard.Match("tmp-old/*", u.object) {
|
||||
if wildcard.Match("tmp-old/*", u.Object) {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
now := time.Now()
|
||||
if now.Sub(u.queued) < time.Second {
|
||||
if now.Sub(u.Queued) < time.Second {
|
||||
// let recently failed networks to reconnect
|
||||
// making MRF wait for 1s before retrying,
|
||||
// i.e 4 reconnect attempts.
|
||||
@@ -106,21 +258,22 @@ func (m *mrfState) healRoutine(z *erasureServerPools) {
|
||||
wait := healSleeper.Timer(context.Background())
|
||||
|
||||
scan := madmin.HealNormalScan
|
||||
if u.scanMode != 0 {
|
||||
scan = u.scanMode
|
||||
if u.BitrotScan {
|
||||
scan = madmin.HealDeepScan
|
||||
}
|
||||
if u.object == "" {
|
||||
healBucket(u.bucket, scan)
|
||||
|
||||
if u.Object == "" {
|
||||
healBucket(u.Bucket, scan)
|
||||
} else {
|
||||
if len(u.versions) > 0 {
|
||||
vers := len(u.versions) / 16
|
||||
if len(u.Versions) > 0 {
|
||||
vers := len(u.Versions) / 16
|
||||
if vers > 0 {
|
||||
for i := 0; i < vers; i++ {
|
||||
healObject(u.bucket, u.object, uuid.UUID(u.versions[16*i:]).String(), scan)
|
||||
healObject(u.Bucket, u.Object, uuid.UUID(u.Versions[16*i:]).String(), scan)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
healObject(u.bucket, u.object, u.versionID, scan)
|
||||
healObject(u.Bucket, u.Object, u.VersionID, scan)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user