mirror of
https://github.com/minio/minio.git
synced 2025-04-04 11:50:36 -04:00
xl: Avoid multi-disks node to exit when one disk fails (#12423)
It makes sense that a node that has multiple disks starts when one disk fails, returning an i/o error for example. This commit will make this faulty tolerance available in this specific use case.
This commit is contained in:
parent
e448dbbabf
commit
810af07529
@ -257,18 +257,17 @@ func (l EndpointServerPools) Localhost() string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// FirstLocalDiskPath returns the disk path of first (in cmdline args order)
|
// LocalDisksPaths returns the disk paths of the local disks
|
||||||
// local endpoint.
|
func (l EndpointServerPools) LocalDisksPaths() []string {
|
||||||
func (l EndpointServerPools) FirstLocalDiskPath() string {
|
var disks []string
|
||||||
var diskPath string
|
|
||||||
for _, ep := range l {
|
for _, ep := range l {
|
||||||
for _, endpoint := range ep.Endpoints {
|
for _, endpoint := range ep.Endpoints {
|
||||||
if endpoint.IsLocal {
|
if endpoint.IsLocal {
|
||||||
return endpoint.Path
|
disks = append(disks, endpoint.Path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return diskPath
|
return disks
|
||||||
}
|
}
|
||||||
|
|
||||||
// FirstLocal returns true if the first endpoint is local.
|
// FirstLocal returns true if the first endpoint is local.
|
||||||
|
@ -20,18 +20,15 @@ package cmd
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"crypto/tls"
|
"crypto/tls"
|
||||||
"errors"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"os"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
xhttp "github.com/minio/minio/internal/http"
|
xhttp "github.com/minio/minio/internal/http"
|
||||||
"github.com/minio/minio/internal/logger"
|
"github.com/minio/minio/internal/logger"
|
||||||
"github.com/minio/minio/internal/sync/errgroup"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var printEndpointError = func() func(Endpoint, error, bool) {
|
var printEndpointError = func() func(Endpoint, error, bool) {
|
||||||
@ -72,78 +69,37 @@ var printEndpointError = func() func(Endpoint, error, bool) {
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Migrates backend format of local disks.
|
// Cleans up tmp directory of the local disk.
|
||||||
func formatErasureMigrateLocalEndpoints(endpoints Endpoints) error {
|
func formatErasureCleanupTmp(diskPath string) error {
|
||||||
g := errgroup.WithNErrs(len(endpoints))
|
// Need to move temporary objects left behind from previous run of minio
|
||||||
for index, endpoint := range endpoints {
|
// server to a unique directory under `minioMetaTmpBucket-old` to clean
|
||||||
if !endpoint.IsLocal {
|
// up `minioMetaTmpBucket` for the current run.
|
||||||
continue
|
//
|
||||||
}
|
// /disk1/.minio.sys/tmp-old/
|
||||||
index := index
|
// |__ 33a58b40-aecc-4c9f-a22f-ff17bfa33b62
|
||||||
g.Go(func() error {
|
// |__ e870a2c1-d09c-450c-a69c-6eaa54a89b3e
|
||||||
epPath := endpoints[index].Path
|
//
|
||||||
err := formatErasureMigrate(epPath)
|
// In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains
|
||||||
if err != nil && !errors.Is(err, os.ErrNotExist) {
|
// temporary objects from one of the previous runs of minio server.
|
||||||
return err
|
tmpOld := pathJoin(diskPath, minioMetaTmpBucket+"-old", mustGetUUID())
|
||||||
}
|
if err := renameAll(pathJoin(diskPath, minioMetaTmpBucket),
|
||||||
return nil
|
tmpOld); err != nil && err != errFileNotFound {
|
||||||
}, index)
|
logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate",
|
||||||
|
pathJoin(diskPath, minioMetaTmpBucket),
|
||||||
|
tmpOld,
|
||||||
|
osErrToFileErr(err)))
|
||||||
}
|
}
|
||||||
for _, err := range g.Wait() {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Cleans up tmp directory of local disks.
|
// Renames and schedules for purging all bucket metacache.
|
||||||
func formatErasureCleanupTmpLocalEndpoints(endpoints Endpoints) error {
|
renameAllBucketMetacache(diskPath)
|
||||||
g := errgroup.WithNErrs(len(endpoints))
|
|
||||||
for index, endpoint := range endpoints {
|
|
||||||
if !endpoint.IsLocal {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
index := index
|
|
||||||
g.Go(func() error {
|
|
||||||
epPath := endpoints[index].Path
|
|
||||||
// Need to move temporary objects left behind from previous run of minio
|
|
||||||
// server to a unique directory under `minioMetaTmpBucket-old` to clean
|
|
||||||
// up `minioMetaTmpBucket` for the current run.
|
|
||||||
//
|
|
||||||
// /disk1/.minio.sys/tmp-old/
|
|
||||||
// |__ 33a58b40-aecc-4c9f-a22f-ff17bfa33b62
|
|
||||||
// |__ e870a2c1-d09c-450c-a69c-6eaa54a89b3e
|
|
||||||
//
|
|
||||||
// In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains
|
|
||||||
// temporary objects from one of the previous runs of minio server.
|
|
||||||
tmpOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
|
|
||||||
if err := renameAll(pathJoin(epPath, minioMetaTmpBucket),
|
|
||||||
tmpOld); err != nil && err != errFileNotFound {
|
|
||||||
logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate",
|
|
||||||
pathJoin(epPath, minioMetaTmpBucket),
|
|
||||||
tmpOld,
|
|
||||||
osErrToFileErr(err)))
|
|
||||||
}
|
|
||||||
|
|
||||||
// Renames and schedules for puring all bucket metacache.
|
// Removal of tmp-old folder is backgrounded completely.
|
||||||
renameAllBucketMetacache(epPath)
|
go removeAll(pathJoin(diskPath, minioMetaTmpBucket+"-old"))
|
||||||
|
|
||||||
// Removal of tmp-old folder is backgrounded completely.
|
if err := mkdirAll(pathJoin(diskPath, minioMetaTmpBucket), 0777); err != nil {
|
||||||
go removeAll(pathJoin(epPath, minioMetaTmpBucket+"-old"))
|
logger.LogIf(GlobalContext, fmt.Errorf("unable to create (%s) %w, drive may be faulty please investigate",
|
||||||
|
pathJoin(diskPath, minioMetaTmpBucket),
|
||||||
if err := mkdirAll(pathJoin(epPath, minioMetaTmpBucket), 0777); err != nil {
|
err))
|
||||||
logger.LogIf(GlobalContext, fmt.Errorf("unable to create (%s) %w, drive may be faulty please investigate",
|
|
||||||
pathJoin(epPath, minioMetaTmpBucket),
|
|
||||||
err))
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}, index)
|
|
||||||
}
|
|
||||||
for _, err := range g.Wait() {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -337,14 +293,6 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
|
|||||||
return nil, nil, errInvalidArgument
|
return nil, nil, errInvalidArgument
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := formatErasureMigrateLocalEndpoints(endpoints); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := formatErasureCleanupTmpLocalEndpoints(endpoints); err != nil {
|
|
||||||
return nil, nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
|
// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
|
||||||
// All times are rounded to avoid showing milli, micro and nano seconds
|
// All times are rounded to avoid showing milli, micro and nano seconds
|
||||||
formatStartTime := time.Now().Round(time.Second)
|
formatStartTime := time.Now().Round(time.Second)
|
||||||
|
@ -555,7 +555,7 @@ func serverMain(ctx *cli.Context) {
|
|||||||
|
|
||||||
if globalIsErasure { // to be done after config init
|
if globalIsErasure { // to be done after config init
|
||||||
initBackgroundReplication(GlobalContext, newObject)
|
initBackgroundReplication(GlobalContext, newObject)
|
||||||
globalTierJournal, err = initTierDeletionJournal(GlobalContext.Done())
|
globalTierJournal, err = initTierDeletionJournal(GlobalContext)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.FatalIf(err, "Unable to initialize remote tier pending deletes journal")
|
logger.FatalIf(err, "Unable to initialize remote tier pending deletes journal")
|
||||||
}
|
}
|
||||||
|
@ -56,23 +56,28 @@ var (
|
|||||||
errUnsupportedJournalVersion = errors.New("unsupported pending deletes journal version")
|
errUnsupportedJournalVersion = errors.New("unsupported pending deletes journal version")
|
||||||
)
|
)
|
||||||
|
|
||||||
func initTierDeletionJournal(done <-chan struct{}) (*tierJournal, error) {
|
func initTierDeletionJournal(ctx context.Context) (*tierJournal, error) {
|
||||||
diskPath := globalEndpoints.FirstLocalDiskPath()
|
for _, diskPath := range globalEndpoints.LocalDisksPaths() {
|
||||||
j := &tierJournal{
|
j := &tierJournal{
|
||||||
diskPath: diskPath,
|
diskPath: diskPath,
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := os.MkdirAll(filepath.Dir(j.JournalPath()), os.FileMode(0700)); err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
err := j.Open()
|
||||||
|
if err != nil {
|
||||||
|
logger.LogIf(ctx, err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
go j.deletePending(ctx.Done())
|
||||||
|
return j, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := os.MkdirAll(filepath.Dir(j.JournalPath()), os.FileMode(0700)); err != nil {
|
return nil, errors.New("no local disk found")
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
err := j.Open()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
go j.deletePending(done)
|
|
||||||
return j, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// rotate rotates the journal. If a read-only journal already exists it does
|
// rotate rotates the journal. If a read-only journal already exists it does
|
||||||
|
@ -300,7 +300,15 @@ func newXLStorage(ep Endpoint) (*xlStorage, error) {
|
|||||||
return p, err
|
return p, err
|
||||||
}
|
}
|
||||||
w.Close()
|
w.Close()
|
||||||
defer Remove(filePath)
|
Remove(filePath)
|
||||||
|
|
||||||
|
if err := formatErasureMigrate(p.diskPath); err != nil && !errors.Is(err, os.ErrNotExist) {
|
||||||
|
return p, err
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := formatErasureCleanupTmp(p.diskPath); err != nil {
|
||||||
|
return p, err
|
||||||
|
}
|
||||||
|
|
||||||
// Success.
|
// Success.
|
||||||
return p, nil
|
return p, nil
|
||||||
|
Loading…
x
Reference in New Issue
Block a user