implement a safer completeMultipart implementation (#20227)

- optimize writing part.N.meta by writing both part.N
  and its meta in sequence without network component.

- remove part.N.meta, part.N which were partially success
  ful, in quorum loss situations during renamePart()

- allow for strict read quorum check arbitrated via ETag
  for the given part number, this makes it double safer
  upon final commit.

- return an appropriate error when read quorum is missing,
  instead of returning InvalidPart{}, which is non-retryable
  error. This kind of situation can happen when many
  nodes are going offline in rotation, an example of such
  a restart() behavior is statefulset updates in k8s.

fixes #20091
This commit is contained in:
Harshavardhana
2024-08-12 01:38:15 -07:00
committed by GitHub
parent 909b169593
commit 2e0fd2cba9
19 changed files with 1487 additions and 275 deletions

View File

@@ -19,13 +19,9 @@ package cmd
import (
"context"
"fmt"
"io"
"math/rand"
"sync"
"time"
"github.com/minio/pkg/v3/sync/errgroup"
)
func (er erasureObjects) getOnlineDisks() (newDisks []StorageAPI) {
@@ -87,89 +83,3 @@ func (er erasureObjects) getLocalDisks() (newDisks []StorageAPI) {
}
return newDisks
}
// readMultipleFiles Reads raw data from all specified files from all disks.
func readMultipleFiles(ctx context.Context, disks []StorageAPI, req ReadMultipleReq, readQuorum int) ([]ReadMultipleResp, error) {
resps := make([]chan ReadMultipleResp, len(disks))
for i := range resps {
resps[i] = make(chan ReadMultipleResp, len(req.Files))
}
g := errgroup.WithNErrs(len(disks))
// Read files in parallel across disks.
for index := range disks {
index := index
g.Go(func() (err error) {
if disks[index] == nil {
return errDiskNotFound
}
return disks[index].ReadMultiple(ctx, req, resps[index])
}, index)
}
dataArray := make([]ReadMultipleResp, 0, len(req.Files))
// Merge results. They should come in order from each.
for _, wantFile := range req.Files {
quorum := 0
toAdd := ReadMultipleResp{
Bucket: req.Bucket,
Prefix: req.Prefix,
File: wantFile,
}
for i := range resps {
if disks[i] == nil {
continue
}
select {
case <-ctx.Done():
case gotFile, ok := <-resps[i]:
if !ok {
continue
}
if gotFile.Error != "" || !gotFile.Exists {
continue
}
if gotFile.File != wantFile || gotFile.Bucket != req.Bucket || gotFile.Prefix != req.Prefix {
continue
}
quorum++
if toAdd.Modtime.After(gotFile.Modtime) || len(gotFile.Data) < len(toAdd.Data) {
// Pick latest, or largest to avoid possible truncated entries.
continue
}
toAdd = gotFile
}
}
if quorum < readQuorum {
toAdd.Exists = false
toAdd.Error = errErasureReadQuorum.Error()
toAdd.Data = nil
}
dataArray = append(dataArray, toAdd)
}
ignoredErrs := []error{
errFileNotFound,
errVolumeNotFound,
errFileVersionNotFound,
io.ErrUnexpectedEOF, // some times we would read without locks, ignore these errors
io.EOF, // some times we would read without locks, ignore these errors
context.DeadlineExceeded,
context.Canceled,
}
ignoredErrs = append(ignoredErrs, objectOpIgnoredErrs...)
errs := g.Wait()
for index, err := range errs {
if err == nil {
continue
}
if !IsErr(err, ignoredErrs...) {
storageLogOnceIf(ctx, fmt.Errorf("Drive %s, path (%s/%s) returned an error (%w)",
disks[index], req.Bucket, req.Prefix, err),
disks[index].String())
}
}
// Return all the metadata.
return dataArray, nil
}