implement a safer completeMultipart implementation (#20227)

- optimize writing part.N.meta by writing both part.N
  and its meta in sequence without network component.

- remove part.N.meta, part.N which were partially success
  ful, in quorum loss situations during renamePart()

- allow for strict read quorum check arbitrated via ETag
  for the given part number, this makes it double safer
  upon final commit.

- return an appropriate error when read quorum is missing,
  instead of returning InvalidPart{}, which is non-retryable
  error. This kind of situation can happen when many
  nodes are going offline in rotation, an example of such
  a restart() behavior is statefulset updates in k8s.

fixes #20091
This commit is contained in:
Harshavardhana
2024-08-12 01:38:15 -07:00
committed by GitHub
parent 909b169593
commit 2e0fd2cba9
19 changed files with 1487 additions and 275 deletions

View File

@@ -390,8 +390,7 @@ func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Tim
return findFileInfoInQuorum(ctx, metaArr, modTime, etag, quorum)
}
// writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently.
func writeUniqueFileInfo(ctx context.Context, disks []StorageAPI, origbucket, bucket, prefix string, files []FileInfo, quorum int) ([]StorageAPI, error) {
func writeAllMetadataWithRevert(ctx context.Context, disks []StorageAPI, origbucket, bucket, prefix string, files []FileInfo, quorum int, revert bool) ([]StorageAPI, error) {
g := errgroup.WithNErrs(len(disks))
// Start writing `xl.meta` to all disks in parallel.
@@ -415,9 +414,37 @@ func writeUniqueFileInfo(ctx context.Context, disks []StorageAPI, origbucket, bu
mErrs := g.Wait()
err := reduceWriteQuorumErrs(ctx, mErrs, objectOpIgnoredErrs, quorum)
if err != nil && revert {
ng := errgroup.WithNErrs(len(disks))
for index := range disks {
if mErrs[index] != nil {
continue
}
index := index
ng.Go(func() error {
if disks[index] == nil {
return errDiskNotFound
}
return disks[index].Delete(ctx, bucket, pathJoin(prefix, xlStorageFormatFile), DeleteOptions{
Recursive: true,
})
}, index)
}
ng.Wait()
}
return evalDisks(disks, mErrs), err
}
func writeAllMetadata(ctx context.Context, disks []StorageAPI, origbucket, bucket, prefix string, files []FileInfo, quorum int) ([]StorageAPI, error) {
return writeAllMetadataWithRevert(ctx, disks, origbucket, bucket, prefix, files, quorum, true)
}
// writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently.
func writeUniqueFileInfo(ctx context.Context, disks []StorageAPI, origbucket, bucket, prefix string, files []FileInfo, quorum int) ([]StorageAPI, error) {
return writeAllMetadataWithRevert(ctx, disks, origbucket, bucket, prefix, files, quorum, false)
}
func commonParity(parities []int, defaultParityCount int) int {
N := len(parities)