mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
fix: make sure to avoid calling RenameData() on disconnected disks. (#14094)
Large clusters with multiple sets, or multi-pool setups at times might fail and report unexpected "file not found" errors. This can become a problem during startup sequence when some files need to be created at multiple locations. - This PR ensures that we nil the erasure writers such that they are skipped in RenameData() call. - RenameData() doesn't need to "Access()" calls for `.minio.sys` folders they always exist. - Make sure PutObject() never returns ObjectNotFound{} for any errors, make sure it always returns "WriteQuorum" when renameData() fails with ObjectNotFound{}. Return appropriate errors for all other cases.
This commit is contained in:
parent
04e669a6be
commit
38ccc4f672
@ -119,19 +119,23 @@ func newBitrotReader(disk StorageAPI, data []byte, bucket string, filePath strin
|
||||
// Close all the readers.
|
||||
func closeBitrotReaders(rs []io.ReaderAt) {
|
||||
for _, r := range rs {
|
||||
if r != nil {
|
||||
if br, ok := r.(io.Closer); ok {
|
||||
br.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Close all the writers.
|
||||
func closeBitrotWriters(ws []io.Writer) {
|
||||
for _, w := range ws {
|
||||
if w != nil {
|
||||
if bw, ok := w.(io.Closer); ok {
|
||||
bw.Close()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns hash sum for whole-bitrot, nil for streaming-bitrot.
|
||||
|
@ -322,8 +322,8 @@ func (e Erasure) Heal(ctx context.Context, writers []io.Writer, readers []io.Rea
|
||||
errs: make([]error, len(writers)),
|
||||
}
|
||||
|
||||
err = w.Write(ctx, bufs)
|
||||
if err != nil {
|
||||
if err = w.Write(ctx, bufs); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
@ -52,7 +52,10 @@ func (p *parallelWriter) Write(ctx context.Context, blocks [][]byte) error {
|
||||
if p.errs[i] == nil {
|
||||
if n != len(blocks[i]) {
|
||||
p.errs[i] = io.ErrShortWrite
|
||||
p.writers[i] = nil
|
||||
}
|
||||
} else {
|
||||
p.writers[i] = nil
|
||||
}
|
||||
}(i)
|
||||
}
|
||||
@ -93,6 +96,7 @@ func (e *Erasure) Encode(ctx context.Context, src io.Reader, writers []io.Writer
|
||||
blocks, err = e.EncodeData(ctx, buf[:n])
|
||||
if err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
|
||||
return 0, err
|
||||
}
|
||||
|
||||
|
@ -540,7 +540,7 @@ func (er erasureObjects) healObject(ctx context.Context, bucket string, object s
|
||||
|
||||
// If all disks are having errors, we give up.
|
||||
if disksToHealCount == 0 {
|
||||
return result, fmt.Errorf("all disks had write errors, unable to heal")
|
||||
return result, fmt.Errorf("all disks had write errors, unable to heal %s/%s", bucket, object)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -701,10 +701,11 @@ func (er erasureObjects) putMetacacheObject(ctx context.Context, key string, r *
|
||||
if disk == nil {
|
||||
continue
|
||||
}
|
||||
|
||||
if disk.IsOnline() {
|
||||
inlineBuffers[i] = bytes.NewBuffer(make([]byte, 0, shardFileSize))
|
||||
writers[i] = newStreamingBitrotWriterBuffer(inlineBuffers[i], DefaultBitrotAlgorithm, erasure.ShardSize())
|
||||
}
|
||||
}
|
||||
|
||||
n, erasureErr := erasure.Encode(ctx, data, writers, buffer, writeQuorum)
|
||||
closeBitrotWriters(writers)
|
||||
@ -720,6 +721,7 @@ func (er erasureObjects) putMetacacheObject(ctx context.Context, key string, r *
|
||||
|
||||
for i, w := range writers {
|
||||
if w == nil {
|
||||
// Make sure to avoid writing to disks which we couldn't complete in erasure.Encode()
|
||||
onlineDisks[i] = nil
|
||||
continue
|
||||
}
|
||||
@ -930,6 +932,10 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
|
||||
continue
|
||||
}
|
||||
|
||||
if !disk.IsOnline() {
|
||||
continue
|
||||
}
|
||||
|
||||
if len(inlineBuffers) > 0 {
|
||||
sz := shardFileSize
|
||||
if sz < 0 {
|
||||
@ -939,6 +945,7 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
|
||||
writers[i] = newStreamingBitrotWriterBuffer(inlineBuffers[i], DefaultBitrotAlgorithm, erasure.ShardSize())
|
||||
continue
|
||||
}
|
||||
|
||||
writers[i] = newBitrotWriter(disk, minioMetaTmpBucket, tempErasureObj, shardFileSize, DefaultBitrotAlgorithm, erasure.ShardSize())
|
||||
}
|
||||
|
||||
@ -1012,6 +1019,9 @@ func (er erasureObjects) putObject(ctx context.Context, bucket string, object st
|
||||
|
||||
// Rename the successfully written temporary object to final location.
|
||||
if onlineDisks, err = renameData(ctx, onlineDisks, minioMetaTmpBucket, tempObj, partsMetadata, bucket, object, writeQuorum); err != nil {
|
||||
if errors.Is(err, errFileNotFound) {
|
||||
return ObjectInfo{}, toObjectErr(errErasureWriteQuorum, bucket, object)
|
||||
}
|
||||
logger.LogIf(ctx, err)
|
||||
return ObjectInfo{}, toObjectErr(err, bucket, object)
|
||||
}
|
||||
|
@ -291,6 +291,7 @@ func configRetriableErrors(err error) bool {
|
||||
errors.Is(err, io.ErrUnexpectedEOF) ||
|
||||
errors.As(err, &rquorum) ||
|
||||
errors.As(err, &wquorum) ||
|
||||
isErrObjectNotFound(err) ||
|
||||
isErrBucketNotFound(err) ||
|
||||
errors.Is(err, os.ErrDeadlineExceeded)
|
||||
}
|
||||
|
@ -1984,6 +1984,14 @@ func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recu
|
||||
return s.deleteFile(volumeDir, filePath, recursive)
|
||||
}
|
||||
|
||||
func skipAccessChecks(volume string) bool {
|
||||
switch volume {
|
||||
case minioMetaTmpBucket, minioMetaBucket, minioMetaMultipartBucket:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// RenameData - rename source path to destination path atomically, metadata and data directory.
|
||||
func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) (err error) {
|
||||
defer func() {
|
||||
@ -2010,6 +2018,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
|
||||
return err
|
||||
}
|
||||
|
||||
if !skipAccessChecks(srcVolume) {
|
||||
// Stat a volume entry.
|
||||
if err = Access(srcVolumeDir); err != nil {
|
||||
if osIsNotExist(err) {
|
||||
@ -2019,7 +2028,9 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if !skipAccessChecks(dstVolume) {
|
||||
if err = Access(dstVolumeDir); err != nil {
|
||||
if osIsNotExist(err) {
|
||||
return errVolumeNotFound
|
||||
@ -2028,6 +2039,7 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
|
||||
}
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
srcFilePath := pathutil.Join(srcVolumeDir, pathJoin(srcPath, xlStorageFormatFile))
|
||||
dstFilePath := pathutil.Join(dstVolumeDir, pathJoin(dstPath, xlStorageFormatFile))
|
||||
@ -2237,7 +2249,8 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
|
||||
s.deleteFile(dstVolumeDir, legacyDataPath, true)
|
||||
}
|
||||
s.deleteFile(dstVolumeDir, dstDataPath, false)
|
||||
if err != errFileNotFound {
|
||||
// Looks like srcFilePath is missing usually at .minio.sys/ ignore it.
|
||||
if !errors.Is(err, errFileNotFound) {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
return osErrToFileErr(err)
|
||||
@ -2252,9 +2265,11 @@ func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath string, f
|
||||
}
|
||||
s.deleteFile(dstVolumeDir, dstFilePath, false)
|
||||
|
||||
if err != errFileNotFound {
|
||||
// Looks like srcFilePath is missing usually at .minio.sys/ ignore it.
|
||||
if !errors.Is(err, errFileNotFound) {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
|
||||
return osErrToFileErr(err)
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user