Different read quorum for tiered objects (#20115)

For a non-tiered object, MinIO requires that EcM (# of data blocks) of
xl.meta agree, corresponding to the number of data blocks needed to 
read this object.

OTOH, tiered objects have metadata in the hot tier and data in the 
warm tier. The data and its integrity are offloaded to the warm tier. This
allows us to reduce the read quorum from EcM (typically > N/2, where N -
erasure stripe width) to N/2 + 1. The simple majority of metadata
ensures consensus on what the object is and where it is
located.
This commit is contained in:
Krishnan Parthasarathi
2024-07-25 14:02:50 -07:00
committed by GitHub
parent b7f319b62a
commit 4a1edfd9aa
2 changed files with 130 additions and 1 deletions

View File

@@ -26,6 +26,7 @@ import (
"time"
"github.com/minio/minio/internal/amztime"
"github.com/minio/minio/internal/bucket/lifecycle"
"github.com/minio/minio/internal/bucket/replication"
"github.com/minio/minio/internal/crypto"
"github.com/minio/minio/internal/hash/sha256"
@@ -456,6 +457,7 @@ func commonParity(parities []int, defaultParityCount int) int {
}
func listObjectParities(partsMetadata []FileInfo, errs []error) (parities []int) {
totalShards := len(partsMetadata)
parities = make([]int, len(partsMetadata))
for index, metadata := range partsMetadata {
if errs[index] != nil {
@@ -466,9 +468,13 @@ func listObjectParities(partsMetadata []FileInfo, errs []error) (parities []int)
parities[index] = -1
continue
}
//nolint:gocritic
// Delete marker or zero byte objects take highest parity.
if metadata.Deleted || metadata.Size == 0 {
parities[index] = len(partsMetadata) / 2
parities[index] = totalShards / 2
} else if metadata.TransitionStatus == lifecycle.TransitionComplete {
// For tiered objects, read quorum is N/2+1 to ensure simple majority on xl.meta. It is not equal to EcM because the data integrity is entrusted with the warm tier.
parities[index] = totalShards - (totalShards/2 + 1)
} else {
parities[index] = metadata.Erasure.ParityBlocks
}