mirror of
https://github.com/minio/minio.git
synced 2025-01-25 21:53:16 -05:00
fallback on etags if they match when mtime is not same (#17424)
on "unversioned" buckets there are situations when successive concurrent I/O can lead to an inconsistent state() with mtime while the etag might be the same for the object on disk. in such a scenario it is possible for us to allow reading of the object since etag matches and if etag matches we are guaranteed that we have enough copies the object will be readable and same. This PR allows fallback in such scenarios.
This commit is contained in:
parent
22b7c8cd8a
commit
64de61d15d
@ -25,6 +25,38 @@ import (
|
|||||||
"github.com/minio/madmin-go/v2"
|
"github.com/minio/madmin-go/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func commonETags(etags []string) (etag string, maxima int) {
|
||||||
|
etagOccurrenceMap := make(map[string]int, len(etags))
|
||||||
|
|
||||||
|
// Ignore the uuid sentinel and count the rest.
|
||||||
|
for _, etag := range etags {
|
||||||
|
if etag == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
etagOccurrenceMap[etag]++
|
||||||
|
}
|
||||||
|
|
||||||
|
maxima = 0 // Counter for remembering max occurrence of elements.
|
||||||
|
latest := ""
|
||||||
|
|
||||||
|
// Find the common cardinality from previously collected
|
||||||
|
// occurrences of elements.
|
||||||
|
for etag, count := range etagOccurrenceMap {
|
||||||
|
if count < maxima {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// We are at or above maxima
|
||||||
|
if count > maxima {
|
||||||
|
maxima = count
|
||||||
|
latest = etag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the collected common max time, with maxima
|
||||||
|
return latest, maxima
|
||||||
|
}
|
||||||
|
|
||||||
// commonTime returns a maximally occurring time from a list of time.
|
// commonTime returns a maximally occurring time from a list of time.
|
||||||
func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime time.Time, maxima int) {
|
func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime time.Time, maxima int) {
|
||||||
timeOccurenceMap := make(map[int64]int, len(times))
|
timeOccurenceMap := make(map[int64]int, len(times))
|
||||||
@ -86,6 +118,13 @@ func commonTime(modTimes []time.Time, quorum int) time.Time {
|
|||||||
return timeSentinel
|
return timeSentinel
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func commonETag(etags []string, quorum int) string {
|
||||||
|
if etag, count := commonETags(etags); count >= quorum {
|
||||||
|
return etag
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
// Beginning of unix time is treated as sentinel value here.
|
// Beginning of unix time is treated as sentinel value here.
|
||||||
var (
|
var (
|
||||||
timeSentinel = time.Unix(0, 0).UTC()
|
timeSentinel = time.Unix(0, 0).UTC()
|
||||||
@ -102,6 +141,33 @@ func bootModtimes(diskCount int) []time.Time {
|
|||||||
return modTimes
|
return modTimes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func listObjectETags(partsMetadata []FileInfo, errs []error, quorum int) (etags []string) {
|
||||||
|
etags = make([]string, len(partsMetadata))
|
||||||
|
vidMap := map[string]int{}
|
||||||
|
for index, metadata := range partsMetadata {
|
||||||
|
if errs[index] != nil {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
vid := metadata.VersionID
|
||||||
|
if metadata.VersionID == "" {
|
||||||
|
vid = nullVersionID
|
||||||
|
}
|
||||||
|
vidMap[vid]++
|
||||||
|
etags[index] = metadata.Metadata["etag"]
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, count := range vidMap {
|
||||||
|
// do we have enough common versions
|
||||||
|
// that have enough quorum to satisfy
|
||||||
|
// the etag.
|
||||||
|
if count >= quorum {
|
||||||
|
return etags
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return make([]string, len(partsMetadata))
|
||||||
|
}
|
||||||
|
|
||||||
// Extracts list of times from FileInfo slice and returns, skips
|
// Extracts list of times from FileInfo slice and returns, skips
|
||||||
// slice elements which have errors.
|
// slice elements which have errors.
|
||||||
func listObjectModtimes(partsMetadata []FileInfo, errs []error) (modTimes []time.Time) {
|
func listObjectModtimes(partsMetadata []FileInfo, errs []error) (modTimes []time.Time) {
|
||||||
@ -162,7 +228,7 @@ func listObjectDiskMtimes(partsMetadata []FileInfo) (diskMTimes []time.Time) {
|
|||||||
// - a slice of disks where disk having 'older' xl.meta (or nothing)
|
// - a slice of disks where disk having 'older' xl.meta (or nothing)
|
||||||
// are set to nil.
|
// are set to nil.
|
||||||
// - latest (in time) of the maximally occurring modTime(s), which has at least quorum occurrences.
|
// - latest (in time) of the maximally occurring modTime(s), which has at least quorum occurrences.
|
||||||
func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, quorum int) (onlineDisks []StorageAPI, modTime time.Time) {
|
func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, quorum int) (onlineDisks []StorageAPI, modTime time.Time, etag string) {
|
||||||
onlineDisks = make([]StorageAPI, len(disks))
|
onlineDisks = make([]StorageAPI, len(disks))
|
||||||
|
|
||||||
// List all the file commit ids from parts metadata.
|
// List all the file commit ids from parts metadata.
|
||||||
@ -171,6 +237,23 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error,
|
|||||||
// Reduce list of UUIDs to a single common value.
|
// Reduce list of UUIDs to a single common value.
|
||||||
modTime = commonTime(modTimes, quorum)
|
modTime = commonTime(modTimes, quorum)
|
||||||
|
|
||||||
|
if modTime.IsZero() || modTime.Equal(timeSentinel) {
|
||||||
|
etags := listObjectETags(partsMetadata, errs, quorum)
|
||||||
|
|
||||||
|
etag = commonETag(etags, quorum)
|
||||||
|
|
||||||
|
if etag != "" { // allow this fallback only if a non-empty etag is found.
|
||||||
|
for index, e := range etags {
|
||||||
|
if partsMetadata[index].IsValid() && e == etag {
|
||||||
|
onlineDisks[index] = disks[index]
|
||||||
|
} else {
|
||||||
|
onlineDisks[index] = nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return onlineDisks, modTime, etag
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Create a new online disks slice, which have common uuid.
|
// Create a new online disks slice, which have common uuid.
|
||||||
for index, t := range modTimes {
|
for index, t := range modTimes {
|
||||||
if partsMetadata[index].IsValid() && t.Equal(modTime) {
|
if partsMetadata[index].IsValid() && t.Equal(modTime) {
|
||||||
@ -180,7 +263,7 @@ func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return onlineDisks, modTime
|
return onlineDisks, modTime, ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// disksWithAllParts - This function needs to be called with
|
// disksWithAllParts - This function needs to be called with
|
||||||
|
@ -299,7 +299,7 @@ func TestListOnlineDisks(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount
|
rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount
|
||||||
onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum)
|
onlineDisks, modTime, _ := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum)
|
||||||
if !modTime.Equal(test.expectedTime) {
|
if !modTime.Equal(test.expectedTime) {
|
||||||
t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
|
t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
|
||||||
test.expectedTime, modTime)
|
test.expectedTime, modTime)
|
||||||
@ -481,7 +481,7 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount
|
rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount
|
||||||
onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum)
|
onlineDisks, modTime, _ := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum)
|
||||||
if !modTime.Equal(test.expectedTime) {
|
if !modTime.Equal(test.expectedTime) {
|
||||||
t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
|
t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
|
||||||
test.expectedTime, modTime)
|
test.expectedTime, modTime)
|
||||||
@ -548,7 +548,7 @@ func TestDisksWithAllParts(t *testing.T) {
|
|||||||
t.Fatalf("Failed to get quorum consistent fileInfo %v", err)
|
t.Fatalf("Failed to get quorum consistent fileInfo %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
erasureDisks, _ = listOnlineDisks(erasureDisks, partsMetadata, errs, readQuorum)
|
erasureDisks, _, _ = listOnlineDisks(erasureDisks, partsMetadata, errs, readQuorum)
|
||||||
|
|
||||||
filteredDisks, errs, _ := disksWithAllParts(ctx, erasureDisks, partsMetadata,
|
filteredDisks, errs, _ := disksWithAllParts(ctx, erasureDisks, partsMetadata,
|
||||||
errs, fi, bucket, object, madmin.HealDeepScan)
|
errs, fi, bucket, object, madmin.HealDeepScan)
|
||||||
|
@ -386,11 +386,11 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
|
|||||||
|
|
||||||
// List of disks having latest version of the object xl.meta
|
// List of disks having latest version of the object xl.meta
|
||||||
// (by modtime).
|
// (by modtime).
|
||||||
onlineDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs, readQuorum)
|
onlineDisks, modTime, etag := listOnlineDisks(storageDisks, partsMetadata, errs, readQuorum)
|
||||||
|
|
||||||
// Latest FileInfo for reference. If a valid metadata is not
|
// Latest FileInfo for reference. If a valid metadata is not
|
||||||
// present, it is as good as object not found.
|
// present, it is as good as object not found.
|
||||||
latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, readQuorum)
|
latestMeta, err := pickValidFileInfo(ctx, partsMetadata, modTime, etag, readQuorum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return result, err
|
return result, err
|
||||||
}
|
}
|
||||||
|
@ -328,7 +328,7 @@ func (fi FileInfo) ObjectToPartOffset(ctx context.Context, offset int64) (partIn
|
|||||||
return 0, 0, InvalidRange{}
|
return 0, 0, InvalidRange{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.Time, quorum int) (FileInfo, error) {
|
func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) {
|
||||||
// with less quorum return error.
|
// with less quorum return error.
|
||||||
if quorum < 1 {
|
if quorum < 1 {
|
||||||
return FileInfo{}, errErasureReadQuorum
|
return FileInfo{}, errErasureReadQuorum
|
||||||
@ -336,9 +336,17 @@ func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.
|
|||||||
metaHashes := make([]string, len(metaArr))
|
metaHashes := make([]string, len(metaArr))
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
for i, meta := range metaArr {
|
for i, meta := range metaArr {
|
||||||
if meta.IsValid() && meta.ModTime.Equal(modTime) {
|
if !meta.IsValid() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
etagOnly := modTime.Equal(timeSentinel) && (etag != "" && etag == meta.Metadata["etag"])
|
||||||
|
mtimeValid := meta.ModTime.Equal(modTime)
|
||||||
|
if mtimeValid || etagOnly {
|
||||||
fmt.Fprintf(h, "%v", meta.XLV1)
|
fmt.Fprintf(h, "%v", meta.XLV1)
|
||||||
|
if !etagOnly {
|
||||||
|
// Verify dataDir is same only when mtime is valid and etag is not considered.
|
||||||
fmt.Fprintf(h, "%v", meta.GetDataDir())
|
fmt.Fprintf(h, "%v", meta.GetDataDir())
|
||||||
|
}
|
||||||
for _, part := range meta.Parts {
|
for _, part := range meta.Parts {
|
||||||
fmt.Fprintf(h, "part.%d", part.Number)
|
fmt.Fprintf(h, "part.%d", part.Number)
|
||||||
}
|
}
|
||||||
@ -408,8 +416,8 @@ func pickValidDiskTimeWithQuorum(metaArr []FileInfo, quorum int) time.Time {
|
|||||||
|
|
||||||
// pickValidFileInfo - picks one valid FileInfo content and returns from a
|
// pickValidFileInfo - picks one valid FileInfo content and returns from a
|
||||||
// slice of FileInfo.
|
// slice of FileInfo.
|
||||||
func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, quorum int) (FileInfo, error) {
|
func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) {
|
||||||
return findFileInfoInQuorum(ctx, metaArr, modTime, quorum)
|
return findFileInfoInQuorum(ctx, metaArr, modTime, etag, quorum)
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently.
|
// writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently.
|
||||||
|
@ -204,7 +204,7 @@ func TestFindFileInfoInQuorum(t *testing.T) {
|
|||||||
for _, test := range tests {
|
for _, test := range tests {
|
||||||
test := test
|
test := test
|
||||||
t.Run("", func(t *testing.T) {
|
t.Run("", func(t *testing.T) {
|
||||||
_, err := findFileInfoInQuorum(context.Background(), test.fis, test.modTime, test.expectedQuorum)
|
_, err := findFileInfoInQuorum(context.Background(), test.fis, test.modTime, "", test.expectedQuorum)
|
||||||
if err != test.expectedErr {
|
if err != test.expectedErr {
|
||||||
t.Errorf("Expected %s, got %s", test.expectedErr, err)
|
t.Errorf("Expected %s, got %s", test.expectedErr, err)
|
||||||
}
|
}
|
||||||
|
@ -83,8 +83,9 @@ func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object
|
|||||||
if write {
|
if write {
|
||||||
quorum = writeQuorum
|
quorum = writeQuorum
|
||||||
}
|
}
|
||||||
|
|
||||||
// List all online disks.
|
// List all online disks.
|
||||||
_, modTime := listOnlineDisks(storageDisks, partsMetadata, errs, quorum)
|
_, modTime, etag := listOnlineDisks(storageDisks, partsMetadata, errs, quorum)
|
||||||
|
|
||||||
if write {
|
if write {
|
||||||
reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
|
reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
|
||||||
@ -98,7 +99,7 @@ func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Pick one from the first valid metadata.
|
// Pick one from the first valid metadata.
|
||||||
fi, err = pickValidFileInfo(ctx, partsMetadata, modTime, quorum)
|
fi, err = pickValidFileInfo(ctx, partsMetadata, modTime, etag, quorum)
|
||||||
|
|
||||||
return fi, partsMetadata, err
|
return fi, partsMetadata, err
|
||||||
}
|
}
|
||||||
|
@ -108,10 +108,10 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d
|
|||||||
}
|
}
|
||||||
|
|
||||||
// List all online disks.
|
// List all online disks.
|
||||||
onlineDisks, modTime := listOnlineDisks(storageDisks, metaArr, errs, readQuorum)
|
onlineDisks, modTime, etag := listOnlineDisks(storageDisks, metaArr, errs, readQuorum)
|
||||||
|
|
||||||
// Pick latest valid metadata.
|
// Pick latest valid metadata.
|
||||||
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
|
fi, err := pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return oi, toObjectErr(err, srcBucket, srcObject)
|
return oi, toObjectErr(err, srcBucket, srcObject)
|
||||||
}
|
}
|
||||||
@ -347,11 +347,6 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje
|
|||||||
|
|
||||||
var healOnce sync.Once
|
var healOnce sync.Once
|
||||||
|
|
||||||
// once we have obtained a common FileInfo i.e latest, we should stick
|
|
||||||
// to single dataDir to read the content to avoid reading from some other
|
|
||||||
// dataDir that has stale FileInfo{} to ensure that we fail appropriately
|
|
||||||
// during reads and expect the same dataDir everywhere.
|
|
||||||
dataDir := fi.DataDir
|
|
||||||
for ; partIndex <= lastPartIndex; partIndex++ {
|
for ; partIndex <= lastPartIndex; partIndex++ {
|
||||||
if length == totalBytesRead {
|
if length == totalBytesRead {
|
||||||
break
|
break
|
||||||
@ -380,7 +375,7 @@ func (er erasureObjects) getObjectWithFileInfo(ctx context.Context, bucket, obje
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
checksumInfo := metaArr[index].Erasure.GetChecksumInfo(partNumber)
|
checksumInfo := metaArr[index].Erasure.GetChecksumInfo(partNumber)
|
||||||
partPath := pathJoin(object, dataDir, fmt.Sprintf("part.%d", partNumber))
|
partPath := pathJoin(object, metaArr[index].DataDir, fmt.Sprintf("part.%d", partNumber))
|
||||||
readers[index] = newBitrotReader(disk, metaArr[index].Data, bucket, partPath, tillOffset,
|
readers[index] = newBitrotReader(disk, metaArr[index].Data, bucket, partPath, tillOffset,
|
||||||
checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
|
checksumInfo.Algorithm, checksumInfo.Hash, erasure.ShardSize())
|
||||||
|
|
||||||
@ -664,10 +659,10 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// List all online disks.
|
// List all online disks.
|
||||||
onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum)
|
onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum)
|
||||||
|
|
||||||
// Pick latest valid metadata.
|
// Pick latest valid metadata.
|
||||||
fi, err = pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
|
fi, err = pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fi, nil, nil, err
|
return fi, nil, nil, err
|
||||||
}
|
}
|
||||||
@ -693,7 +688,7 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
|
|||||||
missingBlocks++
|
missingBlocks++
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if metaArr[i].IsValid() && metaArr[i].ModTime.Equal(fi.ModTime) {
|
if metaArr[i].IsValid() && metaArr[i].ModTime.Equal(modTime) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
metaArr[i] = FileInfo{}
|
metaArr[i] = FileInfo{}
|
||||||
@ -1794,10 +1789,10 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// List all online disks.
|
// List all online disks.
|
||||||
onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum)
|
onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum)
|
||||||
|
|
||||||
// Pick latest valid metadata.
|
// Pick latest valid metadata.
|
||||||
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
|
fi, err := pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ObjectInfo{}, toObjectErr(err, bucket, object)
|
return ObjectInfo{}, toObjectErr(err, bucket, object)
|
||||||
}
|
}
|
||||||
@ -1867,10 +1862,10 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin
|
|||||||
}
|
}
|
||||||
|
|
||||||
// List all online disks.
|
// List all online disks.
|
||||||
onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum)
|
onlineDisks, modTime, etag := listOnlineDisks(disks, metaArr, errs, readQuorum)
|
||||||
|
|
||||||
// Pick latest valid metadata.
|
// Pick latest valid metadata.
|
||||||
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
|
fi, err := pickValidFileInfo(ctx, metaArr, modTime, etag, readQuorum)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ObjectInfo{}, toObjectErr(err, bucket, object)
|
return ObjectInfo{}, toObjectErr(err, bucket, object)
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user