heal: Pick maximally occuring modTime in quorum (#17071)

This commit is contained in:
Krishnan Parthasarathi 2023-04-25 10:13:57 -07:00 committed by GitHub
parent 8fd07bcd51
commit fae9000304
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 95 additions and 92 deletions

View File

@ -76,10 +76,14 @@ func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime tim
return time.Unix(0, latest).UTC(), maxima return time.Unix(0, latest).UTC(), maxima
} }
// commonTime returns a maximally occurring time from a list of time. // commonTime returns a maximally occurring time from a list of time if it
func commonTime(modTimes []time.Time) (modTime time.Time) { // occurs >= quorum, else return timeSentinel
modTime, _ = commonTimeAndOccurence(modTimes, 0) func commonTime(modTimes []time.Time, quorum int) time.Time {
return modTime if modTime, count := commonTimeAndOccurence(modTimes, 0); count >= quorum {
return modTime
}
return timeSentinel
} }
// Beginning of unix time is treated as sentinel value here. // Beginning of unix time is treated as sentinel value here.
@ -157,15 +161,15 @@ func listObjectDiskMtimes(partsMetadata []FileInfo) (diskMTimes []time.Time) {
// listOnlineDisks - returns // listOnlineDisks - returns
// - a slice of disks where disk having 'older' xl.meta (or nothing) // - a slice of disks where disk having 'older' xl.meta (or nothing)
// are set to nil. // are set to nil.
// - latest (in time) of the maximally occurring modTime(s). // - latest (in time) of the maximally occurring modTime(s), which has at least quorum occurrences.
func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error) (onlineDisks []StorageAPI, modTime time.Time) { func listOnlineDisks(disks []StorageAPI, partsMetadata []FileInfo, errs []error, quorum int) (onlineDisks []StorageAPI, modTime time.Time) {
onlineDisks = make([]StorageAPI, len(disks)) onlineDisks = make([]StorageAPI, len(disks))
// List all the file commit ids from parts metadata. // List all the file commit ids from parts metadata.
modTimes := listObjectModtimes(partsMetadata, errs) modTimes := listObjectModtimes(partsMetadata, errs)
// Reduce list of UUIDs to a single common value. // Reduce list of UUIDs to a single common value.
modTime = commonTime(modTimes) modTime = commonTime(modTimes, quorum)
// Create a new online disks slice, which have common uuid. // Create a new online disks slice, which have common uuid.
for index, t := range modTimes { for index, t := range modTimes {

View File

@ -52,7 +52,7 @@ func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, defaultPar
var latestFileInfo FileInfo var latestFileInfo FileInfo
// Reduce list of UUIDs to a single common value - i.e. the last updated Time // Reduce list of UUIDs to a single common value - i.e. the last updated Time
modTime := commonTime(modTimes) modTime := commonTime(modTimes, expectedRQuorum)
if modTime.IsZero() || modTime.Equal(timeSentinel) { if modTime.IsZero() || modTime.Equal(timeSentinel) {
return FileInfo{}, errErasureReadQuorum return FileInfo{}, errErasureReadQuorum
@ -82,8 +82,9 @@ func getLatestFileInfo(ctx context.Context, partsMetadata []FileInfo, defaultPar
func TestCommonTime(t *testing.T) { func TestCommonTime(t *testing.T) {
// List of test cases for common modTime. // List of test cases for common modTime.
testCases := []struct { testCases := []struct {
times []time.Time times []time.Time
time time.Time time time.Time
quorum int
}{ }{
{ {
// 1. Tests common times when slice has varying time elements. // 1. Tests common times when slice has varying time elements.
@ -97,6 +98,7 @@ func TestCommonTime(t *testing.T) {
time.Unix(0, 1).UTC(), time.Unix(0, 1).UTC(),
}, },
time.Unix(0, 3).UTC(), time.Unix(0, 3).UTC(),
3,
}, },
{ {
// 2. Tests common time obtained when all elements are equal. // 2. Tests common time obtained when all elements are equal.
@ -110,10 +112,11 @@ func TestCommonTime(t *testing.T) {
time.Unix(0, 3).UTC(), time.Unix(0, 3).UTC(),
}, },
time.Unix(0, 3).UTC(), time.Unix(0, 3).UTC(),
4,
}, },
{ {
// 3. Tests common time obtained when elements have a mixture // 3. Tests common time obtained when elements have a mixture of
// of sentinel values. // sentinel values and don't have read quorum on any of the values.
[]time.Time{ []time.Time{
time.Unix(0, 3).UTC(), time.Unix(0, 3).UTC(),
time.Unix(0, 3).UTC(), time.Unix(0, 3).UTC(),
@ -126,7 +129,8 @@ func TestCommonTime(t *testing.T) {
timeSentinel, timeSentinel,
timeSentinel, timeSentinel,
}, },
time.Unix(0, 3).UTC(), timeSentinel,
5,
}, },
} }
@ -134,7 +138,7 @@ func TestCommonTime(t *testing.T) {
// common modtime. Tests fail if modtime does not match. // common modtime. Tests fail if modtime does not match.
for i, testCase := range testCases { for i, testCase := range testCases {
// Obtain a common mod time from modTimes slice. // Obtain a common mod time from modTimes slice.
ctime := commonTime(testCase.times) ctime := commonTime(testCase.times, testCase.quorum)
if !testCase.time.Equal(ctime) { if !testCase.time.Equal(ctime) {
t.Errorf("Test case %d, expect to pass but failed. Wanted modTime: %s, got modTime: %s\n", i+1, testCase.time, ctime) t.Errorf("Test case %d, expect to pass but failed. Wanted modTime: %s, got modTime: %s\n", i+1, testCase.time, ctime)
} }
@ -151,30 +155,34 @@ func TestListOnlineDisks(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("Prepare Erasure backend failed - %v", err) t.Fatalf("Prepare Erasure backend failed - %v", err)
} }
setObjectLayer(obj)
defer obj.Shutdown(context.Background()) defer obj.Shutdown(context.Background())
defer removeRoots(disks) defer removeRoots(disks)
type tamperKind int type tamperKind int
const ( const (
noTamper tamperKind = iota noTamper tamperKind = iota
deletePart tamperKind = iota deletePart
corruptPart tamperKind = iota corruptPart
) )
threeNanoSecs := time.Unix(0, 3).UTC()
fourNanoSecs := time.Unix(0, 4).UTC() timeSentinel := time.Unix(1, 0).UTC()
modTimesThreeNone := []time.Time{ threeNanoSecs := time.Unix(3, 0).UTC()
threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs, fourNanoSecs := time.Unix(4, 0).UTC()
threeNanoSecs, threeNanoSecs, threeNanoSecs, modTimesThreeNone := make([]time.Time, 16)
timeSentinel, timeSentinel, timeSentinel, timeSentinel, modTimesThreeFour := make([]time.Time, 16)
timeSentinel, timeSentinel, timeSentinel, timeSentinel, for i := 0; i < 16; i++ {
timeSentinel, // Have 13 good xl.meta, 12 for default parity count = 4 (EC:4) and one
} // to be tampered with.
modTimesThreeFour := []time.Time{ if i > 12 {
threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs, modTimesThreeFour[i] = fourNanoSecs
threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs, modTimesThreeNone[i] = timeSentinel
fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs, continue
fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs, }
modTimesThreeFour[i] = threeNanoSecs
modTimesThreeNone[i] = threeNanoSecs
} }
testCases := []struct { testCases := []struct {
modTimes []time.Time modTimes []time.Time
expectedTime time.Time expectedTime time.Time
@ -183,10 +191,10 @@ func TestListOnlineDisks(t *testing.T) {
}{ }{
{ {
modTimes: modTimesThreeFour, modTimes: modTimesThreeFour,
expectedTime: fourNanoSecs, expectedTime: threeNanoSecs,
errs: []error{ errs: []error{
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
}, },
_tamperBackend: noTamper, _tamperBackend: noTamper,
}, },
@ -195,13 +203,10 @@ func TestListOnlineDisks(t *testing.T) {
expectedTime: threeNanoSecs, expectedTime: threeNanoSecs,
errs: []error{ errs: []error{
// Disks that have a valid xl.meta. // Disks that have a valid xl.meta.
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
// Majority of disks don't have xl.meta. nil, nil, nil, nil, nil,
errFileNotFound, errFileNotFound, // Some disks can't access xl.meta.
errFileNotFound, errFileNotFound, errFileNotFound, errDiskAccessDenied, errDiskNotFound,
errFileNotFound, errDiskAccessDenied,
errDiskNotFound, errFileNotFound,
errFileNotFound,
}, },
_tamperBackend: deletePart, _tamperBackend: deletePart,
}, },
@ -210,13 +215,10 @@ func TestListOnlineDisks(t *testing.T) {
expectedTime: threeNanoSecs, expectedTime: threeNanoSecs,
errs: []error{ errs: []error{
// Disks that have a valid xl.meta. // Disks that have a valid xl.meta.
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
// Majority of disks don't have xl.meta. nil, nil, nil, nil, nil,
errFileNotFound, errFileNotFound, // Some disks don't have xl.meta.
errFileNotFound, errFileNotFound, errDiskNotFound, errFileNotFound, errFileNotFound,
errFileNotFound, errDiskAccessDenied,
errDiskNotFound, errFileNotFound,
errFileNotFound,
}, },
_tamperBackend: corruptPart, _tamperBackend: corruptPart,
}, },
@ -296,7 +298,8 @@ func TestListOnlineDisks(t *testing.T) {
} }
onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs) rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount
onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum)
if !modTime.Equal(test.expectedTime) { if !modTime.Equal(test.expectedTime) {
t.Fatalf("Expected modTime to be equal to %v but was found to be %v", t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
test.expectedTime, modTime) test.expectedTime, modTime)
@ -325,6 +328,7 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("Prepare Erasure backend failed - %v", err) t.Fatalf("Prepare Erasure backend failed - %v", err)
} }
setObjectLayer(obj)
defer obj.Shutdown(context.Background()) defer obj.Shutdown(context.Background())
defer removeRoots(disks) defer removeRoots(disks)
@ -337,19 +341,20 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
timeSentinel := time.Unix(1, 0).UTC() timeSentinel := time.Unix(1, 0).UTC()
threeNanoSecs := time.Unix(3, 0).UTC() threeNanoSecs := time.Unix(3, 0).UTC()
fourNanoSecs := time.Unix(4, 0).UTC() fourNanoSecs := time.Unix(4, 0).UTC()
modTimesThreeNone := []time.Time{ modTimesThreeNone := make([]time.Time, 16)
threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs, modTimesThreeFour := make([]time.Time, 16)
threeNanoSecs, threeNanoSecs, threeNanoSecs, for i := 0; i < 16; i++ {
timeSentinel, timeSentinel, timeSentinel, timeSentinel, // Have 13 good xl.meta, 12 for default parity count = 4 (EC:4) and one
timeSentinel, timeSentinel, timeSentinel, timeSentinel, // to be tampered with.
timeSentinel, if i > 12 {
} modTimesThreeFour[i] = fourNanoSecs
modTimesThreeFour := []time.Time{ modTimesThreeNone[i] = timeSentinel
threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs, continue
threeNanoSecs, threeNanoSecs, threeNanoSecs, threeNanoSecs, }
fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs, modTimesThreeFour[i] = threeNanoSecs
fourNanoSecs, fourNanoSecs, fourNanoSecs, fourNanoSecs, modTimesThreeNone[i] = threeNanoSecs
} }
testCases := []struct { testCases := []struct {
modTimes []time.Time modTimes []time.Time
expectedTime time.Time expectedTime time.Time
@ -358,10 +363,10 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
}{ }{
{ {
modTimes: modTimesThreeFour, modTimes: modTimesThreeFour,
expectedTime: fourNanoSecs, expectedTime: threeNanoSecs,
errs: []error{ errs: []error{
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
}, },
_tamperBackend: noTamper, _tamperBackend: noTamper,
}, },
@ -370,13 +375,10 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
expectedTime: threeNanoSecs, expectedTime: threeNanoSecs,
errs: []error{ errs: []error{
// Disks that have a valid xl.meta. // Disks that have a valid xl.meta.
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
// Majority of disks don't have xl.meta. nil, nil, nil, nil, nil,
errFileNotFound, errFileNotFound, // Some disks can't access xl.meta.
errFileNotFound, errFileNotFound, errFileNotFound, errDiskAccessDenied, errDiskNotFound,
errFileNotFound, errDiskAccessDenied,
errDiskNotFound, errFileNotFound,
errFileNotFound,
}, },
_tamperBackend: deletePart, _tamperBackend: deletePart,
}, },
@ -385,13 +387,10 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
expectedTime: threeNanoSecs, expectedTime: threeNanoSecs,
errs: []error{ errs: []error{
// Disks that have a valid xl.meta. // Disks that have a valid xl.meta.
nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
// Majority of disks don't have xl.meta. nil, nil, nil, nil, nil,
errFileNotFound, errFileNotFound, // Some disks don't have xl.meta.
errFileNotFound, errFileNotFound, errDiskNotFound, errFileNotFound, errFileNotFound,
errFileNotFound, errDiskAccessDenied,
errDiskNotFound, errFileNotFound,
errFileNotFound,
}, },
_tamperBackend: corruptPart, _tamperBackend: corruptPart,
}, },
@ -481,7 +480,8 @@ func TestListOnlineDisksSmallObjects(t *testing.T) {
t.Fatalf("Failed to getLatestFileInfo, expected %v, got %v", errErasureReadQuorum, err) t.Fatalf("Failed to getLatestFileInfo, expected %v, got %v", errErasureReadQuorum, err)
} }
onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs) rQuorum := len(errs) - z.serverPools[0].sets[0].defaultParityCount
onlineDisks, modTime := listOnlineDisks(erasureDisks, partsMetadata, test.errs, rQuorum)
if !modTime.Equal(test.expectedTime) { if !modTime.Equal(test.expectedTime) {
t.Fatalf("Expected modTime to be equal to %v but was found to be %v", t.Fatalf("Expected modTime to be equal to %v but was found to be %v",
test.expectedTime, modTime) test.expectedTime, modTime)
@ -508,6 +508,7 @@ func TestDisksWithAllParts(t *testing.T) {
if err != nil { if err != nil {
t.Fatalf("Prepare Erasure backend failed - %v", err) t.Fatalf("Prepare Erasure backend failed - %v", err)
} }
setObjectLayer(obj)
defer obj.Shutdown(context.Background()) defer obj.Shutdown(context.Background())
defer removeRoots(disks) defer removeRoots(disks)
@ -547,7 +548,7 @@ func TestDisksWithAllParts(t *testing.T) {
t.Fatalf("Failed to get quorum consistent fileInfo %v", err) t.Fatalf("Failed to get quorum consistent fileInfo %v", err)
} }
erasureDisks, _ = listOnlineDisks(erasureDisks, partsMetadata, errs) erasureDisks, _ = listOnlineDisks(erasureDisks, partsMetadata, errs, readQuorum)
filteredDisks, errs, _ := disksWithAllParts(ctx, erasureDisks, partsMetadata, filteredDisks, errs, _ := disksWithAllParts(ctx, erasureDisks, partsMetadata,
errs, fi, bucket, object, madmin.HealDeepScan) errs, fi, bucket, object, madmin.HealDeepScan)

View File

@ -386,7 +386,7 @@ func (er *erasureObjects) healObject(ctx context.Context, bucket string, object
// List of disks having latest version of the object xl.meta // List of disks having latest version of the object xl.meta
// (by modtime). // (by modtime).
onlineDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) onlineDisks, modTime := listOnlineDisks(storageDisks, partsMetadata, errs, readQuorum)
// Latest FileInfo for reference. If a valid metadata is not // Latest FileInfo for reference. If a valid metadata is not
// present, it is as good as object not found. // present, it is as good as object not found.

View File

@ -78,24 +78,22 @@ func (er erasureObjects) checkUploadIDExists(ctx context.Context, bucket, object
return fi, nil, err return fi, nil, err
} }
quorum := readQuorum
if write {
quorum = writeQuorum
}
// List all online disks. // List all online disks.
_, modTime := listOnlineDisks(storageDisks, partsMetadata, errs) _, modTime := listOnlineDisks(storageDisks, partsMetadata, errs, quorum)
var quorum int
if write { if write {
reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum) reducedErr := reduceWriteQuorumErrs(ctx, errs, objectOpIgnoredErrs, writeQuorum)
if reducedErr == errErasureWriteQuorum { if reducedErr == errErasureWriteQuorum {
return fi, nil, reducedErr return fi, nil, reducedErr
} }
quorum = writeQuorum
} else { } else {
if reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil { if reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, readQuorum); reducedErr != nil {
return fi, nil, reducedErr return fi, nil, reducedErr
} }
// Pick one from the first valid metadata.
quorum = readQuorum
} }
// Pick one from the first valid metadata. // Pick one from the first valid metadata.

View File

@ -107,7 +107,7 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d
} }
// List all online disks. // List all online disks.
onlineDisks, modTime := listOnlineDisks(storageDisks, metaArr, errs) onlineDisks, modTime := listOnlineDisks(storageDisks, metaArr, errs, readQuorum)
// Pick latest valid metadata. // Pick latest valid metadata.
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
@ -660,7 +660,7 @@ func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object s
} }
// List all online disks. // List all online disks.
onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs) onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum)
// Pick latest valid metadata. // Pick latest valid metadata.
fi, err = pickValidFileInfo(ctx, metaArr, modTime, readQuorum) fi, err = pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
@ -1770,7 +1770,7 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s
} }
// List all online disks. // List all online disks.
onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs) onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum)
// Pick latest valid metadata. // Pick latest valid metadata.
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)
@ -1843,7 +1843,7 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin
} }
// List all online disks. // List all online disks.
onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs) onlineDisks, modTime := listOnlineDisks(disks, metaArr, errs, readQuorum)
// Pick latest valid metadata. // Pick latest valid metadata.
fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum) fi, err := pickValidFileInfo(ctx, metaArr, modTime, readQuorum)