Add disksUnavailable healStatus const (#3990)

`disksUnavailable` healStatus constant indicates that a given object
needs healing but one or more of disks requiring heal are offline. This
can be used by admin heal API consumers to distinguish between a
successful heal and a no-op since the outdated disks were offline.
This commit is contained in:
Krishnan Parthasarathi 2017-04-01 06:25:15 +05:30 committed by Harshavardhana
parent a2a8d54bb6
commit 2bd694dbc8
16 changed files with 168 additions and 69 deletions

View File

@ -604,6 +604,11 @@ func isDryRun(qval url.Values) bool {
return false
}
type healObjectResult struct {
HealedCount int
OfflineCount int
}
// HealObjectHandler - POST /?heal&bucket=mybucket&object=myobject&dry-run
// - x-minio-operation = object
// - bucket and object are both mandatory query parameters
@ -646,14 +651,23 @@ func (adminAPI adminAPIHandlers) HealObjectHandler(w http.ResponseWriter, r *htt
return
}
err := objLayer.HealObject(bucket, object)
numOfflineDisks, numHealedDisks, err := objLayer.HealObject(bucket, object)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
jsonBytes, err := json.Marshal(healObjectResult{
HealedCount: numHealedDisks,
OfflineCount: numOfflineDisks,
})
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
// Return 200 on success.
writeSuccessResponseHeadersOnly(w)
writeSuccessResponseJSON(w, jsonBytes)
}
// HealUploadHandler - POST /?heal&bucket=mybucket&object=myobject&upload-id=myuploadID&dry-run
@ -715,14 +729,23 @@ func (adminAPI adminAPIHandlers) HealUploadHandler(w http.ResponseWriter, r *htt
//object. The 'object' corresponding to a given bucket,
//object and uploadID is
//.minio.sys/multipart/bucket/object/uploadID.
err := objLayer.HealObject(minioMetaMultipartBucket, uploadObj)
numOfflineDisks, numHealedDisks, err := objLayer.HealObject(minioMetaMultipartBucket, uploadObj)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
jsonBytes, err := json.Marshal(healObjectResult{
HealedCount: numHealedDisks,
OfflineCount: numOfflineDisks,
})
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
// Return 200 on success.
writeSuccessResponseHeadersOnly(w)
writeSuccessResponseJSON(w, jsonBytes)
}
// HealFormatHandler - POST /?heal&dry-run

View File

@ -27,8 +27,8 @@ func (a AzureObjects) ListBucketsHeal() (buckets []BucketInfo, err error) {
}
// HealObject - Not relevant.
func (a AzureObjects) HealObject(bucket, object string) error {
return traceError(NotImplemented{})
func (a AzureObjects) HealObject(bucket, object string) (int, int, error) {
return 0, 0, traceError(NotImplemented{})
}
// ListObjectsHeal - Not relevant.

View File

@ -809,8 +809,8 @@ func (fs fsObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKey
}
// HealObject - no-op for fs. Valid only for XL.
func (fs fsObjects) HealObject(bucket, object string) error {
return traceError(NotImplemented{})
func (fs fsObjects) HealObject(bucket, object string) (int, int, error) {
return 0, 0, traceError(NotImplemented{})
}
// HealBucket - no-op for fs, Valid only for XL.

View File

@ -303,7 +303,7 @@ func TestFSHealObject(t *testing.T) {
defer removeAll(disk)
obj := initFSObjects(disk, t)
err := obj.HealObject("bucket", "object")
_, _, err := obj.HealObject("bucket", "object")
if err == nil || !isSameType(errorCause(err), NotImplemented{}) {
t.Fatalf("Heal Object should return NotImplemented error ")
}

View File

@ -57,6 +57,7 @@ const (
canHeal // Object can be healed
corrupted // Object can't be healed
quorumUnavailable // Object can't be healed until read quorum is available
canPartiallyHeal // Object can't be healed completely until outdated disk(s) are online.
)
// HealBucketInfo - represents healing related information of a bucket.
@ -80,7 +81,7 @@ type BucketInfo struct {
type HealObjectInfo struct {
Status healStatus
MissingDataCount int
MissingPartityCount int
MissingParityCount int
}
// ObjectInfo - represents object metadata.

View File

@ -50,7 +50,7 @@ type ObjectLayer interface {
// Healing operations.
HealBucket(bucket string) error
ListBucketsHeal() (buckets []BucketInfo, err error)
HealObject(bucket, object string) error
HealObject(bucket, object string) (int, int, error)
ListObjectsHeal(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error)
ListUploadsHeal(bucket, prefix, marker, uploadIDMarker,
delimiter string, maxUploads int) (ListMultipartsInfo, error)

View File

@ -187,7 +187,7 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
return HealObjectInfo{
Status: quorumUnavailable,
MissingDataCount: 0,
MissingPartityCount: 0,
MissingParityCount: 0,
}
}
@ -197,7 +197,7 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
return HealObjectInfo{
Status: corrupted,
MissingDataCount: 0,
MissingPartityCount: 0,
MissingParityCount: 0,
}
}
@ -206,11 +206,16 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
missingDataCount := 0
missingParityCount := 0
disksMissing := false
for i, err := range errs {
// xl.json is not found, which implies the erasure
// coded blocks are unavailable in the corresponding disk.
// First half of the disks are data and the rest are parity.
if realErr := errorCause(err); realErr == errFileNotFound || realErr == errDiskNotFound {
switch realErr := errorCause(err); realErr {
case errDiskNotFound:
disksMissing = true
fallthrough
case errFileNotFound:
if xlMeta.Erasure.Distribution[i]-1 < xl.dataBlocks {
missingDataCount++
} else {
@ -219,12 +224,22 @@ func xlHealStat(xl xlObjects, partsMetadata []xlMetaV1, errs []error) HealObject
}
}
// The object may not be healed completely, since some of the
// disks needing healing are unavailable.
if disksMissing {
return HealObjectInfo{
Status: canPartiallyHeal,
MissingDataCount: missingDataCount,
MissingParityCount: missingParityCount,
}
}
// This object can be healed. We have enough object metadata
// to reconstruct missing erasure coded blocks.
return HealObjectInfo{
Status: canHeal,
MissingDataCount: missingDataCount,
MissingPartityCount: missingParityCount,
MissingParityCount: missingParityCount,
}
}

View File

@ -125,7 +125,7 @@ func healBucketMetadata(storageDisks []StorageAPI, bucket string, readQuorum int
metaLock.RLock()
defer metaLock.RUnlock()
// Heals the given file at metaPath.
if err := healObject(storageDisks, minioMetaBucket, metaPath, readQuorum); err != nil && !isErrObjectNotFound(err) {
if _, _, err := healObject(storageDisks, minioMetaBucket, metaPath, readQuorum); err != nil && !isErrObjectNotFound(err) {
return err
} // Success.
return nil
@ -313,18 +313,18 @@ func quickHeal(storageDisks []StorageAPI, writeQuorum int, readQuorum int) error
}
// Heals an object only the corrupted/missing erasure blocks.
func healObject(storageDisks []StorageAPI, bucket string, object string, quorum int) error {
func healObject(storageDisks []StorageAPI, bucket string, object string, quorum int) (int, int, error) {
partsMetadata, errs := readAllXLMetadata(storageDisks, bucket, object)
// readQuorum suffices for xl.json since we use monotonic
// system time to break the tie when a split-brain situation
// arises.
if reducedErr := reduceReadQuorumErrs(errs, nil, quorum); reducedErr != nil {
return toObjectErr(reducedErr, bucket, object)
return 0, 0, toObjectErr(reducedErr, bucket, object)
}
if !xlShouldHeal(storageDisks, partsMetadata, errs, bucket, object) {
// There is nothing to heal.
return nil
return 0, 0, nil
}
// List of disks having latest version of the object.
@ -333,12 +333,16 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
// List of disks having all parts as per latest xl.json.
availableDisks, errs, aErr := disksWithAllParts(latestDisks, partsMetadata, errs, bucket, object)
if aErr != nil {
return toObjectErr(aErr, bucket, object)
return 0, 0, toObjectErr(aErr, bucket, object)
}
numAvailableDisks := 0
for _, disk := range availableDisks {
if disk != nil {
numOfflineDisks := 0
for index, disk := range availableDisks {
switch {
case disk == nil, errs[index] == errDiskNotFound:
numOfflineDisks++
case disk != nil:
numAvailableDisks++
}
}
@ -346,18 +350,25 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
// If less than read quorum number of disks have all the parts
// of the data, we can't reconstruct the erasure-coded data.
if numAvailableDisks < quorum {
return toObjectErr(errXLReadQuorum, bucket, object)
return 0, 0, toObjectErr(errXLReadQuorum, bucket, object)
}
// List of disks having outdated version of the object or missing object.
outDatedDisks := outDatedDisks(storageDisks, availableDisks, errs, partsMetadata,
bucket, object)
numHealedDisks := 0
for _, disk := range outDatedDisks {
if disk != nil {
numHealedDisks++
}
}
// Latest xlMetaV1 for reference. If a valid metadata is not
// present, it is as good as object not found.
latestMeta, pErr := pickValidXLMeta(partsMetadata, modTime)
if pErr != nil {
return toObjectErr(pErr, bucket, object)
return 0, 0, toObjectErr(pErr, bucket, object)
}
for index, disk := range outDatedDisks {
@ -389,14 +400,14 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
for _, part := range outDatedMeta.Parts {
dErr := disk.DeleteFile(bucket, pathJoin(object, part.Name))
if dErr != nil && !isErr(dErr, errFileNotFound) {
return toObjectErr(traceError(dErr), bucket, object)
return 0, 0, toObjectErr(traceError(dErr), bucket, object)
}
}
// Delete xl.json file. Ignore if xl.json not found.
dErr := disk.DeleteFile(bucket, pathJoin(object, xlMetaJSONFile))
if dErr != nil && !isErr(dErr, errFileNotFound) {
return toObjectErr(traceError(dErr), bucket, object)
return 0, 0, toObjectErr(traceError(dErr), bucket, object)
}
}
@ -425,7 +436,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
minioMetaTmpBucket, pathJoin(tmpID, partName),
partSize, erasure.BlockSize, erasure.DataBlocks, erasure.ParityBlocks, sumInfo.Algorithm)
if hErr != nil {
return toObjectErr(hErr, bucket, object)
return 0, 0, toObjectErr(hErr, bucket, object)
}
for index, sum := range checkSums {
if outDatedDisks[index] != nil {
@ -450,7 +461,7 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
// Generate and write `xl.json` generated from other disks.
aErr = writeUniqueXLMetadata(outDatedDisks, minioMetaTmpBucket, tmpID, partsMetadata, diskCount(outDatedDisks))
if aErr != nil {
return toObjectErr(aErr, bucket, object)
return 0, 0, toObjectErr(aErr, bucket, object)
}
// Rename from tmp location to the actual location.
@ -461,22 +472,22 @@ func healObject(storageDisks []StorageAPI, bucket string, object string, quorum
// Remove any lingering partial data from current namespace.
aErr = disk.DeleteFile(bucket, retainSlash(object))
if aErr != nil && aErr != errFileNotFound {
return toObjectErr(traceError(aErr), bucket, object)
return 0, 0, toObjectErr(traceError(aErr), bucket, object)
}
// Attempt a rename now from healed data to final location.
aErr = disk.RenameFile(minioMetaTmpBucket, retainSlash(tmpID), bucket, retainSlash(object))
if aErr != nil {
return toObjectErr(traceError(aErr), bucket, object)
return 0, 0, toObjectErr(traceError(aErr), bucket, object)
}
}
return nil
return numOfflineDisks, numHealedDisks, nil
}
// HealObject heals a given object for all its missing entries.
// FIXME: If an object object was deleted and one disk was down,
// and later the disk comes back up again, heal on the object
// should delete it.
func (xl xlObjects) HealObject(bucket, object string) error {
func (xl xlObjects) HealObject(bucket, object string) (int, int, error) {
// Lock the object before healing.
objectLock := globalNSMutex.NewNSLock(bucket, object)
objectLock.RLock()

View File

@ -558,7 +558,7 @@ func TestHealObjectXL(t *testing.T) {
t.Fatalf("Failed to delete a file - %v", err)
}
err = obj.HealObject(bucket, object)
_, _, err = obj.HealObject(bucket, object)
if err != nil {
t.Fatalf("Failed to heal object - %v", err)
}
@ -574,7 +574,7 @@ func TestHealObjectXL(t *testing.T) {
}
// Try healing now, expect to receive errDiskNotFound.
err = obj.HealObject(bucket, object)
_, _, err = obj.HealObject(bucket, object)
if errorCause(err) != errDiskNotFound {
t.Errorf("Expected %v but received %v", errDiskNotFound, err)
}

View File

@ -312,7 +312,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}
err = xl.HealObject(bucket, object)
_, _, err = xl.HealObject(bucket, object)
if err != nil {
t.Fatal(err)
}
@ -336,7 +336,7 @@ func TestHealing(t *testing.T) {
t.Fatal(err)
}
err = xl.HealObject(bucket, object)
_, _, err = xl.HealObject(bucket, object)
if err != nil {
t.Fatal(err)
}

View File

@ -242,18 +242,19 @@ __Example__
```
<a name="HealObject"></a>
### HealObject(bucket, object string, isDryRun bool) error
### HealObject(bucket, object string, isDryRun bool) (HealObjectResult, error)
If object is successfully healed returns nil, otherwise returns error indicating the reason for failure. If isDryRun is true, then the object is not healed, but heal object request is validated by the server. e.g, if the object exists, if object name is valid etc.
__Example__
``` go
isDryRun := false
err := madmClnt.HealObject("mybucket", "myobject", isDryRun)
isDryRun = false
healResult, err := madmClnt.HealObject("mybucket", "myobject", isDryRun)
if err != nil {
log.Fatalln(err)
}
log.Println("successfully healed mybucket/myobject")
log.Println("Heal-object result: ", healResult)
```
@ -323,17 +324,17 @@ __Example__
```
<a name="HealUpload"></a>
### HealUpload(bucket, object, uploadID string, isDryRun bool) error
### HealUpload(bucket, object, uploadID string, isDryRun bool) (HealObjectResult, error)
If upload is successfully healed returns nil, otherwise returns error indicating the reason for failure. If isDryRun is true, then the upload is not healed, but heal upload request is validated by the server. e.g, if the upload exists, if upload name is valid etc.
``` go
isDryRun = false
err = madmClnt.HealUpload("mybucket", "myobject", "myuploadID", isDryRun)
healResult, err := madmClnt.HealUpload("mybucket", "myobject", "myUploadID", isDryRun)
if err != nil {
log.Fatalln(err)
}
log.Println("successfully healed mybucket/myobject/myuploadID")
log.Println("Heal-upload result: ", healResult)
```
## 5. Config operations

View File

@ -41,17 +41,17 @@ func main() {
// Heal object mybucket/myobject - dry run.
isDryRun := true
err = madmClnt.HealObject("mybucket", "myobject", isDryRun)
_, err = madmClnt.HealObject("mybucket", "myobject", isDryRun)
if err != nil {
log.Fatalln(err)
}
// Heal object mybucket/myobject - this time for real.
isDryRun = false
err = madmClnt.HealObject("mybucket", "myobject", isDryRun)
healResult, err := madmClnt.HealObject("mybucket", "myobject", isDryRun)
if err != nil {
log.Fatalln(err)
}
log.Println("successfully healed mybucket/myobject")
log.Printf("heal result: %#v\n", healResult)
}

View File

@ -65,6 +65,8 @@ func main() {
switch healInfo := *object.HealObjectInfo; healInfo.Status {
case madmin.CanHeal:
fmt.Println(object.Key, " can be healed.")
case madmin.CanPartiallyHeal:
fmt.Println(object.Key, " can't be healed completely, some disks are offline.")
case madmin.QuorumUnavailable:
fmt.Println(object.Key, " can't be healed until quorum is available.")
case madmin.Corrupted:

View File

@ -41,17 +41,17 @@ func main() {
// Heal upload mybucket/myobject/uploadID - dry run.
isDryRun := true
err = madmClnt.HealUpload("mybucket", "myobject", "myuploadID", isDryRun)
_, err = madmClnt.HealUpload("mybucket", "myobject", "myUploadID", isDryRun)
if err != nil {
log.Fatalln(err)
}
// Heal upload mybucket/myobject/uploadID - this time for real.
isDryRun = false
err = madmClnt.HealUpload("mybucket", "myobject", "myuploadID", isDryRun)
healResult, err := madmClnt.HealUpload("mybucket", "myobject", "myUploadID", isDryRun)
if err != nil {
log.Fatalln(err)
}
log.Println("successfully healed mybucket/myobject/myuploadID")
log.Printf("Heal result for mybucket/myobject/myUploadID: %#v\n", healResult)
}

View File

@ -64,6 +64,8 @@ func main() {
switch healInfo := *upload.HealUploadInfo; healInfo.Status {
case madmin.CanHeal:
fmt.Println(upload.Key, " can be healed.")
case madmin.CanPartiallyHeal:
fmt.Println(upload.Key, " can be healed partially. Some disks may be offline.")
case madmin.QuorumUnavailable:
fmt.Println(upload.Key, " can't be healed until quorum is available.")
case madmin.Corrupted:

View File

@ -20,8 +20,10 @@
package madmin
import (
"encoding/json"
"encoding/xml"
"fmt"
"io/ioutil"
"net/http"
"net/url"
"time"
@ -104,8 +106,12 @@ const (
CanHeal
// Corrupted - Object can't be healed
Corrupted
// QuorumUnavailable - Object can't be healed until read quorum is available
// QuorumUnavailable - Object can't be healed until read
// quorum is available
QuorumUnavailable
// CanPartiallyHeal - Object can't be healed completely until
// disks with missing parts come online
CanPartiallyHeal
)
// HealBucketInfo - represents healing related information of a bucket.
@ -129,7 +135,7 @@ type BucketInfo struct {
type HealObjectInfo struct {
Status HealStatus
MissingDataCount int
MissingPartityCount int
MissingParityCount int
}
// ObjectInfo container for object metadata.
@ -434,7 +440,7 @@ func (adm *AdminClient) HealBucket(bucket string, dryrun bool) error {
}
// HealUpload - Heal the given upload.
func (adm *AdminClient) HealUpload(bucket, object, uploadID string, dryrun bool) error {
func (adm *AdminClient) HealUpload(bucket, object, uploadID string, dryrun bool) (HealObjectResult, error) {
// Construct query params.
queryVal := url.Values{}
queryVal.Set("heal", "")
@ -460,18 +466,40 @@ func (adm *AdminClient) HealUpload(bucket, object, uploadID string, dryrun bool)
defer closeResponse(resp)
if err != nil {
return err
return HealObjectResult{}, err
}
if resp.StatusCode != http.StatusOK {
return httpRespToErrorResponse(resp)
return HealObjectResult{}, httpRespToErrorResponse(resp)
}
return nil
// Healing is not performed so heal object result is empty.
if dryrun {
return HealObjectResult{}, nil
}
jsonBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return HealObjectResult{}, err
}
healResult := HealObjectResult{}
err = json.Unmarshal(jsonBytes, &healResult)
if err != nil {
return HealObjectResult{}, err
}
return healResult, nil
}
// HealObjectResult - represents result of heal-object admin API.
type HealObjectResult struct {
HealedCount int // number of disks that were healed.
OfflineCount int // number of disks that needed healing but were offline.
}
// HealObject - Heal the given object.
func (adm *AdminClient) HealObject(bucket, object string, dryrun bool) error {
func (adm *AdminClient) HealObject(bucket, object string, dryrun bool) (HealObjectResult, error) {
// Construct query params.
queryVal := url.Values{}
queryVal.Set("heal", "")
@ -494,14 +522,30 @@ func (adm *AdminClient) HealObject(bucket, object string, dryrun bool) error {
defer closeResponse(resp)
if err != nil {
return err
return HealObjectResult{}, err
}
if resp.StatusCode != http.StatusOK {
return httpRespToErrorResponse(resp)
return HealObjectResult{}, httpRespToErrorResponse(resp)
}
return nil
// Healing is not performed so heal object result is empty.
if dryrun {
return HealObjectResult{}, nil
}
jsonBytes, err := ioutil.ReadAll(resp.Body)
if err != nil {
return HealObjectResult{}, err
}
healResult := HealObjectResult{}
err = json.Unmarshal(jsonBytes, &healResult)
if err != nil {
return HealObjectResult{}, err
}
return healResult, nil
}
// HealFormat - heal storage format on available disks.