minio/cmd/xl-v1-healing_test.go
Aditya Manthramurthy a337ea4d11 Move admin APIs to new path and add redesigned heal APIs (#5351)
- Changes related to moving admin APIs
   - admin APIs now have an endpoint under /minio/admin
   - admin APIs are now versioned - a new API to server the version is
     added at "GET /minio/admin/version" and all API operations have the
     path prefix /minio/admin/v1/<operation>
   - new service stop API added
   - credentials change API is moved to /minio/admin/v1/config/credential
   - credentials change API and configuration get/set API now require TLS
     so that credentials are protected
   - all API requests now receive JSON
   - heal APIs are disabled as they will be changed substantially

- Heal API changes
   Heal API is now provided at a single endpoint with the ability for a
   client to start a heal sequence on all the data in the server, a
   single bucket, or under a prefix within a bucket.

   When a heal sequence is started, the server returns a unique token
   that needs to be used for subsequent 'status' requests to fetch heal
   results.

   On each status request from the client, the server returns heal result
   records that it has accumulated since the previous status request. The
   server accumulates upto 1000 records and pauses healing further
   objects until the client requests for status. If the client does not
   request any further records for a long time, the server aborts the
   heal sequence automatically.

   A heal result record is returned for each entity healed on the server,
   such as system metadata, object metadata, buckets and objects, and has
   information about the before and after states on each disk.

   A client may request to force restart a heal sequence - this causes
   the running heal sequence to be aborted at the next safe spot and
   starts a new heal sequence.
2018-01-22 14:54:55 -08:00

550 lines
14 KiB
Go

/*
* Minio Cloud Storage, (C) 2016, 2017 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"bytes"
"fmt"
"os"
"path/filepath"
"testing"
"github.com/minio/minio-go/pkg/set"
"github.com/minio/minio/pkg/errors"
)
// Tests healing of format XL.
func TestHealFormatXL(t *testing.T) {
root, err := newTestConfig(globalMinioDefaultRegion)
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// Everything is fine, should return nil
obj, _, err := initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
if _, err = healFormatXL(xl.storageDisks, false); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// Disks 0..15 are nil
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
xl.storageDisks[i] = nil
}
if _, err = healFormatXL(xl.storageDisks, false); err != errXLReadQuorum {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// One disk returns Faulty Disk
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := range xl.storageDisks {
posixDisk, ok := xl.storageDisks[i].(*retryStorage)
if !ok {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[i] = newNaughtyDisk(posixDisk, nil, errDiskFull)
}
if _, err = healFormatXL(xl.storageDisks, false); err != errXLReadQuorum {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return
// error for offline disk
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if _, err = healFormatXL(xl.storageDisks, false); err != nil && err.Error() != "cannot proceed with heal as some disks are offline" {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// Remove format.json of all disks
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].DeleteFile(minioMetaBucket, formatConfigFile); err != nil {
t.Fatal(err)
}
}
if _, err = healFormatXL(xl.storageDisks, false); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// Corrupted format json in one disk
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 15; i++ {
if err = xl.storageDisks[i].AppendFile(minioMetaBucket, formatConfigFile, []byte("corrupted data")); err != nil {
t.Fatal(err)
}
}
if _, err = healFormatXL(xl.storageDisks, false); err == nil {
t.Fatal("Should get a json parsing error, ")
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// Remove format.json on 3 disks.
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(minioMetaBucket, formatConfigFile); err != nil {
t.Fatal(err)
}
}
if _, err = healFormatXL(xl.storageDisks, false); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(minioMetaBucket, formatConfigFile); err != nil {
t.Fatal(err)
}
}
posixDisk, ok := xl.storageDisks[3].(*retryStorage)
if !ok {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[3] = newNaughtyDisk(posixDisk, nil, errDiskNotFound)
expectedErr := fmt.Errorf("cannot proceed with heal as %s", errSomeDiskOffline)
if _, err = healFormatXL(xl.storageDisks, false); err != nil {
if err.Error() != expectedErr.Error() {
t.Fatal("Got an unexpected error: ", err)
}
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// One disk has access denied error, heal should return
// appropriate error
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(minioMetaBucket, formatConfigFile); err != nil {
t.Fatal(err)
}
}
posixDisk, ok = xl.storageDisks[3].(*retryStorage)
if !ok {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[3] = newNaughtyDisk(posixDisk, nil, errDiskAccessDenied)
expectedErr = fmt.Errorf("cannot proceed with heal as some disks had unhandled errors")
if _, err = healFormatXL(xl.storageDisks, false); err != nil {
if err.Error() != expectedErr.Error() {
t.Fatal("Got an unexpected error: ", err)
}
}
removeRoots(fsDirs)
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
if err = obj.MakeBucketWithLocation(getRandomBucketName(), ""); err != nil {
t.Fatal(err)
}
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteFile(minioMetaBucket, formatConfigFile); err != nil {
t.Fatal(err)
}
}
if _, err = healFormatXL(xl.storageDisks, false); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
removeRoots(fsDirs)
}
// Tests undoes and validates if the undoing completes successfully.
func TestUndoMakeBucket(t *testing.T) {
root, err := newTestConfig(globalMinioDefaultRegion)
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
// Remove format.json on 16 disks.
obj, _, err := initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
bucketName := getRandomBucketName()
if err = obj.MakeBucketWithLocation(bucketName, ""); err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
undoMakeBucket(xl.storageDisks, bucketName)
// Validate if bucket was deleted properly.
_, err = obj.GetBucketInfo(bucketName)
if err != nil {
err = errors.Cause(err)
switch err.(type) {
case BucketNotFound:
default:
t.Fatal(err)
}
}
}
// Tests quick healing of bucket and bucket metadata.
func TestQuickHeal(t *testing.T) {
root, err := newTestConfig(globalMinioDefaultRegion)
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
// Remove format.json on 16 disks.
obj, _, err := initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
bucketName := getRandomBucketName()
if err = obj.MakeBucketWithLocation(bucketName, ""); err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteVol(bucketName); err != nil {
t.Fatal(err)
}
}
// figure out read and write quorum
readQuorum := len(xl.storageDisks) / 2
writeQuorum := len(xl.storageDisks)/2 + 1
// Heal the missing buckets.
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal(err)
}
// Validate if buckets were indeed healed.
for i := 0; i <= 2; i++ {
if _, err = xl.storageDisks[i].StatVol(bucketName); err != nil {
t.Fatal(err)
}
}
// Corrupt one of the disks to return unformatted disk.
posixDisk, ok := xl.storageDisks[0].(*retryStorage)
if !ok {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errUnformattedDisk)
if err = quickHeal(*xl, writeQuorum, readQuorum); err != errUnformattedDisk {
t.Fatal(err)
}
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
xl.storageDisks[0] = nil
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
fsDirs, err = getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
// One disk is not found, heal corrupted disks should return nil
obj, _, err = initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
xl = obj.(*xlObjects)
// Corrupt one of the disks to return unformatted disk.
posixDisk, ok = xl.storageDisks[0].(*retryStorage)
if !ok {
t.Fatal("storage disk is not *retryStorage type")
}
xl.storageDisks[0] = newNaughtyDisk(posixDisk, nil, errDiskNotFound)
if err = quickHeal(*xl, writeQuorum, readQuorum); err != nil {
t.Fatal("Got an unexpected error: ", err)
}
}
// TestListBucketsHeal lists buckets heal result
func TestListBucketsHeal(t *testing.T) {
root, err := newTestConfig("us-east-1")
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
obj, _, err := initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
// Create a bucket that won't get corrupted
saneBucket := "sanebucket"
if err = obj.MakeBucketWithLocation(saneBucket, ""); err != nil {
t.Fatal(err)
}
// Create a bucket that will be removed in some disks
corruptedBucketName := getRandomBucketName()
if err = obj.MakeBucketWithLocation(corruptedBucketName, ""); err != nil {
t.Fatal(err)
}
xl := obj.(*xlObjects)
// Remove bucket in disk 0, 1 and 2
for i := 0; i <= 2; i++ {
if err = xl.storageDisks[i].DeleteVol(corruptedBucketName); err != nil {
t.Fatal(err)
}
}
// List the missing buckets.
buckets, err := xl.ListBucketsHeal()
if err != nil {
t.Fatal(err)
}
bucketSet := set.CreateStringSet(saneBucket, corruptedBucketName)
// Check the number of buckets in list buckets heal result
if len(buckets) != len(bucketSet) {
t.Fatalf("Length of missing buckets is incorrect, expected: 2, found: %d", len(buckets))
}
// Check each bucket name is in `bucketSet`v
for _, b := range buckets {
if !bucketSet.Contains(b.Name) {
t.Errorf("Bucket %v is missing from bucket set", b.Name)
}
}
}
// Tests healing of object.
func TestHealObjectXL(t *testing.T) {
root, err := newTestConfig(globalMinioDefaultRegion)
if err != nil {
t.Fatal(err)
}
defer os.RemoveAll(root)
nDisks := 16
fsDirs, err := getRandomDisks(nDisks)
if err != nil {
t.Fatal(err)
}
defer removeRoots(fsDirs)
// Everything is fine, should return nil
obj, _, err := initObjectLayer(mustGetNewEndpointList(fsDirs...))
if err != nil {
t.Fatal(err)
}
bucket := "bucket"
object := "object"
data := bytes.Repeat([]byte("a"), 5*1024*1024)
err = obj.MakeBucketWithLocation(bucket, "")
if err != nil {
t.Fatalf("Failed to make a bucket - %v", err)
}
// Create an object with multiple parts uploaded in decreasing
// part number.
uploadID, err := obj.NewMultipartUpload(bucket, object, nil)
if err != nil {
t.Fatalf("Failed to create a multipart upload - %v", err)
}
var uploadedParts []CompletePart
for _, partID := range []int{2, 1} {
pInfo, err1 := obj.PutObjectPart(bucket, object, uploadID, partID, mustGetHashReader(t, bytes.NewReader(data), int64(len(data)), "", ""))
if err1 != nil {
t.Fatalf("Failed to upload a part - %v", err1)
}
uploadedParts = append(uploadedParts, CompletePart{
PartNumber: pInfo.PartNumber,
ETag: pInfo.ETag,
})
}
_, err = obj.CompleteMultipartUpload(bucket, object, uploadID, uploadedParts)
if err != nil {
t.Fatalf("Failed to complete multipart upload - %v", err)
}
// Remove the object backend files from the first disk.
xl := obj.(*xlObjects)
firstDisk := xl.storageDisks[0]
err = firstDisk.DeleteFile(bucket, filepath.Join(object, xlMetaJSONFile))
if err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}
_, err = obj.HealObject(bucket, object, false)
if err != nil {
t.Fatalf("Failed to heal object - %v", err)
}
_, err = firstDisk.StatFile(bucket, filepath.Join(object, xlMetaJSONFile))
if err != nil {
t.Errorf("Expected xl.json file to be present but stat failed - %v", err)
}
// Nil more than half the disks, to remove write quorum.
for i := 0; i <= len(xl.storageDisks)/2; i++ {
xl.storageDisks[i] = nil
}
// Try healing now, expect to receive errDiskNotFound.
_, err = obj.HealObject(bucket, object, false)
// since majority of xl.jsons are not available, object quorum can't be read properly and error will be errXLReadQuorum
if errors.Cause(err) != errXLReadQuorum {
t.Errorf("Expected %v but received %v", errDiskNotFound, err)
}
}