minio/xl-objects.go

582 lines
17 KiB
Go
Raw Normal View History

/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"path"
"path/filepath"
"strings"
"sync"
"github.com/minio/minio/pkg/mimedb"
)
const (
multipartSuffix = ".minio.multipart"
multipartMetaFile = "00000" + multipartSuffix
formatConfigFile = "format.json"
)
// xlObjects - Implements fs object layer.
type xlObjects struct {
storage StorageAPI
listObjectMap map[listParams][]*treeWalker
listObjectMapMutex *sync.Mutex
}
// errMaxDisks - returned for reached maximum of disks.
var errMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'")
// errMinDisks - returned for minimum number of disks.
var errMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'")
// errNumDisks - returned for odd number of disks.
var errNumDisks = errors.New("Number of disks should be multiples of '2'")
const (
// Maximum erasure blocks.
maxErasureBlocks = 16
// Minimum erasure blocks.
minErasureBlocks = 8
)
func checkSufficientDisks(disks []string) error {
// Verify total number of disks.
totalDisks := len(disks)
if totalDisks > maxErasureBlocks {
return errMaxDisks
}
if totalDisks < minErasureBlocks {
return errMinDisks
}
// isEven function to verify if a given number if even.
isEven := func(number int) bool {
return number%2 == 0
}
// Verify if we have even number of disks.
// only combination of 8, 10, 12, 14, 16 are supported.
if !isEven(totalDisks) {
return errNumDisks
}
return nil
}
// Depending on the disk type network or local, initialize storage layer.
func newStorageLayer(disk string) (storage StorageAPI, err error) {
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" {
// Initialize filesystem storage API.
return newPosix(disk)
}
// Initialize rpc client storage API.
return newRPCClient(disk)
}
// Initialize all storage disks to bootstrap.
func bootstrapDisks(disks []string) ([]StorageAPI, error) {
storageDisks := make([]StorageAPI, len(disks))
for index, disk := range disks {
var err error
// Intentionally ignore disk not found errors while
// initializing POSIX, so that we have successfully
// initialized posix Storage. Subsequent calls to XL/Erasure
// will manage any errors related to disks.
storageDisks[index], err = newStorageLayer(disk)
if err != nil && err != errDiskNotFound {
return nil, err
}
}
return storageDisks, nil
}
// newXLObjects - initialize new xl object layer.
func newXLObjects(disks []string) (ObjectLayer, error) {
if err := checkSufficientDisks(disks); err != nil {
return nil, err
}
storageDisks, err := bootstrapDisks(disks)
if err != nil {
return nil, err
}
// Initialize object layer - like creating minioMetaBucket, cleaning up tmp files etc.
initObjectLayer(storageDisks...)
// Load saved XL format.json and validate.
newDisks, err := loadFormatXL(storageDisks)
if err != nil {
switch err {
case errUnformattedDisk:
// Save new XL format.
errSave := initFormatXL(storageDisks)
if errSave != nil {
return nil, errSave
}
newDisks = storageDisks
default:
// errCorruptedDisk - error.
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
}
}
// FIXME: healFormatXL(newDisks)
storage, err := newXL(newDisks)
if err != nil {
return nil, err
}
// Return successfully initialized object layer.
return xlObjects{
storage: storage,
listObjectMap: make(map[listParams][]*treeWalker),
listObjectMapMutex: &sync.Mutex{},
}, nil
}
/// Bucket operations
// MakeBucket - make a bucket.
func (xl xlObjects) MakeBucket(bucket string) error {
nsMutex.Lock(bucket, "")
defer nsMutex.Unlock(bucket, "")
return makeBucket(xl.storage, bucket)
}
// GetBucketInfo - get bucket info.
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
nsMutex.RLock(bucket, "")
defer nsMutex.RUnlock(bucket, "")
return getBucketInfo(xl.storage, bucket)
}
// ListBuckets - list buckets.
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) {
return listBuckets(xl.storage)
}
// DeleteBucket - delete a bucket.
func (xl xlObjects) DeleteBucket(bucket string) error {
nsMutex.Lock(bucket, "")
nsMutex.Unlock(bucket, "")
return deleteBucket(xl.storage, bucket)
}
/// Object Operations
// GetObject - get an object.
func (xl xlObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return nil, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return nil, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
if !isMultipartObject(xl.storage, bucket, object) {
_, err := xl.storage.StatFile(bucket, object)
if err == nil {
var reader io.ReadCloser
reader, err = xl.storage.ReadFile(bucket, object, startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
return reader, nil
}
return nil, toObjectErr(err, bucket, object)
}
fileReader, fileWriter := io.Pipe()
info, err := getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
partIndex, offset, err := info.GetPartNumberOffset(startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
// Hold a read lock once more which can be released after the following go-routine ends.
// We hold RLock once more because the current function would return before the go routine below
// executes and hence releasing the read lock (because of defer'ed nsMutex.RUnlock() call).
nsMutex.RLock(bucket, object)
go func() {
defer nsMutex.RUnlock(bucket, object)
for ; partIndex < len(info.Parts); partIndex++ {
part := info.Parts[partIndex]
r, err := xl.storage.ReadFile(bucket, pathJoin(object, partNumToPartFileName(part.PartNumber)), offset)
if err != nil {
fileWriter.CloseWithError(err)
return
}
// Reset offset to 0 as it would be non-0 only for the first loop if startOffset is non-0.
offset = 0
if _, err = io.Copy(fileWriter, r); err != nil {
switch reader := r.(type) {
case *io.PipeReader:
reader.CloseWithError(err)
case io.ReadCloser:
reader.Close()
}
fileWriter.CloseWithError(err)
return
}
// Close the readerCloser that reads multiparts of an object from the xl storage layer.
// Not closing leaks underlying file descriptors.
r.Close()
}
fileWriter.Close()
}()
return fileReader, nil
}
// Return the partsInfo of a special multipart object.
func getMultipartObjectInfo(storage StorageAPI, bucket, object string) (info MultipartObjectInfo, err error) {
offset := int64(0)
r, err := storage.ReadFile(bucket, pathJoin(object, multipartMetaFile), offset)
if err != nil {
return MultipartObjectInfo{}, err
}
decoder := json.NewDecoder(r)
err = decoder.Decode(&info)
if err != nil {
return MultipartObjectInfo{}, err
}
return info, nil
}
// Return ObjectInfo.
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) {
objInfo.Bucket = bucket
objInfo.Name = object
// First see if the object was a simple-PUT upload.
fi, err := xl.storage.StatFile(bucket, object)
if err != nil {
if err != errFileNotFound {
return ObjectInfo{}, err
}
var info MultipartObjectInfo
// Check if the object was multipart upload.
info, err = getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return ObjectInfo{}, err
}
objInfo.Size = info.Size
objInfo.ModTime = info.ModTime
objInfo.MD5Sum = info.MD5Sum
objInfo.ContentType = info.ContentType
objInfo.ContentEncoding = info.ContentEncoding
} else {
metadata := make(map[string]string)
offset := int64(0) // To read entire content
r, err := xl.storage.ReadFile(bucket, pathJoin(object, "meta.json"), offset)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
decoder := json.NewDecoder(r)
if err = decoder.Decode(&metadata); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
contentType := metadata["content-type"]
if len(contentType) == 0 {
contentType = "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
}
objInfo.Size = fi.Size
objInfo.IsDir = fi.Mode.IsDir()
objInfo.ModTime = fi.ModTime
objInfo.MD5Sum = metadata["md5Sum"]
objInfo.ContentType = contentType
objInfo.ContentEncoding = metadata["content-encoding"]
}
return objInfo, nil
}
// GetObjectInfo - get object info.
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
info, err := xl.getObjectInfo(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
return info, nil
}
// PutObject - create an object.
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify bucket exists.
if !isBucketExist(xl.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
// No metadata is set, allocate a new one.
if metadata == nil {
metadata = make(map[string]string)
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
tempObj := path.Join(tmpMetaPrefix, bucket, object)
fileWriter, err := xl.storage.CreateFile(minioMetaBucket, tempObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// Update the md5sum if not set with the newly calculated one.
if len(metadata["md5Sum"]) == 0 {
metadata["md5Sum"] = newMD5Hex
}
// md5Hex representation.
md5Hex := metadata["md5Sum"]
if md5Hex != "" {
if newMD5Hex != md5Hex {
if err = safeCloseAndRemove(fileWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Check if an object is present as one of the parent dir.
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Delete if an object already exists.
// FIXME: rename it to tmp file and delete only after
// the newly uploaded file is renamed from tmp location to
// the original location.
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err = xl.deleteMultipartObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
return newMD5Hex, nil
}
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
err = xl.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if dErr := xl.storage.DeleteFile(minioMetaBucket, tempObj); dErr != nil {
return "", toObjectErr(dErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
tempMetaJSONFile := path.Join(tmpMetaPrefix, bucket, object, "meta.json")
metaWriter, err := xl.storage.CreateFile(minioMetaBucket, tempMetaJSONFile)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
encoder := json.NewEncoder(metaWriter)
err = encoder.Encode(&metadata)
if err != nil {
if clErr := safeCloseAndRemove(metaWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
if err = metaWriter.Close(); err != nil {
if err = safeCloseAndRemove(metaWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
metaJSONFile := path.Join(object, "meta.json")
err = xl.storage.RenameFile(minioMetaBucket, tempMetaJSONFile, bucket, metaJSONFile)
if err != nil {
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMetaJSONFile); derr != nil {
return "", toObjectErr(derr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
// isMultipartObject - verifies if an object is special multipart file.
func isMultipartObject(storage StorageAPI, bucket, object string) bool {
_, err := storage.StatFile(bucket, pathJoin(object, multipartMetaFile))
if err != nil {
if err == errFileNotFound {
return false
}
errorIf(err, "Failed to stat file "+bucket+pathJoin(object, multipartMetaFile))
return false
}
return true
}
// deleteMultipartObject - deletes only multipart object.
func (xl xlObjects) deleteMultipartObject(bucket, object string) error {
// Get parts info.
info, err := getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return err
}
// Range through all files and delete it.
var wg = &sync.WaitGroup{}
var errs = make([]error, len(info.Parts))
for index, part := range info.Parts {
wg.Add(1)
// Start deleting parts in routine.
go func(index int, part MultipartPartInfo) {
defer wg.Done()
partFileName := partNumToPartFileName(part.PartNumber)
errs[index] = xl.storage.DeleteFile(bucket, pathJoin(object, partFileName))
}(index, part)
}
// Wait for all the deletes to finish.
wg.Wait()
// Loop through and validate if any errors, if we are unable to remove any part return
// "unexpected" error as returning any other error might be misleading. For ex.
// if DeleteFile() had returned errFileNotFound and we return it, then client would see
// ObjectNotFound which is misleading.
for _, err := range errs {
if err != nil {
return errUnexpected
}
}
err = xl.storage.DeleteFile(bucket, pathJoin(object, multipartMetaFile))
if err != nil {
return err
}
return nil
}
// deleteObject - deletes a regular object.
func (xl xlObjects) deleteObject(bucket, object string) error {
metaJSONFile := path.Join(object, "meta.json")
// Ignore if meta.json file doesn't exist.
if err := xl.storage.DeleteFile(bucket, metaJSONFile); err != nil {
if err != errFileNotFound {
return err
}
}
if err := xl.storage.DeleteFile(bucket, object); err != nil {
if err != errFileNotFound {
return err
}
}
return nil
}
// DeleteObject - delete the object.
func (xl xlObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err := xl.deleteMultipartObject(bucket, object)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
err := xl.deleteObject(bucket, object)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
// ListObjects - list all objects at prefix, delimited by '/'.
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return listObjectsCommon(xl, bucket, prefix, marker, delimiter, maxKeys)
}