XL/FS: Rewrite in new format.

This commit is contained in:
Harshavardhana 2016-05-20 20:48:47 -07:00
parent 63c65b4635
commit 293d246f95
36 changed files with 3560 additions and 3076 deletions

View File

@ -0,0 +1,4 @@
{
"format": "fs",
"version": "1"
}

View File

@ -0,0 +1,14 @@
{
"version": "1",
"format": "fs",
"minio": {
"release": "DEVELOPMENT.GOGET"
},
"parts": [
{
"name": "object1",
"size": 29,
"eTag": "",
},
]
}

View File

@ -0,0 +1,10 @@
{
"version": "1",
"format": "fs",
"uploadIds": [
{
"uploadID": "id",
"startTime": "time",
}
]
}

View File

@ -0,0 +1,20 @@
{
"xl": {
"jbod": [
"8aa2b1bc-0e5a-49e0-8221-05228336b040",
"3467a69b-0266-478a-9e10-e819447e4545",
"d4a4505b-4e4f-4864-befd-4f36adb0bc66",
"592b6583-ca26-47af-b991-ba6d097e34e8",
"c7ef69f0-dbf5-4c0e-b167-d30a441bad7e",
"f0b36ea3-fe96-4f2b-bced-22c7f33e0e0c",
"b83abf39-e39d-4e7b-8e16-6f9953455a48",
"7d63dfc9-5441-4243-bd36-de8db0691982",
"c1bbffc5-81f9-4251-9398-33a959b3ce37",
"64408f94-26e0-4277-9593-2d703f4d5a91"
],
"disk": "8aa2b1bc-0e5a-49e0-8221-05228336b040",
"version": "1"
},
"format": "xl",
"version": "1"
}

View File

@ -0,0 +1,10 @@
{
"version": "1",
"format": "xl",
"uploadIds": [
{
"uploadID": "id",
"startTime": "time",
}
]
}

View File

@ -0,0 +1,44 @@
{
"parts": [
{
"size": 5242880,
"etag": "3565c6e741e69a007a5ac7db893a62b5",
"name": "object1"
},
{
"size": 5242880,
"etag": "d416712335c280ab1e39498552937764",
"name": "object2"
},
{
"size": 4338324,
"etag": "8a98c5c54d81c6c95ed9bdcaeb941aaf",
"name": "object3"
}
],
"meta": {
"md5Sum": "97586a5290d4f5a41328062d6a7da593-3",
"content-type": "application\/octet-stream",
"content-encoding": ""
},
"minio": {
"release": "DEVELOPMENT.GOGET"
},
"erasure": {
"index": 2,
"distribution": [ 1, 3, 4, 2, 5, 8, 7, 6, 9 ],
"blockSize": 4194304,
"parity": 5,
"data": 5
},
"checksum": {
"enable": false,
},
"stat": {
"version": 0,
"modTime": "2016-05-24T00:09:40.122390255Z",
"size": 14824084
},
"format": "xl",
"version": "1"
}

172
erasure-createfile.go Normal file
View File

@ -0,0 +1,172 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"io"
"sync"
)
// cleanupCreateFileOps - cleans up all the temporary files and other
// temporary data upon any failure.
func (e erasure) cleanupCreateFileOps(volume, path string, writers []io.WriteCloser) {
// Close and remove temporary writers.
for _, writer := range writers {
if err := safeCloseAndRemove(writer); err != nil {
errorIf(err, "Failed to close writer.")
}
}
// Remove any temporary written data.
for _, disk := range e.storageDisks {
if err := disk.DeleteFile(volume, path); err != nil {
errorIf(err, "Unable to delete file.")
}
}
}
// WriteErasure reads predefined blocks, encodes them and writes to
// configured storage disks.
func (e erasure) writeErasure(volume, path string, reader *io.PipeReader, wcloser *waitCloser) {
// Release the block writer upon function return.
defer wcloser.release()
writers := make([]io.WriteCloser, len(e.storageDisks))
// Initialize all writers.
for index, disk := range e.storageDisks {
writer, err := disk.CreateFile(volume, path)
if err != nil {
e.cleanupCreateFileOps(volume, path, writers)
reader.CloseWithError(err)
return
}
writers[index] = writer
}
// Allocate 4MiB block size buffer for reading.
dataBuffer := make([]byte, erasureBlockSize)
for {
// Read up to allocated block size.
n, err := io.ReadFull(reader, dataBuffer)
if err != nil {
// Any unexpected errors, close the pipe reader with error.
if err != io.ErrUnexpectedEOF && err != io.EOF {
// Remove all temp writers.
e.cleanupCreateFileOps(volume, path, writers)
reader.CloseWithError(err)
return
}
}
// At EOF break out.
if err == io.EOF {
break
}
if n > 0 {
// Split the input buffer into data and parity blocks.
var dataBlocks [][]byte
dataBlocks, err = e.ReedSolomon.Split(dataBuffer[0:n])
if err != nil {
// Remove all temp writers.
e.cleanupCreateFileOps(volume, path, writers)
reader.CloseWithError(err)
return
}
// Encode parity blocks using data blocks.
err = e.ReedSolomon.Encode(dataBlocks)
if err != nil {
// Remove all temp writers upon error.
e.cleanupCreateFileOps(volume, path, writers)
reader.CloseWithError(err)
return
}
var wg = &sync.WaitGroup{}
var wErrs = make([]error, len(writers))
// Write encoded data to quorum disks in parallel.
for index, writer := range writers {
if writer == nil {
continue
}
wg.Add(1)
// Write encoded data in routine.
go func(index int, writer io.Writer) {
defer wg.Done()
encodedData := dataBlocks[index]
_, wErr := writers[index].Write(encodedData)
if wErr != nil {
wErrs[index] = wErr
return
}
wErrs[index] = nil
}(index, writer)
}
wg.Wait()
// Cleanup and return on first non-nil error.
for _, wErr := range wErrs {
if wErr == nil {
continue
}
// Remove all temp writers upon error.
e.cleanupCreateFileOps(volume, path, writers)
reader.CloseWithError(wErr)
return
}
}
}
// Close all writers and metadata writers in routines.
for _, writer := range writers {
if writer == nil {
continue
}
// Safely wrote, now rename to its actual location.
if err := writer.Close(); err != nil {
// Remove all temp writers upon error.
e.cleanupCreateFileOps(volume, path, writers)
reader.CloseWithError(err)
return
}
}
// Close the pipe reader and return.
reader.Close()
return
}
// CreateFile - create a file.
func (e erasure) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) {
if !isValidVolname(volume) {
return nil, errInvalidArgument
}
if !isValidPath(path) {
return nil, errInvalidArgument
}
// Initialize pipe for data pipe line.
pipeReader, pipeWriter := io.Pipe()
// Initialize a new wait closer, implements both Write and Close.
wcloser := newWaitCloser(pipeWriter)
// Start erasure encoding in routine, reading data block by block from pipeReader.
go e.writeErasure(volume, path, pipeReader, wcloser)
// Return the writer, caller should start writing to this.
return wcloser, nil
}

View File

@ -18,14 +18,12 @@ package main
import ( import (
"errors" "errors"
"fmt"
"io" "io"
slashpath "path"
"sync" "sync"
) )
// ReadFile - read file // ReadFile - decoded erasure coded file.
func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, error) { func (e erasure) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, error) {
// Input validation. // Input validation.
if !isValidVolname(volume) { if !isValidVolname(volume) {
return nil, errInvalidArgument return nil, errInvalidArgument
@ -34,52 +32,34 @@ func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, er
return nil, errInvalidArgument return nil, errInvalidArgument
} }
onlineDisks, metadata, heal, err := xl.listOnlineDisks(volume, path) var wg = &sync.WaitGroup{}
if err != nil {
return nil, err readers := make([]io.ReadCloser, len(e.storageDisks))
for index, disk := range e.storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
// If disk.ReadFile returns error and we don't have read
// quorum it will be taken care as ReedSolomon.Reconstruct()
// will fail later.
offset := int64(0)
if reader, err := disk.ReadFile(volume, path, offset); err == nil {
readers[index] = reader
}
}(index, disk)
} }
if heal { wg.Wait()
// Heal in background safely, since we already have read
// quorum disks. Let the reads continue.
go func() {
hErr := xl.healFile(volume, path)
errorIf(hErr, "Unable to heal file "+volume+"/"+path+".")
}()
}
readers := make([]io.ReadCloser, len(xl.storageDisks))
for index, disk := range onlineDisks {
if disk == nil {
continue
}
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
// If disk.ReadFile returns error and we don't have read quorum it will be taken care as
// ReedSolomon.Reconstruct() will fail later.
var reader io.ReadCloser
offset := int64(0)
if reader, err = disk.ReadFile(volume, erasurePart, offset); err == nil {
readers[index] = reader
}
}
// Initialize pipe. // Initialize pipe.
pipeReader, pipeWriter := io.Pipe() pipeReader, pipeWriter := io.Pipe()
go func() { go func() {
var totalLeft = metadata.Stat.Size // Read until EOF.
// Read until the totalLeft. for {
for totalLeft > 0 {
// Figure out the right blockSize as it was encoded before.
var curBlockSize int64
if metadata.Erasure.BlockSize < totalLeft {
curBlockSize = metadata.Erasure.BlockSize
} else {
curBlockSize = totalLeft
}
// Calculate the current encoded block size. // Calculate the current encoded block size.
curEncBlockSize := getEncodedBlockLen(curBlockSize, metadata.Erasure.DataBlocks) curEncBlockSize := getEncodedBlockLen(erasureBlockSize, e.DataBlocks)
enBlocks := make([][]byte, len(xl.storageDisks)) enBlocks := make([][]byte, len(e.storageDisks))
var wg = &sync.WaitGroup{}
// Loop through all readers and read. // Loop through all readers and read.
for index, reader := range readers { for index, reader := range readers {
// Initialize shard slice and fill the data from each parts. // Initialize shard slice and fill the data from each parts.
@ -87,19 +67,28 @@ func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, er
if reader == nil { if reader == nil {
continue continue
} }
// Parallelize reading. // Read the necessary blocks.
wg.Add(1) n, rErr := io.ReadFull(reader, enBlocks[index])
go func(index int, reader io.Reader) { if rErr == io.EOF {
defer wg.Done() // Close the pipe.
// Read the necessary blocks. pipeWriter.Close()
_, rErr := io.ReadFull(reader, enBlocks[index])
if rErr != nil && rErr != io.ErrUnexpectedEOF { // Cleanly close all the underlying data readers.
readers[index] = nil for _, reader := range readers {
if reader == nil {
continue
}
reader.Close()
} }
}(index, reader) return
}
if rErr != nil && rErr != io.ErrUnexpectedEOF {
readers[index].Close()
readers[index] = nil
continue
}
enBlocks[index] = enBlocks[index][:n]
} }
// Wait for the read routines to finish.
wg.Wait()
// Check blocks if they are all zero in length. // Check blocks if they are all zero in length.
if checkBlockSize(enBlocks) == 0 { if checkBlockSize(enBlocks) == 0 {
@ -108,8 +97,7 @@ func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, er
} }
// Verify the blocks. // Verify the blocks.
var ok bool ok, err := e.ReedSolomon.Verify(enBlocks)
ok, err = xl.ReedSolomon.Verify(enBlocks)
if err != nil { if err != nil {
pipeWriter.CloseWithError(err) pipeWriter.CloseWithError(err)
return return
@ -123,13 +111,13 @@ func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, er
enBlocks[index] = nil enBlocks[index] = nil
} }
} }
err = xl.ReedSolomon.Reconstruct(enBlocks) err = e.ReedSolomon.Reconstruct(enBlocks)
if err != nil { if err != nil {
pipeWriter.CloseWithError(err) pipeWriter.CloseWithError(err)
return return
} }
// Verify reconstructed blocks again. // Verify reconstructed blocks again.
ok, err = xl.ReedSolomon.Verify(enBlocks) ok, err = e.ReedSolomon.Verify(enBlocks)
if err != nil { if err != nil {
pipeWriter.CloseWithError(err) pipeWriter.CloseWithError(err)
return return
@ -143,16 +131,14 @@ func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, er
} }
// Get all the data blocks. // Get all the data blocks.
dataBlocks := getDataBlocks(enBlocks, metadata.Erasure.DataBlocks, int(curBlockSize)) dataBlocks := getDataBlocks(enBlocks, e.DataBlocks)
// Verify if the offset is right for the block, if not move to // Verify if the offset is right for the block, if not move to
// the next block. // the next block.
if startOffset > 0 { if startOffset > 0 {
startOffset = startOffset - int64(len(dataBlocks)) startOffset = startOffset - int64(len(dataBlocks))
// Start offset is greater than or equal to zero, skip the dataBlocks. // Start offset is greater than or equal to zero, skip the dataBlocks.
if startOffset >= 0 { if startOffset >= 0 {
totalLeft = totalLeft - metadata.Erasure.BlockSize
continue continue
} }
// Now get back the remaining offset if startOffset is negative. // Now get back the remaining offset if startOffset is negative.
@ -168,20 +154,6 @@ func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, er
// Reset offset to '0' to read rest of the blocks. // Reset offset to '0' to read rest of the blocks.
startOffset = int64(0) startOffset = int64(0)
// Save what's left after reading erasureBlockSize.
totalLeft = totalLeft - metadata.Erasure.BlockSize
}
// Cleanly end the pipe after a successful decoding.
pipeWriter.Close()
// Cleanly close all the underlying data readers.
for _, reader := range readers {
if reader == nil {
continue
}
reader.Close()
} }
}() }()

View File

@ -17,12 +17,19 @@
package main package main
// getDataBlocks - fetches the data block only part of the input encoded blocks. // getDataBlocks - fetches the data block only part of the input encoded blocks.
func getDataBlocks(enBlocks [][]byte, dataBlocks int, curBlockSize int) []byte { func getDataBlocks(enBlocks [][]byte, dataBlocks int) []byte {
var data []byte var data []byte
for _, block := range enBlocks[:dataBlocks] { for _, block := range enBlocks[:dataBlocks] {
data = append(data, block...) var newBlock []byte
// FIXME: Find a better way to skip the padding zeros.
for _, b := range block {
if b == 0 {
continue
}
newBlock = append(newBlock, b)
}
data = append(data, newBlock...)
} }
data = data[:curBlockSize]
return data return data
} }

60
erasure.go Normal file
View File

@ -0,0 +1,60 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"github.com/klauspost/reedsolomon"
)
// erasure storage layer.
type erasure struct {
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
DataBlocks int
ParityBlocks int
storageDisks []StorageAPI
}
// errUnexpected - returned for any unexpected error.
var errUnexpected = errors.New("Unexpected error - please report at https://github.com/minio/minio/issues")
// newErasure instantiate a new erasure.
func newErasure(disks []StorageAPI) (*erasure, error) {
// Initialize E.
e := &erasure{}
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(disks)/2, len(disks)/2
// Initialize reed solomon encoding.
rs, err := reedsolomon.New(dataBlocks, parityBlocks)
if err != nil {
return nil, err
}
// Save the reedsolomon.
e.DataBlocks = dataBlocks
e.ParityBlocks = parityBlocks
e.ReedSolomon = rs
// Save all the initialized storage disks.
e.storageDisks = disks
// Return successfully initialized.
return e, nil
}

View File

@ -1,150 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"io"
"path"
)
// ListMultipartUploads - list multipart uploads.
func (fs fsObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
return listMultipartUploadsCommon(fs, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
func (fs fsObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
meta = make(map[string]string) // Reset the meta value, we are not going to save headers for fs.
return newMultipartUploadCommon(fs.storage, bucket, object, meta)
}
// PutObjectPart - writes the multipart upload chunks.
func (fs fsObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
return putObjectPartCommon(fs.storage, bucket, object, uploadID, partID, size, data, md5Hex)
}
func (fs fsObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
return listObjectPartsCommon(fs.storage, bucket, object, uploadID, partNumberMarker, maxParts)
}
func (fs fsObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(fs.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
if !isUploadIDExists(fs.storage, bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
tempObj := path.Join(tmpMetaPrefix, bucket, object, uploadID, incompleteFile)
fileWriter, err := fs.storage.CreateFile(minioMetaBucket, tempObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
// Construct part suffix.
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag)
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
var fi FileInfo
fi, err = fs.storage.StatFile(minioMetaBucket, multipartPartFile)
if err != nil {
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) {
return "", PartTooSmall{}
}
var fileReader io.ReadCloser
fileReader, err = fs.storage.ReadFile(minioMetaBucket, multipartPartFile, 0)
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
_, err = io.Copy(fileWriter, fileReader)
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
err = fileReader.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
// Rename the file back to original location, if not delete the
// temporary object.
err = fs.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if derr := fs.storage.DeleteFile(minioMetaBucket, tempObj); derr != nil {
return "", toObjectErr(derr, minioMetaBucket, tempObj)
}
return "", toObjectErr(err, bucket, object)
}
// Cleanup all the parts if everything else has been safely committed.
if err = cleanupUploadedParts(fs.storage, bucket, object, uploadID); err != nil {
return "", err
}
// Return md5sum.
return s3MD5, nil
}
// AbortMultipartUpload - aborts a multipart upload.
func (fs fsObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
return abortMultipartUploadCommon(fs.storage, bucket, object, uploadID)
}

106
fs-v1-metadata.go Normal file
View File

@ -0,0 +1,106 @@
package main
import (
"bytes"
"encoding/json"
"io"
"path"
"sort"
)
// A fsMetaV1 represents a metadata header mapping keys to sets of values.
type fsMetaV1 struct {
Version string `json:"version"`
Format string `json:"format"`
Minio struct {
Release string `json:"release"`
} `json:"minio"`
Checksum struct {
Enable bool `json:"enable"`
} `json:"checksum"`
Parts []objectPartInfo `json:"parts,omitempty"`
}
// ReadFrom - read from implements io.ReaderFrom interface for
// unmarshalling fsMetaV1.
func (m *fsMetaV1) ReadFrom(reader io.Reader) (n int64, err error) {
var buffer bytes.Buffer
n, err = buffer.ReadFrom(reader)
if err != nil {
return 0, err
}
err = json.Unmarshal(buffer.Bytes(), m)
return n, err
}
// WriteTo - write to implements io.WriterTo interface for marshalling fsMetaV1.
func (m fsMetaV1) WriteTo(writer io.Writer) (n int64, err error) {
metadataBytes, err := json.Marshal(m)
if err != nil {
return 0, err
}
p, err := writer.Write(metadataBytes)
return int64(p), err
}
// SearchObjectPart - search object part name and etag.
func (m fsMetaV1) SearchObjectPart(name string, etag string) int {
for i, part := range m.Parts {
if name == part.Name && etag == part.ETag {
return i
}
}
return -1
}
// AddObjectPart - add a new object part in order.
func (m *fsMetaV1) AddObjectPart(name string, etag string, size int64) {
m.Parts = append(m.Parts, objectPartInfo{
Name: name,
ETag: etag,
Size: size,
})
sort.Sort(byPartName(m.Parts))
}
const (
fsMetaJSONFile = "fs.json"
)
// readFSMetadata - read `fs.json`.
func (fs fsObjects) readFSMetadata(bucket, object string) (fsMeta fsMetaV1, err error) {
r, err := fs.storage.ReadFile(bucket, path.Join(object, fsMetaJSONFile), int64(0))
if err != nil {
return fsMetaV1{}, err
}
defer r.Close()
_, err = fsMeta.ReadFrom(r)
if err != nil {
return fsMetaV1{}, err
}
return fsMeta, nil
}
// writeFSMetadata - write `fs.json`.
func (fs fsObjects) writeFSMetadata(bucket, prefix string, fsMeta fsMetaV1) error {
// Initialize metadata map, save all erasure related metadata.
fsMeta.Minio.Release = minioReleaseTag
w, err := fs.storage.CreateFile(bucket, path.Join(prefix, fsMetaJSONFile))
if err != nil {
return err
}
_, err = fsMeta.WriteTo(w)
if err != nil {
if mErr := safeCloseAndRemove(w); mErr != nil {
return mErr
}
return err
}
if err = w.Close(); err != nil {
if mErr := safeCloseAndRemove(w); mErr != nil {
return mErr
}
return err
}
return nil
}

View File

@ -19,66 +19,39 @@ package main
import ( import (
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"encoding/json"
"fmt" "fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"path" "path"
"sort"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/skyrings/skyring-common/tools/uuid" "github.com/skyrings/skyring-common/tools/uuid"
) )
const ( // Checks whether bucket exists.
incompleteFile = "00000.incomplete" func (fs fsObjects) isBucketExist(bucket string) bool {
uploadsJSONFile = "uploads.json" // Check whether bucket exists.
) _, err := fs.storage.StatVol(bucket)
// createUploadsJSON - create uploads.json placeholder file.
func createUploadsJSON(storage StorageAPI, bucket, object, uploadID string) error {
// Place holder uploads.json
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
uploadsJSONSuffix := fmt.Sprintf("%s.%s", uploadID, uploadsJSONFile)
tmpUploadsPath := path.Join(tmpMetaPrefix, bucket, object, uploadsJSONSuffix)
w, err := storage.CreateFile(minioMetaBucket, uploadsPath)
if err != nil { if err != nil {
return err if err == errVolumeNotFound {
} return false
if err = w.Close(); err != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
return clErr
} }
return err errorIf(err, "Stat failed on bucket "+bucket+".")
return false
} }
_, err = storage.StatFile(minioMetaBucket, uploadsPath) return true
if err != nil {
if err == errFileNotFound {
err = storage.RenameFile(minioMetaBucket, tmpUploadsPath, minioMetaBucket, uploadsPath)
if err == nil {
return nil
}
}
if derr := storage.DeleteFile(minioMetaBucket, tmpUploadsPath); derr != nil {
return derr
}
return err
}
return nil
} }
/// Common multipart object layer functions. // newMultipartUploadCommon - initialize a new multipart, is a common function for both object layers.
func (fs fsObjects) newMultipartUploadCommon(bucket string, object string, meta map[string]string) (uploadID string, err error) {
// newMultipartUploadCommon - initialize a new multipart, is a common
// function for both object layers.
func newMultipartUploadCommon(storage StorageAPI, bucket string, object string, meta map[string]string) (uploadID string, err error) {
// Verify if bucket name is valid. // Verify if bucket name is valid.
if !IsValidBucketName(bucket) { if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket} return "", BucketNameInvalid{Bucket: bucket}
} }
// Verify whether the bucket exists. // Verify whether the bucket exists.
if !isBucketExist(storage, bucket) { if !fs.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket} return "", BucketNotFound{Bucket: bucket}
} }
// Verify if object name is valid. // Verify if object name is valid.
@ -89,266 +62,68 @@ func newMultipartUploadCommon(storage StorageAPI, bucket string, object string,
if meta == nil { if meta == nil {
meta = make(map[string]string) meta = make(map[string]string)
} }
fsMeta := fsMetaV1{}
fsMeta.Format = "fs"
fsMeta.Version = "1"
// This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/" // This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/"
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object)) defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Loops through until successfully generates a new unique upload id.
for {
uuid, err := uuid.New()
if err != nil {
return "", err
}
uploadID := uuid.String()
// Create placeholder file 'uploads.json'
err = createUploadsJSON(storage, bucket, object, uploadID)
if err != nil {
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile)
incompleteSuffix := fmt.Sprintf("%s.%s", uploadID, incompleteFile)
tempUploadIDPath := path.Join(tmpMetaPrefix, bucket, object, incompleteSuffix)
if _, err = storage.StatFile(minioMetaBucket, uploadIDPath); err != nil {
if err != errFileNotFound {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// uploadIDPath doesn't exist, so create empty file to reserve the name
var w io.WriteCloser
if w, err = storage.CreateFile(minioMetaBucket, tempUploadIDPath); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
// Encode the uploaded metadata into incomplete file. uploadID = getUUID()
encoder := json.NewEncoder(w) initiated := time.Now().UTC()
err = encoder.Encode(&meta) // Create 'uploads.json'
if err != nil { if err = writeUploadJSON(bucket, object, uploadID, initiated, fs.storage); err != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
return "", toObjectErr(clErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
// Close the writer.
if err = w.Close(); err != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
return "", toObjectErr(clErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
// Rename the file to the actual location from temporary path.
err = storage.RenameFile(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath)
if err != nil {
if derr := storage.DeleteFile(minioMetaBucket, tempUploadIDPath); derr != nil {
return "", toObjectErr(derr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
return uploadID, nil
}
// uploadIDPath already exists.
// loop again to try with different uuid generated.
}
}
// putObjectPartCommon - put object part.
func putObjectPartCommon(storage StorageAPI, bucket string, object string, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !isUploadIDExists(storage, bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold read lock on the uploadID so that no one aborts it.
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Hold write lock on the part so that there is no parallel upload on the part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
partSuffix := fmt.Sprintf("%s.%.5d", uploadID, partID)
partSuffixPath := path.Join(tmpMetaPrefix, bucket, object, partSuffix)
fileWriter, err := storage.CreateFile(minioMetaBucket, partSuffixPath)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Reader shouldn't have more data what mentioned in size argument.
// reading one more byte from the reader to validate it.
// expected to fail, success validates existence of more data in the reader.
if _, err = io.CopyN(ioutil.Discard, data, 1); err == nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", UnExpectedDataSize{Size: int(size)}
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" {
if newMD5Hex != md5Hex {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", err return "", err
} }
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
partSuffixMD5 := fmt.Sprintf("%.5d.%s", partID, newMD5Hex) tempUploadIDPath := path.Join(tmpMetaPrefix, bucket, object, uploadID)
partSuffixMD5Path := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffixMD5) if err = fs.writeFSMetadata(minioMetaBucket, tempUploadIDPath, fsMeta); err != nil {
if _, err = storage.StatFile(minioMetaBucket, partSuffixMD5Path); err == nil { return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
// Part already uploaded as md5sum matches with the previous part.
// Just delete the temporary file.
if err = storage.DeleteFile(minioMetaBucket, partSuffixPath); err != nil {
return "", toObjectErr(err, minioMetaBucket, partSuffixPath)
}
return newMD5Hex, nil
} }
err = storage.RenameFile(minioMetaBucket, partSuffixPath, minioMetaBucket, partSuffixMD5Path) err = fs.storage.RenameFile(minioMetaBucket, path.Join(tempUploadIDPath, fsMetaJSONFile), minioMetaBucket, path.Join(uploadIDPath, fsMetaJSONFile))
if err != nil { if err != nil {
if derr := storage.DeleteFile(minioMetaBucket, partSuffixPath); derr != nil { if dErr := fs.storage.DeleteFile(minioMetaBucket, path.Join(tempUploadIDPath, fsMetaJSONFile)); dErr != nil {
return "", toObjectErr(derr, minioMetaBucket, partSuffixPath) return "", toObjectErr(dErr, minioMetaBucket, tempUploadIDPath)
} }
return "", toObjectErr(err, minioMetaBucket, partSuffixMD5Path) return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
} }
return newMD5Hex, nil // Return success.
return uploadID, nil
} }
// Wrapper to which removes all the uploaded parts after a successful func isMultipartObject(storage StorageAPI, bucket, prefix string) bool {
// complete multipart upload. _, err := storage.StatFile(bucket, path.Join(prefix, fsMetaJSONFile))
func cleanupUploadedParts(storage StorageAPI, bucket, object, uploadID string) error {
return cleanupDir(storage, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
}
// abortMultipartUploadCommon - aborts a multipart upload, common
// function used by both object layers.
func abortMultipartUploadCommon(storage StorageAPI, bucket, object, uploadID string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !isBucketExist(storage, bucket) {
return BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !isUploadIDExists(storage, bucket, object, uploadID) {
return InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if err := cleanupUploadedParts(storage, bucket, object, uploadID); err != nil {
return err
}
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
if entries, err := storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err == nil {
if len(entries) > 1 {
return nil
}
}
uploadsJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
if err := storage.DeleteFile(minioMetaBucket, uploadsJSONPath); err != nil {
return err
}
return nil
}
// isIncompleteMultipart - is object incomplete multipart.
func isIncompleteMultipart(storage StorageAPI, objectPath string) (bool, error) {
_, err := storage.StatFile(minioMetaBucket, path.Join(objectPath, uploadsJSONFile))
if err != nil { if err != nil {
if err == errFileNotFound { if err == errFileNotFound {
return false, nil return false
} }
return false, err errorIf(err, "Unable to access "+path.Join(prefix, fsMetaJSONFile))
return false
} }
return true, nil return true
} }
// listLeafEntries - lists all entries if a given prefixPath is a leaf // listUploadsInfo - list all uploads info.
// directory, returns error if any - returns empty list if prefixPath func (fs fsObjects) listUploadsInfo(prefixPath string) (uploads []uploadInfo, err error) {
// is not a leaf directory. splitPrefixes := strings.SplitN(prefixPath, "/", 3)
func listLeafEntries(storage StorageAPI, prefixPath string) (entries []string, err error) { uploadIDs, err := getUploadIDs(splitPrefixes[1], splitPrefixes[2], fs.storage)
var ok bool
if ok, err = isIncompleteMultipart(storage, prefixPath); err != nil {
return nil, err
} else if !ok {
return nil, nil
}
entries, err = storage.ListDir(minioMetaBucket, prefixPath)
if err != nil { if err != nil {
if err == errFileNotFound {
return []uploadInfo{}, nil
}
return nil, err return nil, err
} }
var newEntries []string uploads = uploadIDs.Uploads
for _, entry := range entries { return uploads, nil
if strings.HasSuffix(entry, slashSeparator) {
newEntries = append(newEntries, entry)
}
}
return newEntries, nil
} }
// listMetaBucketMultipartFiles - list all files at a given prefix inside minioMetaBucket. // listMetaBucketMultipart - list all objects at a given prefix inside minioMetaBucket.
func listMetaBucketMultipartFiles(layer ObjectLayer, prefixPath string, markerPath string, recursive bool, maxKeys int) (fileInfos []FileInfo, eof bool, err error) { func (fs fsObjects) listMetaBucketMultipart(prefixPath string, markerPath string, recursive bool, maxKeys int) (fileInfos []FileInfo, eof bool, err error) {
var storage StorageAPI walker := fs.lookupTreeWalk(listParams{minioMetaBucket, recursive, markerPath, prefixPath})
switch l := layer.(type) {
case fsObjects:
storage = l.storage
case xlObjects:
storage = l.storage
}
if recursive && markerPath != "" {
markerPath = pathJoin(markerPath, incompleteFile)
}
walker := lookupTreeWalk(layer, listParams{minioMetaBucket, recursive, markerPath, prefixPath})
if walker == nil { if walker == nil {
walker = startTreeWalk(layer, minioMetaBucket, prefixPath, markerPath, recursive) walker = fs.startTreeWalk(minioMetaBucket, prefixPath, markerPath, recursive)
} }
// newMaxKeys tracks the size of entries which are going to be // newMaxKeys tracks the size of entries which are going to be
@ -357,7 +132,6 @@ func listMetaBucketMultipartFiles(layer ObjectLayer, prefixPath string, markerPa
// Following loop gathers and filters out special files inside // Following loop gathers and filters out special files inside
// minio meta volume. // minio meta volume.
outerLoop:
for { for {
walkResult, ok := <-walker.ch walkResult, ok := <-walker.ch
if !ok { if !ok {
@ -373,47 +147,41 @@ outerLoop:
} }
return nil, false, toObjectErr(walkResult.err, minioMetaBucket, prefixPath) return nil, false, toObjectErr(walkResult.err, minioMetaBucket, prefixPath)
} }
fi := walkResult.fileInfo fileInfo := walkResult.fileInfo
var entries []string var uploads []uploadInfo
if fi.Mode.IsDir() { if fileInfo.Mode.IsDir() {
// List all the entries if fi.Name is a leaf directory, if // List all the entries if fi.Name is a leaf directory, if
// fi.Name is not a leaf directory then the resulting // fi.Name is not a leaf directory then the resulting
// entries are empty. // entries are empty.
entries, err = listLeafEntries(storage, fi.Name) uploads, err = fs.listUploadsInfo(fileInfo.Name)
if err != nil { if err != nil {
return nil, false, err return nil, false, err
} }
} }
if len(entries) > 0 { if len(uploads) > 0 {
// We reach here for non-recursive case and a leaf entry. for _, upload := range uploads {
sort.Strings(entries) fileInfos = append(fileInfos, FileInfo{
for _, entry := range entries { Name: path.Join(fileInfo.Name, upload.UploadID),
var fileInfo FileInfo ModTime: upload.Initiated,
incompleteUploadFile := path.Join(fi.Name, entry, incompleteFile) })
fileInfo, err = storage.StatFile(minioMetaBucket, incompleteUploadFile)
if err != nil {
return nil, false, err
}
fileInfo.Name = path.Join(fi.Name, entry)
fileInfos = append(fileInfos, fileInfo)
newMaxKeys++ newMaxKeys++
// If we have reached the maxKeys, it means we have listed // If we have reached the maxKeys, it means we have listed
// everything that was requested. // everything that was requested.
if newMaxKeys == maxKeys { if newMaxKeys == maxKeys {
break outerLoop break
} }
} }
} else { } else {
// We reach here for a non-recursive case non-leaf entry // We reach here for a non-recursive case non-leaf entry
// OR recursive case with fi.Name. // OR recursive case with fi.Name.
if !fi.Mode.IsDir() { // Do not skip non-recursive case directory entries. if !fileInfo.Mode.IsDir() { // Do not skip non-recursive case directory entries.
// Validate if 'fi.Name' is incomplete multipart. // Validate if 'fi.Name' is incomplete multipart.
if !strings.HasSuffix(fi.Name, incompleteFile) { if !strings.HasSuffix(fileInfo.Name, fsMetaJSONFile) {
continue continue
} }
fi.Name = path.Dir(fi.Name) fileInfo.Name = path.Dir(fileInfo.Name)
} }
fileInfos = append(fileInfos, fi) fileInfos = append(fileInfos, fileInfo)
newMaxKeys++ newMaxKeys++
// If we have reached the maxKeys, it means we have listed // If we have reached the maxKeys, it means we have listed
// everything that was requested. // everything that was requested.
@ -428,34 +196,27 @@ outerLoop:
// can continue from where it left off for the next list request. // can continue from where it left off for the next list request.
lastFileInfo := fileInfos[len(fileInfos)-1] lastFileInfo := fileInfos[len(fileInfos)-1]
markerPath = lastFileInfo.Name markerPath = lastFileInfo.Name
saveTreeWalk(layer, listParams{minioMetaBucket, recursive, markerPath, prefixPath}, walker) fs.saveTreeWalk(listParams{minioMetaBucket, recursive, markerPath, prefixPath}, walker)
} }
// Return entries here. // Return entries here.
return fileInfos, eof, nil return fileInfos, eof, nil
} }
// FIXME: Currently the code sorts based on keyName/upload-id which is // FIXME: Currently the code sorts based on keyName/upload-id which is
// in correct based on the S3 specs. According to s3 specs we are // not correct based on the S3 specs. According to s3 specs we are
// supposed to only lexically sort keyNames and then for keyNames with // supposed to only lexically sort keyNames and then for keyNames with
// multiple upload ids should be sorted based on the initiated time. // multiple upload ids should be sorted based on the initiated time.
// Currently this case is not handled. // Currently this case is not handled.
// listMultipartUploadsCommon - lists all multipart uploads, common // listMultipartUploadsCommon - lists all multipart uploads, common function for both object layers.
// function for both object layers. func (fs fsObjects) listMultipartUploadsCommon(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
func listMultipartUploadsCommon(layer ObjectLayer, bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
var storage StorageAPI
switch l := layer.(type) {
case xlObjects:
storage = l.storage
case fsObjects:
storage = l.storage
}
result := ListMultipartsInfo{} result := ListMultipartsInfo{}
// Verify if bucket is valid. // Verify if bucket is valid.
if !IsValidBucketName(bucket) { if !IsValidBucketName(bucket) {
return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket} return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket}
} }
if !isBucketExist(storage, bucket) { if !fs.isBucketExist(bucket) {
return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket} return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket}
} }
if !IsValidObjectPrefix(prefix) { if !IsValidObjectPrefix(prefix) {
@ -514,27 +275,27 @@ func listMultipartUploadsCommon(layer ObjectLayer, bucket, prefix, keyMarker, up
} }
// List all the multipart files at prefixPath, starting with marker keyMarkerPath. // List all the multipart files at prefixPath, starting with marker keyMarkerPath.
fileInfos, eof, err := listMetaBucketMultipartFiles(layer, multipartPrefixPath, multipartMarkerPath, recursive, maxUploads) fileInfos, eof, err := fs.listMetaBucketMultipart(multipartPrefixPath, multipartMarkerPath, recursive, maxUploads)
if err != nil { if err != nil {
return ListMultipartsInfo{}, err return ListMultipartsInfo{}, err
} }
// Loop through all the received files fill in the multiparts result. // Loop through all the received files fill in the multiparts result.
for _, fi := range fileInfos { for _, fileInfo := range fileInfos {
var objectName string var objectName string
var uploadID string var uploadID string
if fi.Mode.IsDir() { if fileInfo.Mode.IsDir() {
// All directory entries are common prefixes. // All directory entries are common prefixes.
uploadID = "" // Upload ids are empty for CommonPrefixes. uploadID = "" // Upload ids are empty for CommonPrefixes.
objectName = strings.TrimPrefix(fi.Name, retainSlash(pathJoin(mpartMetaPrefix, bucket))) objectName = strings.TrimPrefix(fileInfo.Name, retainSlash(pathJoin(mpartMetaPrefix, bucket)))
result.CommonPrefixes = append(result.CommonPrefixes, objectName) result.CommonPrefixes = append(result.CommonPrefixes, objectName)
} else { } else {
uploadID = path.Base(fi.Name) uploadID = path.Base(fileInfo.Name)
objectName = strings.TrimPrefix(path.Dir(fi.Name), retainSlash(pathJoin(mpartMetaPrefix, bucket))) objectName = strings.TrimPrefix(path.Dir(fileInfo.Name), retainSlash(pathJoin(mpartMetaPrefix, bucket)))
result.Uploads = append(result.Uploads, uploadMetadata{ result.Uploads = append(result.Uploads, uploadMetadata{
Object: objectName, Object: objectName,
UploadID: uploadID, UploadID: uploadID,
Initiated: fi.ModTime, Initiated: fileInfo.ModTime,
}) })
} }
result.NextKeyMarker = objectName result.NextKeyMarker = objectName
@ -548,51 +309,165 @@ func listMultipartUploadsCommon(layer ObjectLayer, bucket, prefix, keyMarker, up
return result, nil return result, nil
} }
// ListObjectParts - list object parts, common function across both object layers. // ListMultipartUploads - list multipart uploads.
func listObjectPartsCommon(storage StorageAPI, bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) { func (fs fsObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
return fs.listMultipartUploadsCommon(bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
func (fs fsObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
meta = make(map[string]string) // Reset the meta value, we are not going to save headers for fs.
return fs.newMultipartUploadCommon(bucket, object, meta)
}
// putObjectPartCommon - put object part.
func (fs fsObjects) putObjectPartCommon(bucket string, object string, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !fs.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !fs.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold read lock on the uploadID so that no one aborts it.
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Hold write lock on the part so that there is no parallel upload on the part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
partSuffix := fmt.Sprintf("object%d", partID)
tmpPartPath := path.Join(tmpMetaPrefix, bucket, object, uploadID, partSuffix)
fileWriter, err := fs.storage.CreateFile(minioMetaBucket, tmpPartPath)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Reader shouldn't have more data what mentioned in size argument.
// reading one more byte from the reader to validate it.
// expected to fail, success validates existence of more data in the reader.
if _, err = io.CopyN(ioutil.Discard, data, 1); err == nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", UnExpectedDataSize{Size: int(size)}
}
} else {
var n int64
if n, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
size = n
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" {
if newMD5Hex != md5Hex {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
fsMeta, err := fs.readFSMetadata(minioMetaBucket, uploadIDPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
fsMeta.AddObjectPart(partSuffix, newMD5Hex, size)
partPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
err = fs.storage.RenameFile(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath)
if err != nil {
if dErr := fs.storage.DeleteFile(minioMetaBucket, tmpPartPath); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tmpPartPath)
}
return "", toObjectErr(err, minioMetaBucket, partPath)
}
if err = fs.writeFSMetadata(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID), fsMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
}
return newMD5Hex, nil
}
// PutObjectPart - writes the multipart upload chunks.
func (fs fsObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
return fs.putObjectPartCommon(bucket, object, uploadID, partID, size, data, md5Hex)
}
func (fs fsObjects) listObjectPartsCommon(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
// Verify if bucket is valid. // Verify if bucket is valid.
if !IsValidBucketName(bucket) { if !IsValidBucketName(bucket) {
return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket} return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket}
} }
// Verify whether the bucket exists. // Verify whether the bucket exists.
if !isBucketExist(storage, bucket) { if !fs.isBucketExist(bucket) {
return ListPartsInfo{}, BucketNotFound{Bucket: bucket} return ListPartsInfo{}, BucketNotFound{Bucket: bucket}
} }
if !IsValidObjectName(object) { if !IsValidObjectName(object) {
return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object} return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
} }
if !isUploadIDExists(storage, bucket, object, uploadID) { if !fs.isUploadIDExists(bucket, object, uploadID) {
return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID} return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID}
} }
// Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload. // Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID)) defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
result := ListPartsInfo{} result := ListPartsInfo{}
entries, err := storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
fsMeta, err := fs.readFSMetadata(minioMetaBucket, uploadIDPath)
if err != nil { if err != nil {
return result, err return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, uploadIDPath)
} }
sort.Strings(entries) // Only parts with higher part numbers will be listed.
var newEntries []string parts := fsMeta.Parts[partNumberMarker:]
for _, entry := range entries {
newEntries = append(newEntries, path.Base(entry))
}
idx := sort.SearchStrings(newEntries, fmt.Sprintf("%.5d.", partNumberMarker+1))
newEntries = newEntries[idx:]
count := maxParts count := maxParts
for _, entry := range newEntries { for i, part := range parts {
fi, err := storage.StatFile(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID, entry)) var fi FileInfo
splitEntry := strings.SplitN(entry, ".", 2) partNamePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, part.Name)
partStr := splitEntry[0] fi, err = fs.storage.StatFile(minioMetaBucket, partNamePath)
etagStr := splitEntry[1]
partNum, err := strconv.Atoi(partStr)
if err != nil { if err != nil {
return ListPartsInfo{}, err return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, partNamePath)
} }
partNum := i + partNumberMarker + 1
result.Parts = append(result.Parts, partInfo{ result.Parts = append(result.Parts, partInfo{
PartNumber: partNum, PartNumber: partNum,
ETag: part.ETag,
LastModified: fi.ModTime, LastModified: fi.ModTime,
ETag: etagStr,
Size: fi.Size, Size: fi.Size,
}) })
count-- count--
@ -601,7 +476,7 @@ func listObjectPartsCommon(storage StorageAPI, bucket, object, uploadID string,
} }
} }
// If listed entries are more than maxParts, we set IsTruncated as true. // If listed entries are more than maxParts, we set IsTruncated as true.
if len(newEntries) > len(result.Parts) { if len(parts) > len(result.Parts) {
result.IsTruncated = true result.IsTruncated = true
// Make sure to fill next part number marker if IsTruncated is // Make sure to fill next part number marker if IsTruncated is
// true for subsequent listing. // true for subsequent listing.
@ -615,16 +490,170 @@ func listObjectPartsCommon(storage StorageAPI, bucket, object, uploadID string,
return result, nil return result, nil
} }
func (fs fsObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
return fs.listObjectPartsCommon(bucket, object, uploadID, partNumberMarker, maxParts)
}
// isUploadIDExists - verify if a given uploadID exists and is valid. // isUploadIDExists - verify if a given uploadID exists and is valid.
func isUploadIDExists(storage StorageAPI, bucket, object, uploadID string) bool { func (fs fsObjects) isUploadIDExists(bucket, object, uploadID string) bool {
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile) uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
st, err := storage.StatFile(minioMetaBucket, uploadIDPath) _, err := fs.storage.StatFile(minioMetaBucket, path.Join(uploadIDPath, fsMetaJSONFile))
if err != nil { if err != nil {
if err == errFileNotFound { if err == errFileNotFound {
return false return false
} }
errorIf(err, "Stat failed on "+minioMetaBucket+"/"+uploadIDPath+".") errorIf(err, "Unable to access upload id"+uploadIDPath)
return false return false
} }
return st.Mode.IsRegular() return true
}
func (fs fsObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !fs.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
if !fs.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
tempObj := path.Join(tmpMetaPrefix, bucket, object, uploadID, "object1")
fileWriter, err := fs.storage.CreateFile(minioMetaBucket, tempObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
// Construct part suffix.
partSuffix := fmt.Sprintf("object%d", part.PartNumber)
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
var fi FileInfo
fi, err = fs.storage.StatFile(minioMetaBucket, multipartPartFile)
if err != nil {
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) {
return "", PartTooSmall{}
}
var fileReader io.ReadCloser
fileReader, err = fs.storage.ReadFile(minioMetaBucket, multipartPartFile, 0)
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
_, err = io.Copy(fileWriter, fileReader)
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
err = fileReader.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
// Rename the file back to original location, if not delete the temporary object.
err = fs.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if dErr := fs.storage.DeleteFile(minioMetaBucket, tempObj); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tempObj)
}
return "", toObjectErr(err, bucket, object)
}
// Cleanup all the parts if everything else has been safely committed.
if err = cleanupUploadedParts(bucket, object, uploadID, fs.storage); err != nil {
return "", err
}
// Return md5sum.
return s3MD5, nil
}
// abortMultipartUploadCommon - aborts a multipart upload, common
// function used by both object layers.
func (fs fsObjects) abortMultipartUploadCommon(bucket, object, uploadID string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !fs.isBucketExist(bucket) {
return BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !fs.isUploadIDExists(bucket, object, uploadID) {
return InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Cleanup all uploaded parts.
if err := cleanupUploadedParts(bucket, object, uploadID, fs.storage); err != nil {
return err
}
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
uploadIDs, err := getUploadIDs(bucket, object, fs.storage)
if err == nil {
uploadIDIdx := uploadIDs.SearchUploadID(uploadID)
if uploadIDIdx != -1 {
uploadIDs.Uploads = append(uploadIDs.Uploads[:uploadIDIdx], uploadIDs.Uploads[uploadIDIdx+1:]...)
}
if len(uploadIDs.Uploads) > 0 {
return nil
}
}
if err = fs.storage.DeleteFile(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)); err != nil {
return toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
return nil
}
// AbortMultipartUpload - aborts a multipart upload.
func (fs fsObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
return fs.abortMultipartUploadCommon(bucket, object, uploadID)
} }

View File

@ -21,6 +21,7 @@ import (
"encoding/hex" "encoding/hex"
"io" "io"
"path/filepath" "path/filepath"
"sort"
"strings" "strings"
"sync" "sync"
@ -30,7 +31,7 @@ import (
// fsObjects - Implements fs object layer. // fsObjects - Implements fs object layer.
type fsObjects struct { type fsObjects struct {
storage StorageAPI storage StorageAPI
listObjectMap map[listParams][]*treeWalker listObjectMap map[listParams][]*treeWalkerFS
listObjectMapMutex *sync.Mutex listObjectMapMutex *sync.Mutex
} }
@ -59,7 +60,7 @@ func newFSObjects(exportPath string) (ObjectLayer, error) {
// Return successfully initialized object layer. // Return successfully initialized object layer.
return fsObjects{ return fsObjects{
storage: storage, storage: storage,
listObjectMap: make(map[listParams][]*treeWalker), listObjectMap: make(map[listParams][]*treeWalkerFS),
listObjectMapMutex: &sync.Mutex{}, listObjectMapMutex: &sync.Mutex{},
}, nil }, nil
} }
@ -68,22 +69,68 @@ func newFSObjects(exportPath string) (ObjectLayer, error) {
// MakeBucket - make a bucket. // MakeBucket - make a bucket.
func (fs fsObjects) MakeBucket(bucket string) error { func (fs fsObjects) MakeBucket(bucket string) error {
return makeBucket(fs.storage, bucket) // Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := fs.storage.MakeVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
} }
// GetBucketInfo - get bucket info. // GetBucketInfo - get bucket info.
func (fs fsObjects) GetBucketInfo(bucket string) (BucketInfo, error) { func (fs fsObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
return getBucketInfo(fs.storage, bucket) // Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketInfo{}, BucketNameInvalid{Bucket: bucket}
}
vi, err := fs.storage.StatVol(bucket)
if err != nil {
return BucketInfo{}, toObjectErr(err, bucket)
}
return BucketInfo{
Name: bucket,
Created: vi.Created,
Total: vi.Total,
Free: vi.Free,
}, nil
} }
// ListBuckets - list buckets. // ListBuckets - list buckets.
func (fs fsObjects) ListBuckets() ([]BucketInfo, error) { func (fs fsObjects) ListBuckets() ([]BucketInfo, error) {
return listBuckets(fs.storage) var bucketInfos []BucketInfo
vols, err := fs.storage.ListVols()
if err != nil {
return nil, toObjectErr(err)
}
for _, vol := range vols {
// StorageAPI can send volume names which are incompatible
// with buckets, handle it and skip them.
if !IsValidBucketName(vol.Name) {
continue
}
bucketInfos = append(bucketInfos, BucketInfo{
Name: vol.Name,
Created: vol.Created,
Total: vol.Total,
Free: vol.Free,
})
}
sort.Sort(byBucketName(bucketInfos))
return bucketInfos, nil
} }
// DeleteBucket - delete a bucket. // DeleteBucket - delete a bucket.
func (fs fsObjects) DeleteBucket(bucket string) error { func (fs fsObjects) DeleteBucket(bucket string) error {
return deleteBucket(fs.storage, bucket) // Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := fs.storage.DeleteVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
} }
/// Object Operations /// Object Operations
@ -218,7 +265,121 @@ func (fs fsObjects) DeleteObject(bucket, object string) error {
return nil return nil
} }
// Checks whether bucket exists.
func isBucketExist(storage StorageAPI, bucketName string) bool {
// Check whether bucket exists.
_, err := storage.StatVol(bucketName)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucketName+".")
return false
}
return true
}
func (fs fsObjects) listObjectsFS(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if bucket exists.
if !isBucketExist(fs.storage, bucket) {
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListObjectsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if marker != "" {
if !strings.HasPrefix(marker, prefix) {
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{
Marker: marker,
Prefix: prefix,
}
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return ListObjectsInfo{}, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
walker := fs.lookupTreeWalk(listParams{bucket, recursive, marker, prefix})
if walker == nil {
walker = fs.startTreeWalk(bucket, prefix, marker, recursive)
}
var fileInfos []FileInfo
var eof bool
var nextMarker string
for i := 0; i < maxKeys; {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return ListObjectsInfo{}, nil
}
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix)
}
fileInfo := walkResult.fileInfo
nextMarker = fileInfo.Name
fileInfos = append(fileInfos, fileInfo)
if walkResult.end {
eof = true
break
}
i++
}
params := listParams{bucket, recursive, nextMarker, prefix}
if !eof {
fs.saveTreeWalk(params, walker)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, fileInfo := range fileInfos {
// With delimiter set we fill in NextMarker and Prefixes.
if delimiter == slashSeparator {
result.NextMarker = fileInfo.Name
if fileInfo.Mode.IsDir() {
result.Prefixes = append(result.Prefixes, fileInfo.Name)
continue
}
}
result.Objects = append(result.Objects, ObjectInfo{
Name: fileInfo.Name,
ModTime: fileInfo.ModTime,
Size: fileInfo.Size,
IsDir: false,
})
}
return result, nil
}
// ListObjects - list all objects. // ListObjects - list all objects.
func (fs fsObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) { func (fs fsObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return listObjectsCommon(fs, bucket, prefix, marker, delimiter, maxKeys) return fs.listObjectsFS(bucket, prefix, marker, delimiter, maxKeys)
} }

View File

@ -16,10 +16,7 @@
package main package main
import ( import "strings"
"sort"
"strings"
)
// Common initialization needed for both object layers. // Common initialization needed for both object layers.
func initObjectLayer(storageDisks ...StorageAPI) error { func initObjectLayer(storageDisks ...StorageAPI) error {
@ -69,192 +66,3 @@ func cleanupDir(storage StorageAPI, volume, dirPath string) error {
} }
return delFunc(retainSlash(pathJoin(dirPath))) return delFunc(retainSlash(pathJoin(dirPath)))
} }
/// Common object layer functions.
// makeBucket - create a bucket, is a common function for both object layers.
func makeBucket(storage StorageAPI, bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := storage.MakeVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
// getBucketInfo - fetch bucket info, is a common function for both object layers.
func getBucketInfo(storage StorageAPI, bucket string) (BucketInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketInfo{}, BucketNameInvalid{Bucket: bucket}
}
vi, err := storage.StatVol(bucket)
if err != nil {
return BucketInfo{}, toObjectErr(err, bucket)
}
return BucketInfo{
Name: bucket,
Created: vi.Created,
Total: vi.Total,
Free: vi.Free,
}, nil
}
// listBuckets - list all buckets, is a common function for both object layers.
func listBuckets(storage StorageAPI) ([]BucketInfo, error) {
var bucketInfos []BucketInfo
vols, err := storage.ListVols()
if err != nil {
return nil, toObjectErr(err)
}
for _, vol := range vols {
// StorageAPI can send volume names which are incompatible
// with buckets, handle it and skip them.
if !IsValidBucketName(vol.Name) {
continue
}
bucketInfos = append(bucketInfos, BucketInfo{
Name: vol.Name,
Created: vol.Created,
Total: vol.Total,
Free: vol.Free,
})
}
sort.Sort(byBucketName(bucketInfos))
return bucketInfos, nil
}
// deleteBucket - deletes a bucket, is a common function for both the layers.
func deleteBucket(storage StorageAPI, bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := storage.DeleteVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
func listObjectsCommon(layer ObjectLayer, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
var storage StorageAPI
switch l := layer.(type) {
case xlObjects:
storage = l.storage
case fsObjects:
storage = l.storage
}
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if bucket exists.
if !isBucketExist(storage, bucket) {
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListObjectsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if marker != "" {
if !strings.HasPrefix(marker, prefix) {
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{
Marker: marker,
Prefix: prefix,
}
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return ListObjectsInfo{}, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
walker := lookupTreeWalk(layer, listParams{bucket, recursive, marker, prefix})
if walker == nil {
walker = startTreeWalk(layer, bucket, prefix, marker, recursive)
}
var fileInfos []FileInfo
var eof bool
var nextMarker string
for i := 0; i < maxKeys; {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return ListObjectsInfo{}, nil
}
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix)
}
fileInfo := walkResult.fileInfo
nextMarker = fileInfo.Name
fileInfos = append(fileInfos, fileInfo)
if walkResult.end {
eof = true
break
}
i++
}
params := listParams{bucket, recursive, nextMarker, prefix}
if !eof {
saveTreeWalk(layer, params, walker)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, fileInfo := range fileInfos {
// With delimiter set we fill in NextMarker and Prefixes.
if delimiter == slashSeparator {
result.NextMarker = fileInfo.Name
if fileInfo.Mode.IsDir() {
result.Prefixes = append(result.Prefixes, fileInfo.Name)
continue
}
}
result.Objects = append(result.Objects, ObjectInfo{
Name: fileInfo.Name,
ModTime: fileInfo.ModTime,
Size: fileInfo.Size,
IsDir: false,
})
}
return result, nil
}
// checks whether bucket exists.
func isBucketExist(storage StorageAPI, bucketName string) bool {
// Check whether bucket exists.
_, err := storage.StatVol(bucketName)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucketName+".")
return false
}
return true
}

View File

@ -28,6 +28,7 @@ import (
"unicode/utf8" "unicode/utf8"
"github.com/minio/minio/pkg/safe" "github.com/minio/minio/pkg/safe"
"github.com/skyrings/skyring-common/tools/uuid"
) )
const ( const (
@ -123,6 +124,20 @@ func pathJoin(elem ...string) string {
return path.Join(elem...) + trailingSlash return path.Join(elem...) + trailingSlash
} }
// getUUID() - get a unique uuid.
func getUUID() (uuidStr string) {
for {
uuid, err := uuid.New()
if err != nil {
errorIf(err, "Unable to initialize uuid")
continue
}
uuidStr = uuid.String()
break
}
return uuidStr
}
// Create an s3 compatible MD5sum for complete multipart transaction. // Create an s3 compatible MD5sum for complete multipart transaction.
func completeMultipartMD5(parts ...completePart) (string, error) { func completeMultipartMD5(parts ...completePart) (string, error) {
var finalMD5Bytes []byte var finalMD5Bytes []byte

View File

@ -27,8 +27,6 @@ import (
"gopkg.in/check.v1" "gopkg.in/check.v1"
) )
// TODO - enable all the commented tests.
// APITestSuite - collection of API tests. // APITestSuite - collection of API tests.
func APITestSuite(c *check.C, create func() ObjectLayer) { func APITestSuite(c *check.C, create func() ObjectLayer) {
testMakeBucket(c, create) testMakeBucket(c, create)

View File

@ -333,6 +333,8 @@ func (s fsStorage) ReadFile(volume string, path string, offset int64) (readClose
return nil, errFileNotFound return nil, errFileNotFound
} else if os.IsPermission(err) { } else if os.IsPermission(err) {
return nil, errFileAccessDenied return nil, errFileAccessDenied
} else if strings.Contains(err.Error(), "not a directory") {
return nil, errFileNotFound
} }
return nil, err return nil, err
} }
@ -425,7 +427,6 @@ func (s fsStorage) StatFile(volume, path string) (file FileInfo, err error) {
// Return all errors here. // Return all errors here.
return FileInfo{}, err return FileInfo{}, err
} }
// If its a directory its not a regular file. // If its a directory its not a regular file.
if st.Mode().IsDir() { if st.Mode().IsDir() {
return FileInfo{}, errFileNotFound return FileInfo{}, errFileNotFound

View File

@ -44,6 +44,10 @@ func ExecObjectLayerTest(t *testing.T, objTest func(obj ObjectLayer, instanceTyp
} }
erasureDisks = append(erasureDisks, path) erasureDisks = append(erasureDisks, path)
} }
// Initialize name space lock.
initNSLock()
objLayer, err := newXLObjects(erasureDisks) objLayer, err := newXLObjects(erasureDisks)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -59,6 +63,9 @@ func ExecObjectLayerTest(t *testing.T, objTest func(obj ObjectLayer, instanceTyp
return nil, "", err return nil, "", err
} }
// Initialize name space lock.
initNSLock()
// Create the obj. // Create the obj.
objLayer, err := newFSObjects(fsDir) objLayer, err := newFSObjects(fsDir)
if err != nil { if err != nil {
@ -80,7 +87,7 @@ func ExecObjectLayerTest(t *testing.T, objTest func(obj ObjectLayer, instanceTyp
} }
// Executing the object layer tests for single node setup. // Executing the object layer tests for single node setup.
objTest(objLayer, singleNodeTestStr, t) objTest(objLayer, singleNodeTestStr, t)
initNSLock()
objLayer, fsDirs, err := getXLObjectLayer() objLayer, fsDirs, err := getXLObjectLayer()
if err != nil { if err != nil {
t.Fatalf("Initialization of object layer failed for XL setup: %s", err.Error()) t.Fatalf("Initialization of object layer failed for XL setup: %s", err.Error())

View File

@ -21,49 +21,30 @@ import (
"path" "path"
"sort" "sort"
"strings" "strings"
"sync"
"time" "time"
) )
// listParams - list object params used for list object map // Tree walk notify carries a channel which notifies tree walk
type listParams struct { // results, additionally it also carries information if treeWalk
bucket string // should be timedOut.
recursive bool type treeWalkerFS struct {
marker string ch <-chan treeWalkResultFS
prefix string timedOut bool
} }
// Tree walk result carries results of tree walking. // Tree walk result carries results of tree walking.
type treeWalkResult struct { type treeWalkResultFS struct {
fileInfo FileInfo fileInfo FileInfo
err error err error
end bool end bool
} }
// Tree walk notify carries a channel which notifies tree walk
// results, additionally it also carries information if treeWalk
// should be timedOut.
type treeWalker struct {
ch <-chan treeWalkResult
timedOut bool
}
// treeWalk walks FS directory tree recursively pushing fileInfo into the channel as and when it encounters files. // treeWalk walks FS directory tree recursively pushing fileInfo into the channel as and when it encounters files.
func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResult) bool, count *int) bool { func (fs fsObjects) treeWalk(bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResultFS) bool, count *int) bool {
// Example: // Example:
// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively // if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
// called with prefixDir="one/two/three/four/" and marker="five.txt" // called with prefixDir="one/two/three/four/" and marker="five.txt"
var isXL bool
var disk StorageAPI
switch l := layer.(type) {
case xlObjects:
isXL = true
disk = l.storage
case fsObjects:
disk = l.storage
}
// Convert entry to FileInfo // Convert entry to FileInfo
entryToFileInfo := func(entry string) (fileInfo FileInfo, err error) { entryToFileInfo := func(entry string) (fileInfo FileInfo, err error) {
if strings.HasSuffix(entry, slashSeparator) { if strings.HasSuffix(entry, slashSeparator) {
@ -73,26 +54,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
fileInfo.Mode = os.ModeDir fileInfo.Mode = os.ModeDir
return return
} }
if isXL && strings.HasSuffix(entry, multipartSuffix) { if fileInfo, err = fs.storage.StatFile(bucket, path.Join(prefixDir, entry)); err != nil {
// If the entry was detected as a multipart file we use
// getMultipartObjectInfo() to fill the FileInfo structure.
entry = strings.TrimSuffix(entry, multipartSuffix)
var info MultipartObjectInfo
info, err = getMultipartObjectInfo(disk, bucket, path.Join(prefixDir, entry))
if err != nil {
return
}
// Set the Mode to a "regular" file.
fileInfo.Mode = 0
// Trim the suffix that was temporarily added to indicate that this
// is a multipart file.
fileInfo.Name = path.Join(prefixDir, entry)
fileInfo.Size = info.Size
fileInfo.MD5Sum = info.MD5Sum
fileInfo.ModTime = info.ModTime
return
}
if fileInfo, err = disk.StatFile(bucket, path.Join(prefixDir, entry)); err != nil {
return return
} }
// Object name needs to be full path. // Object name needs to be full path.
@ -110,9 +72,9 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
markerBase = markerSplit[1] markerBase = markerSplit[1]
} }
} }
entries, err := disk.ListDir(bucket, prefixDir) entries, err := fs.storage.ListDir(bucket, prefixDir)
if err != nil { if err != nil {
send(treeWalkResult{err: err}) send(treeWalkResultFS{err: err})
return false return false
} }
@ -123,16 +85,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
} }
} }
} }
// For XL multipart files strip the trailing "/" and append ".minio.multipart" to the entry so that sort.Strings(entries)
// entryToFileInfo() can call StatFile for regular files or getMultipartObjectInfo() for multipart files.
for i, entry := range entries {
if isXL && strings.HasSuffix(entry, slashSeparator) {
if isMultipartObject(disk, bucket, path.Join(prefixDir, entry)) {
entries[i] = strings.TrimSuffix(entry, slashSeparator) + multipartSuffix
}
}
}
sort.Sort(byMultipartFiles(entries))
// Skip the empty strings // Skip the empty strings
for len(entries) > 0 && entries[0] == "" { for len(entries) > 0 && entries[0] == "" {
entries = entries[1:] entries = entries[1:]
@ -144,7 +97,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
// If markerDir="four/" Search() returns the index of "four/" in the sorted // If markerDir="four/" Search() returns the index of "four/" in the sorted
// entries list so we skip all the entries till "four/" // entries list so we skip all the entries till "four/"
idx := sort.Search(len(entries), func(i int) bool { idx := sort.Search(len(entries), func(i int) bool {
return strings.TrimSuffix(entries[i], multipartSuffix) >= markerDir return entries[i] >= markerDir
}) })
entries = entries[idx:] entries = entries[idx:]
*count += len(entries) *count += len(entries)
@ -176,7 +129,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
} }
*count-- *count--
prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories. prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories.
if !treeWalk(layer, bucket, path.Join(prefixDir, entry), prefixMatch, markerArg, recursive, send, count) { if !fs.treeWalk(bucket, path.Join(prefixDir, entry), prefixMatch, markerArg, recursive, send, count) {
return false return false
} }
continue continue
@ -188,7 +141,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
// Ignore error and continue. // Ignore error and continue.
continue continue
} }
if !send(treeWalkResult{fileInfo: fileInfo}) { if !send(treeWalkResultFS{fileInfo: fileInfo}) {
return false return false
} }
} }
@ -196,7 +149,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
} }
// Initiate a new treeWalk in a goroutine. // Initiate a new treeWalk in a goroutine.
func startTreeWalk(layer ObjectLayer, bucket, prefix, marker string, recursive bool) *treeWalker { func (fs fsObjects) startTreeWalk(bucket, prefix, marker string, recursive bool) *treeWalkerFS {
// Example 1 // Example 1
// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt" // If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt" // treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
@ -207,8 +160,8 @@ func startTreeWalk(layer ObjectLayer, bucket, prefix, marker string, recursive b
// treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt" // treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt"
// and entryPrefixMatch="th" // and entryPrefixMatch="th"
ch := make(chan treeWalkResult, maxObjectList) ch := make(chan treeWalkResultFS, maxObjectList)
walkNotify := treeWalker{ch: ch} walkNotify := treeWalkerFS{ch: ch}
entryPrefixMatch := prefix entryPrefixMatch := prefix
prefixDir := "" prefixDir := ""
lastIndex := strings.LastIndex(prefix, slashSeparator) lastIndex := strings.LastIndex(prefix, slashSeparator)
@ -220,7 +173,7 @@ func startTreeWalk(layer ObjectLayer, bucket, prefix, marker string, recursive b
marker = strings.TrimPrefix(marker, prefixDir) marker = strings.TrimPrefix(marker, prefixDir)
go func() { go func() {
defer close(ch) defer close(ch)
send := func(walkResult treeWalkResult) bool { send := func(walkResult treeWalkResultFS) bool {
if count == 0 { if count == 0 {
walkResult.end = true walkResult.end = true
} }
@ -233,61 +186,41 @@ func startTreeWalk(layer ObjectLayer, bucket, prefix, marker string, recursive b
return false return false
} }
} }
treeWalk(layer, bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count) fs.treeWalk(bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count)
}() }()
return &walkNotify return &walkNotify
} }
// Save the goroutine reference in the map // Save the goroutine reference in the map
func saveTreeWalk(layer ObjectLayer, params listParams, walker *treeWalker) { func (fs fsObjects) saveTreeWalk(params listParams, walker *treeWalkerFS) {
var listObjectMap map[listParams][]*treeWalker fs.listObjectMapMutex.Lock()
var listObjectMapMutex *sync.Mutex defer fs.listObjectMapMutex.Unlock()
switch l := layer.(type) {
case xlObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
case fsObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
}
listObjectMapMutex.Lock()
defer listObjectMapMutex.Unlock()
walkers, _ := listObjectMap[params] walkers, _ := fs.listObjectMap[params]
walkers = append(walkers, walker) walkers = append(walkers, walker)
listObjectMap[params] = walkers fs.listObjectMap[params] = walkers
} }
// Lookup the goroutine reference from map // Lookup the goroutine reference from map
func lookupTreeWalk(layer ObjectLayer, params listParams) *treeWalker { func (fs fsObjects) lookupTreeWalk(params listParams) *treeWalkerFS {
var listObjectMap map[listParams][]*treeWalker fs.listObjectMapMutex.Lock()
var listObjectMapMutex *sync.Mutex defer fs.listObjectMapMutex.Unlock()
switch l := layer.(type) {
case xlObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
case fsObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
}
listObjectMapMutex.Lock()
defer listObjectMapMutex.Unlock()
if walkChs, ok := listObjectMap[params]; ok { if walkChs, ok := fs.listObjectMap[params]; ok {
for i, walkCh := range walkChs { for i, walkCh := range walkChs {
if !walkCh.timedOut { if !walkCh.timedOut {
newWalkChs := walkChs[i+1:] newWalkChs := walkChs[i+1:]
if len(newWalkChs) > 0 { if len(newWalkChs) > 0 {
listObjectMap[params] = newWalkChs fs.listObjectMap[params] = newWalkChs
} else { } else {
delete(listObjectMap, params) delete(fs.listObjectMap, params)
} }
return walkCh return walkCh
} }
} }
// As all channels are timed out, delete the map entry // As all channels are timed out, delete the map entry
delete(listObjectMap, params) delete(fs.listObjectMap, params)
} }
return nil return nil
} }

265
tree-walk-xl.go Normal file
View File

@ -0,0 +1,265 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"math/rand"
"path"
"sort"
"strings"
"time"
)
// listParams - list object params used for list object map
type listParams struct {
bucket string
recursive bool
marker string
prefix string
}
// Tree walk result carries results of tree walking.
type treeWalkResult struct {
objInfo ObjectInfo
err error
end bool
}
// Tree walk notify carries a channel which notifies tree walk
// results, additionally it also carries information if treeWalk
// should be timedOut.
type treeWalker struct {
ch <-chan treeWalkResult
timedOut bool
}
// listDir - listDir.
func (xl xlObjects) listDir(bucket, prefixDir string, filter func(entry string) bool) (entries []string, err error) {
// Count for list errors encountered.
var listErrCount = 0
// Loop through and return the first success entry based on the
// selected random disk.
for listErrCount < len(xl.storageDisks) {
// Choose a random disk on each attempt, do not hit the same disk all the time.
randIndex := rand.Intn(len(xl.storageDisks) - 1)
disk := xl.storageDisks[randIndex] // Pick a random disk.
if entries, err = disk.ListDir(bucket, prefixDir); err == nil {
// Skip the entries which do not match the filter.
for i, entry := range entries {
if filter(entry) {
entries[i] = ""
continue
}
if strings.HasSuffix(entry, slashSeparator) && xl.isObject(bucket, path.Join(prefixDir, entry)) {
entries[i] = strings.TrimSuffix(entry, slashSeparator)
}
}
sort.Strings(entries)
// Skip the empty strings
for len(entries) > 0 && entries[0] == "" {
entries = entries[1:]
}
return entries, nil
}
listErrCount++ // Update list error count.
}
// Return error at the end.
return nil, err
}
// getRandomDisk - gives a random disk at any point in time from the
// available disk pool.
func (xl xlObjects) getRandomDisk() (disk StorageAPI) {
randIndex := rand.Intn(len(xl.storageDisks) - 1)
disk = xl.storageDisks[randIndex] // Pick a random disk.
return disk
}
// treeWalkXL walks directory tree recursively pushing fileInfo into the channel as and when it encounters files.
func (xl xlObjects) treeWalkXL(bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResult) bool, count *int) bool {
// Example:
// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
// called with prefixDir="one/two/three/four/" and marker="five.txt"
// Convert entry to FileInfo
entryToObjectInfo := func(entry string) (objInfo ObjectInfo, err error) {
if strings.HasSuffix(entry, slashSeparator) {
// Object name needs to be full path.
objInfo.Bucket = bucket
objInfo.Name = path.Join(prefixDir, entry)
objInfo.Name += slashSeparator
objInfo.IsDir = true
return objInfo, nil
}
// Set the Mode to a "regular" file.
return xl.getObjectInfo(bucket, path.Join(prefixDir, entry))
}
var markerBase, markerDir string
if marker != "" {
// Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt"
markerSplit := strings.SplitN(marker, slashSeparator, 2)
markerDir = markerSplit[0]
if len(markerSplit) == 2 {
markerDir += slashSeparator
markerBase = markerSplit[1]
}
}
entries, err := xl.listDir(bucket, prefixDir, func(entry string) bool {
return !strings.HasPrefix(entry, entryPrefixMatch)
})
if err != nil {
send(treeWalkResult{err: err})
return false
}
if len(entries) == 0 {
return true
}
// example:
// If markerDir="four/" Search() returns the index of "four/" in the sorted
// entries list so we skip all the entries till "four/"
idx := sort.Search(len(entries), func(i int) bool {
return entries[i] >= markerDir
})
entries = entries[idx:]
*count += len(entries)
for i, entry := range entries {
if i == 0 && markerDir == entry {
if !recursive {
// Skip as the marker would already be listed in the previous listing.
*count--
continue
}
if recursive && !strings.HasSuffix(entry, slashSeparator) {
// We should not skip for recursive listing and if markerDir is a directory
// for ex. if marker is "four/five.txt" markerDir will be "four/" which
// should not be skipped, instead it will need to be treeWalkXL()'ed into.
// Skip if it is a file though as it would be listed in previous listing.
*count--
continue
}
}
if recursive && strings.HasSuffix(entry, slashSeparator) {
// If the entry is a directory, we will need recurse into it.
markerArg := ""
if entry == markerDir {
// We need to pass "five.txt" as marker only if we are
// recursing into "four/"
markerArg = markerBase
}
*count--
prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories.
if !xl.treeWalkXL(bucket, path.Join(prefixDir, entry), prefixMatch, markerArg, recursive, send, count) {
return false
}
continue
}
*count--
objInfo, err := entryToObjectInfo(entry)
if err != nil {
// The file got deleted in the interim between ListDir() and StatFile()
// Ignore error and continue.
continue
}
if !send(treeWalkResult{objInfo: objInfo}) {
return false
}
}
return true
}
// Initiate a new treeWalk in a goroutine.
func (xl xlObjects) startTreeWalkXL(bucket, prefix, marker string, recursive bool) *treeWalker {
// Example 1
// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
// and entryPrefixMatch=""
// Example 2
// if prefix is "one/two/th" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt"
// and entryPrefixMatch="th"
ch := make(chan treeWalkResult, maxObjectList)
walkNotify := treeWalker{ch: ch}
entryPrefixMatch := prefix
prefixDir := ""
lastIndex := strings.LastIndex(prefix, slashSeparator)
if lastIndex != -1 {
entryPrefixMatch = prefix[lastIndex+1:]
prefixDir = prefix[:lastIndex+1]
}
count := 0
marker = strings.TrimPrefix(marker, prefixDir)
go func() {
defer close(ch)
send := func(walkResult treeWalkResult) bool {
if count == 0 {
walkResult.end = true
}
timer := time.After(time.Second * 60)
select {
case ch <- walkResult:
return true
case <-timer:
walkNotify.timedOut = true
return false
}
}
xl.treeWalkXL(bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count)
}()
return &walkNotify
}
// Save the goroutine reference in the map
func (xl xlObjects) saveTreeWalkXL(params listParams, walker *treeWalker) {
xl.listObjectMapMutex.Lock()
defer xl.listObjectMapMutex.Unlock()
walkers, _ := xl.listObjectMap[params]
walkers = append(walkers, walker)
xl.listObjectMap[params] = walkers
}
// Lookup the goroutine reference from map
func (xl xlObjects) lookupTreeWalkXL(params listParams) *treeWalker {
xl.listObjectMapMutex.Lock()
defer xl.listObjectMapMutex.Unlock()
if walkChs, ok := xl.listObjectMap[params]; ok {
for i, walkCh := range walkChs {
if !walkCh.timedOut {
newWalkChs := walkChs[i+1:]
if len(newWalkChs) > 0 {
xl.listObjectMap[params] = newWalkChs
} else {
delete(xl.listObjectMap, params)
}
return walkCh
}
}
// As all channels are timed out, delete the map entry
delete(xl.listObjectMap, params)
}
return nil
}

View File

@ -1,204 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
slashpath "path"
"sync"
)
// Get the highest integer from a given integer slice.
func highestInt(intSlice []int64) (highestInteger int64) {
highestInteger = int64(0)
for _, integer := range intSlice {
if highestInteger < integer {
highestInteger = integer
}
}
return highestInteger
}
// Extracts file versions from partsMetadata slice and returns version slice.
func listFileVersions(partsMetadata []xlMetaV1, errs []error) (versions []int64) {
versions = make([]int64, len(partsMetadata))
for index, metadata := range partsMetadata {
if errs[index] == nil {
versions[index] = metadata.Stat.Version
} else {
versions[index] = -1
}
}
return versions
}
// reduceError - convert collection of errors into a single
// error based on total errors and read quorum.
func (xl XL) reduceError(errs []error) error {
fileNotFoundCount := 0
diskNotFoundCount := 0
volumeNotFoundCount := 0
diskAccessDeniedCount := 0
for _, err := range errs {
if err == errFileNotFound {
fileNotFoundCount++
} else if err == errDiskNotFound {
diskNotFoundCount++
} else if err == errVolumeAccessDenied {
diskAccessDeniedCount++
} else if err == errVolumeNotFound {
volumeNotFoundCount++
}
}
// If we have errors with 'file not found' greater than
// readQuorum, return as errFileNotFound.
// else if we have errors with 'volume not found' greater than
// readQuorum, return as errVolumeNotFound.
if fileNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
return errFileNotFound
} else if volumeNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
return errVolumeNotFound
}
// If we have errors with disk not found equal to the
// number of disks, return as errDiskNotFound.
if diskNotFoundCount == len(xl.storageDisks) {
return errDiskNotFound
} else if diskNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
// If we have errors with 'disk not found' greater than
// readQuorum, return as errFileNotFound.
return errFileNotFound
}
// If we have errors with disk not found equal to the
// number of disks, return as errDiskNotFound.
if diskAccessDeniedCount == len(xl.storageDisks) {
return errVolumeAccessDenied
}
return nil
}
// Returns slice of online disks needed.
// - slice returing readable disks.
// - xlMetaV1
// - bool value indicating if healing is needed.
// - error if any.
func (xl XL) listOnlineDisks(volume, path string) (onlineDisks []StorageAPI, mdata xlMetaV1, heal bool, err error) {
partsMetadata, errs := xl.getPartsMetadata(volume, path)
if err = xl.reduceError(errs); err != nil {
return nil, xlMetaV1{}, false, err
}
highestVersion := int64(0)
onlineDisks = make([]StorageAPI, len(xl.storageDisks))
// List all the file versions from partsMetadata list.
versions := listFileVersions(partsMetadata, errs)
// Get highest file version.
highestVersion = highestInt(versions)
// Pick online disks with version set to highestVersion.
onlineDiskCount := 0
for index, version := range versions {
if version == highestVersion {
mdata = partsMetadata[index]
onlineDisks[index] = xl.storageDisks[index]
onlineDiskCount++
} else {
onlineDisks[index] = nil
}
}
// If online disks count is lesser than configured disks, most
// probably we need to heal the file, additionally verify if the
// count is lesser than readQuorum, if not we throw an error.
if onlineDiskCount < len(xl.storageDisks) {
// Online disks lesser than total storage disks, needs to be
// healed. unless we do not have readQuorum.
heal = true
// Verify if online disks count are lesser than readQuorum
// threshold, return an error if yes.
if onlineDiskCount < xl.readQuorum {
return nil, xlMetaV1{}, false, errReadQuorum
}
}
return onlineDisks, mdata, heal, nil
}
// Get file.json metadata as a map slice.
// Returns error slice indicating the failed metadata reads.
// Read lockNS() should be done by caller.
func (xl XL) getPartsMetadata(volume, path string) ([]xlMetaV1, []error) {
errs := make([]error, len(xl.storageDisks))
metadataArray := make([]xlMetaV1, len(xl.storageDisks))
xlMetaV1FilePath := slashpath.Join(path, xlMetaV1File)
var wg = &sync.WaitGroup{}
for index, disk := range xl.storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
offset := int64(0)
metadataReader, err := disk.ReadFile(volume, xlMetaV1FilePath, offset)
if err != nil {
errs[index] = err
return
}
defer metadataReader.Close()
metadata, err := xlMetaV1Decode(metadataReader)
if err != nil {
// Unable to parse file.json, set error.
errs[index] = err
return
}
metadataArray[index] = metadata
}(index, disk)
}
wg.Wait()
return metadataArray, errs
}
// Writes/Updates `file.json` for given file. updateParts carries
// index of disks where `file.json` needs to be updated.
//
// Returns collection of errors, indexed in accordance with input
// updateParts order.
// Write lockNS() should be done by caller.
func (xl XL) updatePartsMetadata(volume, path string, metadata xlMetaV1, updateParts []bool) []error {
xlMetaV1FilePath := pathJoin(path, xlMetaV1File)
errs := make([]error, len(xl.storageDisks))
for index := range updateParts {
errs[index] = errors.New("Metadata not updated")
}
for index, shouldUpdate := range updateParts {
if !shouldUpdate {
continue
}
writer, err := xl.storageDisks[index].CreateFile(volume, xlMetaV1FilePath)
errs[index] = err
if err != nil {
continue
}
err = metadata.Write(writer)
if err != nil {
errs[index] = err
safeCloseAndRemove(writer)
continue
}
writer.Close()
}
return errs
}

View File

@ -1,287 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"io"
slashpath "path"
"sync"
"time"
)
// Erasure block size.
const erasureBlockSize = 4 * 1024 * 1024 // 4MiB.
// cleanupCreateFileOps - cleans up all the temporary files and other
// temporary data upon any failure.
func (xl XL) cleanupCreateFileOps(volume, path string, writers ...io.WriteCloser) {
closeAndRemoveWriters(writers...)
for _, disk := range xl.storageDisks {
if err := disk.DeleteFile(volume, path); err != nil {
errorIf(err, "Unable to delete file.")
}
}
}
// Close and remove writers if they are safeFile.
func closeAndRemoveWriters(writers ...io.WriteCloser) {
for _, writer := range writers {
if err := safeCloseAndRemove(writer); err != nil {
errorIf(err, "Failed to close writer.")
}
}
}
// WriteErasure reads predefined blocks, encodes them and writes to
// configured storage disks.
func (xl XL) writeErasure(volume, path string, reader *io.PipeReader, wcloser *waitCloser) {
// Release the block writer upon function return.
defer wcloser.release()
partsMetadata, errs := xl.getPartsMetadata(volume, path)
// Convert errs into meaningful err to be sent upwards if possible
// based on total number of errors and read quorum.
err := xl.reduceError(errs)
if err != nil && err != errFileNotFound {
reader.CloseWithError(err)
return
}
// List all the file versions on existing files.
versions := listFileVersions(partsMetadata, errs)
// Get highest file version.
higherVersion := highestInt(versions)
// Increment to have next higher version.
higherVersion++
writers := make([]io.WriteCloser, len(xl.storageDisks))
xlMetaV1FilePath := slashpath.Join(path, xlMetaV1File)
metadataWriters := make([]io.WriteCloser, len(xl.storageDisks))
// Save additional erasureMetadata.
modTime := time.Now().UTC()
createFileError := 0
for index, disk := range xl.storageDisks {
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
var writer io.WriteCloser
writer, err = disk.CreateFile(volume, erasurePart)
if err != nil {
// Treat errFileNameTooLong specially
if err == errFileNameTooLong {
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
createFileError++
// We can safely allow CreateFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure.
if createFileError <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
// Remove previous temp writers for any failure.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(errWriteQuorum)
return
}
// Create meta data file.
var metadataWriter io.WriteCloser
metadataWriter, err = disk.CreateFile(volume, xlMetaV1FilePath)
if err != nil {
createFileError++
// We can safely allow CreateFile errors up to
// len(xl.storageDisks) - xl.writeQuorum otherwise return failure.
if createFileError <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
// Remove previous temp writers for any failure.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(errWriteQuorum)
return
}
writers[index] = writer
metadataWriters[index] = metadataWriter
}
// Allocate 4MiB block size buffer for reading.
dataBuffer := make([]byte, erasureBlockSize)
var totalSize int64 // Saves total incoming stream size.
for {
// Read up to allocated block size.
var n int
n, err = io.ReadFull(reader, dataBuffer)
if err != nil {
// Any unexpected errors, close the pipe reader with error.
if err != io.ErrUnexpectedEOF && err != io.EOF {
// Remove all temp writers.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
}
// At EOF break out.
if err == io.EOF {
break
}
if n > 0 {
// Split the input buffer into data and parity blocks.
var dataBlocks [][]byte
dataBlocks, err = xl.ReedSolomon.Split(dataBuffer[0:n])
if err != nil {
// Remove all temp writers.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
// Encode parity blocks using data blocks.
err = xl.ReedSolomon.Encode(dataBlocks)
if err != nil {
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
var wg = &sync.WaitGroup{}
var wErrs = make([]error, len(writers))
// Loop through and write encoded data to quorum disks.
for index, writer := range writers {
if writer == nil {
continue
}
wg.Add(1)
go func(index int, writer io.Writer) {
defer wg.Done()
encodedData := dataBlocks[index]
_, wErr := writers[index].Write(encodedData)
wErrs[index] = wErr
}(index, writer)
}
wg.Wait()
for _, wErr := range wErrs {
if wErr == nil {
continue
}
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(wErr)
return
}
// Update total written.
totalSize += int64(n)
}
}
// Initialize metadata map, save all erasure related metadata.
metadata := xlMetaV1{}
metadata.Version = "1"
metadata.Stat.Size = totalSize
metadata.Stat.ModTime = modTime
metadata.Minio.Release = minioReleaseTag
if len(xl.storageDisks) > len(writers) {
// Save file.version only if we wrote to less disks than all
// storage disks.
metadata.Stat.Version = higherVersion
}
metadata.Erasure.DataBlocks = xl.DataBlocks
metadata.Erasure.ParityBlocks = xl.ParityBlocks
metadata.Erasure.BlockSize = erasureBlockSize
// Write all the metadata.
// below case is not handled here
// Case: when storageDisks is 16 and write quorumDisks is 13,
// meta data write failure up to 2 can be considered.
// currently we fail for any meta data writes
for _, metadataWriter := range metadataWriters {
if metadataWriter == nil {
continue
}
// Write metadata.
err = metadata.Write(metadataWriter)
if err != nil {
// Remove temporary files.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
}
// Close all writers and metadata writers in routines.
for index, writer := range writers {
if writer == nil {
continue
}
// Safely wrote, now rename to its actual location.
if err = writer.Close(); err != nil {
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
if metadataWriters[index] == nil {
continue
}
// Safely wrote, now rename to its actual location.
if err = metadataWriters[index].Close(); err != nil {
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
}
// Close the pipe reader and return.
reader.Close()
return
}
// CreateFile - create a file.
func (xl XL) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) {
if !isValidVolname(volume) {
return nil, errInvalidArgument
}
if !isValidPath(path) {
return nil, errInvalidArgument
}
// Initialize pipe for data pipe line.
pipeReader, pipeWriter := io.Pipe()
// Initialize a new wait closer, implements both Write and Close.
wcloser := newWaitCloser(pipeWriter)
// Start erasure encoding in routine, reading data block by block from pipeReader.
go xl.writeErasure(volume, path, pipeReader, wcloser)
// Return the writer, caller should start writing to this.
return wcloser, nil
}

View File

@ -1,185 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"io"
slashpath "path"
)
// healHeal - heals the file at path.
func (xl XL) healFile(volume string, path string) error {
totalBlocks := xl.DataBlocks + xl.ParityBlocks
needsHeal := make([]bool, totalBlocks)
var readers = make([]io.Reader, totalBlocks)
var writers = make([]io.WriteCloser, totalBlocks)
// List all online disks to verify if we need to heal.
onlineDisks, metadata, heal, err := xl.listOnlineDisks(volume, path)
if err != nil {
return err
}
if !heal {
return nil
}
for index, disk := range onlineDisks {
if disk == nil {
needsHeal[index] = true
continue
}
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
// If disk.ReadFile returns error and we don't have read quorum it will be taken care as
// ReedSolomon.Reconstruct() will fail later.
var reader io.ReadCloser
offset := int64(0)
if reader, err = xl.storageDisks[index].ReadFile(volume, erasurePart, offset); err == nil {
readers[index] = reader
defer reader.Close()
}
}
// create writers for parts where healing is needed.
for index, healNeeded := range needsHeal {
if !healNeeded {
continue
}
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
writers[index], err = xl.storageDisks[index].CreateFile(volume, erasurePart)
if err != nil {
needsHeal[index] = false
safeCloseAndRemove(writers[index])
continue
}
}
// Check if there is atleast one part that needs to be healed.
atleastOneHeal := false
for _, healNeeded := range needsHeal {
if healNeeded {
atleastOneHeal = true
break
}
}
if !atleastOneHeal {
// Return if healing not needed anywhere.
return nil
}
var totalLeft = metadata.Stat.Size
for totalLeft > 0 {
// Figure out the right blockSize.
var curBlockSize int64
if metadata.Erasure.BlockSize < totalLeft {
curBlockSize = metadata.Erasure.BlockSize
} else {
curBlockSize = totalLeft
}
// Calculate the current block size.
curBlockSize = getEncodedBlockLen(curBlockSize, metadata.Erasure.DataBlocks)
enBlocks := make([][]byte, totalBlocks)
// Loop through all readers and read.
for index, reader := range readers {
// Initialize block slice and fill the data from each parts.
// ReedSolomon.Verify() expects that slice is not nil even if the particular
// part needs healing.
enBlocks[index] = make([]byte, curBlockSize)
if needsHeal[index] {
// Skip reading if the part needs healing.
continue
}
if reader == nil {
// If ReadFile() had returned error, do not read from this disk.
continue
}
_, err = io.ReadFull(reader, enBlocks[index])
if err != nil && err != io.ErrUnexpectedEOF {
enBlocks[index] = nil
}
}
// Check blocks if they are all zero in length.
if checkBlockSize(enBlocks) == 0 {
return errDataCorrupt
}
// Verify the blocks.
ok, err := xl.ReedSolomon.Verify(enBlocks)
if err != nil {
closeAndRemoveWriters(writers...)
return err
}
// Verification failed, blocks require reconstruction.
if !ok {
for index, healNeeded := range needsHeal {
if healNeeded {
// Reconstructs() reconstructs the parts if the array is nil.
enBlocks[index] = nil
}
}
err = xl.ReedSolomon.Reconstruct(enBlocks)
if err != nil {
closeAndRemoveWriters(writers...)
return err
}
// Verify reconstructed blocks again.
ok, err = xl.ReedSolomon.Verify(enBlocks)
if err != nil {
closeAndRemoveWriters(writers...)
return err
}
if !ok {
// Blocks cannot be reconstructed, corrupted data.
err = errors.New("Verification failed after reconstruction, data likely corrupted.")
closeAndRemoveWriters(writers...)
return err
}
}
for index, healNeeded := range needsHeal {
if !healNeeded {
continue
}
_, err := writers[index].Write(enBlocks[index])
if err != nil {
safeCloseAndRemove(writers[index])
continue
}
}
totalLeft = totalLeft - metadata.Erasure.BlockSize
}
// After successful healing Close() the writer so that the temp
// files are committed to their location.
for _, writer := range writers {
if writer == nil {
continue
}
writer.Close()
}
// Update the quorum metadata after heal.
errs := xl.updatePartsMetadata(volume, path, metadata, needsHeal)
for index, healNeeded := range needsHeal {
if healNeeded && errs[index] != nil {
return errs[index]
}
}
return nil
}

View File

@ -1,61 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"io"
"time"
)
// A xlMetaV1 represents a metadata header mapping keys to sets of values.
type xlMetaV1 struct {
Version string `json:"version"`
Stat struct {
Size int64 `json:"size"`
ModTime time.Time `json:"modTime"`
Version int64 `json:"version"`
} `json:"stat"`
Erasure struct {
DataBlocks int `json:"data"`
ParityBlocks int `json:"parity"`
BlockSize int64 `json:"blockSize"`
} `json:"erasure"`
Minio struct {
Release string `json:"release"`
} `json:"minio"`
}
// Write writes a metadata in wire format.
func (m xlMetaV1) Write(writer io.Writer) error {
metadataBytes, err := json.Marshal(m)
if err != nil {
return err
}
_, err = writer.Write(metadataBytes)
return err
}
// xlMetaV1Decode - file metadata decode.
func xlMetaV1Decode(reader io.Reader) (metadata xlMetaV1, err error) {
decoder := json.NewDecoder(reader)
// Unmarshalling failed, file possibly corrupted.
if err = decoder.Decode(&metadata); err != nil {
return xlMetaV1{}, err
}
return metadata, nil
}

View File

@ -1,546 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"math/rand"
"os"
slashpath "path"
"strings"
"path"
"sync"
"github.com/klauspost/reedsolomon"
)
const (
// XL erasure metadata file.
xlMetaV1File = "file.json"
)
// XL layer structure.
type XL struct {
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
DataBlocks int
ParityBlocks int
storageDisks []StorageAPI
readQuorum int
writeQuorum int
}
// errUnexpected - returned for any unexpected error.
var errUnexpected = errors.New("Unexpected error - please report at https://github.com/minio/minio/issues")
// newXL instantiate a new XL.
func newXL(disks []StorageAPI) (StorageAPI, error) {
// Initialize XL.
xl := &XL{}
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(disks)/2, len(disks)/2
// Initialize reed solomon encoding.
rs, err := reedsolomon.New(dataBlocks, parityBlocks)
if err != nil {
return nil, err
}
// Save the reedsolomon.
xl.DataBlocks = dataBlocks
xl.ParityBlocks = parityBlocks
xl.ReedSolomon = rs
// Save all the initialized storage disks.
xl.storageDisks = disks
// Figure out read and write quorum based on number of storage disks.
// Read quorum should be always N/2 + 1 (due to Vandermonde matrix
// erasure requirements)
xl.readQuorum = len(xl.storageDisks)/2 + 1
// Write quorum is assumed if we have total disks + 3
// parity. (Need to discuss this again)
xl.writeQuorum = len(xl.storageDisks)/2 + 3
if xl.writeQuorum > len(xl.storageDisks) {
xl.writeQuorum = len(xl.storageDisks)
}
// Return successfully initialized.
return xl, nil
}
// MakeVol - make a volume.
func (xl XL) MakeVol(volume string) error {
if !isValidVolname(volume) {
return errInvalidArgument
}
// Err counters.
createVolErr := 0 // Count generic create vol errs.
volumeExistsErrCnt := 0 // Count all errVolumeExists errs.
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
wg.Add(1)
// Make a volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
if disk == nil {
return
}
dErrs[index] = disk.MakeVol(volume)
}(index, disk)
}
// Wait for all make vol to finish.
wg.Wait()
// Loop through all the concocted errors.
for _, err := range dErrs {
if err == nil {
continue
}
// if volume already exists, count them.
if err == errVolumeExists {
volumeExistsErrCnt++
continue
}
// Update error counter separately.
createVolErr++
}
// Return err if all disks report volume exists.
if volumeExistsErrCnt == len(xl.storageDisks) {
return errVolumeExists
} else if createVolErr > len(xl.storageDisks)-xl.writeQuorum {
// Return errWriteQuorum if errors were more than
// allowed write quorum.
return errWriteQuorum
}
return nil
}
// DeleteVol - delete a volume.
func (xl XL) DeleteVol(volume string) error {
if !isValidVolname(volume) {
return errInvalidArgument
}
// Collect if all disks report volume not found.
var volumeNotFoundErrCnt int
var wg = &sync.WaitGroup{}
var dErrs = make([]error, len(xl.storageDisks))
// Remove a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
wg.Add(1)
// Delete volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
dErrs[index] = disk.DeleteVol(volume)
}(index, disk)
}
// Wait for all the delete vols to finish.
wg.Wait()
// Loop through concocted errors and return anything unusual.
for _, err := range dErrs {
if err != nil {
// We ignore error if errVolumeNotFound or errDiskNotFound
if err == errVolumeNotFound || err == errDiskNotFound {
volumeNotFoundErrCnt++
continue
}
return err
}
}
// Return err if all disks report volume not found.
if volumeNotFoundErrCnt == len(xl.storageDisks) {
return errVolumeNotFound
}
return nil
}
// ListVols - list volumes.
func (xl XL) ListVols() (volsInfo []VolInfo, err error) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Success vols map carries successful results of ListVols from each disks.
var successVols = make([][]VolInfo, len(xl.storageDisks))
for index, disk := range xl.storageDisks {
wg.Add(1) // Add each go-routine to wait for.
go func(index int, disk StorageAPI) {
// Indicate wait group as finished.
defer wg.Done()
// Initiate listing.
vlsInfo, _ := disk.ListVols()
successVols[index] = vlsInfo
}(index, disk)
}
// For all the list volumes running in parallel to finish.
wg.Wait()
// Loop through success vols and get aggregated usage values.
var vlsInfo []VolInfo
var total, free int64
for _, vlsInfo = range successVols {
if len(vlsInfo) <= 1 {
continue
}
var vlInfo VolInfo
for _, vlInfo = range vlsInfo {
if vlInfo.Name == "" {
continue
}
break
}
free += vlInfo.Free
total += vlInfo.Total
}
// Save the updated usage values back into the vols.
for _, vlInfo := range vlsInfo {
vlInfo.Free = free
vlInfo.Total = total
volsInfo = append(volsInfo, vlInfo)
}
// NOTE: The assumption here is that volumes across all disks in
// readQuorum have consistent view i.e they all have same number
// of buckets. This is essentially not verified since healing
// should take care of this.
return volsInfo, nil
}
// getAllVolInfo - list bucket volume info from all disks.
// Returns error slice indicating the failed volume stat operations.
func (xl XL) getAllVolInfo(volume string) ([]VolInfo, []error) {
// Create errs and volInfo slices of storageDisks size.
var errs = make([]error, len(xl.storageDisks))
var volsInfo = make([]VolInfo, len(xl.storageDisks))
// Allocate a new waitgroup.
var wg = &sync.WaitGroup{}
for index, disk := range xl.storageDisks {
wg.Add(1)
// Stat volume on all the disks in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
volInfo, err := disk.StatVol(volume)
if err != nil {
errs[index] = err
return
}
volsInfo[index] = volInfo
}(index, disk)
}
// Wait for all the Stat operations to finish.
wg.Wait()
// Return the concocted values.
return volsInfo, errs
}
// listAllVolInfo - list all stat volume info from all disks.
// Returns
// - stat volume info for all online disks.
// - boolean to indicate if healing is necessary.
// - error if any.
func (xl XL) listAllVolInfo(volume string) ([]VolInfo, bool, error) {
volsInfo, errs := xl.getAllVolInfo(volume)
notFoundCount := 0
for _, err := range errs {
if err == errVolumeNotFound {
notFoundCount++
// If we have errors with file not found greater than allowed read
// quorum we return err as errFileNotFound.
if notFoundCount > len(xl.storageDisks)-xl.readQuorum {
return nil, false, errVolumeNotFound
}
}
}
// Calculate online disk count.
onlineDiskCount := 0
for index := range errs {
if errs[index] == nil {
onlineDiskCount++
}
}
var heal bool
// If online disks count is lesser than configured disks, most
// probably we need to heal the file, additionally verify if the
// count is lesser than readQuorum, if not we throw an error.
if onlineDiskCount < len(xl.storageDisks) {
// Online disks lesser than total storage disks, needs to be
// healed. unless we do not have readQuorum.
heal = true
// Verify if online disks count are lesser than readQuorum
// threshold, return an error if yes.
if onlineDiskCount < xl.readQuorum {
return nil, false, errReadQuorum
}
}
// Return success.
return volsInfo, heal, nil
}
// StatVol - get volume stat info.
func (xl XL) StatVol(volume string) (volInfo VolInfo, err error) {
if !isValidVolname(volume) {
return VolInfo{}, errInvalidArgument
}
// List and figured out if we need healing.
volsInfo, heal, err := xl.listAllVolInfo(volume)
if err != nil {
return VolInfo{}, err
}
// Heal for missing entries.
if heal {
go func() {
// Create volume if missing on disks.
for index, volInfo := range volsInfo {
if volInfo.Name != "" {
continue
}
// Volinfo name would be an empty string, create it.
xl.storageDisks[index].MakeVol(volume)
}
}()
}
// Loop through all statVols, calculate the actual usage values.
var total, free int64
for _, volInfo = range volsInfo {
if volInfo.Name == "" {
continue
}
free += volInfo.Free
total += volInfo.Total
}
// Update the aggregated values.
volInfo.Free = free
volInfo.Total = total
return volInfo, nil
}
// isLeafDirectoryXL - check if a given path is leaf directory. i.e
// if it contains file xlMetaV1File
func isLeafDirectoryXL(disk StorageAPI, volume, leafPath string) (isLeaf bool) {
_, err := disk.StatFile(volume, path.Join(leafPath, xlMetaV1File))
return err == nil
}
// ListDir - return all the entries at the given directory path.
// If an entry is a directory it will be returned with a trailing "/".
func (xl XL) ListDir(volume, dirPath string) (entries []string, err error) {
if !isValidVolname(volume) {
return nil, errInvalidArgument
}
// Count for list errors encountered.
var listErrCount = 0
// Loop through and return the first success entry based on the
// selected random disk.
for listErrCount < len(xl.storageDisks) {
// Choose a random disk on each attempt, do not hit the same disk all the time.
randIndex := rand.Intn(len(xl.storageDisks) - 1)
disk := xl.storageDisks[randIndex] // Pick a random disk.
// Initiate a list operation, if successful filter and return quickly.
if entries, err = disk.ListDir(volume, dirPath); err == nil {
for i, entry := range entries {
isLeaf := isLeafDirectoryXL(disk, volume, path.Join(dirPath, entry))
isDir := strings.HasSuffix(entry, slashSeparator)
if isDir && isLeaf {
entries[i] = strings.TrimSuffix(entry, slashSeparator)
}
}
// We got the entries successfully return.
return entries, nil
}
listErrCount++ // Update list error count.
}
// Return error at the end.
return nil, err
}
// Object API.
// StatFile - stat a file
func (xl XL) StatFile(volume, path string) (FileInfo, error) {
if !isValidVolname(volume) {
return FileInfo{}, errInvalidArgument
}
if !isValidPath(path) {
return FileInfo{}, errInvalidArgument
}
_, metadata, heal, err := xl.listOnlineDisks(volume, path)
if err != nil {
return FileInfo{}, err
}
if heal {
// Heal in background safely, since we already have read quorum disks.
go func() {
hErr := xl.healFile(volume, path)
errorIf(hErr, "Unable to heal file "+volume+"/"+path+".")
}()
}
// Return file info.
return FileInfo{
Volume: volume,
Name: path,
Size: metadata.Stat.Size,
ModTime: metadata.Stat.ModTime,
Mode: os.FileMode(0644),
}, nil
}
// deleteXLFiles - delete all XL backend files.
func (xl XL) deleteXLFiles(volume, path string) error {
errCount := 0
// Update meta data file and remove part file
for index, disk := range xl.storageDisks {
erasureFilePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
err := disk.DeleteFile(volume, erasureFilePart)
if err != nil {
errCount++
// We can safely allow DeleteFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure.
if errCount <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
return err
}
xlMetaV1FilePath := slashpath.Join(path, "file.json")
err = disk.DeleteFile(volume, xlMetaV1FilePath)
if err != nil {
errCount++
// We can safely allow DeleteFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure.
if errCount <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
return err
}
}
// Return success.
return nil
}
// DeleteFile - delete a file
func (xl XL) DeleteFile(volume, path string) error {
if !isValidVolname(volume) {
return errInvalidArgument
}
if !isValidPath(path) {
return errInvalidArgument
}
// Delete all XL files.
return xl.deleteXLFiles(volume, path)
}
// RenameFile - rename file.
func (xl XL) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error {
// Validate inputs.
if !isValidVolname(srcVolume) {
return errInvalidArgument
}
if !isValidPath(srcPath) {
return errInvalidArgument
}
if !isValidVolname(dstVolume) {
return errInvalidArgument
}
if !isValidPath(dstPath) {
return errInvalidArgument
}
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var errs = make([]error, len(xl.storageDisks))
// Rename file on all underlying storage disks.
for index, disk := range xl.storageDisks {
// Append "/" as srcPath and dstPath are either leaf-dirs or non-leaf-dris.
// If srcPath is an object instead of prefix we just rename the leaf-dir and
// not rename the part and metadata files separately.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.RenameFile(srcVolume, retainSlash(srcPath), dstVolume, retainSlash(dstPath))
if err != nil {
errs[index] = err
}
errs[index] = nil
}(index, disk)
}
// Wait for all RenameFile to finish.
wg.Wait()
// Gather err count.
var errCount = 0
for _, err := range errs {
if err == nil {
continue
}
errCount++
}
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure. Cleanup successful renames.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Special condition if readQuorum exists, then return success.
if errCount <= len(xl.storageDisks)-xl.readQuorum {
return nil
}
// Ignore errors here, delete all successfully written files.
xl.deleteXLFiles(dstVolume, dstPath)
return errWriteQuorum
}
return nil
}

View File

@ -1,336 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"fmt"
"io"
"path"
"strings"
"sync"
"time"
)
// MultipartPartInfo Info of each part kept in the multipart metadata file after
// CompleteMultipartUpload() is called.
type MultipartPartInfo struct {
PartNumber int
ETag string
Size int64
}
// MultipartObjectInfo - contents of the multipart metadata file after
// CompleteMultipartUpload() is called.
type MultipartObjectInfo struct {
Parts []MultipartPartInfo
ModTime time.Time
Size int64
MD5Sum string
ContentType string
ContentEncoding string
// Add more fields here.
}
type byMultipartFiles []string
func (files byMultipartFiles) Len() int { return len(files) }
func (files byMultipartFiles) Less(i, j int) bool {
first := strings.TrimSuffix(files[i], multipartSuffix)
second := strings.TrimSuffix(files[j], multipartSuffix)
return first < second
}
func (files byMultipartFiles) Swap(i, j int) { files[i], files[j] = files[j], files[i] }
// GetPartNumberOffset - given an offset for the whole object, return the part and offset in that part.
func (m MultipartObjectInfo) GetPartNumberOffset(offset int64) (partIndex int, partOffset int64, err error) {
partOffset = offset
for i, part := range m.Parts {
partIndex = i
if partOffset < part.Size {
return
}
partOffset -= part.Size
}
// Offset beyond the size of the object
err = errUnexpected
return
}
// getMultipartObjectMeta - incomplete meta file and extract meta information if any.
func getMultipartObjectMeta(storage StorageAPI, metaFile string) (meta map[string]string, err error) {
meta = make(map[string]string)
offset := int64(0)
objMetaReader, err := storage.ReadFile(minioMetaBucket, metaFile, offset)
if err != nil {
return nil, err
}
// Close the metadata reader.
defer objMetaReader.Close()
decoder := json.NewDecoder(objMetaReader)
err = decoder.Decode(&meta)
if err != nil {
return nil, err
}
return meta, nil
}
func partNumToPartFileName(partNum int) string {
return fmt.Sprintf("%.5d%s", partNum, multipartSuffix)
}
// ListMultipartUploads - list multipart uploads.
func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
return listMultipartUploadsCommon(xl, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
func (xl xlObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
return newMultipartUploadCommon(xl.storage, bucket, object, meta)
}
// PutObjectPart - writes the multipart upload chunks.
func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
return putObjectPartCommon(xl.storage, bucket, object, uploadID, partID, size, data, md5Hex)
}
// ListObjectParts - list object parts.
func (xl xlObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
return listObjectPartsCommon(xl.storage, bucket, object, uploadID, partNumberMarker, maxParts)
}
// This function does the following check, suppose
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
// "a/b" and "a" do not exist.
func (xl xlObjects) parentDirIsObject(bucket, parent string) error {
var stat func(string) error
stat = func(p string) error {
if p == "." {
return nil
}
_, err := xl.getObjectInfo(bucket, p)
if err == nil {
// If there is already a file at prefix "p" return error.
return errFileAccessDenied
}
if err == errFileNotFound {
// Check if there is a file as one of the parent paths.
return stat(path.Dir(p))
}
return err
}
return stat(parent)
}
func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(xl.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
if !isUploadIDExists(xl.storage, bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that
// 1) no one aborts this multipart upload
// 2) no one does a parallel complete-multipart-upload on this multipart upload
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
var metadata = MultipartObjectInfo{}
var errs = make([]error, len(parts))
uploadIDIncompletePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile)
objMeta, err := getMultipartObjectMeta(xl.storage, uploadIDIncompletePath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDIncompletePath)
}
// Waitgroup to wait for go-routines.
var wg = &sync.WaitGroup{}
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
// Construct part suffix.
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag)
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
var fi FileInfo
fi, err = xl.storage.StatFile(minioMetaBucket, multipartPartFile)
if err != nil {
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) {
return "", PartTooSmall{}
}
// Update metadata parts.
metadata.Parts = append(metadata.Parts, MultipartPartInfo{
PartNumber: part.PartNumber,
ETag: part.ETag,
Size: fi.Size,
})
metadata.Size += fi.Size
}
// check if an object is present as one of the parent dir.
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Save successfully calculated md5sum.
metadata.MD5Sum = s3MD5
metadata.ContentType = objMeta["content-type"]
metadata.ContentEncoding = objMeta["content-encoding"]
// Save modTime as well as the current time.
metadata.ModTime = time.Now().UTC()
// Create temporary multipart meta file to write and then rename.
multipartMetaSuffix := fmt.Sprintf("%s.%s", uploadID, multipartMetaFile)
tempMultipartMetaFile := path.Join(tmpMetaPrefix, bucket, object, multipartMetaSuffix)
w, err := xl.storage.CreateFile(minioMetaBucket, tempMultipartMetaFile)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
encoder := json.NewEncoder(w)
err = encoder.Encode(&metadata)
if err != nil {
if err = safeCloseAndRemove(w); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Close the writer.
if err = w.Close(); err != nil {
if err = safeCloseAndRemove(w); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Attempt a Rename of multipart meta file to final namespace.
multipartObjFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, multipartMetaFile)
err = xl.storage.RenameFile(minioMetaBucket, tempMultipartMetaFile, minioMetaBucket, multipartObjFile)
if err != nil {
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMultipartMetaFile); derr != nil {
return "", toObjectErr(err, minioMetaBucket, tempMultipartMetaFile)
}
return "", toObjectErr(err, bucket, multipartObjFile)
}
// Loop through and atomically rename the parts to their actual location.
for index, part := range parts {
wg.Add(1)
go func(index int, part completePart) {
defer wg.Done()
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag)
src := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
dst := path.Join(mpartMetaPrefix, bucket, object, uploadID, partNumToPartFileName(part.PartNumber))
errs[index] = xl.storage.RenameFile(minioMetaBucket, src, minioMetaBucket, dst)
errorIf(errs[index], "Unable to rename file %s to %s.", src, dst)
}(index, part)
}
// Wait for all the renames to finish.
wg.Wait()
// Loop through errs list and return first error.
for _, err := range errs {
if err != nil {
return "", toObjectErr(err, bucket, object)
}
}
// Delete the incomplete file place holder.
err = xl.storage.DeleteFile(minioMetaBucket, uploadIDIncompletePath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDIncompletePath)
}
// Hold write lock on the destination before rename
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Delete if an object already exists.
// FIXME: rename it to tmp file and delete only after
// the newly uploaded file is renamed from tmp location to
// the original location.
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err = xl.deleteMultipartObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
return s3MD5, nil
}
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
if err = xl.storage.RenameFile(minioMetaBucket, uploadIDPath, bucket, object); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Hold the lock so that two parallel complete-multipart-uploads do no
// leave a stale uploads.json behind.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
var entries []string
if entries, err = xl.storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err == nil {
if len(entries) > 1 {
return s3MD5, nil
}
}
uploadsJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
err = xl.storage.DeleteFile(minioMetaBucket, uploadsJSONPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadsJSONPath)
}
// Return md5sum.
return s3MD5, nil
}
// AbortMultipartUpload - aborts a multipart upload.
func (xl xlObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
return abortMultipartUploadCommon(xl.storage, bucket, object, uploadID)
}

View File

@ -1,581 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"path"
"path/filepath"
"strings"
"sync"
"github.com/minio/minio/pkg/mimedb"
)
const (
multipartSuffix = ".minio.multipart"
multipartMetaFile = "00000" + multipartSuffix
formatConfigFile = "format.json"
)
// xlObjects - Implements fs object layer.
type xlObjects struct {
storage StorageAPI
listObjectMap map[listParams][]*treeWalker
listObjectMapMutex *sync.Mutex
}
// errMaxDisks - returned for reached maximum of disks.
var errMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'")
// errMinDisks - returned for minimum number of disks.
var errMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'")
// errNumDisks - returned for odd number of disks.
var errNumDisks = errors.New("Number of disks should be multiples of '2'")
const (
// Maximum erasure blocks.
maxErasureBlocks = 16
// Minimum erasure blocks.
minErasureBlocks = 8
)
func checkSufficientDisks(disks []string) error {
// Verify total number of disks.
totalDisks := len(disks)
if totalDisks > maxErasureBlocks {
return errMaxDisks
}
if totalDisks < minErasureBlocks {
return errMinDisks
}
// isEven function to verify if a given number if even.
isEven := func(number int) bool {
return number%2 == 0
}
// Verify if we have even number of disks.
// only combination of 8, 10, 12, 14, 16 are supported.
if !isEven(totalDisks) {
return errNumDisks
}
return nil
}
// Depending on the disk type network or local, initialize storage layer.
func newStorageLayer(disk string) (storage StorageAPI, err error) {
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" {
// Initialize filesystem storage API.
return newPosix(disk)
}
// Initialize rpc client storage API.
return newRPCClient(disk)
}
// Initialize all storage disks to bootstrap.
func bootstrapDisks(disks []string) ([]StorageAPI, error) {
storageDisks := make([]StorageAPI, len(disks))
for index, disk := range disks {
var err error
// Intentionally ignore disk not found errors while
// initializing POSIX, so that we have successfully
// initialized posix Storage. Subsequent calls to XL/Erasure
// will manage any errors related to disks.
storageDisks[index], err = newStorageLayer(disk)
if err != nil && err != errDiskNotFound {
return nil, err
}
}
return storageDisks, nil
}
// newXLObjects - initialize new xl object layer.
func newXLObjects(disks []string) (ObjectLayer, error) {
if err := checkSufficientDisks(disks); err != nil {
return nil, err
}
storageDisks, err := bootstrapDisks(disks)
if err != nil {
return nil, err
}
// Initialize object layer - like creating minioMetaBucket, cleaning up tmp files etc.
initObjectLayer(storageDisks...)
// Load saved XL format.json and validate.
newDisks, err := loadFormatXL(storageDisks)
if err != nil {
switch err {
case errUnformattedDisk:
// Save new XL format.
errSave := initFormatXL(storageDisks)
if errSave != nil {
return nil, errSave
}
newDisks = storageDisks
default:
// errCorruptedDisk - error.
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
}
}
// FIXME: healFormatXL(newDisks)
storage, err := newXL(newDisks)
if err != nil {
return nil, err
}
// Return successfully initialized object layer.
return xlObjects{
storage: storage,
listObjectMap: make(map[listParams][]*treeWalker),
listObjectMapMutex: &sync.Mutex{},
}, nil
}
/// Bucket operations
// MakeBucket - make a bucket.
func (xl xlObjects) MakeBucket(bucket string) error {
nsMutex.Lock(bucket, "")
defer nsMutex.Unlock(bucket, "")
return makeBucket(xl.storage, bucket)
}
// GetBucketInfo - get bucket info.
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
nsMutex.RLock(bucket, "")
defer nsMutex.RUnlock(bucket, "")
return getBucketInfo(xl.storage, bucket)
}
// ListBuckets - list buckets.
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) {
return listBuckets(xl.storage)
}
// DeleteBucket - delete a bucket.
func (xl xlObjects) DeleteBucket(bucket string) error {
nsMutex.Lock(bucket, "")
nsMutex.Unlock(bucket, "")
return deleteBucket(xl.storage, bucket)
}
/// Object Operations
// GetObject - get an object.
func (xl xlObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return nil, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return nil, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
if !isMultipartObject(xl.storage, bucket, object) {
_, err := xl.storage.StatFile(bucket, object)
if err == nil {
var reader io.ReadCloser
reader, err = xl.storage.ReadFile(bucket, object, startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
return reader, nil
}
return nil, toObjectErr(err, bucket, object)
}
fileReader, fileWriter := io.Pipe()
info, err := getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
partIndex, offset, err := info.GetPartNumberOffset(startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
// Hold a read lock once more which can be released after the following go-routine ends.
// We hold RLock once more because the current function would return before the go routine below
// executes and hence releasing the read lock (because of defer'ed nsMutex.RUnlock() call).
nsMutex.RLock(bucket, object)
go func() {
defer nsMutex.RUnlock(bucket, object)
for ; partIndex < len(info.Parts); partIndex++ {
part := info.Parts[partIndex]
r, err := xl.storage.ReadFile(bucket, pathJoin(object, partNumToPartFileName(part.PartNumber)), offset)
if err != nil {
fileWriter.CloseWithError(err)
return
}
// Reset offset to 0 as it would be non-0 only for the first loop if startOffset is non-0.
offset = 0
if _, err = io.Copy(fileWriter, r); err != nil {
switch reader := r.(type) {
case *io.PipeReader:
reader.CloseWithError(err)
case io.ReadCloser:
reader.Close()
}
fileWriter.CloseWithError(err)
return
}
// Close the readerCloser that reads multiparts of an object from the xl storage layer.
// Not closing leaks underlying file descriptors.
r.Close()
}
fileWriter.Close()
}()
return fileReader, nil
}
// Return the partsInfo of a special multipart object.
func getMultipartObjectInfo(storage StorageAPI, bucket, object string) (info MultipartObjectInfo, err error) {
offset := int64(0)
r, err := storage.ReadFile(bucket, pathJoin(object, multipartMetaFile), offset)
if err != nil {
return MultipartObjectInfo{}, err
}
decoder := json.NewDecoder(r)
err = decoder.Decode(&info)
if err != nil {
return MultipartObjectInfo{}, err
}
return info, nil
}
// Return ObjectInfo.
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) {
objInfo.Bucket = bucket
objInfo.Name = object
// First see if the object was a simple-PUT upload.
fi, err := xl.storage.StatFile(bucket, object)
if err != nil {
if err != errFileNotFound {
return ObjectInfo{}, err
}
var info MultipartObjectInfo
// Check if the object was multipart upload.
info, err = getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return ObjectInfo{}, err
}
objInfo.Size = info.Size
objInfo.ModTime = info.ModTime
objInfo.MD5Sum = info.MD5Sum
objInfo.ContentType = info.ContentType
objInfo.ContentEncoding = info.ContentEncoding
} else {
metadata := make(map[string]string)
offset := int64(0) // To read entire content
r, err := xl.storage.ReadFile(bucket, pathJoin(object, "meta.json"), offset)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
decoder := json.NewDecoder(r)
if err = decoder.Decode(&metadata); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
contentType := metadata["content-type"]
if len(contentType) == 0 {
contentType = "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
}
objInfo.Size = fi.Size
objInfo.IsDir = fi.Mode.IsDir()
objInfo.ModTime = fi.ModTime
objInfo.MD5Sum = metadata["md5Sum"]
objInfo.ContentType = contentType
objInfo.ContentEncoding = metadata["content-encoding"]
}
return objInfo, nil
}
// GetObjectInfo - get object info.
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
info, err := xl.getObjectInfo(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
return info, nil
}
// PutObject - create an object.
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify bucket exists.
if !isBucketExist(xl.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
// No metadata is set, allocate a new one.
if metadata == nil {
metadata = make(map[string]string)
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
tempObj := path.Join(tmpMetaPrefix, bucket, object)
fileWriter, err := xl.storage.CreateFile(minioMetaBucket, tempObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// Update the md5sum if not set with the newly calculated one.
if len(metadata["md5Sum"]) == 0 {
metadata["md5Sum"] = newMD5Hex
}
// md5Hex representation.
md5Hex := metadata["md5Sum"]
if md5Hex != "" {
if newMD5Hex != md5Hex {
if err = safeCloseAndRemove(fileWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Check if an object is present as one of the parent dir.
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Delete if an object already exists.
// FIXME: rename it to tmp file and delete only after
// the newly uploaded file is renamed from tmp location to
// the original location.
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err = xl.deleteMultipartObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
} else {
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
}
err = xl.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if dErr := xl.storage.DeleteFile(minioMetaBucket, tempObj); dErr != nil {
return "", toObjectErr(dErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
tempMetaJSONFile := path.Join(tmpMetaPrefix, bucket, object, "meta.json")
metaWriter, err := xl.storage.CreateFile(minioMetaBucket, tempMetaJSONFile)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
encoder := json.NewEncoder(metaWriter)
err = encoder.Encode(&metadata)
if err != nil {
if clErr := safeCloseAndRemove(metaWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
if err = metaWriter.Close(); err != nil {
if err = safeCloseAndRemove(metaWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
metaJSONFile := path.Join(object, "meta.json")
err = xl.storage.RenameFile(minioMetaBucket, tempMetaJSONFile, bucket, metaJSONFile)
if err != nil {
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMetaJSONFile); derr != nil {
return "", toObjectErr(derr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
// isMultipartObject - verifies if an object is special multipart file.
func isMultipartObject(storage StorageAPI, bucket, object string) bool {
_, err := storage.StatFile(bucket, pathJoin(object, multipartMetaFile))
if err != nil {
if err == errFileNotFound {
return false
}
errorIf(err, "Failed to stat file "+bucket+pathJoin(object, multipartMetaFile))
return false
}
return true
}
// deleteMultipartObject - deletes only multipart object.
func (xl xlObjects) deleteMultipartObject(bucket, object string) error {
// Get parts info.
info, err := getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return err
}
// Range through all files and delete it.
var wg = &sync.WaitGroup{}
var errs = make([]error, len(info.Parts))
for index, part := range info.Parts {
wg.Add(1)
// Start deleting parts in routine.
go func(index int, part MultipartPartInfo) {
defer wg.Done()
partFileName := partNumToPartFileName(part.PartNumber)
errs[index] = xl.storage.DeleteFile(bucket, pathJoin(object, partFileName))
}(index, part)
}
// Wait for all the deletes to finish.
wg.Wait()
// Loop through and validate if any errors, if we are unable to remove any part return
// "unexpected" error as returning any other error might be misleading. For ex.
// if DeleteFile() had returned errFileNotFound and we return it, then client would see
// ObjectNotFound which is misleading.
for _, err := range errs {
if err != nil {
return errUnexpected
}
}
err = xl.storage.DeleteFile(bucket, pathJoin(object, multipartMetaFile))
if err != nil {
return err
}
return nil
}
// deleteObject - deletes a regular object.
func (xl xlObjects) deleteObject(bucket, object string) error {
metaJSONFile := path.Join(object, "meta.json")
// Ignore if meta.json file doesn't exist.
if err := xl.storage.DeleteFile(bucket, metaJSONFile); err != nil {
if err != errFileNotFound {
return err
}
}
if err := xl.storage.DeleteFile(bucket, object); err != nil {
if err != errFileNotFound {
return err
}
}
return nil
}
// DeleteObject - delete the object.
func (xl xlObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err := xl.deleteMultipartObject(bucket, object)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
err := xl.deleteObject(bucket, object)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
// ListObjects - list all objects at prefix, delimited by '/'.
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return listObjectsCommon(xl, bucket, prefix, marker, delimiter, maxKeys)
}

355
xl-v1-bucket.go Normal file
View File

@ -0,0 +1,355 @@
package main
import (
"sort"
"sync"
)
/// Bucket operations
// MakeBucket - make a bucket.
func (xl xlObjects) MakeBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
nsMutex.Lock(bucket, "")
defer nsMutex.Unlock(bucket, "")
// Err counters.
createVolErr := 0 // Count generic create vol errs.
volumeExistsErrCnt := 0 // Count all errVolumeExists errs.
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
wg.Add(1)
// Make a volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.MakeVol(bucket)
if err != nil {
dErrs[index] = err
return
}
dErrs[index] = nil
}(index, disk)
}
// Wait for all make vol to finish.
wg.Wait()
// Loop through all the concocted errors.
for _, err := range dErrs {
if err == nil {
continue
}
// if volume already exists, count them.
if err == errVolumeExists {
volumeExistsErrCnt++
continue
}
// Update error counter separately.
createVolErr++
}
// Return err if all disks report volume exists.
if volumeExistsErrCnt == len(xl.storageDisks) {
return toObjectErr(errVolumeExists, bucket)
} else if createVolErr > len(xl.storageDisks)-xl.writeQuorum {
// Return errWriteQuorum if errors were more than
// allowed write quorum.
return toObjectErr(errWriteQuorum, bucket)
}
return nil
}
// getAllBucketInfo - list bucket info from all disks.
// Returns error slice indicating the failed volume stat operations.
func (xl xlObjects) getAllBucketInfo(bucketName string) ([]BucketInfo, []error) {
// Create errs and volInfo slices of storageDisks size.
var errs = make([]error, len(xl.storageDisks))
var volsInfo = make([]VolInfo, len(xl.storageDisks))
// Allocate a new waitgroup.
var wg = &sync.WaitGroup{}
for index, disk := range xl.storageDisks {
wg.Add(1)
// Stat volume on all the disks in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
volInfo, err := disk.StatVol(bucketName)
if err != nil {
errs[index] = err
return
}
volsInfo[index] = volInfo
errs[index] = nil
}(index, disk)
}
// Wait for all the Stat operations to finish.
wg.Wait()
// Return the concocted values.
var bucketsInfo = make([]BucketInfo, len(xl.storageDisks))
for _, volInfo := range volsInfo {
if IsValidBucketName(volInfo.Name) {
bucketsInfo = append(bucketsInfo, BucketInfo{
Name: volInfo.Name,
Created: volInfo.Created,
})
}
}
return bucketsInfo, errs
}
// listAllBucketInfo - list all stat volume info from all disks.
// Returns
// - stat volume info for all online disks.
// - boolean to indicate if healing is necessary.
// - error if any.
func (xl xlObjects) listAllBucketInfo(bucketName string) ([]BucketInfo, bool, error) {
bucketsInfo, errs := xl.getAllBucketInfo(bucketName)
notFoundCount := 0
for _, err := range errs {
if err == errVolumeNotFound {
notFoundCount++
// If we have errors with file not found greater than allowed read
// quorum we return err as errFileNotFound.
if notFoundCount > len(xl.storageDisks)-xl.readQuorum {
return nil, false, errVolumeNotFound
}
}
}
// Calculate online disk count.
onlineDiskCount := 0
for index := range errs {
if errs[index] == nil {
onlineDiskCount++
}
}
var heal bool
// If online disks count is lesser than configured disks, most
// probably we need to heal the file, additionally verify if the
// count is lesser than readQuorum, if not we throw an error.
if onlineDiskCount < len(xl.storageDisks) {
// Online disks lesser than total storage disks, needs to be
// healed. unless we do not have readQuorum.
heal = true
// Verify if online disks count are lesser than readQuorum
// threshold, return an error if yes.
if onlineDiskCount < xl.readQuorum {
return nil, false, errReadQuorum
}
}
// Return success.
return bucketsInfo, heal, nil
}
// Checks whether bucket exists.
func (xl xlObjects) isBucketExist(bucketName string) bool {
// Check whether bucket exists.
_, _, err := xl.listAllBucketInfo(bucketName)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucketName+".")
return false
}
return true
}
// GetBucketInfo - get bucket info.
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketInfo{}, BucketNameInvalid{Bucket: bucket}
}
nsMutex.RLock(bucket, "")
defer nsMutex.RUnlock(bucket, "")
// List and figured out if we need healing.
bucketsInfo, heal, err := xl.listAllBucketInfo(bucket)
if err != nil {
return BucketInfo{}, toObjectErr(err, bucket)
}
// Heal for missing entries.
if heal {
go func() {
// Create bucket if missing on disks.
for index, bktInfo := range bucketsInfo {
if bktInfo.Name != "" {
continue
}
// Bucketinfo name would be an empty string, create it.
xl.storageDisks[index].MakeVol(bucket)
}
}()
}
// Loop through all statVols, calculate the actual usage values.
var total, free int64
var bucketInfo BucketInfo
for _, bucketInfo = range bucketsInfo {
if bucketInfo.Name == "" {
continue
}
free += bucketInfo.Free
total += bucketInfo.Total
}
// Update the aggregated values.
bucketInfo.Free = free
bucketInfo.Total = total
return BucketInfo{
Name: bucket,
Created: bucketInfo.Created,
Total: bucketInfo.Total,
Free: bucketInfo.Free,
}, nil
}
func (xl xlObjects) listBuckets() ([]BucketInfo, error) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Success vols map carries successful results of ListVols from each disks.
var successVols = make([][]VolInfo, len(xl.storageDisks))
for index, disk := range xl.storageDisks {
wg.Add(1) // Add each go-routine to wait for.
go func(index int, disk StorageAPI) {
// Indicate wait group as finished.
defer wg.Done()
// Initiate listing.
volsInfo, _ := disk.ListVols()
successVols[index] = volsInfo
}(index, disk)
}
// For all the list volumes running in parallel to finish.
wg.Wait()
// Loop through success vols and get aggregated usage values.
var volsInfo []VolInfo
var total, free int64
for _, volsInfo = range successVols {
var volInfo VolInfo
for _, volInfo = range volsInfo {
if volInfo.Name == "" {
continue
}
if !IsValidBucketName(volInfo.Name) {
continue
}
break
}
free += volInfo.Free
total += volInfo.Total
}
// Save the updated usage values back into the vols.
for index, volInfo := range volsInfo {
volInfo.Free = free
volInfo.Total = total
volsInfo[index] = volInfo
}
// NOTE: The assumption here is that volumes across all disks in
// readQuorum have consistent view i.e they all have same number
// of buckets. This is essentially not verified since healing
// should take care of this.
var bucketsInfo []BucketInfo
for _, volInfo := range volsInfo {
// StorageAPI can send volume names which are incompatible
// with buckets, handle it and skip them.
if !IsValidBucketName(volInfo.Name) {
continue
}
bucketsInfo = append(bucketsInfo, BucketInfo{
Name: volInfo.Name,
Created: volInfo.Created,
Total: volInfo.Total,
Free: volInfo.Free,
})
}
return bucketsInfo, nil
}
// ListBuckets - list buckets.
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) {
bucketInfos, err := xl.listBuckets()
if err != nil {
return nil, toObjectErr(err)
}
sort.Sort(byBucketName(bucketInfos))
return bucketInfos, nil
}
// DeleteBucket - delete a bucket.
func (xl xlObjects) DeleteBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
nsMutex.Lock(bucket, "")
nsMutex.Unlock(bucket, "")
// Collect if all disks report volume not found.
var volumeNotFoundErrCnt int
var wg = &sync.WaitGroup{}
var dErrs = make([]error, len(xl.storageDisks))
// Remove a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
wg.Add(1)
// Delete volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.DeleteVol(bucket)
if err != nil {
dErrs[index] = err
return
}
dErrs[index] = nil
}(index, disk)
}
// Wait for all the delete vols to finish.
wg.Wait()
// Loop through concocted errors and return anything unusual.
for _, err := range dErrs {
if err != nil {
// We ignore error if errVolumeNotFound or errDiskNotFound
if err == errVolumeNotFound || err == errDiskNotFound {
volumeNotFoundErrCnt++
continue
}
return toObjectErr(err, bucket)
}
}
// Return err if all disks report volume not found.
if volumeNotFoundErrCnt == len(xl.storageDisks) {
return toObjectErr(errVolumeNotFound, bucket)
}
return nil
}

116
xl-v1-list-objects.go Normal file
View File

@ -0,0 +1,116 @@
package main
import "strings"
func (xl xlObjects) listObjectsXL(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
walker := xl.lookupTreeWalkXL(listParams{bucket, recursive, marker, prefix})
if walker == nil {
walker = xl.startTreeWalkXL(bucket, prefix, marker, recursive)
}
var objInfos []ObjectInfo
var eof bool
var nextMarker string
for i := 0; i < maxKeys; {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return ListObjectsInfo{}, nil
}
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix)
}
objInfo := walkResult.objInfo
nextMarker = objInfo.Name
objInfos = append(objInfos, objInfo)
if walkResult.end {
eof = true
break
}
i++
}
params := listParams{bucket, recursive, nextMarker, prefix}
if !eof {
xl.saveTreeWalkXL(params, walker)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, objInfo := range objInfos {
// With delimiter set we fill in NextMarker and Prefixes.
if delimiter == slashSeparator {
result.NextMarker = objInfo.Name
if objInfo.IsDir {
result.Prefixes = append(result.Prefixes, objInfo.Name)
continue
}
}
result.Objects = append(result.Objects, ObjectInfo{
Name: objInfo.Name,
ModTime: objInfo.ModTime,
Size: objInfo.Size,
IsDir: false,
})
}
return result, nil
}
// ListObjects - list all objects at prefix, delimited by '/'.
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if bucket exists.
if !xl.isBucketExist(bucket) {
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListObjectsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if marker != "" {
if !strings.HasPrefix(marker, prefix) {
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{
Marker: marker,
Prefix: prefix,
}
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return ListObjectsInfo{}, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Initiate a list operation, if successful filter and return quickly.
listObjInfo, err := xl.listObjectsXL(bucket, prefix, marker, delimiter, maxKeys)
if err == nil {
// We got the entries successfully return.
return listObjInfo, nil
}
// Return error at the end.
return ListObjectsInfo{}, toObjectErr(err, bucket, prefix)
}

287
xl-v1-metadata.go Normal file
View File

@ -0,0 +1,287 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"bytes"
"encoding/json"
"io"
"path"
"sort"
"sync"
"time"
)
// Erasure block size.
const erasureBlockSize = 4 * 1024 * 1024 // 4MiB.
// objectPartInfo Info of each part kept in the multipart metadata
// file after CompleteMultipartUpload() is called.
type objectPartInfo struct {
Name string `json:"name"`
ETag string `json:"etag"`
Size int64 `json:"size"`
}
// A xlMetaV1 represents a metadata header mapping keys to sets of values.
type xlMetaV1 struct {
Version string `json:"version"`
Format string `json:"format"`
Stat struct {
Size int64 `json:"size"`
ModTime time.Time `json:"modTime"`
Version int64 `json:"version"`
} `json:"stat"`
Erasure struct {
DataBlocks int `json:"data"`
ParityBlocks int `json:"parity"`
BlockSize int64 `json:"blockSize"`
Index int `json:"index"`
Distribution []int `json:"distribution"`
} `json:"erasure"`
Checksum struct {
Enable bool `json:"enable"`
} `json:"checksum"`
Minio struct {
Release string `json:"release"`
} `json:"minio"`
Meta map[string]string `json:"meta"`
Parts []objectPartInfo `json:"parts,omitempty"`
}
// ReadFrom - read from implements io.ReaderFrom interface for
// unmarshalling xlMetaV1.
func (m *xlMetaV1) ReadFrom(reader io.Reader) (n int64, err error) {
var buffer bytes.Buffer
n, err = buffer.ReadFrom(reader)
if err != nil {
return 0, err
}
err = json.Unmarshal(buffer.Bytes(), m)
return n, err
}
// WriteTo - write to implements io.WriterTo interface for marshalling xlMetaV1.
func (m xlMetaV1) WriteTo(writer io.Writer) (n int64, err error) {
metadataBytes, err := json.Marshal(m)
if err != nil {
return 0, err
}
p, err := writer.Write(metadataBytes)
return int64(p), err
}
// byPartName is a collection satisfying sort.Interface.
type byPartName []objectPartInfo
func (t byPartName) Len() int { return len(t) }
func (t byPartName) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t byPartName) Less(i, j int) bool { return t[i].Name < t[j].Name }
// SearchObjectPart - searches for part name and etag, returns the
// index if found.
func (m xlMetaV1) SearchObjectPart(name string, etag string) int {
for i, part := range m.Parts {
if name == part.Name && etag == part.ETag {
return i
}
}
return -1
}
// AddObjectPart - add a new object part in order.
func (m *xlMetaV1) AddObjectPart(name string, etag string, size int64) {
m.Parts = append(m.Parts, objectPartInfo{
Name: name,
ETag: etag,
Size: size,
})
sort.Sort(byPartName(m.Parts))
}
// getPartNumberOffset - given an offset for the whole object, return the part and offset in that part.
func (m xlMetaV1) getPartNumberOffset(offset int64) (partNumber int, partOffset int64, err error) {
partOffset = offset
for i, part := range m.Parts {
partNumber = i
if part.Size == 0 {
return partNumber, partOffset, nil
}
if partOffset < part.Size {
return partNumber, partOffset, nil
}
partOffset -= part.Size
}
// Offset beyond the size of the object
err = errUnexpected
return 0, 0, err
}
// This function does the following check, suppose
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
// "a/b" and "a" do not exist.
func (xl xlObjects) parentDirIsObject(bucket, parent string) bool {
var isParentDirObject func(string) bool
isParentDirObject = func(p string) bool {
if p == "." {
return false
}
if xl.isObject(bucket, p) {
// If there is already a file at prefix "p" return error.
return true
}
// Check if there is a file as one of the parent paths.
return isParentDirObject(path.Dir(p))
}
return isParentDirObject(parent)
}
func (xl xlObjects) isObject(bucket, prefix string) bool {
// Create errs and volInfo slices of storageDisks size.
var errs = make([]error, len(xl.storageDisks))
// Allocate a new waitgroup.
var wg = &sync.WaitGroup{}
for index, disk := range xl.storageDisks {
wg.Add(1)
// Stat file on all the disks in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
_, err := disk.StatFile(bucket, path.Join(prefix, xlMetaJSONFile))
if err != nil {
errs[index] = err
return
}
errs[index] = nil
}(index, disk)
}
// Wait for all the Stat operations to finish.
wg.Wait()
var errFileNotFoundCount int
for _, err := range errs {
if err != nil {
if err == errFileNotFound {
errFileNotFoundCount++
// If we have errors with file not found greater than allowed read
// quorum we return err as errFileNotFound.
if errFileNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
return false
}
continue
}
errorIf(err, "Unable to access file "+path.Join(bucket, prefix))
return false
}
}
return true
}
// readXLMetadata - read xl metadata.
func readXLMetadata(disk StorageAPI, bucket, object string) (xlMeta xlMetaV1, err error) {
r, err := disk.ReadFile(bucket, path.Join(object, xlMetaJSONFile), int64(0))
if err != nil {
return xlMetaV1{}, err
}
defer r.Close()
_, err = xlMeta.ReadFrom(r)
if err != nil {
return xlMetaV1{}, err
}
return xlMeta, nil
}
// deleteXLJson - delete `xl.json` on all disks.
func (xl xlObjects) deleteXLMetadata(bucket, object string) error {
return xl.deleteObject(bucket, path.Join(object, xlMetaJSONFile))
}
// renameXLJson - rename `xl.json` on all disks.
func (xl xlObjects) renameXLMetadata(srcBucket, srcPrefix, dstBucket, dstPrefix string) error {
return xl.renameObject(srcBucket, path.Join(srcPrefix, xlMetaJSONFile), dstBucket, path.Join(dstPrefix, xlMetaJSONFile))
}
// getDiskDistribution - get disk distribution.
func (xl xlObjects) getDiskDistribution() []int {
var distribution = make([]int, len(xl.storageDisks))
for index := range xl.storageDisks {
distribution[index] = index + 1
}
return distribution
}
// writeXLJson - write `xl.json` on all disks in order.
func (xl xlObjects) writeXLMetadata(bucket, prefix string, xlMeta xlMetaV1) error {
var wg = &sync.WaitGroup{}
var mErrs = make([]error, len(xl.storageDisks))
// Initialize metadata map, save all erasure related metadata.
xlMeta.Minio.Release = minioReleaseTag
xlMeta.Erasure.DataBlocks = xl.dataBlocks
xlMeta.Erasure.ParityBlocks = xl.parityBlocks
xlMeta.Erasure.BlockSize = erasureBlockSize
xlMeta.Erasure.Distribution = xl.getDiskDistribution()
for index, disk := range xl.storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI, metadata xlMetaV1) {
defer wg.Done()
metaJSONFile := path.Join(prefix, xlMetaJSONFile)
metaWriter, mErr := disk.CreateFile(bucket, metaJSONFile)
if mErr != nil {
mErrs[index] = mErr
return
}
// Save the order.
metadata.Erasure.Index = index + 1
_, mErr = metadata.WriteTo(metaWriter)
if mErr != nil {
if mErr = safeCloseAndRemove(metaWriter); mErr != nil {
mErrs[index] = mErr
return
}
mErrs[index] = mErr
return
}
if mErr = metaWriter.Close(); mErr != nil {
if mErr = safeCloseAndRemove(metaWriter); mErr != nil {
mErrs[index] = mErr
return
}
mErrs[index] = mErr
return
}
mErrs[index] = nil
}(index, disk, xlMeta)
}
// Wait for all the routines.
wg.Wait()
// FIXME: check for quorum.
// Loop through concocted errors and return the first one.
for _, err := range mErrs {
if err == nil {
continue
}
return err
}
return nil
}

474
xl-v1-multipart-common.go Normal file
View File

@ -0,0 +1,474 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"bytes"
"encoding/json"
"io"
"path"
"sort"
"strings"
"sync"
"time"
"github.com/skyrings/skyring-common/tools/uuid"
)
// uploadInfo -
type uploadInfo struct {
UploadID string `json:"uploadId"`
Initiated time.Time `json:"initiated"`
}
// uploadsV1 -
type uploadsV1 struct {
Version string `json:"version"`
Format string `json:"format"`
Uploads []uploadInfo `json:"uploadIds"`
}
// byInitiatedTime is a collection satisfying sort.Interface.
type byInitiatedTime []uploadInfo
func (t byInitiatedTime) Len() int { return len(t) }
func (t byInitiatedTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t byInitiatedTime) Less(i, j int) bool {
return t[i].Initiated.After(t[j].Initiated)
}
// AddUploadID - adds a new upload id in order of its initiated time.
func (u *uploadsV1) AddUploadID(uploadID string, initiated time.Time) {
u.Uploads = append(u.Uploads, uploadInfo{
UploadID: uploadID,
Initiated: initiated,
})
sort.Sort(byInitiatedTime(u.Uploads))
}
func (u uploadsV1) SearchUploadID(uploadID string) int {
for i, u := range u.Uploads {
if u.UploadID == uploadID {
return i
}
}
return -1
}
// ReadFrom - read from implements io.ReaderFrom interface for unmarshalling uploads.
func (u *uploadsV1) ReadFrom(reader io.Reader) (n int64, err error) {
var buffer bytes.Buffer
n, err = buffer.ReadFrom(reader)
if err != nil {
return 0, err
}
err = json.Unmarshal(buffer.Bytes(), &u)
return n, err
}
// WriteTo - write to implements io.WriterTo interface for marshalling uploads.
func (u uploadsV1) WriteTo(writer io.Writer) (n int64, err error) {
metadataBytes, err := json.Marshal(u)
if err != nil {
return 0, err
}
m, err := writer.Write(metadataBytes)
return int64(m), err
}
// getUploadIDs - get saved upload id's.
func getUploadIDs(bucket, object string, storageDisks ...StorageAPI) (uploadIDs uploadsV1, err error) {
uploadJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
var errs = make([]error, len(storageDisks))
var uploads = make([]uploadsV1, len(storageDisks))
var wg = &sync.WaitGroup{}
for index, disk := range storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
r, rErr := disk.ReadFile(minioMetaBucket, uploadJSONPath, int64(0))
if rErr != nil {
errs[index] = rErr
return
}
defer r.Close()
_, rErr = uploads[index].ReadFrom(r)
if rErr != nil {
errs[index] = rErr
return
}
errs[index] = nil
}(index, disk)
}
wg.Wait()
for _, err = range errs {
if err != nil {
return uploadsV1{}, err
}
}
// FIXME: Do not know if it should pick the picks the first successful one and returns.
return uploads[0], nil
}
func updateUploadJSON(bucket, object string, uploadIDs uploadsV1, storageDisks ...StorageAPI) error {
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
var errs = make([]error, len(storageDisks))
var wg = &sync.WaitGroup{}
for index, disk := range storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
w, wErr := disk.CreateFile(minioMetaBucket, uploadsPath)
if wErr != nil {
errs[index] = wErr
return
}
_, wErr = uploadIDs.WriteTo(w)
if wErr != nil {
errs[index] = wErr
return
}
if wErr = w.Close(); wErr != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
errs[index] = clErr
return
}
errs[index] = wErr
return
}
}(index, disk)
}
wg.Wait()
for _, err := range errs {
if err != nil {
return err
}
}
return nil
}
// writeUploadJSON - create `uploads.json` or update it with new uploadID.
func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, storageDisks ...StorageAPI) error {
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
tmpUploadsPath := path.Join(tmpMetaPrefix, bucket, object, uploadsJSONFile)
var errs = make([]error, len(storageDisks))
var wg = &sync.WaitGroup{}
uploadIDs, err := getUploadIDs(bucket, object, storageDisks...)
if err != nil && err != errFileNotFound {
return err
}
uploadIDs.Version = "1"
uploadIDs.Format = "xl"
uploadIDs.AddUploadID(uploadID, initiated)
for index, disk := range storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
w, wErr := disk.CreateFile(minioMetaBucket, tmpUploadsPath)
if wErr != nil {
errs[index] = wErr
return
}
_, wErr = uploadIDs.WriteTo(w)
if wErr != nil {
errs[index] = wErr
return
}
if wErr = w.Close(); wErr != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
errs[index] = clErr
return
}
errs[index] = wErr
return
}
_, wErr = disk.StatFile(minioMetaBucket, uploadsPath)
if wErr != nil {
if wErr == errFileNotFound {
wErr = disk.RenameFile(minioMetaBucket, tmpUploadsPath, minioMetaBucket, uploadsPath)
if wErr == nil {
return
}
}
if dErr := disk.DeleteFile(minioMetaBucket, tmpUploadsPath); dErr != nil {
errs[index] = dErr
return
}
errs[index] = wErr
return
}
}(index, disk)
}
wg.Wait()
for _, err = range errs {
if err != nil {
return err
}
}
return nil
}
// Wrapper which removes all the uploaded parts.
func cleanupUploadedParts(bucket, object, uploadID string, storageDisks ...StorageAPI) error {
var errs = make([]error, len(storageDisks))
var wg = &sync.WaitGroup{}
for index, disk := range storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
err := cleanupDir(disk, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
if err != nil {
errs[index] = err
return
}
errs[index] = nil
}(index, disk)
}
wg.Wait()
for _, err := range errs {
if err != nil {
return err
}
}
return nil
}
// listUploadsInfo - list all uploads info.
func (xl xlObjects) listUploadsInfo(prefixPath string) (uploads []uploadInfo, err error) {
disk := xl.getRandomDisk()
splitPrefixes := strings.SplitN(prefixPath, "/", 3)
uploadIDs, err := getUploadIDs(splitPrefixes[1], splitPrefixes[2], disk)
if err != nil {
if err == errFileNotFound {
return []uploadInfo{}, nil
}
return nil, err
}
uploads = uploadIDs.Uploads
return uploads, nil
}
// listMetaBucketMultipart - list all objects at a given prefix inside minioMetaBucket.
func (xl xlObjects) listMetaBucketMultipart(prefixPath string, markerPath string, recursive bool, maxKeys int) (objInfos []ObjectInfo, eof bool, err error) {
walker := xl.lookupTreeWalkXL(listParams{minioMetaBucket, recursive, markerPath, prefixPath})
if walker == nil {
walker = xl.startTreeWalkXL(minioMetaBucket, prefixPath, markerPath, recursive)
}
// newMaxKeys tracks the size of entries which are going to be
// returned back.
var newMaxKeys int
// Following loop gathers and filters out special files inside minio meta volume.
for {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found or Disk not found is a valid case.
if walkResult.err == errFileNotFound || walkResult.err == errDiskNotFound {
return nil, true, nil
}
return nil, false, toObjectErr(walkResult.err, minioMetaBucket, prefixPath)
}
objInfo := walkResult.objInfo
var uploads []uploadInfo
if objInfo.IsDir {
// List all the entries if fi.Name is a leaf directory, if
// fi.Name is not a leaf directory then the resulting
// entries are empty.
uploads, err = xl.listUploadsInfo(objInfo.Name)
if err != nil {
return nil, false, err
}
}
if len(uploads) > 0 {
for _, upload := range uploads {
objInfos = append(objInfos, ObjectInfo{
Name: path.Join(objInfo.Name, upload.UploadID),
ModTime: upload.Initiated,
})
newMaxKeys++
// If we have reached the maxKeys, it means we have listed
// everything that was requested.
if newMaxKeys == maxKeys {
break
}
}
} else {
// We reach here for a non-recursive case non-leaf entry
// OR recursive case with fi.Name.
if !objInfo.IsDir { // Do not skip non-recursive case directory entries.
// Validate if 'fi.Name' is incomplete multipart.
if !strings.HasSuffix(objInfo.Name, xlMetaJSONFile) {
continue
}
objInfo.Name = path.Dir(objInfo.Name)
}
objInfos = append(objInfos, objInfo)
newMaxKeys++
// If we have reached the maxKeys, it means we have listed
// everything that was requested.
if newMaxKeys == maxKeys {
break
}
}
}
if !eof && len(objInfos) != 0 {
// EOF has not reached, hence save the walker channel to the map so that the walker go routine
// can continue from where it left off for the next list request.
lastObjInfo := objInfos[len(objInfos)-1]
markerPath = lastObjInfo.Name
xl.saveTreeWalkXL(listParams{minioMetaBucket, recursive, markerPath, prefixPath}, walker)
}
// Return entries here.
return objInfos, eof, nil
}
// FIXME: Currently the code sorts based on keyName/upload-id which is
// not correct based on the S3 specs. According to s3 specs we are
// supposed to only lexically sort keyNames and then for keyNames with
// multiple upload ids should be sorted based on the initiated time.
// Currently this case is not handled.
// listMultipartUploadsCommon - lists all multipart uploads, common
// function for both object layers.
func (xl xlObjects) listMultipartUploadsCommon(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
result := ListMultipartsInfo{}
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
if !xl.isBucketExist(bucket) {
return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListMultipartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListMultipartsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if keyMarker != "" && !strings.HasPrefix(keyMarker, prefix) {
return ListMultipartsInfo{}, InvalidMarkerPrefixCombination{
Marker: keyMarker,
Prefix: prefix,
}
}
if uploadIDMarker != "" {
if strings.HasSuffix(keyMarker, slashSeparator) {
return result, InvalidUploadIDKeyCombination{
UploadIDMarker: uploadIDMarker,
KeyMarker: keyMarker,
}
}
id, err := uuid.Parse(uploadIDMarker)
if err != nil {
return result, err
}
if id.IsZero() {
return result, MalformedUploadID{
UploadID: uploadIDMarker,
}
}
}
recursive := true
if delimiter == slashSeparator {
recursive = false
}
result.IsTruncated = true
result.MaxUploads = maxUploads
// Not using path.Join() as it strips off the trailing '/'.
multipartPrefixPath := pathJoin(mpartMetaPrefix, pathJoin(bucket, prefix))
if prefix == "" {
// Should have a trailing "/" if prefix is ""
// For ex. multipartPrefixPath should be "multipart/bucket/" if prefix is ""
multipartPrefixPath += slashSeparator
}
multipartMarkerPath := ""
if keyMarker != "" {
keyMarkerPath := pathJoin(pathJoin(bucket, keyMarker), uploadIDMarker)
multipartMarkerPath = pathJoin(mpartMetaPrefix, keyMarkerPath)
}
// List all the multipart files at prefixPath, starting with marker keyMarkerPath.
objInfos, eof, err := xl.listMetaBucketMultipart(multipartPrefixPath, multipartMarkerPath, recursive, maxUploads)
if err != nil {
return ListMultipartsInfo{}, err
}
// Loop through all the received files fill in the multiparts result.
for _, objInfo := range objInfos {
var objectName string
var uploadID string
if objInfo.IsDir {
// All directory entries are common prefixes.
uploadID = "" // Upload ids are empty for CommonPrefixes.
objectName = strings.TrimPrefix(objInfo.Name, retainSlash(pathJoin(mpartMetaPrefix, bucket)))
result.CommonPrefixes = append(result.CommonPrefixes, objectName)
} else {
uploadID = path.Base(objInfo.Name)
objectName = strings.TrimPrefix(path.Dir(objInfo.Name), retainSlash(pathJoin(mpartMetaPrefix, bucket)))
result.Uploads = append(result.Uploads, uploadMetadata{
Object: objectName,
UploadID: uploadID,
Initiated: objInfo.ModTime,
})
}
result.NextKeyMarker = objectName
result.NextUploadIDMarker = uploadID
}
result.IsTruncated = !eof
if !result.IsTruncated {
result.NextKeyMarker = ""
result.NextUploadIDMarker = ""
}
return result, nil
}
// isUploadIDExists - verify if a given uploadID exists and is valid.
func (xl xlObjects) isUploadIDExists(bucket, object, uploadID string) bool {
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
return xl.isObject(minioMetaBucket, uploadIDPath)
}

432
xl-v1-multipart.go Normal file
View File

@ -0,0 +1,432 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"io/ioutil"
"path"
"strconv"
"time"
)
// ListMultipartUploads - list multipart uploads.
func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
return xl.listMultipartUploadsCommon(bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
/// Common multipart object layer functions.
// newMultipartUploadCommon - initialize a new multipart, is a common function for both object layers.
func (xl xlObjects) newMultipartUploadCommon(bucket string, object string, meta map[string]string) (uploadID string, err error) {
// Verify if bucket name is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
// Verify if object name is valid.
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
// No metadata is set, allocate a new one.
if meta == nil {
meta = make(map[string]string)
}
xlMeta := xlMetaV1{}
xlMeta.Format = "xl"
xlMeta.Version = "1"
// If not set default to "application/octet-stream"
if meta["content-type"] == "" {
meta["content-type"] = "application/octet-stream"
}
xlMeta.Meta = meta
// This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/"
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
uploadID = getUUID()
initiated := time.Now().UTC()
// Create 'uploads.json'
if err = writeUploadJSON(bucket, object, uploadID, initiated, xl.storageDisks...); err != nil {
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
tempUploadIDPath := path.Join(tmpMetaPrefix, bucket, object, uploadID)
if err = xl.writeXLMetadata(minioMetaBucket, tempUploadIDPath, xlMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
if err = xl.renameXLMetadata(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath); err != nil {
if dErr := xl.deleteXLMetadata(minioMetaBucket, tempUploadIDPath); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Return success.
return uploadID, nil
}
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
func (xl xlObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
return xl.newMultipartUploadCommon(bucket, object, meta)
}
// putObjectPartCommon - put object part.
func (xl xlObjects) putObjectPartCommon(bucket string, object string, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !xl.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold read lock on the uploadID so that no one aborts it.
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Hold write lock on the part so that there is no parallel upload on the part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
partSuffix := fmt.Sprintf("object%d", partID)
tmpPartPath := path.Join(tmpMetaPrefix, bucket, object, uploadID, partSuffix)
fileWriter, err := xl.erasureDisk.CreateFile(minioMetaBucket, tmpPartPath)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Reader shouldn't have more data what mentioned in size argument.
// reading one more byte from the reader to validate it.
// expected to fail, success validates existence of more data in the reader.
if _, err = io.CopyN(ioutil.Discard, data, 1); err == nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", UnExpectedDataSize{Size: int(size)}
}
} else {
var n int64
if n, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
size = n
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" {
if newMD5Hex != md5Hex {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
xlMeta, err := readXLMetadata(xl.getRandomDisk(), minioMetaBucket, uploadIDPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
xlMeta.AddObjectPart(partSuffix, newMD5Hex, size)
partPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
err = xl.renameObject(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath)
if err != nil {
if dErr := xl.deleteObject(minioMetaBucket, tmpPartPath); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tmpPartPath)
}
return "", toObjectErr(err, minioMetaBucket, partPath)
}
if err = xl.writeXLMetadata(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID), xlMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
}
return newMD5Hex, nil
}
// PutObjectPart - writes the multipart upload chunks.
func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
return xl.putObjectPartCommon(bucket, object, uploadID, partID, size, data, md5Hex)
}
// ListObjectParts - list object parts, common function across both object layers.
func (xl xlObjects) listObjectPartsCommon(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return ListPartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !xl.isUploadIDExists(bucket, object, uploadID) {
return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
result := ListPartsInfo{}
disk := xl.getRandomDisk() // Pick a random disk and read `xl.json` from there.
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
xlMeta, err := readXLMetadata(disk, minioMetaBucket, uploadIDPath)
if err != nil {
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Only parts with higher part numbers will be listed.
parts := xlMeta.Parts[partNumberMarker:]
count := maxParts
for i, part := range parts {
var fi FileInfo
partNamePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, part.Name)
fi, err = disk.StatFile(minioMetaBucket, partNamePath)
if err != nil {
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, partNamePath)
}
partNum := i + partNumberMarker + 1
result.Parts = append(result.Parts, partInfo{
PartNumber: partNum,
ETag: part.ETag,
LastModified: fi.ModTime,
Size: fi.Size,
})
count--
if count == 0 {
break
}
}
// If listed entries are more than maxParts, we set IsTruncated as true.
if len(parts) > len(result.Parts) {
result.IsTruncated = true
// Make sure to fill next part number marker if IsTruncated is
// true for subsequent listing.
nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber
result.NextPartNumberMarker = nextPartNumberMarker
}
result.Bucket = bucket
result.Object = object
result.UploadID = uploadID
result.MaxParts = maxParts
return result, nil
}
// ListObjectParts - list object parts.
func (xl xlObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
return xl.listObjectPartsCommon(bucket, object, uploadID, partNumberMarker, maxParts)
}
func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
if !xl.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that
// 1) no one aborts this multipart upload
// 2) no one does a parallel complete-multipart-upload on this multipart upload
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
uploadIDPath := pathJoin(mpartMetaPrefix, bucket, object, uploadID)
xlMeta, err := readXLMetadata(xl.getRandomDisk(), minioMetaBucket, uploadIDPath)
if err != nil {
return "", err
}
var objectSize int64
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
// Construct part suffix.
partSuffix := fmt.Sprintf("object%d", part.PartNumber)
if xlMeta.SearchObjectPart(partSuffix, part.ETag) == -1 {
return "", InvalidPart{}
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(xlMeta.Parts[i].Size) {
return "", PartTooSmall{}
}
objectSize += xlMeta.Parts[i].Size
}
// Check if an object is present as one of the parent dir.
if xl.parentDirIsObject(bucket, path.Dir(object)) {
return "", toObjectErr(errFileAccessDenied, bucket, object)
}
// Save the final object size and modtime.
xlMeta.Stat.Size = objectSize
xlMeta.Stat.ModTime = time.Now().UTC()
// Save successfully calculated md5sum.
xlMeta.Meta["md5Sum"] = s3MD5
if err = xl.writeXLMetadata(minioMetaBucket, uploadIDPath, xlMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Hold write lock on the destination before rename
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Delete if an object already exists.
// FIXME: rename it to tmp file and delete only after
// the newly uploaded file is renamed from tmp location to
// the original location. Verify if the object is a multipart object.
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
if err = xl.renameObject(minioMetaBucket, uploadIDPath, bucket, object); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Hold the lock so that two parallel complete-multipart-uploads do no
// leave a stale uploads.json behind.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
uploadIDs, err := getUploadIDs(bucket, object, xl.storageDisks...)
if err == nil {
uploadIDIdx := uploadIDs.SearchUploadID(uploadID)
if uploadIDIdx != -1 {
uploadIDs.Uploads = append(uploadIDs.Uploads[:uploadIDIdx], uploadIDs.Uploads[uploadIDIdx+1:]...)
}
if len(uploadIDs.Uploads) > 0 {
if err = updateUploadJSON(bucket, object, uploadIDs, xl.storageDisks...); err != nil {
return "", err
}
return s3MD5, nil
}
}
err = xl.deleteObject(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
if err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
// Return md5sum.
return s3MD5, nil
}
// abortMultipartUploadCommon - aborts a multipart upload, common
// function used by both object layers.
func (xl xlObjects) abortMultipartUploadCommon(bucket, object, uploadID string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !xl.isBucketExist(bucket) {
return BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !xl.isUploadIDExists(bucket, object, uploadID) {
return InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Cleanup all uploaded parts.
if err := cleanupUploadedParts(bucket, object, uploadID, xl.storageDisks...); err != nil {
return err
}
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
uploadIDs, err := getUploadIDs(bucket, object, xl.storageDisks...)
if err == nil {
uploadIDIdx := uploadIDs.SearchUploadID(uploadID)
if uploadIDIdx != -1 {
uploadIDs.Uploads = append(uploadIDs.Uploads[:uploadIDIdx], uploadIDs.Uploads[uploadIDIdx+1:]...)
}
if len(uploadIDs.Uploads) > 0 {
return nil
}
}
if err = xl.deleteObject(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err != nil {
return toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
return nil
}
// AbortMultipartUpload - aborts a multipart upload.
func (xl xlObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
return xl.abortMultipartUploadCommon(bucket, object, uploadID)
}

357
xl-v1-object.go Normal file
View File

@ -0,0 +1,357 @@
package main
import (
"crypto/md5"
"encoding/hex"
"io"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/minio/minio/pkg/mimedb"
)
/// Object Operations
// GetObject - get an object.
func (xl xlObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return nil, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return nil, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
fileReader, fileWriter := io.Pipe()
xlMeta, err := readXLMetadata(xl.getRandomDisk(), bucket, object)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
partIndex, offset, err := xlMeta.getPartNumberOffset(startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
// Hold a read lock once more which can be released after the following go-routine ends.
// We hold RLock once more because the current function would return before the go routine below
// executes and hence releasing the read lock (because of defer'ed nsMutex.RUnlock() call).
nsMutex.RLock(bucket, object)
go func() {
defer nsMutex.RUnlock(bucket, object)
for ; partIndex < len(xlMeta.Parts); partIndex++ {
part := xlMeta.Parts[partIndex]
r, err := xl.erasureDisk.ReadFile(bucket, pathJoin(object, part.Name), offset)
if err != nil {
fileWriter.CloseWithError(err)
return
}
// Reset offset to 0 as it would be non-0 only for the first loop if startOffset is non-0.
offset = 0
if _, err = io.Copy(fileWriter, r); err != nil {
switch reader := r.(type) {
case *io.PipeReader:
reader.CloseWithError(err)
case io.ReadCloser:
reader.Close()
}
fileWriter.CloseWithError(err)
return
}
// Close the readerCloser that reads multiparts of an object from the xl storage layer.
// Not closing leaks underlying file descriptors.
r.Close()
}
fileWriter.Close()
}()
return fileReader, nil
}
// GetObjectInfo - get object info.
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
info, err := xl.getObjectInfo(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
return info, nil
}
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) {
// Count for errors encountered.
var xlJSONErrCount = 0
// Loop through and return the first success entry based on the
// selected random disk.
for xlJSONErrCount < len(xl.storageDisks) {
// Choose a random disk on each attempt, do not hit the same disk all the time.
disk := xl.getRandomDisk() // Pick a random disk.
var xlMeta xlMetaV1
xlMeta, err = readXLMetadata(disk, bucket, object)
if err == nil {
objInfo = ObjectInfo{}
objInfo.IsDir = false
objInfo.Bucket = bucket
objInfo.Name = object
objInfo.Size = xlMeta.Stat.Size
objInfo.ModTime = xlMeta.Stat.ModTime
objInfo.MD5Sum = xlMeta.Meta["md5Sum"]
objInfo.ContentType = xlMeta.Meta["content-type"]
objInfo.ContentEncoding = xlMeta.Meta["content-encoding"]
return objInfo, nil
}
xlJSONErrCount++ // Update error count.
}
// Return error at the end.
return ObjectInfo{}, err
}
// renameObject - renaming all source objects to destination object across all disks.
func (xl xlObjects) renameObject(srcBucket, srcObject, dstBucket, dstObject string) error {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var errs = make([]error, len(xl.storageDisks))
// Rename file on all underlying storage disks.
for index, disk := range xl.storageDisks {
// Append "/" as srcObject and dstObject are either leaf-dirs or non-leaf-dris.
// If srcObject is an object instead of prefix we just rename the leaf-dir and
// not rename the part and metadata files separately.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.RenameFile(srcBucket, retainSlash(srcObject), dstBucket, retainSlash(dstObject))
if err != nil {
errs[index] = err
}
errs[index] = nil
}(index, disk)
}
// Wait for all RenameFile to finish.
wg.Wait()
// Gather err count.
var errCount = 0
for _, err := range errs {
if err == nil {
continue
}
errCount++
}
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure. Cleanup successful renames.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Special condition if readQuorum exists, then return success.
if errCount <= len(xl.storageDisks)-xl.readQuorum {
return nil
}
xl.deleteObject(srcBucket, srcObject)
return errWriteQuorum
}
return nil
}
// PutObject - create an object.
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
// No metadata is set, allocate a new one.
if metadata == nil {
metadata = make(map[string]string)
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
tempErasureObj := path.Join(tmpMetaPrefix, bucket, object, "object1")
tempObj := path.Join(tmpMetaPrefix, bucket, object)
fileWriter, err := xl.erasureDisk.CreateFile(minioMetaBucket, tempErasureObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
}
// Save additional erasureMetadata.
modTime := time.Now().UTC()
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// Update the md5sum if not set with the newly calculated one.
if len(metadata["md5Sum"]) == 0 {
metadata["md5Sum"] = newMD5Hex
}
// If not set default to "application/octet-stream"
if metadata["content-type"] == "" {
contentType := "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
metadata["content-type"] = contentType
}
// md5Hex representation.
md5Hex := metadata["md5Sum"]
if md5Hex != "" {
if newMD5Hex != md5Hex {
if err = safeCloseAndRemove(fileWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Check if an object is present as one of the parent dir.
if xl.parentDirIsObject(bucket, path.Dir(object)) {
return "", toObjectErr(errFileAccessDenied, bucket, object)
}
// Delete if an object already exists.
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
err = xl.renameObject(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if dErr := xl.deleteObject(minioMetaBucket, tempObj); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tempObj)
}
return "", toObjectErr(err, bucket, object)
}
xlMeta := xlMetaV1{}
xlMeta.Version = "1"
xlMeta.Format = "xl"
xlMeta.Meta = metadata
xlMeta.Stat.Size = size
xlMeta.Stat.ModTime = modTime
xlMeta.AddObjectPart("object1", newMD5Hex, xlMeta.Stat.Size)
if err = xl.writeXLMetadata(bucket, object, xlMeta); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
// deleteObject - deletes a regular object.
func (xl xlObjects) deleteObject(bucket, object string) error {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
for index, disk := range xl.storageDisks {
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
dErrs[index] = cleanupDir(disk, bucket, object)
}(index, disk)
}
// Wait for all routines to finish.
wg.Wait()
var fileNotFoundCnt, deleteFileErr int
// Loop through all the concocted errors.
for _, err := range dErrs {
if err == nil {
continue
}
// If file not found, count them.
if err == errFileNotFound {
fileNotFoundCnt++
continue
}
// Update error counter separately.
deleteFileErr++
}
// Return err if all disks report file not found.
if fileNotFoundCnt == len(xl.storageDisks) {
return errFileNotFound
} else if deleteFileErr > len(xl.storageDisks)-xl.writeQuorum {
// Return errWriteQuorum if errors were more than
// allowed write quorum.
return errWriteQuorum
}
return nil
}
// DeleteObject - delete the object.
func (xl xlObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
xl.deleteObject(bucket, object)
return nil
}

177
xl-v1.go Normal file
View File

@ -0,0 +1,177 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"path/filepath"
"strings"
"sync"
)
const (
formatConfigFile = "format.json"
xlMetaJSONFile = "xl.json"
uploadsJSONFile = "uploads.json"
)
// xlObjects - Implements fs object layer.
type xlObjects struct {
storageDisks []StorageAPI
erasureDisk *erasure
dataBlocks int
parityBlocks int
readQuorum int
writeQuorum int
listObjectMap map[listParams][]*treeWalker
listObjectMapMutex *sync.Mutex
}
// errMaxDisks - returned for reached maximum of disks.
var errMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'")
// errMinDisks - returned for minimum number of disks.
var errMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'")
// errNumDisks - returned for odd number of disks.
var errNumDisks = errors.New("Number of disks should be multiples of '2'")
const (
// Maximum erasure blocks.
maxErasureBlocks = 16
// Minimum erasure blocks.
minErasureBlocks = 8
)
func checkSufficientDisks(disks []string) error {
// Verify total number of disks.
totalDisks := len(disks)
if totalDisks > maxErasureBlocks {
return errMaxDisks
}
if totalDisks < minErasureBlocks {
return errMinDisks
}
// isEven function to verify if a given number if even.
isEven := func(number int) bool {
return number%2 == 0
}
// Verify if we have even number of disks.
// only combination of 8, 10, 12, 14, 16 are supported.
if !isEven(totalDisks) {
return errNumDisks
}
return nil
}
// Depending on the disk type network or local, initialize storage layer.
func newStorageLayer(disk string) (storage StorageAPI, err error) {
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" {
// Initialize filesystem storage API.
return newPosix(disk)
}
// Initialize rpc client storage API.
return newRPCClient(disk)
}
// Initialize all storage disks to bootstrap.
func bootstrapDisks(disks []string) ([]StorageAPI, error) {
storageDisks := make([]StorageAPI, len(disks))
for index, disk := range disks {
var err error
// Intentionally ignore disk not found errors while
// initializing POSIX, so that we have successfully
// initialized posix Storage. Subsequent calls to XL/Erasure
// will manage any errors related to disks.
storageDisks[index], err = newStorageLayer(disk)
if err != nil && err != errDiskNotFound {
return nil, err
}
}
return storageDisks, nil
}
// newXLObjects - initialize new xl object layer.
func newXLObjects(disks []string) (ObjectLayer, error) {
if err := checkSufficientDisks(disks); err != nil {
return nil, err
}
// Bootstrap disks.
storageDisks, err := bootstrapDisks(disks)
if err != nil {
return nil, err
}
// Initialize object layer - like creating minioMetaBucket, cleaning up tmp files etc.
initObjectLayer(storageDisks...)
// Load saved XL format.json and validate.
newPosixDisks, err := loadFormatXL(storageDisks)
if err != nil {
switch err {
case errUnformattedDisk:
// Save new XL format.
errSave := initFormatXL(storageDisks)
if errSave != nil {
return nil, errSave
}
newPosixDisks = storageDisks
default:
// errCorruptedDisk - error.
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
}
}
// FIXME: healFormatXL(newDisks)
newErasureDisk, err := newErasure(newPosixDisks)
if err != nil {
return nil, err
}
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(newPosixDisks)/2, len(newPosixDisks)/2
xl := xlObjects{
storageDisks: newPosixDisks,
erasureDisk: newErasureDisk,
dataBlocks: dataBlocks,
parityBlocks: parityBlocks,
listObjectMap: make(map[listParams][]*treeWalker),
listObjectMapMutex: &sync.Mutex{},
}
// Figure out read and write quorum based on number of storage disks.
// Read quorum should be always N/2 + 1 (due to Vandermonde matrix
// erasure requirements)
xl.readQuorum = len(xl.storageDisks)/2 + 1
// Write quorum is assumed if we have total disks + 3
// parity. (Need to discuss this again)
xl.writeQuorum = len(xl.storageDisks)/2 + 3
if xl.writeQuorum > len(xl.storageDisks) {
xl.writeQuorum = len(xl.storageDisks)
}
// Return successfully initialized object layer.
return xl, nil
}