Merge pull request #1728 from minio/rewrite-xl

XL/FS: Rewrite in new format.
This commit is contained in:
Harshavardhana 2016-06-02 23:19:17 -07:00
commit 5108ba6eb1
74 changed files with 7215 additions and 4196 deletions

View File

@ -22,7 +22,39 @@ import (
) )
// Parse bucket url queries // Parse bucket url queries
func getBucketResources(values url.Values) (prefix, marker, delimiter string, maxkeys int, encodingType string) { func getListObjectsV1Args(values url.Values) (prefix, marker, delimiter string, maxkeys int, encodingType string) {
prefix = values.Get("prefix")
marker = values.Get("marker")
delimiter = values.Get("delimiter")
if values.Get("max-keys") != "" {
maxkeys, _ = strconv.Atoi(values.Get("max-keys"))
} else {
maxkeys = maxObjectList
}
encodingType = values.Get("encoding-type")
return
}
// Parse bucket url queries for ListObjects V2.
func getListObjectsV2Args(values url.Values) (prefix, token, startAfter, delimiter string, maxkeys int, encodingType string) {
prefix = values.Get("prefix")
startAfter = values.Get("start-after")
delimiter = values.Get("delimiter")
if values.Get("max-keys") != "" {
maxkeys, _ = strconv.Atoi(values.Get("max-keys"))
} else {
maxkeys = maxObjectList
}
encodingType = values.Get("encoding-type")
token = values.Get("continuation-token")
return
}
// Parse bucket url queries
func getBucketResources(values url.Values) (listType int, prefix, marker, delimiter string, maxkeys int, encodingType string) {
if values.Get("list-type") != "" {
listType, _ = strconv.Atoi(values.Get("list-type"))
}
prefix = values.Get("prefix") prefix = values.Get("prefix")
marker = values.Get("marker") marker = values.Get("marker")
delimiter = values.Get("delimiter") delimiter = values.Get("delimiter")

View File

@ -65,6 +65,37 @@ type ListObjectsResponse struct {
Prefix string Prefix string
} }
// ListObjectsV2Response - format for list objects response.
type ListObjectsV2Response struct {
XMLName xml.Name `xml:"http://s3.amazonaws.com/doc/2006-03-01/ ListBucketResult" json:"-"`
CommonPrefixes []CommonPrefix
Contents []Object
Delimiter string
// Encoding type used to encode object keys in the response.
EncodingType string
// A flag that indicates whether or not ListObjects returned all of the results
// that satisfied the search criteria.
IsTruncated bool
StartAfter string
MaxKeys int
Name string
// When response is truncated (the IsTruncated element value in the response
// is true), you can use the key name in this field as marker in the subsequent
// request to get next set of objects. Server lists objects in alphabetical
// order Note: This element is returned only if you have delimiter request parameter
// specified. If response does not include the NextMaker and it is truncated,
// you can use the value of the last Key in the response as the marker in the
// subsequent request to get the next set of object keys.
ContinuationToken string
NextContinuationToken string
Prefix string
}
// Part container for part metadata. // Part container for part metadata.
type Part struct { type Part struct {
PartNumber int PartNumber int
@ -304,6 +335,51 @@ func generateListObjectsResponse(bucket, prefix, marker, delimiter string, maxKe
return data return data
} }
// generates an ListObjects response for the said bucket with other enumerated options.
func generateListObjectsV2Response(bucket, prefix, token, startAfter, delimiter string, maxKeys int, resp ListObjectsInfo) ListObjectsV2Response {
var contents []Object
var prefixes []CommonPrefix
var owner = Owner{}
var data = ListObjectsV2Response{}
owner.ID = "minio"
owner.DisplayName = "minio"
for _, object := range resp.Objects {
var content = Object{}
if object.Name == "" {
continue
}
content.Key = object.Name
content.LastModified = object.ModTime.UTC().Format(timeFormatAMZ)
if object.MD5Sum != "" {
content.ETag = "\"" + object.MD5Sum + "\""
}
content.Size = object.Size
content.StorageClass = "STANDARD"
content.Owner = owner
contents = append(contents, content)
}
// TODO - support EncodingType in xml decoding
data.Name = bucket
data.Contents = contents
data.StartAfter = startAfter
data.Delimiter = delimiter
data.Prefix = prefix
data.MaxKeys = maxKeys
data.ContinuationToken = token
data.NextContinuationToken = resp.NextMarker
data.IsTruncated = resp.IsTruncated
for _, prefix := range resp.Prefixes {
var prefixItem = CommonPrefix{}
prefixItem.Prefix = prefix
prefixes = append(prefixes, prefixItem)
}
data.CommonPrefixes = prefixes
return data
}
// generateCopyObjectResponse // generateCopyObjectResponse
func generateCopyObjectResponse(etag string, lastModified time.Time) CopyObjectResponse { func generateCopyObjectResponse(etag string, lastModified time.Time) CopyObjectResponse {
return CopyObjectResponse{ return CopyObjectResponse{

View File

@ -220,9 +220,22 @@ func (api objectAPIHandlers) ListObjectsHandler(w http.ResponseWriter, r *http.R
return return
} }
} }
var prefix, marker, token, delimiter, startAfter string
var maxkeys int
var listV2 bool
// TODO handle encoding type. // TODO handle encoding type.
prefix, marker, delimiter, maxkeys, _ := getBucketResources(r.URL.Query()) if r.URL.Query().Get("list-type") == "2" {
listV2 = true
prefix, token, startAfter, delimiter, maxkeys, _ = getListObjectsV2Args(r.URL.Query())
// For ListV2 "start-after" is considered only if "continuation-token" is empty.
if token == "" {
marker = startAfter
} else {
marker = token
}
} else {
prefix, marker, delimiter, maxkeys, _ = getListObjectsV1Args(r.URL.Query())
}
if maxkeys < 0 { if maxkeys < 0 {
writeErrorResponse(w, r, ErrInvalidMaxKeys, r.URL.Path) writeErrorResponse(w, r, ErrInvalidMaxKeys, r.URL.Path)
return return
@ -242,10 +255,17 @@ func (api objectAPIHandlers) ListObjectsHandler(w http.ResponseWriter, r *http.R
} }
listObjectsInfo, err := api.ObjectAPI.ListObjects(bucket, prefix, marker, delimiter, maxkeys) listObjectsInfo, err := api.ObjectAPI.ListObjects(bucket, prefix, marker, delimiter, maxkeys)
if err == nil { if err == nil {
var encodedSuccessResponse []byte
// generate response // generate response
if listV2 {
response := generateListObjectsV2Response(bucket, prefix, token, startAfter, delimiter, maxkeys, listObjectsInfo)
encodedSuccessResponse = encodeResponse(response)
} else {
response := generateListObjectsResponse(bucket, prefix, marker, delimiter, maxkeys, listObjectsInfo) response := generateListObjectsResponse(bucket, prefix, marker, delimiter, maxkeys, listObjectsInfo)
encodedSuccessResponse := encodeResponse(response) encodedSuccessResponse = encodeResponse(response)
}
// Write headers // Write headers
setCommonHeaders(w) setCommonHeaders(w)
// Write success response. // Write success response.

0
docs/backend/README.md Normal file
View File

View File

@ -0,0 +1,4 @@
{
"format": "fs",
"version": "1"
}

15
docs/backend/fs/fs.json Normal file
View File

@ -0,0 +1,15 @@
{
"version": "1",
"format": "fs",
"minio": {
"release": "DEVELOPMENT.GOGET"
},
"parts": [
{
"number": 1,
"name": "object1",
"size": 29,
"eTag": "",
},
]
}

View File

@ -0,0 +1,10 @@
{
"version": "1",
"format": "fs",
"uploadIds": [
{
"uploadID": "id",
"startTime": "time",
}
]
}

View File

@ -0,0 +1,20 @@
{
"xl": {
"jbod": [
"8aa2b1bc-0e5a-49e0-8221-05228336b040",
"3467a69b-0266-478a-9e10-e819447e4545",
"d4a4505b-4e4f-4864-befd-4f36adb0bc66",
"592b6583-ca26-47af-b991-ba6d097e34e8",
"c7ef69f0-dbf5-4c0e-b167-d30a441bad7e",
"f0b36ea3-fe96-4f2b-bced-22c7f33e0e0c",
"b83abf39-e39d-4e7b-8e16-6f9953455a48",
"7d63dfc9-5441-4243-bd36-de8db0691982",
"c1bbffc5-81f9-4251-9398-33a959b3ce37",
"64408f94-26e0-4277-9593-2d703f4d5a91"
],
"disk": "8aa2b1bc-0e5a-49e0-8221-05228336b040",
"version": "1"
},
"format": "xl",
"version": "1"
}

View File

@ -0,0 +1,10 @@
{
"version": "1",
"format": "xl",
"uploadIds": [
{
"uploadID": "id",
"startTime": "time",
}
]
}

57
docs/backend/xl/xl.json Normal file
View File

@ -0,0 +1,57 @@
{
"parts": [
{
"number": 1,
"size": 5242880,
"etag": "3565c6e741e69a007a5ac7db893a62b5",
"name": "object1"
},
{
"number": 2,
"size": 5242880,
"etag": "d416712335c280ab1e39498552937764",
"name": "object2"
},
{
"number": 3,
"size": 4338324,
"etag": "8a98c5c54d81c6c95ed9bdcaeb941aaf",
"name": "object3"
}
],
"meta": {
"md5Sum": "97586a5290d4f5a41328062d6a7da593-3",
"content-type": "application\/octet-stream",
"content-encoding": ""
},
"minio": {
"release": "DEVELOPMENT.GOGET"
},
"erasure": {
"algorithm": "klauspost/reedsolomon/vandermonde",
"index": 2,
"distribution": [ 1, 3, 4, 2, 5, 8, 7, 6, 9 ],
"blockSize": 4194304,
"parity": 5,
"data": 5,
"checksum": [
{
"name": "object1",
"algorithm": "sha512",
"hash": "d9910e1492446389cfae6fe979db0245f96ca97ca2c7a25cab45805882004479320d866a47ea1f7be6a62625dd4de6caf7816009ef9d62779346d01a221b335c",
},
{
"name": "object2",
"algorithm": "sha512",
"hash": "d9910e1492446389cfae6fe979db0245f96ca97ca2c7a25cab45805882004479320d866a47ea1f7be6a62625dd4de6caf7816009ef9d62779346d01a221b335c",
},
],
},
"stat": {
"version": 0,
"modTime": "2016-05-24T00:09:40.122390255Z",
"size": 14824084
},
"format": "xl",
"version": "1"
}

149
erasure-createfile.go Normal file
View File

@ -0,0 +1,149 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/hex"
"hash"
"io"
"sync"
"github.com/klauspost/reedsolomon"
)
// erasureCreateFile - writes an entire stream by erasure coding to
// all the disks, writes also calculate individual block's checksum
// for future bit-rot protection.
func erasureCreateFile(disks []StorageAPI, volume string, path string, partName string, data io.Reader, eInfos []erasureInfo) (newEInfos []erasureInfo, size int64, err error) {
// Allocated blockSized buffer for reading.
buf := make([]byte, blockSizeV1)
hashWriters := newHashWriters(len(disks))
// Just pick one eInfo.
eInfo := pickValidErasureInfo(eInfos)
// Read until io.EOF, erasure codes data and writes to all disks.
for {
var n int
n, err = io.ReadFull(data, buf)
if err == io.EOF {
break
}
if err != nil && err != io.ErrUnexpectedEOF {
return nil, 0, err
}
size += int64(n)
var blocks [][]byte
// Returns encoded blocks.
blocks, err = encodeData(buf[:n], eInfo.DataBlocks, eInfo.ParityBlocks)
if err != nil {
return nil, 0, err
}
err = appendFile(disks, volume, path, blocks, eInfo.Distribution, hashWriters)
if err != nil {
return nil, 0, err
}
}
// Save the checksums.
checkSums := make([]checkSumInfo, len(disks))
for index := range disks {
blockIndex := eInfo.Distribution[index] - 1
checkSums[blockIndex] = checkSumInfo{
Name: partName,
Algorithm: "sha512",
Hash: hex.EncodeToString(hashWriters[blockIndex].Sum(nil)),
}
}
// Erasure info update for checksum for each disks.
newEInfos = make([]erasureInfo, len(disks))
for index, eInfo := range eInfos {
if eInfo.IsValid() {
blockIndex := eInfo.Distribution[index] - 1
newEInfos[index] = eInfo
newEInfos[index].Checksum = append(newEInfos[index].Checksum, checkSums[blockIndex])
}
}
// Return newEInfos.
return newEInfos, size, nil
}
// encodeData - encodes incoming data buffer into
// dataBlocks+parityBlocks returns a 2 dimensional byte array.
func encodeData(dataBuffer []byte, dataBlocks, parityBlocks int) ([][]byte, error) {
rs, err := reedsolomon.New(dataBlocks, parityBlocks)
if err != nil {
return nil, err
}
// Split the input buffer into data and parity blocks.
var blocks [][]byte
blocks, err = rs.Split(dataBuffer)
if err != nil {
return nil, err
}
// Encode parity blocks using data blocks.
err = rs.Encode(blocks)
if err != nil {
return nil, err
}
// Return encoded blocks.
return blocks, nil
}
// appendFile - append data buffer at path.
func appendFile(disks []StorageAPI, volume, path string, enBlocks [][]byte, distribution []int, hashWriters []hash.Hash) (err error) {
var wg = &sync.WaitGroup{}
var wErrs = make([]error, len(disks))
// Write encoded data to quorum disks in parallel.
for index, disk := range disks {
if disk == nil {
continue
}
wg.Add(1)
// Write encoded data in routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
// Pick the block from the distribution.
blockIndex := distribution[index] - 1
n, wErr := disk.AppendFile(volume, path, enBlocks[blockIndex])
if wErr != nil {
wErrs[index] = wErr
return
}
if n != int64(len(enBlocks[blockIndex])) {
wErrs[index] = errUnexpected
return
}
// Calculate hash for each blocks.
hashWriters[blockIndex].Write(enBlocks[blockIndex])
// Successfully wrote.
wErrs[index] = nil
}(index, disk)
}
// Wait for all the appends to finish.
wg.Wait()
// Return success.
return nil
}

208
erasure-readfile.go Normal file
View File

@ -0,0 +1,208 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/hex"
"errors"
"github.com/klauspost/reedsolomon"
)
// erasureReadFile - read an entire erasure coded file at into a byte
// array. Erasure coded parts are often few mega bytes in size and it
// is convenient to return them as byte slice. This function also
// supports bit-rot detection by verifying checksum of individual
// block's checksum.
func erasureReadFile(disks []StorageAPI, volume string, path string, partName string, size int64, eInfos []erasureInfo) ([]byte, error) {
// Return data buffer.
var buffer []byte
// Total size left
totalSizeLeft := size
// Starting offset for reading.
startOffset := int64(0)
// Gather previously calculated block checksums.
blockCheckSums := metaPartBlockChecksums(disks, eInfos, partName)
// Pick one erasure info.
eInfo := pickValidErasureInfo(eInfos)
// Write until each parts are read and exhausted.
for totalSizeLeft > 0 {
// Calculate the proper block size.
var curBlockSize int64
if eInfo.BlockSize < totalSizeLeft {
curBlockSize = eInfo.BlockSize
} else {
curBlockSize = totalSizeLeft
}
// Calculate the current encoded block size.
curEncBlockSize := getEncodedBlockLen(curBlockSize, eInfo.DataBlocks)
offsetEncOffset := getEncodedBlockLen(startOffset, eInfo.DataBlocks)
// Allocate encoded blocks up to storage disks.
enBlocks := make([][]byte, len(disks))
// Counter to keep success data blocks.
var successDataBlocksCount = 0
var noReconstruct bool // Set for no reconstruction.
// Read from all the disks.
for index, disk := range disks {
blockIndex := eInfo.Distribution[index] - 1
if !isValidBlock(disks, volume, path, toDiskIndex(blockIndex, eInfo.Distribution), blockCheckSums) {
continue
}
if disk == nil {
continue
}
// Initialize shard slice and fill the data from each parts.
enBlocks[blockIndex] = make([]byte, curEncBlockSize)
// Read the necessary blocks.
_, err := disk.ReadFile(volume, path, offsetEncOffset, enBlocks[blockIndex])
if err != nil {
enBlocks[blockIndex] = nil
}
// Verify if we have successfully read all the data blocks.
if blockIndex < eInfo.DataBlocks && enBlocks[blockIndex] != nil {
successDataBlocksCount++
// Set when we have all the data blocks and no
// reconstruction is needed, so that we can avoid
// erasure reconstruction.
noReconstruct = successDataBlocksCount == eInfo.DataBlocks
if noReconstruct {
// Break out we have read all the data blocks.
break
}
}
}
// Check blocks if they are all zero in length, we have corruption return error.
if checkBlockSize(enBlocks) == 0 {
return nil, errXLDataCorrupt
}
// Verify if reconstruction is needed, proceed with reconstruction.
if !noReconstruct {
err := decodeData(enBlocks, eInfo.DataBlocks, eInfo.ParityBlocks)
if err != nil {
return nil, err
}
}
// Get data blocks from encoded blocks.
dataBlocks, err := getDataBlocks(enBlocks, eInfo.DataBlocks, int(curBlockSize))
if err != nil {
return nil, err
}
// Copy data blocks.
buffer = append(buffer, dataBlocks...)
// Negate the 'n' size written to client.
totalSizeLeft -= int64(len(dataBlocks))
// Increase the offset to move forward.
startOffset += int64(len(dataBlocks))
// Relenquish memory.
dataBlocks = nil
}
return buffer, nil
}
// PartObjectChecksum - returns the checksum for the part name from the checksum slice.
func (e erasureInfo) PartObjectChecksum(partName string) checkSumInfo {
for _, checksum := range e.Checksum {
if checksum.Name == partName {
return checksum
}
}
return checkSumInfo{}
}
// xlMetaPartBlockChecksums - get block checksums for a given part.
func metaPartBlockChecksums(disks []StorageAPI, eInfos []erasureInfo, partName string) (blockCheckSums []checkSumInfo) {
for index := range disks {
if eInfos[index].IsValid() {
// Save the read checksums for a given part.
blockCheckSums = append(blockCheckSums, eInfos[index].PartObjectChecksum(partName))
} else {
blockCheckSums = append(blockCheckSums, checkSumInfo{})
}
}
return blockCheckSums
}
// Takes block index and block distribution to get the disk index.
func toDiskIndex(blockIdx int, distribution []int) (diskIndex int) {
diskIndex = -1
// Find out the right disk index for the input block index.
for index, blockIndex := range distribution {
if blockIndex == blockIdx {
diskIndex = index
}
}
return diskIndex
}
// isValidBlock - calculates the checksum hash for the block and
// validates if its correct returns true for valid cases, false otherwise.
func isValidBlock(disks []StorageAPI, volume, path string, diskIndex int, blockCheckSums []checkSumInfo) bool {
// Unknown block index requested, treat it as error.
if diskIndex == -1 {
return false
}
// Disk is not present, treat entire block to be non existent.
if disks[diskIndex] == nil {
return false
}
// Read everything for a given block and calculate hash.
hashWriter := newHash(blockCheckSums[diskIndex].Algorithm)
hashBytes, err := hashSum(disks[diskIndex], volume, path, hashWriter)
if err != nil {
return false
}
return hex.EncodeToString(hashBytes) == blockCheckSums[diskIndex].Hash
}
// decodeData - decode encoded blocks.
func decodeData(enBlocks [][]byte, dataBlocks, parityBlocks int) error {
rs, err := reedsolomon.New(dataBlocks, parityBlocks)
if err != nil {
return err
}
err = rs.Reconstruct(enBlocks)
if err != nil {
return err
}
// Verify reconstructed blocks (parity).
ok, err := rs.Verify(enBlocks)
if err != nil {
return err
}
if !ok {
// Blocks cannot be reconstructed, corrupted data.
err = errors.New("Verification failed after reconstruction, data likely corrupted.")
return err
}
return nil
}

108
erasure-utils.go Normal file
View File

@ -0,0 +1,108 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/sha512"
"hash"
"io"
"github.com/klauspost/reedsolomon"
)
// newHashWriters - inititialize a slice of hashes for the disk count.
func newHashWriters(diskCount int) []hash.Hash {
hashWriters := make([]hash.Hash, diskCount)
for index := range hashWriters {
hashWriters[index] = newHash("sha512")
}
return hashWriters
}
// newHash - gives you a newly allocated hash depending on the input algorithm.
func newHash(algo string) hash.Hash {
switch algo {
case "sha512":
return sha512.New()
// Add new hashes here.
default:
return sha512.New()
}
}
func hashSum(disk StorageAPI, volume, path string, writer hash.Hash) ([]byte, error) {
startOffset := int64(0)
// Read until io.EOF.
for {
buf := make([]byte, blockSizeV1)
n, err := disk.ReadFile(volume, path, startOffset, buf)
if err == io.EOF {
break
}
if err != nil && err != io.EOF {
return nil, err
}
writer.Write(buf[:n])
startOffset += n
}
return writer.Sum(nil), nil
}
// getDataBlocks - fetches the data block only part of the input encoded blocks.
func getDataBlocks(enBlocks [][]byte, dataBlocks int, curBlockSize int) (data []byte, err error) {
if len(enBlocks) < dataBlocks {
return nil, reedsolomon.ErrTooFewShards
}
size := 0
blocks := enBlocks[:dataBlocks]
for _, block := range blocks {
size += len(block)
}
if size < curBlockSize {
return nil, reedsolomon.ErrShortData
}
write := curBlockSize
for _, block := range blocks {
if write < len(block) {
data = append(data, block[:write]...)
return data, nil
}
data = append(data, block...)
write -= len(block)
}
return data, nil
}
// checkBlockSize return the size of a single block.
// The first non-zero size is returned,
// or 0 if all blocks are size 0.
func checkBlockSize(blocks [][]byte) int {
for _, block := range blocks {
if len(block) != 0 {
return len(block)
}
}
return 0
}
// calculate the blockSize based on input length and total number of
// data blocks.
func getEncodedBlockLen(inputLen int64, dataBlocks int) (curEncBlockSize int64) {
curEncBlockSize = (inputLen + int64(dataBlocks) - 1) / int64(dataBlocks)
return curEncBlockSize
}

17
erasure.go Normal file
View File

@ -0,0 +1,17 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main

View File

@ -20,27 +20,240 @@ import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"io"
"strings" "strings"
"sync"
"github.com/skyrings/skyring-common/tools/uuid" "github.com/skyrings/skyring-common/tools/uuid"
) )
// fsFormat - structure holding 'fs' format.
type fsFormat struct { type fsFormat struct {
Version string `json:"version"` Version string `json:"version"`
} }
// xlFormat - structure holding 'xl' format.
type xlFormat struct { type xlFormat struct {
Version string `json:"version"` Version string `json:"version"` // Version of 'xl' format.
Disk string `json:"disk"` Disk string `json:"disk"` // Disk field carries assigned disk uuid.
// JBOD field carries the input disk order generated the first
// time when fresh disks were supplied.
JBOD []string `json:"jbod"` JBOD []string `json:"jbod"`
} }
// formatConfigV1 - structure holds format config version '1'.
type formatConfigV1 struct { type formatConfigV1 struct {
Version string `json:"version"` Version string `json:"version"` // Version of the format config.
// Format indicates the backend format type, supports two values 'xl' and 'fs'.
Format string `json:"format"` Format string `json:"format"`
FS *fsFormat `json:"fs,omitempty"` FS *fsFormat `json:"fs,omitempty"` // FS field holds fs format.
XL *xlFormat `json:"xl,omitempty"` XL *xlFormat `json:"xl,omitempty"` // XL field holds xl format.
}
/*
All disks online
-----------------
- All Unformatted - format all and return success.
- Some Unformatted - format all and return success.
- Any JBOD inconsistent - return failure // Requires deep inspection, phase2.
- Some are corrupt (missing format.json) - return failure // Requires deep inspection, phase2.
- Any unrecognized disks - return failure
Some disks are offline and we have quorum.
-----------------
- Some unformatted - no heal, return success.
- Any JBOD inconsistent - return failure // Requires deep inspection, phase2.
- Some are corrupt (missing format.json) - return failure // Requires deep inspection, phase2.
- Any unrecognized disks - return failure
No read quorum
-----------------
failure for all cases.
// Pseudo code for managing `format.json`.
// Generic checks.
if (no quorum) return error
if (any disk is corrupt) return error // phase2
if (jbod inconsistent) return error // phase2
if (disks not recognized) // Always error.
// Specific checks.
if (all disks online)
if (all disks return format.json)
if (jbod consistent)
if (all disks recognized)
return
else
if (all disks return format.json not found)
(initialize format)
return
else (some disks return format.json not found)
(heal format)
return
fi
fi
else // No healing at this point forward, some disks are offline or dead.
if (some disks return format.json not found)
if (with force)
// Offline disks are marked as dead.
(heal format) // Offline disks should be marked as dead.
return success
else (without force)
// --force is necessary to heal few drives, because some drives
// are offline. Offline disks will be marked as dead.
return error
fi
fi
*/
var errSomeDiskUnformatted = errors.New("some disks are found to be unformatted")
var errSomeDiskOffline = errors.New("some disks are offline")
// Returns error slice into understandable errors.
func reduceFormatErrs(errs []error, diskCount int) error {
var errUnformattedDiskCount = 0
var errDiskNotFoundCount = 0
for _, err := range errs {
if err == errUnformattedDisk {
errUnformattedDiskCount++
} else if err == errDiskNotFound {
errDiskNotFoundCount++
}
}
// Returns errUnformattedDisk if all disks report unFormattedDisk.
if errUnformattedDiskCount == diskCount {
return errUnformattedDisk
} else if errUnformattedDiskCount < diskCount && errDiskNotFoundCount == 0 {
// Only some disks return unFormattedDisk and all disks are online.
return errSomeDiskUnformatted
} else if errUnformattedDiskCount < diskCount && errDiskNotFoundCount > 0 {
// Only some disks return unFormattedDisk and some disks are
// offline as well.
return errSomeDiskOffline
}
return nil
}
// loadAllFormats - load all format config from all input disks in parallel.
func loadAllFormats(bootstrapDisks []StorageAPI) ([]*formatConfigV1, []error) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var sErrs = make([]error, len(bootstrapDisks))
// Initialize format configs.
var formatConfigs = make([]*formatConfigV1, len(bootstrapDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range bootstrapDisks {
wg.Add(1)
// Make a volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
formatConfig, lErr := loadFormat(disk)
if lErr != nil {
sErrs[index] = lErr
return
}
formatConfigs[index] = formatConfig
}(index, disk)
}
// Wait for all make vol to finish.
wg.Wait()
for _, err := range sErrs {
if err != nil {
// Return all formats and errors.
return formatConfigs, sErrs
}
}
// Return all formats and nil
return formatConfigs, nil
}
// genericFormatCheck - validates and returns error.
// if (no quorum) return error
// if (any disk is corrupt) return error // phase2
// if (jbod inconsistent) return error // phase2
// if (disks not recognized) // Always error.
func genericFormatCheck(formatConfigs []*formatConfigV1, sErrs []error) (err error) {
// Calculate the errors.
var (
errCorruptFormatCount = 0
errCount = 0
)
// Through all errors calculate the actual errors.
for _, lErr := range sErrs {
if lErr == nil {
continue
}
// These errors are good conditions, means disk is online.
if lErr == errUnformattedDisk || lErr == errVolumeNotFound {
continue
}
if lErr == errCorruptedFormat {
errCorruptFormatCount++
} else {
errCount++
}
}
// Calculate read quorum.
readQuorum := len(formatConfigs)/2 + 1
// Validate the err count under tolerant limit.
if errCount > len(formatConfigs)-readQuorum {
return errXLReadQuorum
}
// One of the disk has corrupt format, return error.
if errCorruptFormatCount > 0 {
return errCorruptedFormat
}
// Validates if format and JBOD are consistent across all disks.
if err = checkFormatXL(formatConfigs); err != nil {
return err
}
// Success..
return nil
}
// checkDisksConsistency - checks if all disks are consistent with all JBOD entries on all disks.
func checkDisksConsistency(formatConfigs []*formatConfigV1) error {
var disks = make([]string, len(formatConfigs))
var disksFound = make(map[string]bool)
// Collect currently available disk uuids.
for index, formatConfig := range formatConfigs {
if formatConfig == nil {
continue
}
disks[index] = formatConfig.XL.Disk
}
// Validate collected uuids and verify JBOD.
for index, uuid := range disks {
if uuid == "" {
continue
}
var formatConfig = formatConfigs[index]
for _, savedUUID := range formatConfig.XL.JBOD {
if savedUUID == uuid {
disksFound[uuid] = true
}
}
}
// Check if all disks are found.
for _, value := range disksFound {
if !value {
return errors.New("Some disks not found in JBOD.")
}
}
return nil
} }
// checkJBODConsistency - validate xl jbod order if they are consistent. // checkJBODConsistency - validate xl jbod order if they are consistent.
@ -61,7 +274,7 @@ func checkJBODConsistency(formatConfigs []*formatConfigV1) error {
} }
savedJBODStr := strings.Join(format.XL.JBOD, ".") savedJBODStr := strings.Join(format.XL.JBOD, ".")
if jbodStr != savedJBODStr { if jbodStr != savedJBODStr {
return errors.New("Inconsistent disks.") return errors.New("Inconsistent JBOD found.")
} }
} }
return nil return nil
@ -88,10 +301,8 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
} }
// Pick the first JBOD list to verify the order and construct new set of disk slice. // Pick the first JBOD list to verify the order and construct new set of disk slice.
var newDisks = make([]StorageAPI, len(bootstrapDisks)) var newDisks = make([]StorageAPI, len(bootstrapDisks))
var unclaimedJBODIndex = make(map[int]struct{})
for fIndex, format := range formatConfigs { for fIndex, format := range formatConfigs {
if format == nil { if format == nil {
unclaimedJBODIndex[fIndex] = struct{}{}
continue continue
} }
jIndex := findIndex(format.XL.Disk, savedJBOD) jIndex := findIndex(format.XL.Disk, savedJBOD)
@ -100,24 +311,13 @@ func reorderDisks(bootstrapDisks []StorageAPI, formatConfigs []*formatConfigV1)
} }
newDisks[jIndex] = bootstrapDisks[fIndex] newDisks[jIndex] = bootstrapDisks[fIndex]
} }
// Save the unclaimed jbods as well.
for index, disk := range newDisks {
if disk == nil {
for fIndex := range unclaimedJBODIndex {
newDisks[index] = bootstrapDisks[fIndex]
delete(unclaimedJBODIndex, fIndex)
break
}
continue
}
}
return newDisks, nil return newDisks, nil
} }
// loadFormat - load format from disk. // loadFormat - load format from disk.
func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) { func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) {
offset := int64(0) var buffer []byte
r, err := disk.ReadFile(minioMetaBucket, formatConfigFile, offset) buffer, err = readAll(disk, minioMetaBucket, formatConfigFile)
if err != nil { if err != nil {
// 'file not found' and 'volume not found' as // 'file not found' and 'volume not found' as
// same. 'volume not found' usually means its a fresh disk. // same. 'volume not found' usually means its a fresh disk.
@ -136,23 +336,139 @@ func loadFormat(disk StorageAPI) (format *formatConfigV1, err error) {
} }
return nil, err return nil, err
} }
decoder := json.NewDecoder(r)
format = &formatConfigV1{} format = &formatConfigV1{}
err = decoder.Decode(&format) err = json.Unmarshal(buffer, format)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if err = r.Close(); err != nil {
return nil, err
}
return format, nil return format, nil
} }
// loadFormatXL - load XL format.json. // Heals any missing format.json on the drives. Returns error only for unexpected errors
// as regular errors can be ignored since there might be enough quorum to be operational.
func healFormatXL(bootstrapDisks []StorageAPI) error {
needHeal := make([]bool, len(bootstrapDisks)) // Slice indicating which drives needs healing.
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
var referenceConfig *formatConfigV1
successCount := 0 // Tracks if we have successfully loaded all `format.json` from all disks.
formatNotFoundCount := 0 // Tracks if we `format.json` is not found on all disks.
// Loads `format.json` from all disks.
for index, disk := range bootstrapDisks {
formatXL, err := loadFormat(disk)
if err != nil {
if err == errUnformattedDisk {
// format.json is missing, should be healed.
needHeal[index] = true
formatNotFoundCount++
continue
} else if err == errDiskNotFound { // Is a valid case we
// can proceed without healing.
return nil
}
// Return error for unsupported errors.
return err
} // Success.
formatConfigs[index] = formatXL
successCount++
}
// All `format.json` has been read successfully, previously completed.
if successCount == len(bootstrapDisks) {
// Return success.
return nil
}
// All disks are fresh, format.json will be written by initFormatXL()
if formatNotFoundCount == len(bootstrapDisks) {
return initFormatXL(bootstrapDisks)
}
// Validate format configs for consistency in JBOD and disks.
if err := checkFormatXL(formatConfigs); err != nil {
return err
}
if referenceConfig == nil {
// This config will be used to update the drives missing format.json.
for _, formatConfig := range formatConfigs {
if formatConfig == nil {
continue
}
referenceConfig = formatConfig
break
}
}
uuidUsage := make([]struct {
uuid string // Disk uuid
inUse bool // indicates if the uuid is used by
// any disk
}, len(bootstrapDisks))
// Returns any unused drive UUID.
getUnusedUUID := func() string {
for index := range uuidUsage {
if !uuidUsage[index].inUse {
uuidUsage[index].inUse = true
return uuidUsage[index].uuid
}
}
return ""
}
// From reference config update UUID's not be in use.
for index, diskUUID := range referenceConfig.XL.JBOD {
uuidUsage[index].uuid = diskUUID
uuidUsage[index].inUse = false
}
// For all config formats validate if they are in use and
// update the uuidUsage values.
for _, config := range formatConfigs {
if config == nil {
continue
}
for index := range uuidUsage {
if config.XL.Disk == uuidUsage[index].uuid {
uuidUsage[index].inUse = true
break
}
}
}
// This section heals the format.json and updates the fresh disks
// by apply a new UUID for all the fresh disks.
for index, heal := range needHeal {
if !heal {
continue
}
config := &formatConfigV1{}
*config = *referenceConfig
config.XL.Disk = getUnusedUUID()
if config.XL.Disk == "" {
// getUnusedUUID() should have
// returned an unused uuid, it
// is an unexpected error.
return errUnexpected
}
formatBytes, err := json.Marshal(config)
if err != nil {
return err
}
// Fresh disk without format.json
_, _ = bootstrapDisks[index].AppendFile(minioMetaBucket, formatConfigFile, formatBytes)
// Ignore any error from AppendFile() as
// quorum might still be there to be operational.
}
return nil
}
// loadFormatXL - loads XL `format.json` and returns back properly
// ordered storage slice based on `format.json`.
func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) { func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
var unformattedDisksFoundCnt = 0 var unformattedDisksFoundCnt = 0
var diskNotFoundCount = 0 var diskNotFoundCount = 0
formatConfigs := make([]*formatConfigV1, len(bootstrapDisks)) formatConfigs := make([]*formatConfigV1, len(bootstrapDisks))
// Try to load `format.json` bootstrap disks.
for index, disk := range bootstrapDisks { for index, disk := range bootstrapDisks {
var formatXL *formatConfigV1 var formatXL *formatConfigV1
formatXL, err = loadFormat(disk) formatXL, err = loadFormat(disk)
@ -169,6 +485,7 @@ func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
// Save valid formats. // Save valid formats.
formatConfigs[index] = formatXL formatConfigs[index] = formatXL
} }
// If all disks indicate that 'format.json' is not available // If all disks indicate that 'format.json' is not available
// return 'errUnformattedDisk'. // return 'errUnformattedDisk'.
if unformattedDisksFoundCnt == len(bootstrapDisks) { if unformattedDisksFoundCnt == len(bootstrapDisks) {
@ -176,11 +493,12 @@ func loadFormatXL(bootstrapDisks []StorageAPI) (disks []StorageAPI, err error) {
} else if diskNotFoundCount == len(bootstrapDisks) { } else if diskNotFoundCount == len(bootstrapDisks) {
return nil, errDiskNotFound return nil, errDiskNotFound
} else if diskNotFoundCount > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) { } else if diskNotFoundCount > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) {
return nil, errReadQuorum return nil, errXLReadQuorum
} else if unformattedDisksFoundCnt > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) { } else if unformattedDisksFoundCnt > len(bootstrapDisks)-(len(bootstrapDisks)/2+1) {
return nil, errReadQuorum return nil, errXLReadQuorum
} }
// Validate the format configs read are correct.
if err = checkFormatXL(formatConfigs); err != nil { if err = checkFormatXL(formatConfigs); err != nil {
return nil, err return nil, err
} }
@ -208,14 +526,16 @@ func checkFormatXL(formatConfigs []*formatConfigV1) error {
return fmt.Errorf("Number of disks %d did not match the backend format %d", len(formatConfigs), len(formatXL.XL.JBOD)) return fmt.Errorf("Number of disks %d did not match the backend format %d", len(formatConfigs), len(formatXL.XL.JBOD))
} }
} }
return checkJBODConsistency(formatConfigs) if err := checkJBODConsistency(formatConfigs); err != nil {
return err
}
return checkDisksConsistency(formatConfigs)
} }
// initFormatXL - save XL format configuration on all disks. // initFormatXL - save XL format configuration on all disks.
func initFormatXL(storageDisks []StorageAPI) (err error) { func initFormatXL(storageDisks []StorageAPI) (err error) {
var ( var (
jbod = make([]string, len(storageDisks)) jbod = make([]string, len(storageDisks))
formatWriters = make([]io.WriteCloser, len(storageDisks))
formats = make([]*formatConfigV1, len(storageDisks)) formats = make([]*formatConfigV1, len(storageDisks))
saveFormatErrCnt = 0 saveFormatErrCnt = 0
) )
@ -227,19 +547,9 @@ func initFormatXL(storageDisks []StorageAPI) (err error) {
if saveFormatErrCnt <= len(storageDisks)-(len(storageDisks)/2+3) { if saveFormatErrCnt <= len(storageDisks)-(len(storageDisks)/2+3) {
continue continue
} }
return errWriteQuorum return errXLWriteQuorum
} }
} }
var w io.WriteCloser
w, err = disk.CreateFile(minioMetaBucket, formatConfigFile)
if err != nil {
saveFormatErrCnt++
// Check for write quorum.
if saveFormatErrCnt <= len(storageDisks)-(len(storageDisks)/2+3) {
continue
}
return err
}
var u *uuid.UUID var u *uuid.UUID
u, err = uuid.New() u, err = uuid.New()
if err != nil { if err != nil {
@ -250,7 +560,6 @@ func initFormatXL(storageDisks []StorageAPI) (err error) {
} }
return err return err
} }
formatWriters[index] = w
formats[index] = &formatConfigV1{ formats[index] = &formatConfigV1{
Version: "1", Version: "1",
Format: "xl", Format: "xl",
@ -261,24 +570,19 @@ func initFormatXL(storageDisks []StorageAPI) (err error) {
} }
jbod[index] = formats[index].XL.Disk jbod[index] = formats[index].XL.Disk
} }
for index, w := range formatWriters { for index, disk := range storageDisks {
if formats[index] == nil {
continue
}
formats[index].XL.JBOD = jbod formats[index].XL.JBOD = jbod
encoder := json.NewEncoder(w) formatBytes, err := json.Marshal(formats[index])
err = encoder.Encode(&formats[index])
if err != nil { if err != nil {
return err return err
} }
} n, err := disk.AppendFile(minioMetaBucket, formatConfigFile, formatBytes)
for _, w := range formatWriters { if err != nil {
if w == nil {
continue
}
if err = w.Close(); err != nil {
return err return err
} }
if n != int64(len(formatBytes)) {
return errUnexpected
}
} }
return nil return nil
} }

View File

@ -1,150 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"io"
"path"
)
// ListMultipartUploads - list multipart uploads.
func (fs fsObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
return listMultipartUploadsCommon(fs, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
func (fs fsObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
meta = make(map[string]string) // Reset the meta value, we are not going to save headers for fs.
return newMultipartUploadCommon(fs.storage, bucket, object, meta)
}
// PutObjectPart - writes the multipart upload chunks.
func (fs fsObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
return putObjectPartCommon(fs.storage, bucket, object, uploadID, partID, size, data, md5Hex)
}
func (fs fsObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
return listObjectPartsCommon(fs.storage, bucket, object, uploadID, partNumberMarker, maxParts)
}
func (fs fsObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(fs.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
if !isUploadIDExists(fs.storage, bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
tempObj := path.Join(tmpMetaPrefix, bucket, object, uploadID, incompleteFile)
fileWriter, err := fs.storage.CreateFile(minioMetaBucket, tempObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
// Construct part suffix.
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag)
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
var fi FileInfo
fi, err = fs.storage.StatFile(minioMetaBucket, multipartPartFile)
if err != nil {
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) {
return "", PartTooSmall{}
}
var fileReader io.ReadCloser
fileReader, err = fs.storage.ReadFile(minioMetaBucket, multipartPartFile, 0)
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
_, err = io.Copy(fileWriter, fileReader)
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
err = fileReader.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
// Rename the file back to original location, if not delete the
// temporary object.
err = fs.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if derr := fs.storage.DeleteFile(minioMetaBucket, tempObj); derr != nil {
return "", toObjectErr(derr, minioMetaBucket, tempObj)
}
return "", toObjectErr(err, bucket, object)
}
// Cleanup all the parts if everything else has been safely committed.
if err = cleanupUploadedParts(fs.storage, bucket, object, uploadID); err != nil {
return "", err
}
// Return md5sum.
return s3MD5, nil
}
// AbortMultipartUpload - aborts a multipart upload.
func (fs fsObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
return abortMultipartUploadCommon(fs.storage, bucket, object, uploadID)
}

View File

@ -1,224 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"io"
"path/filepath"
"strings"
"sync"
"github.com/minio/minio/pkg/mimedb"
)
// fsObjects - Implements fs object layer.
type fsObjects struct {
storage StorageAPI
listObjectMap map[listParams][]*treeWalker
listObjectMapMutex *sync.Mutex
}
// newFSObjects - initialize new fs object layer.
func newFSObjects(exportPath string) (ObjectLayer, error) {
var storage StorageAPI
var err error
if !strings.ContainsRune(exportPath, ':') || filepath.VolumeName(exportPath) != "" {
// Initialize filesystem storage API.
storage, err = newPosix(exportPath)
if err != nil {
return nil, err
}
} else {
// Initialize rpc client storage API.
storage, err = newRPCClient(exportPath)
if err != nil {
return nil, err
}
}
// Initialize object layer - like creating minioMetaBucket,
// cleaning up tmp files etc.
initObjectLayer(storage)
// Return successfully initialized object layer.
return fsObjects{
storage: storage,
listObjectMap: make(map[listParams][]*treeWalker),
listObjectMapMutex: &sync.Mutex{},
}, nil
}
/// Bucket operations
// MakeBucket - make a bucket.
func (fs fsObjects) MakeBucket(bucket string) error {
return makeBucket(fs.storage, bucket)
}
// GetBucketInfo - get bucket info.
func (fs fsObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
return getBucketInfo(fs.storage, bucket)
}
// ListBuckets - list buckets.
func (fs fsObjects) ListBuckets() ([]BucketInfo, error) {
return listBuckets(fs.storage)
}
// DeleteBucket - delete a bucket.
func (fs fsObjects) DeleteBucket(bucket string) error {
return deleteBucket(fs.storage, bucket)
}
/// Object Operations
// GetObject - get an object.
func (fs fsObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return nil, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return nil, ObjectNameInvalid{Bucket: bucket, Object: object}
}
fileReader, err := fs.storage.ReadFile(bucket, object, startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
return fileReader, nil
}
// GetObjectInfo - get object info.
func (fs fsObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, (BucketNameInvalid{Bucket: bucket})
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, (ObjectNameInvalid{Bucket: bucket, Object: object})
}
fi, err := fs.storage.StatFile(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
contentType := "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
return ObjectInfo{
Bucket: bucket,
Name: object,
ModTime: fi.ModTime,
Size: fi.Size,
IsDir: fi.Mode.IsDir(),
ContentType: contentType,
MD5Sum: "", // Read from metadata.
}, nil
}
// PutObject - create an object.
func (fs fsObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
fileWriter, err := fs.storage.CreateFile(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", toObjectErr(err, bucket, object)
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", toObjectErr(err, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// md5Hex representation.
var md5Hex string
if len(metadata) != 0 {
md5Hex = metadata["md5Sum"]
}
if md5Hex != "" {
if newMD5Hex != md5Hex {
if err = safeCloseAndRemove(fileWriter); err != nil {
return "", err
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", clErr
}
return "", err
}
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
func (fs fsObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
if err := fs.storage.DeleteFile(bucket, object); err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
// ListObjects - list all objects.
func (fs fsObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return listObjectsCommon(fs, bucket, prefix, marker, delimiter, maxKeys)
}

95
fs-v1-metadata.go Normal file
View File

@ -0,0 +1,95 @@
package main
import (
"encoding/json"
"path"
"sort"
)
const (
fsMetaJSONFile = "fs.json"
)
// A fsMetaV1 represents a metadata header mapping keys to sets of values.
type fsMetaV1 struct {
Version string `json:"version"`
Format string `json:"format"`
Minio struct {
Release string `json:"release"`
} `json:"minio"`
Parts []objectPartInfo `json:"parts,omitempty"`
}
// ObjectPartIndex - returns the index of matching object part number.
func (m fsMetaV1) ObjectPartIndex(partNumber int) (partIndex int) {
for i, part := range m.Parts {
if partNumber == part.Number {
partIndex = i
return partIndex
}
}
return -1
}
// AddObjectPart - add a new object part in order.
func (m *fsMetaV1) AddObjectPart(partNumber int, partName string, partETag string, partSize int64) {
partInfo := objectPartInfo{
Number: partNumber,
Name: partName,
ETag: partETag,
Size: partSize,
}
// Update part info if it already exists.
for i, part := range m.Parts {
if partNumber == part.Number {
m.Parts[i] = partInfo
return
}
}
// Proceed to include new part info.
m.Parts = append(m.Parts, partInfo)
// Parts in fsMeta should be in sorted order by part number.
sort.Sort(byObjectPartNumber(m.Parts))
}
// readFSMetadata - returns the object metadata `fs.json` content.
func (fs fsObjects) readFSMetadata(bucket, object string) (fsMeta fsMetaV1, err error) {
var buffer []byte
buffer, err = readAll(fs.storage, bucket, path.Join(object, fsMetaJSONFile))
if err != nil {
return fsMetaV1{}, err
}
err = json.Unmarshal(buffer, &fsMeta)
if err != nil {
return fsMetaV1{}, err
}
return fsMeta, nil
}
// newFSMetaV1 - initializes new fsMetaV1.
func newFSMetaV1() (fsMeta fsMetaV1) {
fsMeta = fsMetaV1{}
fsMeta.Version = "1"
fsMeta.Format = "fs"
fsMeta.Minio.Release = minioReleaseTag
return fsMeta
}
// writeFSMetadata - writes `fs.json` metadata.
func (fs fsObjects) writeFSMetadata(bucket, prefix string, fsMeta fsMetaV1) error {
metadataBytes, err := json.Marshal(fsMeta)
if err != nil {
return err
}
n, err := fs.storage.AppendFile(bucket, path.Join(prefix, fsMetaJSONFile), metadataBytes)
if err != nil {
return err
}
if n != int64(len(metadataBytes)) {
return errUnexpected
}
return nil
}

70
fs-v1-multipart-common.go Normal file
View File

@ -0,0 +1,70 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"path"
"strings"
)
// Returns if the prefix is a multipart upload.
func (fs fsObjects) isMultipartUpload(bucket, prefix string) bool {
_, err := fs.storage.StatFile(bucket, pathJoin(prefix, uploadsJSONFile))
return err == nil
}
// listUploadsInfo - list all uploads info.
func (fs fsObjects) listUploadsInfo(prefixPath string) (uploads []uploadInfo, err error) {
splitPrefixes := strings.SplitN(prefixPath, "/", 3)
uploadIDs, err := readUploadsJSON(splitPrefixes[1], splitPrefixes[2], fs.storage)
if err != nil {
if err == errFileNotFound {
return []uploadInfo{}, nil
}
return nil, err
}
uploads = uploadIDs.Uploads
return uploads, nil
}
// Checks whether bucket exists.
func (fs fsObjects) isBucketExist(bucket string) bool {
// Check whether bucket exists.
_, err := fs.storage.StatVol(bucket)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucket+".")
return false
}
return true
}
// isUploadIDExists - verify if a given uploadID exists and is valid.
func (fs fsObjects) isUploadIDExists(bucket, object, uploadID string) bool {
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
_, err := fs.storage.StatFile(minioMetaBucket, path.Join(uploadIDPath, fsMetaJSONFile))
if err != nil {
if err == errFileNotFound {
return false
}
errorIf(err, "Unable to access upload id"+uploadIDPath)
return false
}
return true
}

648
fs-v1-multipart.go Normal file
View File

@ -0,0 +1,648 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"path"
"strconv"
"strings"
"time"
"github.com/skyrings/skyring-common/tools/uuid"
)
// listMultipartUploads - lists all multipart uploads.
func (fs fsObjects) listMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
result := ListMultipartsInfo{}
recursive := true
if delimiter == slashSeparator {
recursive = false
}
result.IsTruncated = true
result.MaxUploads = maxUploads
result.KeyMarker = keyMarker
result.Prefix = prefix
result.Delimiter = delimiter
// Not using path.Join() as it strips off the trailing '/'.
multipartPrefixPath := pathJoin(mpartMetaPrefix, bucket, prefix)
if prefix == "" {
// Should have a trailing "/" if prefix is ""
// For ex. multipartPrefixPath should be "multipart/bucket/" if prefix is ""
multipartPrefixPath += slashSeparator
}
multipartMarkerPath := ""
if keyMarker != "" {
multipartMarkerPath = pathJoin(mpartMetaPrefix, bucket, keyMarker)
}
var uploads []uploadMetadata
var err error
var eof bool
if uploadIDMarker != "" {
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, keyMarker))
uploads, _, err = listMultipartUploadIDs(bucket, keyMarker, uploadIDMarker, maxUploads, fs.storage)
nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, keyMarker))
if err != nil {
return ListMultipartsInfo{}, err
}
maxUploads = maxUploads - len(uploads)
}
if maxUploads > 0 {
walker := fs.lookupTreeWalk(listParams{minioMetaBucket, recursive, multipartMarkerPath, multipartPrefixPath})
if walker == nil {
walker = fs.startTreeWalk(minioMetaBucket, multipartPrefixPath, multipartMarkerPath, recursive, func(bucket, object string) bool {
return fs.isMultipartUpload(bucket, object)
})
}
for maxUploads > 0 {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found or Disk not found is a valid case.
if walkResult.err == errFileNotFound || walkResult.err == errDiskNotFound {
eof = true
break
}
return ListMultipartsInfo{}, err
}
entry := strings.TrimPrefix(walkResult.entry, retainSlash(pathJoin(mpartMetaPrefix, bucket)))
if strings.HasSuffix(walkResult.entry, slashSeparator) {
uploads = append(uploads, uploadMetadata{
Object: entry,
})
maxUploads--
if maxUploads == 0 {
if walkResult.end {
eof = true
break
}
}
continue
}
var tmpUploads []uploadMetadata
var end bool
uploadIDMarker = ""
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, entry))
tmpUploads, end, err = listMultipartUploadIDs(bucket, entry, uploadIDMarker, maxUploads, fs.storage)
nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, entry))
if err != nil {
return ListMultipartsInfo{}, err
}
uploads = append(uploads, tmpUploads...)
maxUploads -= len(tmpUploads)
if walkResult.end && end {
eof = true
break
}
}
}
// Loop through all the received uploads fill in the multiparts result.
for _, upload := range uploads {
var objectName string
var uploadID string
if strings.HasSuffix(upload.Object, slashSeparator) {
// All directory entries are common prefixes.
uploadID = "" // Upload ids are empty for CommonPrefixes.
objectName = upload.Object
result.CommonPrefixes = append(result.CommonPrefixes, objectName)
} else {
uploadID = upload.UploadID
objectName = upload.Object
result.Uploads = append(result.Uploads, upload)
}
result.NextKeyMarker = objectName
result.NextUploadIDMarker = uploadID
}
result.IsTruncated = !eof
if !result.IsTruncated {
result.NextKeyMarker = ""
result.NextUploadIDMarker = ""
}
return result, nil
}
// ListMultipartUploads - lists all the pending multipart uploads on a
// bucket. Additionally takes 'prefix, keyMarker, uploadIDmarker and a
// delimiter' which allows us to list uploads match a particular
// prefix or lexically starting from 'keyMarker' or delimiting the
// output to get a directory like listing.
//
// Implements S3 compatible ListMultipartUploads API. The resulting
// ListMultipartsInfo structure is unmarshalled directly into XML and
// replied back to the client.
func (fs fsObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
// Validate input arguments.
if !IsValidBucketName(bucket) {
return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
if !fs.isBucketExist(bucket) {
return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListMultipartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListMultipartsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if keyMarker != "" && !strings.HasPrefix(keyMarker, prefix) {
return ListMultipartsInfo{}, InvalidMarkerPrefixCombination{
Marker: keyMarker,
Prefix: prefix,
}
}
if uploadIDMarker != "" {
if strings.HasSuffix(keyMarker, slashSeparator) {
return ListMultipartsInfo{}, InvalidUploadIDKeyCombination{
UploadIDMarker: uploadIDMarker,
KeyMarker: keyMarker,
}
}
id, err := uuid.Parse(uploadIDMarker)
if err != nil {
return ListMultipartsInfo{}, err
}
if id.IsZero() {
return ListMultipartsInfo{}, MalformedUploadID{
UploadID: uploadIDMarker,
}
}
}
return fs.listMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// newMultipartUpload - wrapper for initializing a new multipart
// request, returns back a unique upload id.
//
// Internally this function creates 'uploads.json' associated for the
// incoming object at '.minio/multipart/bucket/object/uploads.json' on
// all the disks. `uploads.json` carries metadata regarding on going
// multipart operation on the object.
func (fs fsObjects) newMultipartUpload(bucket string, object string, meta map[string]string) (uploadID string, err error) {
// No metadata is set, allocate a new one.
if meta == nil {
meta = make(map[string]string)
}
// Initialize `fs.json` values.
fsMeta := newFSMetaV1()
// This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/"
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
uploadID = getUUID()
initiated := time.Now().UTC()
// Create 'uploads.json'
if err = writeUploadJSON(bucket, object, uploadID, initiated, fs.storage); err != nil {
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
tempUploadIDPath := path.Join(tmpMetaPrefix, uploadID)
if err = fs.writeFSMetadata(minioMetaBucket, tempUploadIDPath, fsMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
err = fs.storage.RenameFile(minioMetaBucket, path.Join(tempUploadIDPath, fsMetaJSONFile), minioMetaBucket, path.Join(uploadIDPath, fsMetaJSONFile))
if err != nil {
if dErr := fs.storage.DeleteFile(minioMetaBucket, path.Join(tempUploadIDPath, fsMetaJSONFile)); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Return success.
return uploadID, nil
}
// NewMultipartUpload - initialize a new multipart upload, returns a
// unique id. The unique id returned here is of UUID form, for each
// subsequent request each UUID is unique.
//
// Implements S3 compatible initiate multipart API.
func (fs fsObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
meta = make(map[string]string) // Reset the meta value, we are not going to save headers for fs.
// Verify if bucket name is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !fs.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
// Verify if object name is valid.
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
return fs.newMultipartUpload(bucket, object, meta)
}
// PutObjectPart - reads incoming data until EOF for the part file on
// an ongoing multipart transaction. Internally incoming data is
// written to '.minio/tmp' location and safely renamed to
// '.minio/multipart' for reach parts.
func (fs fsObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !fs.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
nsMutex.RLock(minioMetaBucket, uploadIDPath)
// Just check if the uploadID exists to avoid copy if it doesn't.
uploadIDExists := fs.isUploadIDExists(bucket, object, uploadID)
nsMutex.RUnlock(minioMetaBucket, uploadIDPath)
if !uploadIDExists {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold write lock on the part so that there is no parallel upload on the part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
partSuffix := fmt.Sprintf("object%d", partID)
tmpPartPath := path.Join(tmpMetaPrefix, uploadID, partSuffix)
// Initialize md5 writer.
md5Writer := md5.New()
var buf = make([]byte, blockSizeV1)
for {
n, err := io.ReadFull(data, buf)
if err == io.EOF {
break
}
if err != nil && err != io.ErrUnexpectedEOF {
return "", toObjectErr(err, bucket, object)
}
// Update md5 writer.
md5Writer.Write(buf[:n])
m, err := fs.storage.AppendFile(minioMetaBucket, tmpPartPath, buf[:n])
if err != nil {
return "", toObjectErr(err, bucket, object)
}
if m != int64(len(buf[:n])) {
return "", toObjectErr(errUnexpected, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" {
if newMD5Hex != md5Hex {
return "", BadDigest{md5Hex, newMD5Hex}
}
}
// Hold write lock as we are updating fs.json
nsMutex.Lock(minioMetaBucket, uploadIDPath)
defer nsMutex.Unlock(minioMetaBucket, uploadIDPath)
// Just check if the uploadID exists to avoid copy if it doesn't.
if !fs.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
fsMeta, err := fs.readFSMetadata(minioMetaBucket, uploadIDPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
fsMeta.AddObjectPart(partID, partSuffix, newMD5Hex, size)
partPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
err = fs.storage.RenameFile(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath)
if err != nil {
if dErr := fs.storage.DeleteFile(minioMetaBucket, tmpPartPath); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tmpPartPath)
}
return "", toObjectErr(err, minioMetaBucket, partPath)
}
uploadIDPath = path.Join(mpartMetaPrefix, bucket, object, uploadID)
tempUploadIDPath := path.Join(tmpMetaPrefix, uploadID)
if err = fs.writeFSMetadata(minioMetaBucket, tempUploadIDPath, fsMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
err = fs.storage.RenameFile(minioMetaBucket, path.Join(tempUploadIDPath, fsMetaJSONFile), minioMetaBucket, path.Join(uploadIDPath, fsMetaJSONFile))
if err != nil {
if dErr := fs.storage.DeleteFile(minioMetaBucket, path.Join(tempUploadIDPath, fsMetaJSONFile)); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
return newMD5Hex, nil
}
// listObjectParts - wrapper scanning through
// '.minio/multipart/bucket/object/UPLOADID'. Lists all the parts
// saved inside '.minio/multipart/bucket/object/UPLOADID'.
func (fs fsObjects) listObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
result := ListPartsInfo{}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
fsMeta, err := fs.readFSMetadata(minioMetaBucket, uploadIDPath)
if err != nil {
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Only parts with higher part numbers will be listed.
partIdx := fsMeta.ObjectPartIndex(partNumberMarker)
parts := fsMeta.Parts
if partIdx != -1 {
parts = fsMeta.Parts[partIdx+1:]
}
count := maxParts
for _, part := range parts {
var fi FileInfo
partNamePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, part.Name)
fi, err = fs.storage.StatFile(minioMetaBucket, partNamePath)
if err != nil {
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, partNamePath)
}
result.Parts = append(result.Parts, partInfo{
PartNumber: part.Number,
ETag: part.ETag,
LastModified: fi.ModTime,
Size: fi.Size,
})
count--
if count == 0 {
break
}
}
// If listed entries are more than maxParts, we set IsTruncated as true.
if len(parts) > len(result.Parts) {
result.IsTruncated = true
// Make sure to fill next part number marker if IsTruncated is
// true for subsequent listing.
nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber
result.NextPartNumberMarker = nextPartNumberMarker
}
result.Bucket = bucket
result.Object = object
result.UploadID = uploadID
result.MaxParts = maxParts
return result, nil
}
// ListObjectParts - lists all previously uploaded parts for a given
// object and uploadID. Takes additional input of part-number-marker
// to indicate where the listing should begin from.
//
// Implements S3 compatible ListObjectParts API. The resulting
// ListPartsInfo structure is unmarshalled directly into XML and
// replied back to the client.
func (fs fsObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !fs.isBucketExist(bucket) {
return ListPartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
// Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if !fs.isUploadIDExists(bucket, object, uploadID) {
return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID}
}
return fs.listObjectParts(bucket, object, uploadID, partNumberMarker, maxParts)
}
// CompleteMultipartUpload - completes an ongoing multipart
// transaction after receiving all the parts indicated by the client.
// Returns an md5sum calculated by concatenating all the individual
// md5sums of all the parts.
//
// Implements S3 compatible Complete multipart API.
func (fs fsObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !fs.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
// Hold lock so that
// 1) no one aborts this multipart upload
// 2) no one does a parallel complete-multipart-upload on this
// multipart upload
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if !fs.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Read saved fs metadata for ongoing multipart.
fsMeta, err := fs.readFSMetadata(minioMetaBucket, uploadIDPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
tempObj := path.Join(tmpMetaPrefix, uploadID, "object1")
var buffer = make([]byte, blockSizeV1)
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
partIdx := fsMeta.ObjectPartIndex(part.PartNumber)
if partIdx == -1 {
return "", InvalidPart{}
}
if fsMeta.Parts[partIdx].ETag != part.ETag {
return "", BadDigest{}
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(fsMeta.Parts[partIdx].Size) {
return "", PartTooSmall{}
}
// Construct part suffix.
partSuffix := fmt.Sprintf("object%d", part.PartNumber)
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
offset := int64(0)
totalLeft := fsMeta.Parts[partIdx].Size
for totalLeft > 0 {
var n int64
n, err = fs.storage.ReadFile(minioMetaBucket, multipartPartFile, offset, buffer)
if err != nil {
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", toObjectErr(err, minioMetaBucket, multipartPartFile)
}
n, err = fs.storage.AppendFile(minioMetaBucket, tempObj, buffer[:n])
if err != nil {
return "", toObjectErr(err, minioMetaBucket, tempObj)
}
offset += n
totalLeft -= n
}
}
// Rename the file back to original location, if not delete the temporary object.
err = fs.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if dErr := fs.storage.DeleteFile(minioMetaBucket, tempObj); dErr != nil {
return "", toObjectErr(dErr, minioMetaBucket, tempObj)
}
return "", toObjectErr(err, bucket, object)
}
// Cleanup all the parts if everything else has been safely committed.
if err = cleanupUploadedParts(bucket, object, uploadID, fs.storage); err != nil {
return "", err
}
// Hold the lock so that two parallel complete-multipart-uploads do not
// leave a stale uploads.json behind.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
uploadsJSON, err := readUploadsJSON(bucket, object, fs.storage)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, object)
}
// If we have successfully read `uploads.json`, then we proceed to
// purge or update `uploads.json`.
uploadIDIdx := uploadsJSON.Index(uploadID)
if uploadIDIdx != -1 {
uploadsJSON.Uploads = append(uploadsJSON.Uploads[:uploadIDIdx], uploadsJSON.Uploads[uploadIDIdx+1:]...)
}
if len(uploadsJSON.Uploads) > 0 {
if err = updateUploadsJSON(bucket, object, uploadsJSON, fs.storage); err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
// Return success.
return s3MD5, nil
}
if err = fs.storage.DeleteFile(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)); err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
// Return md5sum.
return s3MD5, nil
}
// abortMultipartUpload - wrapper for purging an ongoing multipart
// transaction, deletes uploadID entry from `uploads.json` and purges
// the directory at '.minio/multipart/bucket/object/uploadID' holding
// all the upload parts.
func (fs fsObjects) abortMultipartUpload(bucket, object, uploadID string) error {
// Cleanup all uploaded parts.
if err := cleanupUploadedParts(bucket, object, uploadID, fs.storage); err != nil {
return err
}
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
uploadsJSON, err := readUploadsJSON(bucket, object, fs.storage)
if err == nil {
uploadIDIdx := uploadsJSON.Index(uploadID)
if uploadIDIdx != -1 {
uploadsJSON.Uploads = append(uploadsJSON.Uploads[:uploadIDIdx], uploadsJSON.Uploads[uploadIDIdx+1:]...)
}
// There are pending uploads for the same object, preserve
// them update 'uploads.json' in-place.
if len(uploadsJSON.Uploads) > 0 {
err = updateUploadsJSON(bucket, object, uploadsJSON, fs.storage)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
} // No more pending uploads for the object, we purge the entire
// entry at '.minio/multipart/bucket/object'.
if err = fs.storage.DeleteFile(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)); err != nil {
return toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
return nil
}
// AbortMultipartUpload - aborts an ongoing multipart operation
// signified by the input uploadID. This is an atomic operation
// doesn't require clients to initiate multiple such requests.
//
// All parts are purged from all disks and reference to the uploadID
// would be removed from the system, rollback is not possible on this
// operation.
//
// Implements S3 compatible Abort multipart API, slight difference is
// that this is an atomic idempotent operation. Subsequent calls have
// no affect and further requests to the same uploadID would not be
// honored.
func (fs fsObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !fs.isBucketExist(bucket) {
return BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if !fs.isUploadIDExists(bucket, object, uploadID) {
return InvalidUploadID{UploadID: uploadID}
}
err := fs.abortMultipartUpload(bucket, object, uploadID)
return err
}

435
fs-v1.go Normal file
View File

@ -0,0 +1,435 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"io"
"os"
"path"
"path/filepath"
"sort"
"strings"
"sync"
"github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/mimedb"
)
// fsObjects - Implements fs object layer.
type fsObjects struct {
storage StorageAPI
physicalDisk string
listObjectMap map[listParams][]*treeWalkerFS
listObjectMapMutex *sync.Mutex
}
// newFSObjects - initialize new fs object layer.
func newFSObjects(disk string) (ObjectLayer, error) {
storage, err := newStorageAPI(disk)
if err != nil {
return nil, err
}
// Runs house keeping code, like creating minioMetaBucket, cleaning up tmp files etc.
fsHouseKeeping(storage)
// Return successfully initialized object layer.
return fsObjects{
storage: storage,
physicalDisk: disk,
listObjectMap: make(map[listParams][]*treeWalkerFS),
listObjectMapMutex: &sync.Mutex{},
}, nil
}
// StorageInfo - returns underlying storage statistics.
func (fs fsObjects) StorageInfo() StorageInfo {
info, err := disk.GetInfo(fs.physicalDisk)
fatalIf(err, "Unable to get disk info "+fs.physicalDisk)
return StorageInfo{
Total: info.Total,
Free: info.Free,
}
}
/// Bucket operations
// MakeBucket - make a bucket.
func (fs fsObjects) MakeBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := fs.storage.MakeVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
// GetBucketInfo - get bucket info.
func (fs fsObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketInfo{}, BucketNameInvalid{Bucket: bucket}
}
vi, err := fs.storage.StatVol(bucket)
if err != nil {
return BucketInfo{}, toObjectErr(err, bucket)
}
return BucketInfo{
Name: bucket,
Created: vi.Created,
}, nil
}
// ListBuckets - list buckets.
func (fs fsObjects) ListBuckets() ([]BucketInfo, error) {
var bucketInfos []BucketInfo
vols, err := fs.storage.ListVols()
if err != nil {
return nil, toObjectErr(err)
}
for _, vol := range vols {
// StorageAPI can send volume names which are incompatible
// with buckets, handle it and skip them.
if !IsValidBucketName(vol.Name) {
continue
}
bucketInfos = append(bucketInfos, BucketInfo{
Name: vol.Name,
Created: vol.Created,
})
}
sort.Sort(byBucketName(bucketInfos))
return bucketInfos, nil
}
// DeleteBucket - delete a bucket.
func (fs fsObjects) DeleteBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := fs.storage.DeleteVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
/// Object Operations
// GetObject - get an object.
func (fs fsObjects) GetObject(bucket, object string, startOffset int64, length int64, writer io.Writer) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
var totalLeft = length
for totalLeft > 0 {
// Figure out the right blockSize as it was encoded before.
var curBlockSize int64
if blockSizeV1 < totalLeft {
curBlockSize = blockSizeV1
} else {
curBlockSize = totalLeft
}
buf := make([]byte, curBlockSize)
n, err := fs.storage.ReadFile(bucket, object, startOffset, buf)
if err != nil {
return toObjectErr(err, bucket, object)
}
_, err = writer.Write(buf[:n])
if err != nil {
return toObjectErr(err, bucket, object)
}
totalLeft -= n
startOffset += n
}
return nil
}
// GetObjectInfo - get object info.
func (fs fsObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, (BucketNameInvalid{Bucket: bucket})
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, (ObjectNameInvalid{Bucket: bucket, Object: object})
}
fi, err := fs.storage.StatFile(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
contentType := "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
return ObjectInfo{
Bucket: bucket,
Name: object,
ModTime: fi.ModTime,
Size: fi.Size,
IsDir: fi.Mode.IsDir(),
ContentType: contentType,
MD5Sum: "", // Read from metadata.
}, nil
}
// PutObject - create an object.
func (fs fsObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
uniqueID := getUUID()
// Temporary object.
tempObj := path.Join(tmpMetaPrefix, uniqueID)
// Initialize md5 writer.
md5Writer := md5.New()
if size == 0 {
// For size 0 we write a 0byte file.
_, err := fs.storage.AppendFile(minioMetaBucket, tempObj, []byte(""))
if err != nil {
return "", toObjectErr(err, bucket, object)
}
} else {
// Allocate buffer.
buf := make([]byte, blockSizeV1)
for {
n, rErr := data.Read(buf)
if rErr == io.EOF {
break
}
if rErr != nil {
return "", toObjectErr(rErr, bucket, object)
}
// Update md5 writer.
md5Writer.Write(buf[:n])
m, wErr := fs.storage.AppendFile(minioMetaBucket, tempObj, buf[:n])
if wErr != nil {
return "", toObjectErr(wErr, bucket, object)
}
if m != int64(len(buf[:n])) {
return "", toObjectErr(errUnexpected, bucket, object)
}
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// md5Hex representation.
var md5Hex string
if len(metadata) != 0 {
md5Hex = metadata["md5Sum"]
}
if md5Hex != "" {
if newMD5Hex != md5Hex {
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err := fs.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
func (fs fsObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
if err := fs.storage.DeleteFile(bucket, object); err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
// Checks whether bucket exists.
func isBucketExist(storage StorageAPI, bucketName string) bool {
// Check whether bucket exists.
_, err := storage.StatVol(bucketName)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucketName+".")
return false
}
return true
}
func (fs fsObjects) listObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
// Convert entry to FileInfo
entryToFileInfo := func(entry string) (fileInfo FileInfo, err error) {
if strings.HasSuffix(entry, slashSeparator) {
// Object name needs to be full path.
fileInfo.Name = entry
fileInfo.Mode = os.ModeDir
return
}
if fileInfo, err = fs.storage.StatFile(bucket, entry); err != nil {
return
}
// Object name needs to be full path.
fileInfo.Name = entry
return
}
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if bucket exists.
if !isBucketExist(fs.storage, bucket) {
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListObjectsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if marker != "" {
if !strings.HasPrefix(marker, prefix) {
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{
Marker: marker,
Prefix: prefix,
}
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return ListObjectsInfo{}, nil
}
// For delimiter and prefix as '/' we do not list anything at all
// since according to s3 spec we stop at the 'delimiter'
// along // with the prefix. On a flat namespace with 'prefix'
// as '/' we don't have any entries, since all the keys are
// of form 'keyName/...'
if delimiter == slashSeparator && prefix == slashSeparator {
return ListObjectsInfo{}, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
walker := fs.lookupTreeWalk(listParams{bucket, recursive, marker, prefix})
if walker == nil {
walker = fs.startTreeWalk(bucket, prefix, marker, recursive, func(bucket, object string) bool {
return !strings.HasSuffix(object, slashSeparator)
})
}
var fileInfos []FileInfo
var eof bool
var nextMarker string
for i := 0; i < maxKeys; {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return ListObjectsInfo{}, nil
}
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix)
}
fileInfo, err := entryToFileInfo(walkResult.entry)
if err != nil {
return ListObjectsInfo{}, nil
}
nextMarker = fileInfo.Name
fileInfos = append(fileInfos, fileInfo)
if walkResult.end {
eof = true
break
}
i++
}
params := listParams{bucket, recursive, nextMarker, prefix}
if !eof {
fs.saveTreeWalk(params, walker)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, fileInfo := range fileInfos {
// With delimiter set we fill in NextMarker and Prefixes.
if delimiter == slashSeparator {
result.NextMarker = fileInfo.Name
if fileInfo.Mode.IsDir() {
result.Prefixes = append(result.Prefixes, fileInfo.Name)
continue
}
}
result.Objects = append(result.Objects, ObjectInfo{
Name: fileInfo.Name,
ModTime: fileInfo.ModTime,
Size: fileInfo.Size,
IsDir: false,
})
}
return result, nil
}
// ListObjects - list all objects.
func (fs fsObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return fs.listObjects(bucket, prefix, marker, delimiter, maxKeys)
}

View File

@ -28,13 +28,10 @@ const (
) )
// InvalidRange - invalid range // InvalidRange - invalid range
type InvalidRange struct { type InvalidRange struct{}
Start int64
Length int64
}
func (e InvalidRange) Error() string { func (e InvalidRange) Error() string {
return fmt.Sprintf("Invalid range start:%d length:%d", e.Start, e.Length) return "The requested range is not satisfiable"
} }
// HttpRange specifies the byte range to be sent to the client. // HttpRange specifies the byte range to be sent to the client.

View File

@ -118,8 +118,8 @@ func registerApp() *cli.App {
app := cli.NewApp() app := cli.NewApp()
app.Name = "Minio" app.Name = "Minio"
app.Author = "Minio.io" app.Author = "Minio.io"
app.Usage = "Distributed Object Storage Server for Micro Services." app.Usage = "Cloud Storage Server."
app.Description = `Micro services environment provisions one Minio server per application instance. Scalability is achieved through large number of smaller personalized instances. This version of the Minio binary is built using Filesystem storage backend for magnetic and solid state disks.` app.Description = `Minio is an Amazon S3 compatible object storage server. Use it to store photos, videos, VMs, containers, log files, or any blob of data as objects.`
app.Flags = append(minioFlags, globalFlags...) app.Flags = append(minioFlags, globalFlags...)
app.Commands = commands app.Commands = commands
app.CustomAppHelpTemplate = minioHelpTemplate app.CustomAppHelpTemplate = minioHelpTemplate

View File

@ -64,7 +64,7 @@ func (n *nsLockMap) lock(volume, path string, readLock bool) {
} }
n.lockMap[param] = nsLk n.lockMap[param] = nsLk
} }
nsLk.ref++ nsLk.ref++ // Update ref count here to avoid multiple races.
// Unlock map before Locking NS which might block. // Unlock map before Locking NS which might block.
n.mutex.Unlock() n.mutex.Unlock()

View File

@ -20,7 +20,6 @@ import (
"bytes" "bytes"
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"io"
"io/ioutil" "io/ioutil"
"os" "os"
"strconv" "strconv"
@ -111,7 +110,7 @@ func testGetObjectInfo(obj ObjectLayer, instanceType string, t *testing.T) {
} }
} }
func BenchmarkGetObject(b *testing.B) { func BenchmarkGetObjectFS(b *testing.B) {
// Make a temporary directory to use as the obj. // Make a temporary directory to use as the obj.
directory, err := ioutil.TempDir("", "minio-benchmark-getobject") directory, err := ioutil.TempDir("", "minio-benchmark-getobject")
if err != nil { if err != nil {
@ -146,16 +145,12 @@ func BenchmarkGetObject(b *testing.B) {
b.ResetTimer() b.ResetTimer()
for i := 0; i < b.N; i++ { for i := 0; i < b.N; i++ {
var buffer = new(bytes.Buffer) var buffer = new(bytes.Buffer)
r, err := obj.GetObject("bucket", "object"+strconv.Itoa(i%10), 0) err = obj.GetObject("bucket", "object"+strconv.Itoa(i%10), 0, int64(len([]byte(text))), buffer)
if err != nil { if err != nil {
b.Error(err) b.Error(err)
} }
if _, err := io.Copy(buffer, r); err != nil {
b.Error(err)
}
if buffer.Len() != len(text) { if buffer.Len() != len(text) {
b.Errorf("GetObject returned incorrect length %d (should be %d)\n", buffer.Len(), len(text)) b.Errorf("GetObject returned incorrect length %d (should be %d)\n", buffer.Len(), len(text))
} }
r.Close()
} }
} }

View File

@ -413,6 +413,12 @@ func testListObjects(obj ObjectLayer, instanceType string, t *testing.T) {
{Name: "obj2"}, {Name: "obj2"},
}, },
}, },
// ListObjectsResult-30.
// Prefix and Delimiter is set to '/', (testCase 62).
{
IsTruncated: false,
Objects: []ObjectInfo{},
},
} }
testCases := []struct { testCases := []struct {
@ -521,6 +527,8 @@ func testListObjects(obj ObjectLayer, instanceType string, t *testing.T) {
// Test with marker set as hierarhical value and with delimiter. (60-61) // Test with marker set as hierarhical value and with delimiter. (60-61)
{"test-bucket-list-object", "", "Asia/India/India-summer-photos-1", "/", 10, resultCases[28], nil, true}, {"test-bucket-list-object", "", "Asia/India/India-summer-photos-1", "/", 10, resultCases[28], nil, true},
{"test-bucket-list-object", "", "Asia/India/Karnataka/Bangalore/Koramangala/pics", "/", 10, resultCases[29], nil, true}, {"test-bucket-list-object", "", "Asia/India/Karnataka/Bangalore/Koramangala/pics", "/", 10, resultCases[29], nil, true},
// Test with prefix and delimiter set to '/'. (62)
{"test-bucket-list-object", "/", "", "/", 10, resultCases[30], nil, true},
} }
for i, testCase := range testCases { for i, testCase := range testCases {

File diff suppressed because it is too large Load Diff

View File

@ -1,630 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"path"
"sort"
"strconv"
"strings"
"github.com/skyrings/skyring-common/tools/uuid"
)
const (
incompleteFile = "00000.incomplete"
uploadsJSONFile = "uploads.json"
)
// createUploadsJSON - create uploads.json placeholder file.
func createUploadsJSON(storage StorageAPI, bucket, object, uploadID string) error {
// Place holder uploads.json
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
uploadsJSONSuffix := fmt.Sprintf("%s.%s", uploadID, uploadsJSONFile)
tmpUploadsPath := path.Join(tmpMetaPrefix, bucket, object, uploadsJSONSuffix)
w, err := storage.CreateFile(minioMetaBucket, uploadsPath)
if err != nil {
return err
}
if err = w.Close(); err != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
return clErr
}
return err
}
_, err = storage.StatFile(minioMetaBucket, uploadsPath)
if err != nil {
if err == errFileNotFound {
err = storage.RenameFile(minioMetaBucket, tmpUploadsPath, minioMetaBucket, uploadsPath)
if err == nil {
return nil
}
}
if derr := storage.DeleteFile(minioMetaBucket, tmpUploadsPath); derr != nil {
return derr
}
return err
}
return nil
}
/// Common multipart object layer functions.
// newMultipartUploadCommon - initialize a new multipart, is a common
// function for both object layers.
func newMultipartUploadCommon(storage StorageAPI, bucket string, object string, meta map[string]string) (uploadID string, err error) {
// Verify if bucket name is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
// Verify if object name is valid.
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
// No metadata is set, allocate a new one.
if meta == nil {
meta = make(map[string]string)
}
// This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/"
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Loops through until successfully generates a new unique upload id.
for {
uuid, err := uuid.New()
if err != nil {
return "", err
}
uploadID := uuid.String()
// Create placeholder file 'uploads.json'
err = createUploadsJSON(storage, bucket, object, uploadID)
if err != nil {
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile)
incompleteSuffix := fmt.Sprintf("%s.%s", uploadID, incompleteFile)
tempUploadIDPath := path.Join(tmpMetaPrefix, bucket, object, incompleteSuffix)
if _, err = storage.StatFile(minioMetaBucket, uploadIDPath); err != nil {
if err != errFileNotFound {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// uploadIDPath doesn't exist, so create empty file to reserve the name
var w io.WriteCloser
if w, err = storage.CreateFile(minioMetaBucket, tempUploadIDPath); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
// Encode the uploaded metadata into incomplete file.
encoder := json.NewEncoder(w)
err = encoder.Encode(&meta)
if err != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
return "", toObjectErr(clErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
// Close the writer.
if err = w.Close(); err != nil {
if clErr := safeCloseAndRemove(w); clErr != nil {
return "", toObjectErr(clErr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
// Rename the file to the actual location from temporary path.
err = storage.RenameFile(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath)
if err != nil {
if derr := storage.DeleteFile(minioMetaBucket, tempUploadIDPath); derr != nil {
return "", toObjectErr(derr, minioMetaBucket, tempUploadIDPath)
}
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
return uploadID, nil
}
// uploadIDPath already exists.
// loop again to try with different uuid generated.
}
}
// putObjectPartCommon - put object part.
func putObjectPartCommon(storage StorageAPI, bucket string, object string, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !isUploadIDExists(storage, bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold read lock on the uploadID so that no one aborts it.
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Hold write lock on the part so that there is no parallel upload on the part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID, strconv.Itoa(partID)))
partSuffix := fmt.Sprintf("%s.%.5d", uploadID, partID)
partSuffixPath := path.Join(tmpMetaPrefix, bucket, object, partSuffix)
fileWriter, err := storage.CreateFile(minioMetaBucket, partSuffixPath)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Reader shouldn't have more data what mentioned in size argument.
// reading one more byte from the reader to validate it.
// expected to fail, success validates existence of more data in the reader.
if _, err = io.CopyN(ioutil.Discard, data, 1); err == nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", UnExpectedDataSize{Size: int(size)}
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" {
if newMD5Hex != md5Hex {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", err
}
partSuffixMD5 := fmt.Sprintf("%.5d.%s", partID, newMD5Hex)
partSuffixMD5Path := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffixMD5)
if _, err = storage.StatFile(minioMetaBucket, partSuffixMD5Path); err == nil {
// Part already uploaded as md5sum matches with the previous part.
// Just delete the temporary file.
if err = storage.DeleteFile(minioMetaBucket, partSuffixPath); err != nil {
return "", toObjectErr(err, minioMetaBucket, partSuffixPath)
}
return newMD5Hex, nil
}
err = storage.RenameFile(minioMetaBucket, partSuffixPath, minioMetaBucket, partSuffixMD5Path)
if err != nil {
if derr := storage.DeleteFile(minioMetaBucket, partSuffixPath); derr != nil {
return "", toObjectErr(derr, minioMetaBucket, partSuffixPath)
}
return "", toObjectErr(err, minioMetaBucket, partSuffixMD5Path)
}
return newMD5Hex, nil
}
// Wrapper to which removes all the uploaded parts after a successful
// complete multipart upload.
func cleanupUploadedParts(storage StorageAPI, bucket, object, uploadID string) error {
return cleanupDir(storage, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
}
// abortMultipartUploadCommon - aborts a multipart upload, common
// function used by both object layers.
func abortMultipartUploadCommon(storage StorageAPI, bucket, object, uploadID string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !isBucketExist(storage, bucket) {
return BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !isUploadIDExists(storage, bucket, object, uploadID) {
return InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if err := cleanupUploadedParts(storage, bucket, object, uploadID); err != nil {
return err
}
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
if entries, err := storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err == nil {
if len(entries) > 1 {
return nil
}
}
uploadsJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
if err := storage.DeleteFile(minioMetaBucket, uploadsJSONPath); err != nil {
return err
}
return nil
}
// isIncompleteMultipart - is object incomplete multipart.
func isIncompleteMultipart(storage StorageAPI, objectPath string) (bool, error) {
_, err := storage.StatFile(minioMetaBucket, path.Join(objectPath, uploadsJSONFile))
if err != nil {
if err == errFileNotFound {
return false, nil
}
return false, err
}
return true, nil
}
// listLeafEntries - lists all entries if a given prefixPath is a leaf
// directory, returns error if any - returns empty list if prefixPath
// is not a leaf directory.
func listLeafEntries(storage StorageAPI, prefixPath string) (entries []string, err error) {
var ok bool
if ok, err = isIncompleteMultipart(storage, prefixPath); err != nil {
return nil, err
} else if !ok {
return nil, nil
}
entries, err = storage.ListDir(minioMetaBucket, prefixPath)
if err != nil {
return nil, err
}
var newEntries []string
for _, entry := range entries {
if strings.HasSuffix(entry, slashSeparator) {
newEntries = append(newEntries, entry)
}
}
return newEntries, nil
}
// listMetaBucketMultipartFiles - list all files at a given prefix inside minioMetaBucket.
func listMetaBucketMultipartFiles(layer ObjectLayer, prefixPath string, markerPath string, recursive bool, maxKeys int) (fileInfos []FileInfo, eof bool, err error) {
var storage StorageAPI
switch l := layer.(type) {
case fsObjects:
storage = l.storage
case xlObjects:
storage = l.storage
}
if recursive && markerPath != "" {
markerPath = pathJoin(markerPath, incompleteFile)
}
walker := lookupTreeWalk(layer, listParams{minioMetaBucket, recursive, markerPath, prefixPath})
if walker == nil {
walker = startTreeWalk(layer, minioMetaBucket, prefixPath, markerPath, recursive)
}
// newMaxKeys tracks the size of entries which are going to be
// returned back.
var newMaxKeys int
// Following loop gathers and filters out special files inside
// minio meta volume.
outerLoop:
for {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found or Disk not found is a valid case.
if walkResult.err == errFileNotFound || walkResult.err == errDiskNotFound {
return nil, true, nil
}
return nil, false, toObjectErr(walkResult.err, minioMetaBucket, prefixPath)
}
fi := walkResult.fileInfo
var entries []string
if fi.Mode.IsDir() {
// List all the entries if fi.Name is a leaf directory, if
// fi.Name is not a leaf directory then the resulting
// entries are empty.
entries, err = listLeafEntries(storage, fi.Name)
if err != nil {
return nil, false, err
}
}
if len(entries) > 0 {
// We reach here for non-recursive case and a leaf entry.
sort.Strings(entries)
for _, entry := range entries {
var fileInfo FileInfo
incompleteUploadFile := path.Join(fi.Name, entry, incompleteFile)
fileInfo, err = storage.StatFile(minioMetaBucket, incompleteUploadFile)
if err != nil {
return nil, false, err
}
fileInfo.Name = path.Join(fi.Name, entry)
fileInfos = append(fileInfos, fileInfo)
newMaxKeys++
// If we have reached the maxKeys, it means we have listed
// everything that was requested.
if newMaxKeys == maxKeys {
break outerLoop
}
}
} else {
// We reach here for a non-recursive case non-leaf entry
// OR recursive case with fi.Name.
if !fi.Mode.IsDir() { // Do not skip non-recursive case directory entries.
// Validate if 'fi.Name' is incomplete multipart.
if !strings.HasSuffix(fi.Name, incompleteFile) {
continue
}
fi.Name = path.Dir(fi.Name)
}
fileInfos = append(fileInfos, fi)
newMaxKeys++
// If we have reached the maxKeys, it means we have listed
// everything that was requested.
if newMaxKeys == maxKeys {
break
}
}
}
if !eof && len(fileInfos) != 0 {
// EOF has not reached, hence save the walker channel to the map so that the walker go routine
// can continue from where it left off for the next list request.
lastFileInfo := fileInfos[len(fileInfos)-1]
markerPath = lastFileInfo.Name
saveTreeWalk(layer, listParams{minioMetaBucket, recursive, markerPath, prefixPath}, walker)
}
// Return entries here.
return fileInfos, eof, nil
}
// FIXME: Currently the code sorts based on keyName/upload-id which is
// in correct based on the S3 specs. According to s3 specs we are
// supposed to only lexically sort keyNames and then for keyNames with
// multiple upload ids should be sorted based on the initiated time.
// Currently this case is not handled.
// listMultipartUploadsCommon - lists all multipart uploads, common
// function for both object layers.
func listMultipartUploadsCommon(layer ObjectLayer, bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
var storage StorageAPI
switch l := layer.(type) {
case xlObjects:
storage = l.storage
case fsObjects:
storage = l.storage
}
result := ListMultipartsInfo{}
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
if !isBucketExist(storage, bucket) {
return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListMultipartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListMultipartsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if keyMarker != "" && !strings.HasPrefix(keyMarker, prefix) {
return ListMultipartsInfo{}, InvalidMarkerPrefixCombination{
Marker: keyMarker,
Prefix: prefix,
}
}
if uploadIDMarker != "" {
if strings.HasSuffix(keyMarker, slashSeparator) {
return result, InvalidUploadIDKeyCombination{
UploadIDMarker: uploadIDMarker,
KeyMarker: keyMarker,
}
}
id, err := uuid.Parse(uploadIDMarker)
if err != nil {
return result, err
}
if id.IsZero() {
return result, MalformedUploadID{
UploadID: uploadIDMarker,
}
}
}
recursive := true
if delimiter == slashSeparator {
recursive = false
}
result.IsTruncated = true
result.MaxUploads = maxUploads
// Not using path.Join() as it strips off the trailing '/'.
multipartPrefixPath := pathJoin(mpartMetaPrefix, pathJoin(bucket, prefix))
if prefix == "" {
// Should have a trailing "/" if prefix is ""
// For ex. multipartPrefixPath should be "multipart/bucket/" if prefix is ""
multipartPrefixPath += slashSeparator
}
multipartMarkerPath := ""
if keyMarker != "" {
keyMarkerPath := pathJoin(pathJoin(bucket, keyMarker), uploadIDMarker)
multipartMarkerPath = pathJoin(mpartMetaPrefix, keyMarkerPath)
}
// List all the multipart files at prefixPath, starting with marker keyMarkerPath.
fileInfos, eof, err := listMetaBucketMultipartFiles(layer, multipartPrefixPath, multipartMarkerPath, recursive, maxUploads)
if err != nil {
return ListMultipartsInfo{}, err
}
// Loop through all the received files fill in the multiparts result.
for _, fi := range fileInfos {
var objectName string
var uploadID string
if fi.Mode.IsDir() {
// All directory entries are common prefixes.
uploadID = "" // Upload ids are empty for CommonPrefixes.
objectName = strings.TrimPrefix(fi.Name, retainSlash(pathJoin(mpartMetaPrefix, bucket)))
result.CommonPrefixes = append(result.CommonPrefixes, objectName)
} else {
uploadID = path.Base(fi.Name)
objectName = strings.TrimPrefix(path.Dir(fi.Name), retainSlash(pathJoin(mpartMetaPrefix, bucket)))
result.Uploads = append(result.Uploads, uploadMetadata{
Object: objectName,
UploadID: uploadID,
Initiated: fi.ModTime,
})
}
result.NextKeyMarker = objectName
result.NextUploadIDMarker = uploadID
}
result.IsTruncated = !eof
if !result.IsTruncated {
result.NextKeyMarker = ""
result.NextUploadIDMarker = ""
}
return result, nil
}
// ListObjectParts - list object parts, common function across both object layers.
func listObjectPartsCommon(storage StorageAPI, bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(storage, bucket) {
return ListPartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
if !isUploadIDExists(storage, bucket, object, uploadID) {
return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
result := ListPartsInfo{}
entries, err := storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID))
if err != nil {
return result, err
}
sort.Strings(entries)
var newEntries []string
for _, entry := range entries {
newEntries = append(newEntries, path.Base(entry))
}
idx := sort.SearchStrings(newEntries, fmt.Sprintf("%.5d.", partNumberMarker+1))
newEntries = newEntries[idx:]
count := maxParts
for _, entry := range newEntries {
fi, err := storage.StatFile(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object, uploadID, entry))
splitEntry := strings.SplitN(entry, ".", 2)
partStr := splitEntry[0]
etagStr := splitEntry[1]
partNum, err := strconv.Atoi(partStr)
if err != nil {
return ListPartsInfo{}, err
}
result.Parts = append(result.Parts, partInfo{
PartNumber: partNum,
LastModified: fi.ModTime,
ETag: etagStr,
Size: fi.Size,
})
count--
if count == 0 {
break
}
}
// If listed entries are more than maxParts, we set IsTruncated as true.
if len(newEntries) > len(result.Parts) {
result.IsTruncated = true
// Make sure to fill next part number marker if IsTruncated is
// true for subsequent listing.
nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber
result.NextPartNumberMarker = nextPartNumberMarker
}
result.Bucket = bucket
result.Object = object
result.UploadID = uploadID
result.MaxParts = maxParts
return result, nil
}
// isUploadIDExists - verify if a given uploadID exists and is valid.
func isUploadIDExists(storage StorageAPI, bucket, object, uploadID string) bool {
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile)
st, err := storage.StatFile(minioMetaBucket, uploadIDPath)
if err != nil {
if err == errFileNotFound {
return false
}
errorIf(err, "Stat failed on "+minioMetaBucket+"/"+uploadIDPath+".")
return false
}
return st.Mode.IsRegular()
}

View File

@ -17,27 +17,92 @@
package main package main
import ( import (
"sort" "path/filepath"
"strings" "strings"
"sync"
) )
// Common initialization needed for both object layers. const (
func initObjectLayer(storageDisks ...StorageAPI) error { // Block size used for all internal operations version 1.
// This happens for the first time, but keep this here since this blockSizeV1 = 10 * 1024 * 1024 // 10MiB.
// is the only place where it can be made expensive optimizing all )
// other calls. Create minio meta volume, if it doesn't exist yet.
for _, storage := range storageDisks { // House keeping code needed for FS.
if err := storage.MakeVol(minioMetaBucket); err != nil { func fsHouseKeeping(storageDisk StorageAPI) error {
// Attempt to create `.minio`.
err := storageDisk.MakeVol(minioMetaBucket)
if err != nil {
if err != errVolumeExists && err != errDiskNotFound { if err != errVolumeExists && err != errDiskNotFound {
return toObjectErr(err, minioMetaBucket) return err
} }
} }
// Cleanup all temp entries upon start. // Cleanup all temp entries upon start.
err := cleanupDir(storage, minioMetaBucket, tmpMetaPrefix) err = cleanupDir(storageDisk, minioMetaBucket, tmpMetaPrefix)
if err != nil { if err != nil {
return err
}
return nil
}
// Depending on the disk type network or local, initialize storage API.
func newStorageAPI(disk string) (storage StorageAPI, err error) {
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" {
// Initialize filesystem storage API.
return newPosix(disk)
}
// Initialize rpc client storage API.
return newRPCClient(disk)
}
// House keeping code needed for XL.
func xlHouseKeeping(storageDisks []StorageAPI) error {
// This happens for the first time, but keep this here since this
// is the only place where it can be made expensive optimizing all
// other calls. Create minio meta volume, if it doesn't exist yet.
var wg = &sync.WaitGroup{}
// Initialize errs to collect errors inside go-routine.
var errs = make([]error, len(storageDisks))
// Initialize all disks in parallel.
for index, disk := range storageDisks {
if disk == nil {
errs[index] = errDiskNotFound
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
// Indicate this wait group is done.
defer wg.Done()
// Attempt to create `.minio`.
err := disk.MakeVol(minioMetaBucket)
if err != nil && err != errVolumeExists && err != errDiskNotFound {
errs[index] = err
return
}
// Cleanup all temp entries upon start.
err = cleanupDir(disk, minioMetaBucket, tmpMetaPrefix)
if err != nil {
errs[index] = err
return
}
errs[index] = nil
}(index, disk)
}
// Wait for all cleanup to finish.
wg.Wait()
// Return upon first error.
for _, err := range errs {
if err == nil {
continue
}
return toObjectErr(err, minioMetaBucket, tmpMetaPrefix) return toObjectErr(err, minioMetaBucket, tmpMetaPrefix)
} }
}
// Return success here.
return nil return nil
} }
@ -67,194 +132,6 @@ func cleanupDir(storage StorageAPI, volume, dirPath string) error {
} }
return nil return nil
} }
return delFunc(retainSlash(pathJoin(dirPath))) err := delFunc(retainSlash(pathJoin(dirPath)))
} return err
/// Common object layer functions.
// makeBucket - create a bucket, is a common function for both object layers.
func makeBucket(storage StorageAPI, bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := storage.MakeVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
// getBucketInfo - fetch bucket info, is a common function for both object layers.
func getBucketInfo(storage StorageAPI, bucket string) (BucketInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketInfo{}, BucketNameInvalid{Bucket: bucket}
}
vi, err := storage.StatVol(bucket)
if err != nil {
return BucketInfo{}, toObjectErr(err, bucket)
}
return BucketInfo{
Name: bucket,
Created: vi.Created,
Total: vi.Total,
Free: vi.Free,
}, nil
}
// listBuckets - list all buckets, is a common function for both object layers.
func listBuckets(storage StorageAPI) ([]BucketInfo, error) {
var bucketInfos []BucketInfo
vols, err := storage.ListVols()
if err != nil {
return nil, toObjectErr(err)
}
for _, vol := range vols {
// StorageAPI can send volume names which are incompatible
// with buckets, handle it and skip them.
if !IsValidBucketName(vol.Name) {
continue
}
bucketInfos = append(bucketInfos, BucketInfo{
Name: vol.Name,
Created: vol.Created,
Total: vol.Total,
Free: vol.Free,
})
}
sort.Sort(byBucketName(bucketInfos))
return bucketInfos, nil
}
// deleteBucket - deletes a bucket, is a common function for both the layers.
func deleteBucket(storage StorageAPI, bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if err := storage.DeleteVol(bucket); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
func listObjectsCommon(layer ObjectLayer, bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
var storage StorageAPI
switch l := layer.(type) {
case xlObjects:
storage = l.storage
case fsObjects:
storage = l.storage
}
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if bucket exists.
if !isBucketExist(storage, bucket) {
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListObjectsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if marker != "" {
if !strings.HasPrefix(marker, prefix) {
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{
Marker: marker,
Prefix: prefix,
}
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return ListObjectsInfo{}, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
walker := lookupTreeWalk(layer, listParams{bucket, recursive, marker, prefix})
if walker == nil {
walker = startTreeWalk(layer, bucket, prefix, marker, recursive)
}
var fileInfos []FileInfo
var eof bool
var nextMarker string
for i := 0; i < maxKeys; {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return ListObjectsInfo{}, nil
}
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix)
}
fileInfo := walkResult.fileInfo
nextMarker = fileInfo.Name
fileInfos = append(fileInfos, fileInfo)
if walkResult.end {
eof = true
break
}
i++
}
params := listParams{bucket, recursive, nextMarker, prefix}
if !eof {
saveTreeWalk(layer, params, walker)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, fileInfo := range fileInfos {
// With delimiter set we fill in NextMarker and Prefixes.
if delimiter == slashSeparator {
result.NextMarker = fileInfo.Name
if fileInfo.Mode.IsDir() {
result.Prefixes = append(result.Prefixes, fileInfo.Name)
continue
}
}
result.Objects = append(result.Objects, ObjectInfo{
Name: fileInfo.Name,
ModTime: fileInfo.ModTime,
Size: fileInfo.Size,
IsDir: false,
})
}
return result, nil
}
// checks whether bucket exists.
func isBucketExist(storage StorageAPI, bucketName string) bool {
// Check whether bucket exists.
_, err := storage.StatVol(bucketName)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucketName+".")
return false
}
return true
} }

View File

@ -18,12 +18,16 @@ package main
import "time" import "time"
// StorageInfo - represents total capacity of underlying storage.
type StorageInfo struct {
Total int64 // Total disk space.
Free int64 // Free total available disk space.
}
// BucketInfo - bucket name and create date // BucketInfo - bucket name and create date
type BucketInfo struct { type BucketInfo struct {
Name string Name string
Created time.Time Created time.Time
Total int64
Free int64
} }
// ObjectInfo - object info. // ObjectInfo - object info.

View File

@ -40,10 +40,6 @@ func toObjectErr(err error, params ...string) error {
} }
case errDiskFull: case errDiskFull:
return StorageFull{} return StorageFull{}
case errReadQuorum:
return InsufficientReadQuorum{}
case errWriteQuorum:
return InsufficientWriteQuorum{}
case errIsNotRegular, errFileAccessDenied: case errIsNotRegular, errFileAccessDenied:
if len(params) >= 2 { if len(params) >= 2 {
return ObjectExistsAsDirectory{ return ObjectExistsAsDirectory{
@ -65,6 +61,10 @@ func toObjectErr(err error, params ...string) error {
Object: params[1], Object: params[1],
} }
} }
case errXLReadQuorum:
return InsufficientReadQuorum{}
case errXLWriteQuorum:
return InsufficientWriteQuorum{}
case io.ErrUnexpectedEOF, io.ErrShortWrite: case io.ErrUnexpectedEOF, io.ErrShortWrite:
return IncompleteBody{} return IncompleteBody{}
} }

View File

@ -124,39 +124,23 @@ func (api objectAPIHandlers) GetObjectHandler(w http.ResponseWriter, r *http.Req
return return
} }
// Get the object.
startOffset := hrange.start
readCloser, err := api.ObjectAPI.GetObject(bucket, object, startOffset)
if err != nil {
errorIf(err, "Unable to read object.")
apiErr := toAPIErrorCode(err)
if apiErr == ErrNoSuchKey {
apiErr = errAllowableObjectNotFound(bucket, r)
}
writeErrorResponse(w, r, apiErr, r.URL.Path)
return
}
defer readCloser.Close() // Close after this handler returns.
// Set standard object headers. // Set standard object headers.
setObjectHeaders(w, objInfo, hrange) setObjectHeaders(w, objInfo, hrange)
// Set any additional requested response headers. // Set any additional requested response headers.
setGetRespHeaders(w, r.URL.Query()) setGetRespHeaders(w, r.URL.Query())
if hrange.length > 0 { // Get the object.
if _, err := io.CopyN(w, readCloser, hrange.length); err != nil { startOffset := hrange.start
errorIf(err, "Writing to client failed.") length := hrange.length
// Do not send error response here, since client could have died. if length == 0 {
return length = objInfo.Size - startOffset
} }
} else { if err := api.ObjectAPI.GetObject(bucket, object, startOffset, length, w); err != nil {
if _, err := io.Copy(w, readCloser); err != nil {
errorIf(err, "Writing to client failed.") errorIf(err, "Writing to client failed.")
// Do not send error response here, since client could have died. // Do not send error response here, client would have already died.
return return
} }
}
} }
var unixEpochTime = time.Unix(0, 0) var unixEpochTime = time.Unix(0, 0)
@ -393,14 +377,19 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
return return
} }
pipeReader, pipeWriter := io.Pipe()
go func() {
startOffset := int64(0) // Read the whole file. startOffset := int64(0) // Read the whole file.
// Get the object. // Get the object.
readCloser, err := api.ObjectAPI.GetObject(sourceBucket, sourceObject, startOffset) gErr := api.ObjectAPI.GetObject(sourceBucket, sourceObject, startOffset, objInfo.Size, pipeWriter)
if err != nil { if gErr != nil {
errorIf(err, "Unable to read an object.") errorIf(gErr, "Unable to read an object.")
writeErrorResponse(w, r, toAPIErrorCode(err), objectSource) pipeWriter.CloseWithError(gErr)
return return
} }
pipeWriter.Close() // Close.
}()
// Size of object. // Size of object.
size := objInfo.Size size := objInfo.Size
@ -413,7 +402,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
// same md5sum as the source. // same md5sum as the source.
// Create the object. // Create the object.
md5Sum, err := api.ObjectAPI.PutObject(bucket, object, size, readCloser, metadata) md5Sum, err := api.ObjectAPI.PutObject(bucket, object, size, pipeReader, metadata)
if err != nil { if err != nil {
errorIf(err, "Unable to create an object.") errorIf(err, "Unable to create an object.")
writeErrorResponse(w, r, toAPIErrorCode(err), r.URL.Path) writeErrorResponse(w, r, toAPIErrorCode(err), r.URL.Path)
@ -434,7 +423,7 @@ func (api objectAPIHandlers) CopyObjectHandler(w http.ResponseWriter, r *http.Re
// write success response. // write success response.
writeSuccessResponse(w, encodedSuccessResponse) writeSuccessResponse(w, encodedSuccessResponse)
// Explicitly close the reader, to avoid fd leaks. // Explicitly close the reader, to avoid fd leaks.
readCloser.Close() pipeReader.Close()
} }
// checkCopySource implements x-amz-copy-source-if-modified-since and // checkCopySource implements x-amz-copy-source-if-modified-since and
@ -887,10 +876,6 @@ func (api objectAPIHandlers) ListObjectPartsHandler(w http.ResponseWriter, r *ht
writeErrorResponse(w, r, ErrInvalidMaxParts, r.URL.Path) writeErrorResponse(w, r, ErrInvalidMaxParts, r.URL.Path)
return return
} }
if maxParts == 0 {
maxParts = maxPartsList
}
listPartsInfo, err := api.ObjectAPI.ListObjectParts(bucket, object, uploadID, partNumberMarker, maxParts) listPartsInfo, err := api.ObjectAPI.ListObjectParts(bucket, object, uploadID, partNumberMarker, maxParts)
if err != nil { if err != nil {
errorIf(err, "Unable to list uploaded parts.") errorIf(err, "Unable to list uploaded parts.")
@ -945,6 +930,10 @@ func (api objectAPIHandlers) CompleteMultipartUploadHandler(w http.ResponseWrite
writeErrorResponse(w, r, ErrMalformedXML, r.URL.Path) writeErrorResponse(w, r, ErrMalformedXML, r.URL.Path)
return return
} }
if len(complMultipartUpload.Parts) == 0 {
writeErrorResponse(w, r, ErrMalformedXML, r.URL.Path)
return
}
if !sort.IsSorted(completedParts(complMultipartUpload.Parts)) { if !sort.IsSorted(completedParts(complMultipartUpload.Parts)) {
writeErrorResponse(w, r, ErrInvalidPartOrder, r.URL.Path) writeErrorResponse(w, r, ErrInvalidPartOrder, r.URL.Path)
return return

View File

@ -20,6 +20,9 @@ import "io"
// ObjectLayer implements primitives for object API layer. // ObjectLayer implements primitives for object API layer.
type ObjectLayer interface { type ObjectLayer interface {
// Storage operations.
StorageInfo() StorageInfo
// Bucket operations. // Bucket operations.
MakeBucket(bucket string) error MakeBucket(bucket string) error
GetBucketInfo(bucket string) (bucketInfo BucketInfo, err error) GetBucketInfo(bucket string) (bucketInfo BucketInfo, err error)
@ -28,7 +31,7 @@ type ObjectLayer interface {
ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (result ListObjectsInfo, err error) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (result ListObjectsInfo, err error)
// Object operations. // Object operations.
GetObject(bucket, object string, startOffset int64) (reader io.ReadCloser, err error) GetObject(bucket, object string, startOffset int64, length int64, writer io.Writer) (err error)
GetObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) GetObjectInfo(bucket, object string) (objInfo ObjectInfo, err error)
PutObject(bucket, object string, size int64, data io.Reader, metadata map[string]string) (md5 string, err error) PutObject(bucket, object string, size int64, data io.Reader, metadata map[string]string) (md5 string, err error)
DeleteObject(bucket, object string) error DeleteObject(bucket, object string) error

View File

@ -19,15 +19,13 @@ package main
import ( import (
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"errors"
"fmt" "fmt"
"io"
"path" "path"
"regexp" "regexp"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
"github.com/minio/minio/pkg/safe" "github.com/skyrings/skyring-common/tools/uuid"
) )
const ( const (
@ -123,6 +121,20 @@ func pathJoin(elem ...string) string {
return path.Join(elem...) + trailingSlash return path.Join(elem...) + trailingSlash
} }
// getUUID() - get a unique uuid.
func getUUID() (uuidStr string) {
for {
uuid, err := uuid.New()
if err != nil {
errorIf(err, "Unable to initialize uuid")
continue
}
uuidStr = uuid.String()
break
}
return uuidStr
}
// Create an s3 compatible MD5sum for complete multipart transaction. // Create an s3 compatible MD5sum for complete multipart transaction.
func completeMultipartMD5(parts ...completePart) (string, error) { func completeMultipartMD5(parts ...completePart) (string, error) {
var finalMD5Bytes []byte var finalMD5Bytes []byte
@ -145,18 +157,3 @@ type byBucketName []BucketInfo
func (d byBucketName) Len() int { return len(d) } func (d byBucketName) Len() int { return len(d) }
func (d byBucketName) Swap(i, j int) { d[i], d[j] = d[j], d[i] } func (d byBucketName) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
func (d byBucketName) Less(i, j int) bool { return d[i].Name < d[j].Name } func (d byBucketName) Less(i, j int) bool { return d[i].Name < d[j].Name }
// safeCloseAndRemove - safely closes and removes underlying temporary
// file writer if possible.
func safeCloseAndRemove(writer io.WriteCloser) error {
// If writer is a safe file, Attempt to close and remove.
safeWriter, ok := writer.(*safe.File)
if ok {
return safeWriter.CloseAndRemove()
}
wCloser, ok := writer.(*waitCloser)
if ok {
return wCloser.CloseWithError(errors.New("Close and error out."))
}
return nil
}

View File

@ -20,15 +20,12 @@ import (
"bytes" "bytes"
"crypto/md5" "crypto/md5"
"encoding/hex" "encoding/hex"
"io"
"math/rand" "math/rand"
"strconv" "strconv"
"gopkg.in/check.v1" "gopkg.in/check.v1"
) )
// TODO - enable all the commented tests.
// APITestSuite - collection of API tests. // APITestSuite - collection of API tests.
func APITestSuite(c *check.C, create func() ObjectLayer) { func APITestSuite(c *check.C, create func() ObjectLayer) {
testMakeBucket(c, create) testMakeBucket(c, create)
@ -135,24 +132,21 @@ func testMultipleObjectCreation(c *check.C, create func() ObjectLayer) {
objects[key] = []byte(randomString) objects[key] = []byte(randomString)
metadata := make(map[string]string) metadata := make(map[string]string)
metadata["md5Sum"] = expectedMD5Sumhex metadata["md5Sum"] = expectedMD5Sumhex
md5Sum, err := obj.PutObject("bucket", key, int64(len(randomString)), bytes.NewBufferString(randomString), metadata) var md5Sum string
md5Sum, err = obj.PutObject("bucket", key, int64(len(randomString)), bytes.NewBufferString(randomString), metadata)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
c.Assert(md5Sum, check.Equals, expectedMD5Sumhex) c.Assert(md5Sum, check.Equals, expectedMD5Sumhex)
} }
for key, value := range objects { for key, value := range objects {
var byteBuffer bytes.Buffer var byteBuffer bytes.Buffer
r, err := obj.GetObject("bucket", key, 0) err = obj.GetObject("bucket", key, 0, int64(len(value)), &byteBuffer)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
_, e := io.Copy(&byteBuffer, r)
c.Assert(e, check.IsNil)
c.Assert(byteBuffer.Bytes(), check.DeepEquals, value) c.Assert(byteBuffer.Bytes(), check.DeepEquals, value)
c.Assert(r.Close(), check.IsNil)
objInfo, err := obj.GetObjectInfo("bucket", key) objInfo, err := obj.GetObjectInfo("bucket", key)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
c.Assert(objInfo.Size, check.Equals, int64(len(value))) c.Assert(objInfo.Size, check.Equals, int64(len(value)))
r.Close()
} }
} }
@ -269,16 +263,14 @@ func testObjectOverwriteWorks(c *check.C, create func() ObjectLayer) {
_, err = obj.PutObject("bucket", "object", int64(len("The list of parts was not in ascending order. The parts list must be specified in order by part number.")), bytes.NewBufferString("The list of parts was not in ascending order. The parts list must be specified in order by part number."), nil) _, err = obj.PutObject("bucket", "object", int64(len("The list of parts was not in ascending order. The parts list must be specified in order by part number.")), bytes.NewBufferString("The list of parts was not in ascending order. The parts list must be specified in order by part number."), nil)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
_, err = obj.PutObject("bucket", "object", int64(len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.")), bytes.NewBufferString("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."), nil) length := int64(len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."))
_, err = obj.PutObject("bucket", "object", length, bytes.NewBufferString("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."), nil)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
var bytesBuffer bytes.Buffer var bytesBuffer bytes.Buffer
r, err := obj.GetObject("bucket", "object", 0) err = obj.GetObject("bucket", "object", 0, length, &bytesBuffer)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
_, e := io.Copy(&bytesBuffer, r)
c.Assert(e, check.IsNil)
c.Assert(string(bytesBuffer.Bytes()), check.Equals, "The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.") c.Assert(string(bytesBuffer.Bytes()), check.Equals, "The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.")
c.Assert(r.Close(), check.IsNil)
} }
// Tests validate that bucket operation on non-existent bucket fails. // Tests validate that bucket operation on non-existent bucket fails.
@ -305,17 +297,14 @@ func testPutObjectInSubdir(c *check.C, create func() ObjectLayer) {
err := obj.MakeBucket("bucket") err := obj.MakeBucket("bucket")
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
_, err = obj.PutObject("bucket", "dir1/dir2/object", int64(len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.")), bytes.NewBufferString("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."), nil) length := int64(len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."))
_, err = obj.PutObject("bucket", "dir1/dir2/object", length, bytes.NewBufferString("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."), nil)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
var bytesBuffer bytes.Buffer var bytesBuffer bytes.Buffer
r, err := obj.GetObject("bucket", "dir1/dir2/object", 0) err = obj.GetObject("bucket", "dir1/dir2/object", 0, length, &bytesBuffer)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
n, e := io.Copy(&bytesBuffer, r)
c.Assert(e, check.IsNil)
c.Assert(len(bytesBuffer.Bytes()), check.Equals, len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.")) c.Assert(len(bytesBuffer.Bytes()), check.Equals, len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed."))
c.Assert(int64(len(bytesBuffer.Bytes())), check.Equals, int64(n))
c.Assert(r.Close(), check.IsNil)
} }
// Tests validate ListBuckets. // Tests validate ListBuckets.
@ -386,7 +375,8 @@ func testNonExistantObjectInBucket(c *check.C, create func() ObjectLayer) {
err := obj.MakeBucket("bucket") err := obj.MakeBucket("bucket")
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
_, err = obj.GetObject("bucket", "dir1", 0) var bytesBuffer bytes.Buffer
err = obj.GetObject("bucket", "dir1", 0, 10, &bytesBuffer)
c.Assert(err, check.Not(check.IsNil)) c.Assert(err, check.Not(check.IsNil))
switch err := err.(type) { switch err := err.(type) {
case ObjectNotFound: case ObjectNotFound:
@ -405,7 +395,8 @@ func testGetDirectoryReturnsObjectNotFound(c *check.C, create func() ObjectLayer
_, err = obj.PutObject("bucket", "dir1/dir3/object", int64(len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.")), bytes.NewBufferString("One or more of the specified parts could not be found. The part might not have been uploaded, or the specified entity tag might not have matched the part's entity tag."), nil) _, err = obj.PutObject("bucket", "dir1/dir3/object", int64(len("The specified multipart upload does not exist. The upload ID might be invalid, or the multipart upload might have been aborted or completed.")), bytes.NewBufferString("One or more of the specified parts could not be found. The part might not have been uploaded, or the specified entity tag might not have matched the part's entity tag."), nil)
c.Assert(err, check.IsNil) c.Assert(err, check.IsNil)
_, err = obj.GetObject("bucket", "dir1", 0) var bytesBuffer bytes.Buffer
err = obj.GetObject("bucket", "dir1", 0, 10, &bytesBuffer)
switch err := err.(type) { switch err := err.(type) {
case ObjectNotFound: case ObjectNotFound:
c.Assert(err.Bucket, check.Equals, "bucket") c.Assert(err.Bucket, check.Equals, "bucket")
@ -415,7 +406,7 @@ func testGetDirectoryReturnsObjectNotFound(c *check.C, create func() ObjectLayer
c.Assert(err, check.Equals, "ObjectNotFound") c.Assert(err, check.Equals, "ObjectNotFound")
} }
_, err = obj.GetObject("bucket", "dir1/", 0) err = obj.GetObject("bucket", "dir1/", 0, 10, &bytesBuffer)
switch err := err.(type) { switch err := err.(type) {
case ObjectNameInvalid: case ObjectNameInvalid:
c.Assert(err.Bucket, check.Equals, "bucket") c.Assert(err.Bucket, check.Equals, "bucket")

View File

@ -16,115 +16,109 @@
// NOTE - Rename() not guaranteed to be safe on all filesystems which are not fully POSIX compatible // NOTE - Rename() not guaranteed to be safe on all filesystems which are not fully POSIX compatible
// Package safe provides safe file write semantics by leveraging Rename's() safeity.
package safe package safe
import ( import (
"io" "errors"
"io/ioutil" "io/ioutil"
"os" "os"
"path/filepath" "path/filepath"
) )
// Vault - vault is an interface for different implementations of safe // File represents safe file descriptor.
// i/o semantics.
type Vault interface {
io.ReadWriteCloser
SyncClose() error
CloseAndRemove() error
}
// File provides for safe file writes.
type File struct { type File struct {
*os.File name string
file string tmpfile *os.File
closed bool
aborted bool
} }
// SyncClose sync file to disk and close, returns an error if any // Write writes len(b) bytes to the temporary File. In case of error, the temporary file is removed.
func (f *File) SyncClose() error { func (file *File) Write(b []byte) (n int, err error) {
// sync to the disk if file.aborted {
if err := f.File.Sync(); err != nil { err = errors.New("write on aborted file")
return err return
} }
// Close the fd. if file.closed {
if err := f.Close(); err != nil { err = errors.New("write on closed file")
return err return
} }
return nil
defer func() {
if err != nil {
os.Remove(file.tmpfile.Name())
file.aborted = true
}
}()
n, err = file.tmpfile.Write(b)
return
} }
// Close the file, returns an error if any // Close closes the temporary File and renames to the named file. In case of error, the temporary file is removed.
func (f *File) Close() error { func (file *File) Close() (err error) {
// Close the embedded fd. defer func() {
if err := f.File.Close(); err != nil { if err != nil {
return err os.Remove(file.tmpfile.Name())
file.aborted = true
} }
// Safe rename to final destination }()
if err := os.Rename(f.Name(), f.file); err != nil {
return err if file.aborted || file.closed {
return
} }
return nil
if err = file.tmpfile.Close(); err != nil {
return
}
err = os.Rename(file.tmpfile.Name(), file.name)
file.closed = true
return
} }
// CloseAndRemove closes the temp file, and safely removes it. Returns // Abort aborts the temporary File by closing and removing the temporary file.
// error if any. func (file *File) Abort() (err error) {
func (f *File) CloseAndRemove() error { if file.aborted || file.closed {
// close the embedded fd return
f.File.Close()
// Remove the temp file.
if err := os.Remove(f.Name()); err != nil {
return err
} }
return nil
file.tmpfile.Close()
err = os.Remove(file.tmpfile.Name())
file.aborted = true
return
} }
// CreateFile creates a new file at filePath for safe writes, it also // CreateFile creates the named file safely from unique temporary file.
// creates parent directories if they don't exist. // The temporary file is renamed to the named file upon successful close
func CreateFile(filePath string) (*File, error) { // to safeguard intermediate state in the named file. The temporary file
return CreateFileWithPrefix(filePath, "$deleteme.") // is created in the name of the named file with suffixed unique number
} // and prefixed "$tmpfile" string. While creating the temporary file,
// missing parent directories are also created. The temporary file is
// CreateFileWithSuffix is similar to CreateFileWithPrefix, but the // removed if case of any intermediate failure. Not removed temporary
// second argument is treated as suffix for the temporary files. // files can be cleaned up by identifying them using "$tmpfile" prefix
func CreateFileWithSuffix(filePath string, suffix string) (*File, error) { // string.
// If parent directories do not exist, ioutil.TempFile doesn't create them func CreateFile(name string) (*File, error) {
// handle such a case with os.MkdirAll() // ioutil.TempFile() fails if parent directory is missing.
if err := os.MkdirAll(filepath.Dir(filePath), 0700); err != nil { // Create parent directory to avoid such error.
dname := filepath.Dir(name)
if err := os.MkdirAll(dname, 0700); err != nil {
return nil, err return nil, err
} }
f, err := ioutil.TempFile(filepath.Dir(filePath), filepath.Base(filePath)+suffix)
fname := filepath.Base(name)
tmpfile, err := ioutil.TempFile(dname, "$tmpfile."+fname+".")
if err != nil { if err != nil {
return nil, err return nil, err
} }
if err = os.Chmod(f.Name(), 0600); err != nil {
if err = os.Remove(f.Name()); err != nil {
return nil, err
}
return nil, err
}
return &File{File: f, file: filePath}, nil
}
// CreateFileWithPrefix creates a new file at filePath for safe if err = os.Chmod(tmpfile.Name(), 0600); err != nil {
// writes, it also creates parent directories if they don't exist. if rerr := os.Remove(tmpfile.Name()); rerr != nil {
// prefix specifies the prefix of the temporary files so that cleaning err = rerr
// stale temp files is easy.
func CreateFileWithPrefix(filePath string, prefix string) (*File, error) {
// If parent directories do not exist, ioutil.TempFile doesn't create them
// handle such a case with os.MkdirAll()
if err := os.MkdirAll(filepath.Dir(filePath), 0700); err != nil {
return nil, err
}
f, err := ioutil.TempFile(filepath.Dir(filePath), prefix+filepath.Base(filePath))
if err != nil {
return nil, err
}
if err = os.Chmod(f.Name(), 0600); err != nil {
if err = os.Remove(f.Name()); err != nil {
return nil, err
} }
return nil, err return nil, err
} }
return &File{File: f, file: filePath}, nil
return &File{name: name, tmpfile: tmpfile}, nil
} }

View File

@ -34,13 +34,14 @@ type MySuite struct {
var _ = Suite(&MySuite{}) var _ = Suite(&MySuite{})
func (s *MySuite) SetUpSuite(c *C) { func (s *MySuite) SetUpSuite(c *C) {
root, err := ioutil.TempDir(os.TempDir(), "safe-") root, err := ioutil.TempDir(os.TempDir(), "safe_test.go.")
c.Assert(err, IsNil) c.Assert(err, IsNil)
s.root = root s.root = root
} }
func (s *MySuite) TearDownSuite(c *C) { func (s *MySuite) TearDownSuite(c *C) {
os.RemoveAll(s.root) err := os.Remove(s.root)
c.Assert(err, IsNil)
} }
func (s *MySuite) TestSafe(c *C) { func (s *MySuite) TestSafe(c *C) {
@ -52,15 +53,17 @@ func (s *MySuite) TestSafe(c *C) {
c.Assert(err, IsNil) c.Assert(err, IsNil)
_, err = os.Stat(filepath.Join(s.root, "testfile")) _, err = os.Stat(filepath.Join(s.root, "testfile"))
c.Assert(err, IsNil) c.Assert(err, IsNil)
err = os.Remove(filepath.Join(s.root, "testfile"))
c.Assert(err, IsNil)
} }
func (s *MySuite) TestSafeRemove(c *C) { func (s *MySuite) TestSafeAbort(c *C) {
f, err := CreateFile(filepath.Join(s.root, "purgefile")) f, err := CreateFile(filepath.Join(s.root, "purgefile"))
c.Assert(err, IsNil) c.Assert(err, IsNil)
_, err = os.Stat(filepath.Join(s.root, "purgefile")) _, err = os.Stat(filepath.Join(s.root, "purgefile"))
c.Assert(err, Not(IsNil)) c.Assert(err, Not(IsNil))
err = f.CloseAndRemove() err = f.Abort()
c.Assert(err, IsNil) c.Assert(err, IsNil)
err = f.Close() _, err = os.Stat(filepath.Join(s.root, "purgefile"))
c.Assert(err, Not(IsNil)) c.Assert(err, Not(IsNil))
} }

137
posix.go
View File

@ -17,23 +17,24 @@
package main package main
import ( import (
"bytes"
"io" "io"
"os" "os"
slashpath "path" slashpath "path"
"path/filepath"
"runtime" "runtime"
"strings" "strings"
"syscall" "syscall"
"github.com/minio/minio/pkg/disk" "github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/safe"
) )
const ( const (
fsMinSpacePercent = 5 fsMinSpacePercent = 5
) )
// fsStorage - implements StorageAPI interface. // posix - implements StorageAPI interface.
type fsStorage struct { type posix struct {
diskPath string diskPath string
minFreeDisk int64 minFreeDisk int64
} }
@ -90,7 +91,7 @@ func newPosix(diskPath string) (StorageAPI, error) {
if diskPath == "" { if diskPath == "" {
return nil, errInvalidArgument return nil, errInvalidArgument
} }
fs := fsStorage{ fs := posix{
diskPath: diskPath, diskPath: diskPath,
minFreeDisk: fsMinSpacePercent, // Minimum 5% disk should be free. minFreeDisk: fsMinSpacePercent, // Minimum 5% disk should be free.
} }
@ -169,7 +170,7 @@ func listVols(dirPath string) ([]VolInfo, error) {
// corresponding valid volume names on the backend in a platform // corresponding valid volume names on the backend in a platform
// compatible way for all operating systems. If volume is not found // compatible way for all operating systems. If volume is not found
// an error is generated. // an error is generated.
func (s fsStorage) getVolDir(volume string) (string, error) { func (s posix) getVolDir(volume string) (string, error) {
if !isValidVolname(volume) { if !isValidVolname(volume) {
return "", errInvalidArgument return "", errInvalidArgument
} }
@ -181,7 +182,7 @@ func (s fsStorage) getVolDir(volume string) (string, error) {
} }
// Make a volume entry. // Make a volume entry.
func (s fsStorage) MakeVol(volume string) (err error) { func (s posix) MakeVol(volume string) (err error) {
// Validate if disk is free. // Validate if disk is free.
if err = checkDiskFree(s.diskPath, s.minFreeDisk); err != nil { if err = checkDiskFree(s.diskPath, s.minFreeDisk); err != nil {
return err return err
@ -201,16 +202,7 @@ func (s fsStorage) MakeVol(volume string) (err error) {
} }
// ListVols - list volumes. // ListVols - list volumes.
func (s fsStorage) ListVols() (volsInfo []VolInfo, err error) { func (s posix) ListVols() (volsInfo []VolInfo, err error) {
// Get disk info to be populated for VolInfo.
var diskInfo disk.Info
diskInfo, err = disk.GetInfo(s.diskPath)
if err != nil {
if os.IsNotExist(err) {
return nil, errDiskNotFound
}
return nil, err
}
volsInfo, err = listVols(s.diskPath) volsInfo, err = listVols(s.diskPath)
if err != nil { if err != nil {
return nil, err return nil, err
@ -219,9 +211,6 @@ func (s fsStorage) ListVols() (volsInfo []VolInfo, err error) {
volInfo := VolInfo{ volInfo := VolInfo{
Name: vol.Name, Name: vol.Name,
Created: vol.Created, Created: vol.Created,
Total: diskInfo.Total,
Free: diskInfo.Free,
FSType: diskInfo.FSType,
} }
volsInfo[i] = volInfo volsInfo[i] = volInfo
} }
@ -229,7 +218,12 @@ func (s fsStorage) ListVols() (volsInfo []VolInfo, err error) {
} }
// StatVol - get volume info. // StatVol - get volume info.
func (s fsStorage) StatVol(volume string) (volInfo VolInfo, err error) { func (s posix) StatVol(volume string) (volInfo VolInfo, err error) {
// Validate if disk is free.
if err = checkDiskFree(s.diskPath, s.minFreeDisk); err != nil {
return VolInfo{}, err
}
// Verify if volume is valid and it exists. // Verify if volume is valid and it exists.
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
@ -244,29 +238,17 @@ func (s fsStorage) StatVol(volume string) (volInfo VolInfo, err error) {
} }
return VolInfo{}, err return VolInfo{}, err
} }
// Get disk info, to be returned back along with volume info.
var diskInfo disk.Info
diskInfo, err = disk.GetInfo(s.diskPath)
if err != nil {
if os.IsNotExist(err) {
return VolInfo{}, errDiskNotFound
}
return VolInfo{}, err
}
// As os.Stat() doesn't carry other than ModTime(), use ModTime() // As os.Stat() doesn't carry other than ModTime(), use ModTime()
// as CreatedTime. // as CreatedTime.
createdTime := st.ModTime() createdTime := st.ModTime()
return VolInfo{ return VolInfo{
Name: volume, Name: volume,
Created: createdTime, Created: createdTime,
Free: diskInfo.Free,
Total: diskInfo.Total,
FSType: diskInfo.FSType,
}, nil }, nil
} }
// DeleteVol - delete a volume. // DeleteVol - delete a volume.
func (s fsStorage) DeleteVol(volume string) error { func (s posix) DeleteVol(volume string) error {
// Verify if volume is valid and it exists. // Verify if volume is valid and it exists.
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
@ -291,7 +273,7 @@ func (s fsStorage) DeleteVol(volume string) error {
// ListDir - return all the entries at the given directory path. // ListDir - return all the entries at the given directory path.
// If an entry is a directory it will be returned with a trailing "/". // If an entry is a directory it will be returned with a trailing "/".
func (s fsStorage) ListDir(volume, dirPath string) ([]string, error) { func (s posix) ListDir(volume, dirPath string) ([]string, error) {
// Verify if volume is valid and it exists. // Verify if volume is valid and it exists.
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
@ -308,91 +290,119 @@ func (s fsStorage) ListDir(volume, dirPath string) ([]string, error) {
return readDir(pathJoin(volumeDir, dirPath)) return readDir(pathJoin(volumeDir, dirPath))
} }
// ReadFile - read a file at a given offset. // ReadFile reads exactly len(buf) bytes into buf. It returns the
func (s fsStorage) ReadFile(volume string, path string, offset int64) (readCloser io.ReadCloser, err error) { // number of bytes copied. The error is EOF only if no bytes were
// read. On return, n == len(buf) if and only if err == nil. n == 0
// for io.EOF. Additionally ReadFile also starts reading from an
// offset.
func (s posix) ReadFile(volume string, path string, offset int64, buf []byte) (n int64, err error) {
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
return nil, err return 0, err
} }
// Stat a volume entry. // Stat a volume entry.
_, err = os.Stat(volumeDir) _, err = os.Stat(volumeDir)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return nil, errVolumeNotFound return 0, errVolumeNotFound
} }
return nil, err return 0, err
} }
filePath := pathJoin(volumeDir, path) filePath := pathJoin(volumeDir, path)
if err = checkPathLength(filePath); err != nil { if err = checkPathLength(filePath); err != nil {
return nil, err return 0, err
} }
file, err := os.Open(filePath) file, err := os.Open(filePath)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return nil, errFileNotFound return 0, errFileNotFound
} else if os.IsPermission(err) { } else if os.IsPermission(err) {
return nil, errFileAccessDenied return 0, errFileAccessDenied
} else if strings.Contains(err.Error(), "not a directory") {
return 0, errFileNotFound
} }
return nil, err return 0, err
} }
st, err := file.Stat() st, err := file.Stat()
if err != nil { if err != nil {
return nil, err return 0, err
} }
// Verify if its not a regular file, since subsequent Seek is undefined. // Verify if its not a regular file, since subsequent Seek is undefined.
if !st.Mode().IsRegular() { if !st.Mode().IsRegular() {
return nil, errFileNotFound return 0, errFileNotFound
} }
// Seek to requested offset. // Seek to requested offset.
_, err = file.Seek(offset, os.SEEK_SET) _, err = file.Seek(offset, os.SEEK_SET)
if err != nil { if err != nil {
return nil, err return 0, err
} }
return file, nil
// Close the reader.
defer file.Close()
// Read file.
m, err := io.ReadFull(file, buf)
// Error unexpected is valid, set this back to nil.
if err == io.ErrUnexpectedEOF {
err = nil
}
// Success.
return int64(m), err
} }
// CreateFile - create a file at path. // AppendFile - append a byte array at path, if file doesn't exist at
func (s fsStorage) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) { // path this call explicitly creates it.
func (s posix) AppendFile(volume, path string, buf []byte) (n int64, err error) {
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
return nil, err return 0, err
} }
// Stat a volume entry. // Stat a volume entry.
_, err = os.Stat(volumeDir) _, err = os.Stat(volumeDir)
if err != nil { if err != nil {
if os.IsNotExist(err) { if os.IsNotExist(err) {
return nil, errVolumeNotFound return 0, errVolumeNotFound
} }
return nil, err return 0, err
} }
if err = checkDiskFree(s.diskPath, s.minFreeDisk); err != nil { if err = checkDiskFree(s.diskPath, s.minFreeDisk); err != nil {
return nil, err return 0, err
} }
filePath := pathJoin(volumeDir, path) filePath := pathJoin(volumeDir, path)
if err = checkPathLength(filePath); err != nil { if err = checkPathLength(filePath); err != nil {
return nil, err return 0, err
} }
// Verify if the file already exists and is not of regular type. // Verify if the file already exists and is not of regular type.
var st os.FileInfo var st os.FileInfo
if st, err = os.Stat(filePath); err == nil { if st, err = os.Stat(filePath); err == nil {
if st.IsDir() { if st.IsDir() {
return nil, errIsNotRegular return 0, errIsNotRegular
} }
} }
w, err := safe.CreateFileWithPrefix(filePath, "$tmpfile") // Create top level directories if they don't exist.
if err = os.MkdirAll(filepath.Dir(filePath), 0700); err != nil {
return 0, err
}
w, err := os.OpenFile(filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0600)
if err != nil { if err != nil {
// File path cannot be verified since one of the parents is a file. // File path cannot be verified since one of the parents is a file.
if strings.Contains(err.Error(), "not a directory") { if strings.Contains(err.Error(), "not a directory") {
return nil, errFileAccessDenied return 0, errFileAccessDenied
} }
return nil, err return 0, err
} }
return w, nil // Close upon return.
defer w.Close()
// Return io.Copy
return io.Copy(w, bytes.NewReader(buf))
} }
// StatFile - get file info. // StatFile - get file info.
func (s fsStorage) StatFile(volume, path string) (file FileInfo, err error) { func (s posix) StatFile(volume, path string) (file FileInfo, err error) {
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
return FileInfo{}, err return FileInfo{}, err
@ -425,7 +435,6 @@ func (s fsStorage) StatFile(volume, path string) (file FileInfo, err error) {
// Return all errors here. // Return all errors here.
return FileInfo{}, err return FileInfo{}, err
} }
// If its a directory its not a regular file. // If its a directory its not a regular file.
if st.Mode().IsDir() { if st.Mode().IsDir() {
return FileInfo{}, errFileNotFound return FileInfo{}, errFileNotFound
@ -470,7 +479,7 @@ func deleteFile(basePath, deletePath string) error {
} }
// DeleteFile - delete a file at path. // DeleteFile - delete a file at path.
func (s fsStorage) DeleteFile(volume, path string) error { func (s posix) DeleteFile(volume, path string) error {
volumeDir, err := s.getVolDir(volume) volumeDir, err := s.getVolDir(volume)
if err != nil { if err != nil {
return err return err
@ -495,8 +504,8 @@ func (s fsStorage) DeleteFile(volume, path string) error {
return deleteFile(volumeDir, filePath) return deleteFile(volumeDir, filePath)
} }
// RenameFile - rename file. // RenameFile - rename source path to destination path atomically.
func (s fsStorage) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error { func (s posix) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error {
srcVolumeDir, err := s.getVolDir(srcVolume) srcVolumeDir, err := s.getVolDir(srcVolume)
if err != nil { if err != nil {
return err return err

View File

@ -33,7 +33,7 @@ func newObjectLayer(exportPaths []string) (ObjectLayer, error) {
} }
// Initialize XL object layer. // Initialize XL object layer.
objAPI, err := newXLObjects(exportPaths) objAPI, err := newXLObjects(exportPaths)
if err == errWriteQuorum { if err == errXLWriteQuorum {
return objAPI, errors.New("Disks are different with last minio server run.") return objAPI, errors.New("Disks are different with last minio server run.")
} }
return objAPI, err return objAPI, err

View File

@ -17,14 +17,8 @@
package main package main
import ( import (
"errors"
"fmt"
"io"
"net/http" "net/http"
"net/rpc" "net/rpc"
"net/url"
urlpath "path"
"strconv"
"strings" "strings"
"time" "time"
) )
@ -151,34 +145,15 @@ func (n networkStorage) DeleteVol(volume string) error {
// File operations. // File operations.
// CreateFile - create file. // CreateFile - create file.
func (n networkStorage) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) { func (n networkStorage) AppendFile(volume, path string, buffer []byte) (m int64, err error) {
writeURL := new(url.URL) if err = n.rpcClient.Call("Storage.AppendFileHandler", AppendFileArgs{
writeURL.Scheme = n.netScheme Vol: volume,
writeURL.Host = n.netAddr Path: path,
writeURL.Path = fmt.Sprintf("%s/upload/%s", storageRPCPath, urlpath.Join(volume, path)) Buffer: buffer,
}, &m); err != nil {
contentType := "application/octet-stream" return 0, toStorageErr(err)
readCloser, writeCloser := io.Pipe()
go func() {
resp, err := n.httpClient.Post(writeURL.String(), contentType, readCloser)
if err != nil {
readCloser.CloseWithError(err)
return
} }
if resp != nil { return m, nil
if resp.StatusCode != http.StatusOK {
if resp.StatusCode == http.StatusNotFound {
readCloser.CloseWithError(errFileNotFound)
return
}
readCloser.CloseWithError(errors.New("Invalid response."))
return
}
// Close the reader.
readCloser.Close()
}
}()
return writeCloser, nil
} }
// StatFile - get latest Stat information for a file at path. // StatFile - get latest Stat information for a file at path.
@ -193,27 +168,16 @@ func (n networkStorage) StatFile(volume, path string) (fileInfo FileInfo, err er
} }
// ReadFile - reads a file. // ReadFile - reads a file.
func (n networkStorage) ReadFile(volume string, path string, offset int64) (reader io.ReadCloser, err error) { func (n networkStorage) ReadFile(volume string, path string, offset int64, buffer []byte) (m int64, err error) {
readURL := new(url.URL) if err = n.rpcClient.Call("Storage.ReadFileHandler", ReadFileArgs{
readURL.Scheme = n.netScheme Vol: volume,
readURL.Host = n.netAddr Path: path,
readURL.Path = fmt.Sprintf("%s/download/%s", storageRPCPath, urlpath.Join(volume, path)) Offset: offset,
readQuery := make(url.Values) Buffer: buffer,
readQuery.Set("offset", strconv.FormatInt(offset, 10)) }, &m); err != nil {
readURL.RawQuery = readQuery.Encode() return 0, toStorageErr(err)
resp, err := n.httpClient.Get(readURL.String())
if err != nil {
return nil, err
} }
if resp != nil { return m, nil
if resp.StatusCode != http.StatusOK {
if resp.StatusCode == http.StatusNotFound {
return nil, errFileNotFound
}
return nil, errors.New("Invalid response")
}
}
return resp.Body, nil
} }
// ListDir - list all entries at prefix. // ListDir - list all entries at prefix.

View File

@ -27,25 +27,40 @@ type ListVolsReply struct {
Vols []VolInfo Vols []VolInfo
} }
// StatFileArgs stat file args. // ReadFileArgs contains read file arguments.
type ReadFileArgs struct {
Vol string
Path string
Offset int64
Buffer []byte
}
// AppendFileArgs contains append file arguments.
type AppendFileArgs struct {
Vol string
Path string
Buffer []byte
}
// StatFileArgs contains stat file arguments.
type StatFileArgs struct { type StatFileArgs struct {
Vol string Vol string
Path string Path string
} }
// DeleteFileArgs delete file args. // DeleteFileArgs contains delete file arguments.
type DeleteFileArgs struct { type DeleteFileArgs struct {
Vol string Vol string
Path string Path string
} }
// ListDirArgs list dir args. // ListDirArgs contains list dir arguments.
type ListDirArgs struct { type ListDirArgs struct {
Vol string Vol string
Path string Path string
} }
// RenameFileArgs rename file args. // RenameFileArgs contains rename file arguments.
type RenameFileArgs struct { type RenameFileArgs struct {
SrcVol string SrcVol string
SrcPath string SrcPath string

View File

@ -1,10 +1,7 @@
package main package main
import ( import (
"io"
"net/http"
"net/rpc" "net/rpc"
"strconv"
router "github.com/gorilla/mux" router "github.com/gorilla/mux"
) )
@ -78,6 +75,26 @@ func (s *storageServer) ListDirHandler(arg *ListDirArgs, reply *[]string) error
return nil return nil
} }
// ReadFileHandler - read file handler is rpc wrapper to read file.
func (s *storageServer) ReadFileHandler(arg *ReadFileArgs, reply *int64) error {
n, err := s.storage.ReadFile(arg.Vol, arg.Path, arg.Offset, arg.Buffer)
if err != nil {
return err
}
reply = &n
return nil
}
// AppendFileHandler - append file handler is rpc wrapper to append file.
func (s *storageServer) AppendFileHandler(arg *AppendFileArgs, reply *int64) error {
n, err := s.storage.AppendFile(arg.Vol, arg.Path, arg.Buffer)
if err != nil {
return err
}
reply = &n
return nil
}
// DeleteFileHandler - delete file handler is rpc wrapper to delete file. // DeleteFileHandler - delete file handler is rpc wrapper to delete file.
func (s *storageServer) DeleteFileHandler(arg *DeleteFileArgs, reply *GenericReply) error { func (s *storageServer) DeleteFileHandler(arg *DeleteFileArgs, reply *GenericReply) error {
err := s.storage.DeleteFile(arg.Vol, arg.Path) err := s.storage.DeleteFile(arg.Vol, arg.Path)
@ -115,60 +132,4 @@ func registerStorageRPCRouter(mux *router.Router, stServer *storageServer) {
storageRouter := mux.NewRoute().PathPrefix(reservedBucket).Subrouter() storageRouter := mux.NewRoute().PathPrefix(reservedBucket).Subrouter()
// Add minio storage routes. // Add minio storage routes.
storageRouter.Path("/storage").Handler(storageRPCServer) storageRouter.Path("/storage").Handler(storageRPCServer)
// StreamUpload - stream upload handler.
storageRouter.Methods("POST").Path("/storage/upload/{volume}/{path:.+}").HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
vars := router.Vars(r)
volume := vars["volume"]
path := vars["path"]
writeCloser, err := stServer.storage.CreateFile(volume, path)
if err != nil {
httpErr := http.StatusInternalServerError
if err == errVolumeNotFound {
httpErr = http.StatusNotFound
} else if err == errIsNotRegular {
httpErr = http.StatusConflict
}
http.Error(w, err.Error(), httpErr)
return
}
reader := r.Body
if _, err = io.Copy(writeCloser, reader); err != nil {
safeCloseAndRemove(writeCloser)
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
writeCloser.Close()
reader.Close()
})
// StreamDownloadHandler - stream download handler.
storageRouter.Methods("GET").Path("/storage/download/{volume}/{path:.+}").Queries("offset", "{offset:.*}").HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
vars := router.Vars(r)
volume := vars["volume"]
path := vars["path"]
offset, err := strconv.ParseInt(r.URL.Query().Get("offset"), 10, 64)
if err != nil {
http.Error(w, err.Error(), http.StatusBadRequest)
return
}
readCloser, err := stServer.storage.ReadFile(volume, path, offset)
if err != nil {
httpErr := http.StatusBadRequest
if err == errVolumeNotFound {
httpErr = http.StatusNotFound
} else if err == errFileNotFound {
httpErr = http.StatusNotFound
}
http.Error(w, err.Error(), httpErr)
return
}
// Copy reader to writer.
io.Copy(w, readCloser)
// Flush out any remaining buffers to client.
w.(http.Flusher).Flush()
// Close the reader.
readCloser.Close()
})
} }

View File

@ -26,6 +26,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"syscall" "syscall"
"time"
"github.com/minio/cli" "github.com/minio/cli"
"github.com/minio/mc/pkg/console" "github.com/minio/mc/pkg/console"
@ -33,7 +34,7 @@ import (
var serverCmd = cli.Command{ var serverCmd = cli.Command{
Name: "server", Name: "server",
Usage: "Start Minio cloud storage server.", Usage: "Start object storage server.",
Flags: []cli.Flag{ Flags: []cli.Flag{
cli.StringFlag{ cli.StringFlag{
Name: "address", Name: "address",
@ -64,9 +65,10 @@ EXAMPLES:
3. Start minio server on Windows. 3. Start minio server on Windows.
$ minio {{.Name}} C:\MyShare $ minio {{.Name}} C:\MyShare
4. Start minio server 8 disks to enable erasure coded layer with 4 data and 4 parity. 4. Start minio server 12 disks to enable erasure coded layer with 6 data and 6 parity.
$ minio {{.Name}} /mnt/export1/backend /mnt/export2/backend /mnt/export3/backend /mnt/export4/backend \ $ minio {{.Name}} /mnt/export1/backend /mnt/export2/backend /mnt/export3/backend /mnt/export4/backend \
/mnt/export5/backend /mnt/export6/backend /mnt/export7/backend /mnt/export8/backend /mnt/export5/backend /mnt/export6/backend /mnt/export7/backend /mnt/export8/backend /mnt/export9/backend \
/mnt/export10/backend /mnt/export11/backend /mnt/export12/backend
`, `,
} }
@ -80,6 +82,9 @@ func configureServer(srvCmdConfig serverCmdConfig) *http.Server {
// Minio server config // Minio server config
apiServer := &http.Server{ apiServer := &http.Server{
Addr: srvCmdConfig.serverAddr, Addr: srvCmdConfig.serverAddr,
// Adding timeout of 10 minutes for unresponsive client connections.
ReadTimeout: 10 * time.Minute,
WriteTimeout: 10 * time.Minute,
Handler: configureServerHandler(srvCmdConfig), Handler: configureServerHandler(srvCmdConfig),
MaxHeaderBytes: 1 << 20, MaxHeaderBytes: 1 << 20,
} }

View File

@ -444,7 +444,7 @@ func (s *MyAPISuite) TestBucket(c *C) {
c.Assert(response.StatusCode, Equals, http.StatusOK) c.Assert(response.StatusCode, Equals, http.StatusOK)
} }
func (s *MyAPISuite) TestObject(c *C) { func (s *MyAPISuite) TestObjectGet(c *C) {
buffer := bytes.NewReader([]byte("hello world")) buffer := bytes.NewReader([]byte("hello world"))
request, err := s.newRequest("PUT", testAPIFSCacheServer.URL+"/testobject", 0, nil) request, err := s.newRequest("PUT", testAPIFSCacheServer.URL+"/testobject", 0, nil)
c.Assert(err, IsNil) c.Assert(err, IsNil)

View File

@ -920,9 +920,18 @@ func (s *MyAPIXLSuite) TestPartialContent(c *C) {
c.Assert(response.StatusCode, Equals, http.StatusOK) c.Assert(response.StatusCode, Equals, http.StatusOK)
// Prepare request // Prepare request
var table = []struct {
byteRange string
expectedString string
}{
{"6-7", "Wo"},
{"6-", "World"},
{"-7", "o World"},
}
for _, t := range table {
request, err = s.newRequest("GET", testAPIXLServer.URL+"/partial-content/bar", 0, nil) request, err = s.newRequest("GET", testAPIXLServer.URL+"/partial-content/bar", 0, nil)
c.Assert(err, IsNil) c.Assert(err, IsNil)
request.Header.Add("Range", "bytes=6-7") request.Header.Add("Range", "bytes="+t.byteRange)
client = http.Client{} client = http.Client{}
response, err = client.Do(request) response, err = client.Do(request)
@ -930,8 +939,8 @@ func (s *MyAPIXLSuite) TestPartialContent(c *C) {
c.Assert(response.StatusCode, Equals, http.StatusPartialContent) c.Assert(response.StatusCode, Equals, http.StatusPartialContent)
partialObject, err := ioutil.ReadAll(response.Body) partialObject, err := ioutil.ReadAll(response.Body)
c.Assert(err, IsNil) c.Assert(err, IsNil)
c.Assert(string(partialObject), Equals, t.expectedString)
c.Assert(string(partialObject), Equals, "Wo") }
} }
func (s *MyAPIXLSuite) TestListObjectsHandlerErrors(c *C) { func (s *MyAPIXLSuite) TestListObjectsHandlerErrors(c *C) {

View File

@ -25,9 +25,6 @@ import (
type VolInfo struct { type VolInfo struct {
Name string Name string
Created time.Time Created time.Time
Total int64
Free int64
FSType string
} }
// FileInfo - file stat information. // FileInfo - file stat information.

View File

@ -18,6 +18,9 @@ package main
import "errors" import "errors"
// errUnexpected - unexpected error, requires manual intervention.
var errUnexpected = errors.New("Unexpected error, please report this issue at https://github.com/minio/minio/issues")
// errCorruptedFormat - corrupted backend format. // errCorruptedFormat - corrupted backend format.
var errCorruptedFormat = errors.New("corrupted backend format") var errCorruptedFormat = errors.New("corrupted backend format")
@ -48,18 +51,8 @@ var errVolumeNotFound = errors.New("volume not found")
// errVolumeNotEmpty - volume not empty. // errVolumeNotEmpty - volume not empty.
var errVolumeNotEmpty = errors.New("volume is not empty") var errVolumeNotEmpty = errors.New("volume is not empty")
// errVolumeAccessDenied - cannot access volume, insufficient // errVolumeAccessDenied - cannot access volume, insufficient permissions.
// permissions.
var errVolumeAccessDenied = errors.New("volume access denied") var errVolumeAccessDenied = errors.New("volume access denied")
// errVolumeAccessDenied - cannot access file, insufficient permissions. // errVolumeAccessDenied - cannot access file, insufficient permissions.
var errFileAccessDenied = errors.New("file access denied") var errFileAccessDenied = errors.New("file access denied")
// errReadQuorum - did not meet read quorum.
var errReadQuorum = errors.New("I/O error. did not meet read quorum.")
// errWriteQuorum - did not meet write quorum.
var errWriteQuorum = errors.New("I/O error. did not meet write quorum.")
// errDataCorrupt - err data corrupt.
var errDataCorrupt = errors.New("data likely corrupted, all blocks are zero in length")

View File

@ -16,8 +16,6 @@
package main package main
import "io"
// StorageAPI interface. // StorageAPI interface.
type StorageAPI interface { type StorageAPI interface {
// Volume operations. // Volume operations.
@ -28,9 +26,9 @@ type StorageAPI interface {
// File operations. // File operations.
ListDir(volume, dirPath string) ([]string, error) ListDir(volume, dirPath string) ([]string, error)
ReadFile(volume string, path string, offset int64) (readCloser io.ReadCloser, err error) ReadFile(volume string, path string, offset int64, buf []byte) (n int64, err error)
CreateFile(volume string, path string) (writeCloser io.WriteCloser, err error) AppendFile(volume string, path string, buf []byte) (n int64, err error)
RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error
StatFile(volume string, path string) (file FileInfo, err error) StatFile(volume string, path string) (file FileInfo, err error)
DeleteFile(volume string, path string) (err error) DeleteFile(volume string, path string) (err error)
RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error
} }

View File

@ -44,6 +44,10 @@ func ExecObjectLayerTest(t *testing.T, objTest func(obj ObjectLayer, instanceTyp
} }
erasureDisks = append(erasureDisks, path) erasureDisks = append(erasureDisks, path)
} }
// Initialize name space lock.
initNSLock()
objLayer, err := newXLObjects(erasureDisks) objLayer, err := newXLObjects(erasureDisks)
if err != nil { if err != nil {
return nil, nil, err return nil, nil, err
@ -59,6 +63,9 @@ func ExecObjectLayerTest(t *testing.T, objTest func(obj ObjectLayer, instanceTyp
return nil, "", err return nil, "", err
} }
// Initialize name space lock.
initNSLock()
// Create the obj. // Create the obj.
objLayer, err := newFSObjects(fsDir) objLayer, err := newFSObjects(fsDir)
if err != nil { if err != nil {
@ -80,7 +87,7 @@ func ExecObjectLayerTest(t *testing.T, objTest func(obj ObjectLayer, instanceTyp
} }
// Executing the object layer tests for single node setup. // Executing the object layer tests for single node setup.
objTest(objLayer, singleNodeTestStr, t) objTest(objLayer, singleNodeTestStr, t)
initNSLock()
objLayer, fsDirs, err := getXLObjectLayer() objLayer, fsDirs, err := getXLObjectLayer()
if err != nil { if err != nil {
t.Fatalf("Initialization of object layer failed for XL setup: %s", err.Error()) t.Fatalf("Initialization of object layer failed for XL setup: %s", err.Error())

206
tree-walk-fs.go Normal file
View File

@ -0,0 +1,206 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"path"
"sort"
"strings"
"time"
)
// Tree walk notify carries a channel which notifies tree walk
// results, additionally it also carries information if treeWalk
// should be timedOut.
type treeWalkerFS struct {
ch <-chan treeWalkResultFS
timedOut bool
}
// Tree walk result carries results of tree walking.
type treeWalkResultFS struct {
entry string
err error
end bool
}
// treeWalk walks FS directory tree recursively pushing fileInfo into the channel as and when it encounters files.
func (fs fsObjects) treeWalk(bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResultFS) bool, count *int, isLeaf func(string, string) bool) bool {
// Example:
// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
// called with prefixDir="one/two/three/four/" and marker="five.txt"
var markerBase, markerDir string
if marker != "" {
// Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt"
markerSplit := strings.SplitN(marker, slashSeparator, 2)
markerDir = markerSplit[0]
if len(markerSplit) == 2 {
markerDir += slashSeparator
markerBase = markerSplit[1]
}
}
entries, err := fs.storage.ListDir(bucket, prefixDir)
if err != nil {
send(treeWalkResultFS{err: err})
return false
}
for i, entry := range entries {
if entryPrefixMatch != "" {
if !strings.HasPrefix(entry, entryPrefixMatch) {
entries[i] = ""
continue
}
}
if isLeaf(bucket, pathJoin(prefixDir, entry)) {
entries[i] = strings.TrimSuffix(entry, slashSeparator)
}
}
sort.Strings(entries)
// Skip the empty strings
for len(entries) > 0 && entries[0] == "" {
entries = entries[1:]
}
if len(entries) == 0 {
return true
}
// example:
// If markerDir="four/" Search() returns the index of "four/" in the sorted
// entries list so we skip all the entries till "four/"
idx := sort.Search(len(entries), func(i int) bool {
return entries[i] >= markerDir
})
entries = entries[idx:]
*count += len(entries)
for i, entry := range entries {
if i == 0 && markerDir == entry {
if !recursive {
// Skip as the marker would already be listed in the previous listing.
*count--
continue
}
if recursive && !strings.HasSuffix(entry, slashSeparator) {
// We should not skip for recursive listing and if markerDir is a directory
// for ex. if marker is "four/five.txt" markerDir will be "four/" which
// should not be skipped, instead it will need to be treeWalk()'ed into.
// Skip if it is a file though as it would be listed in previous listing.
*count--
continue
}
}
if recursive && strings.HasSuffix(entry, slashSeparator) {
// If the entry is a directory, we will need recurse into it.
markerArg := ""
if entry == markerDir {
// We need to pass "five.txt" as marker only if we are
// recursing into "four/"
markerArg = markerBase
}
*count--
prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories.
if !fs.treeWalk(bucket, path.Join(prefixDir, entry), prefixMatch, markerArg, recursive, send, count, isLeaf) {
return false
}
continue
}
*count--
if !send(treeWalkResultFS{entry: pathJoin(prefixDir, entry)}) {
return false
}
}
return true
}
// Initiate a new treeWalk in a goroutine.
func (fs fsObjects) startTreeWalk(bucket, prefix, marker string, recursive bool, isLeaf func(string, string) bool) *treeWalkerFS {
// Example 1
// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
// and entryPrefixMatch=""
// Example 2
// if prefix is "one/two/th" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/" and marker="three/four/five.txt"
// and entryPrefixMatch="th"
ch := make(chan treeWalkResultFS, maxObjectList)
walkNotify := treeWalkerFS{ch: ch}
entryPrefixMatch := prefix
prefixDir := ""
lastIndex := strings.LastIndex(prefix, slashSeparator)
if lastIndex != -1 {
entryPrefixMatch = prefix[lastIndex+1:]
prefixDir = prefix[:lastIndex+1]
}
count := 0
marker = strings.TrimPrefix(marker, prefixDir)
go func() {
defer close(ch)
send := func(walkResult treeWalkResultFS) bool {
if count == 0 {
walkResult.end = true
}
timer := time.After(time.Second * 60)
select {
case ch <- walkResult:
return true
case <-timer:
walkNotify.timedOut = true
return false
}
}
fs.treeWalk(bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count, isLeaf)
}()
return &walkNotify
}
// Save the goroutine reference in the map
func (fs fsObjects) saveTreeWalk(params listParams, walker *treeWalkerFS) {
fs.listObjectMapMutex.Lock()
defer fs.listObjectMapMutex.Unlock()
walkers, _ := fs.listObjectMap[params]
walkers = append(walkers, walker)
fs.listObjectMap[params] = walkers
}
// Lookup the goroutine reference from map
func (fs fsObjects) lookupTreeWalk(params listParams) *treeWalkerFS {
fs.listObjectMapMutex.Lock()
defer fs.listObjectMapMutex.Unlock()
if walkChs, ok := fs.listObjectMap[params]; ok {
for i, walkCh := range walkChs {
if !walkCh.timedOut {
newWalkChs := walkChs[i+1:]
if len(newWalkChs) > 0 {
fs.listObjectMap[params] = newWalkChs
} else {
delete(fs.listObjectMap, params)
}
return walkCh
}
}
// As all channels are timed out, delete the map entry
delete(fs.listObjectMap, params)
}
return nil
}

View File

@ -17,11 +17,8 @@
package main package main
import ( import (
"os"
"path"
"sort" "sort"
"strings" "strings"
"sync"
"time" "time"
) )
@ -35,7 +32,7 @@ type listParams struct {
// Tree walk result carries results of tree walking. // Tree walk result carries results of tree walking.
type treeWalkResult struct { type treeWalkResult struct {
fileInfo FileInfo entry string
err error err error
end bool end bool
} }
@ -48,58 +45,44 @@ type treeWalker struct {
timedOut bool timedOut bool
} }
// treeWalk walks FS directory tree recursively pushing fileInfo into the channel as and when it encounters files. // listDir - listDir.
func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResult) bool, count *int) bool { func (xl xlObjects) listDir(bucket, prefixDir string, filter func(entry string) bool, isLeaf func(string, string) bool) (entries []string, err error) {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
entries, err = disk.ListDir(bucket, prefixDir)
if err != nil {
break
}
// Skip the entries which do not match the filter.
for i, entry := range entries {
if filter(entry) {
entries[i] = ""
continue
}
if strings.HasSuffix(entry, slashSeparator) && isLeaf(bucket, pathJoin(prefixDir, entry)) {
entries[i] = strings.TrimSuffix(entry, slashSeparator)
}
}
sort.Strings(entries)
// Skip the empty strings
for len(entries) > 0 && entries[0] == "" {
entries = entries[1:]
}
return entries, nil
}
// Return error at the end.
return nil, err
}
// treeWalk walks directory tree recursively pushing fileInfo into the channel as and when it encounters files.
func (xl xlObjects) treeWalk(bucket, prefixDir, entryPrefixMatch, marker string, recursive bool, send func(treeWalkResult) bool, count *int, isLeaf func(string, string) bool) bool {
// Example: // Example:
// if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively // if prefixDir="one/two/three/" and marker="four/five.txt" treeWalk is recursively
// called with prefixDir="one/two/three/four/" and marker="five.txt" // called with prefixDir="one/two/three/four/" and marker="five.txt"
var isXL bool
var disk StorageAPI
switch l := layer.(type) {
case xlObjects:
isXL = true
disk = l.storage
case fsObjects:
disk = l.storage
}
// Convert entry to FileInfo
entryToFileInfo := func(entry string) (fileInfo FileInfo, err error) {
if strings.HasSuffix(entry, slashSeparator) {
// Object name needs to be full path.
fileInfo.Name = path.Join(prefixDir, entry)
fileInfo.Name += slashSeparator
fileInfo.Mode = os.ModeDir
return
}
if isXL && strings.HasSuffix(entry, multipartSuffix) {
// If the entry was detected as a multipart file we use
// getMultipartObjectInfo() to fill the FileInfo structure.
entry = strings.TrimSuffix(entry, multipartSuffix)
var info MultipartObjectInfo
info, err = getMultipartObjectInfo(disk, bucket, path.Join(prefixDir, entry))
if err != nil {
return
}
// Set the Mode to a "regular" file.
fileInfo.Mode = 0
// Trim the suffix that was temporarily added to indicate that this
// is a multipart file.
fileInfo.Name = path.Join(prefixDir, entry)
fileInfo.Size = info.Size
fileInfo.MD5Sum = info.MD5Sum
fileInfo.ModTime = info.ModTime
return
}
if fileInfo, err = disk.StatFile(bucket, path.Join(prefixDir, entry)); err != nil {
return
}
// Object name needs to be full path.
fileInfo.Name = path.Join(prefixDir, entry)
return
}
var markerBase, markerDir string var markerBase, markerDir string
if marker != "" { if marker != "" {
// Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt" // Ex: if marker="four/five.txt", markerDir="four/" markerBase="five.txt"
@ -110,41 +93,22 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
markerBase = markerSplit[1] markerBase = markerSplit[1]
} }
} }
entries, err := disk.ListDir(bucket, prefixDir) entries, err := xl.listDir(bucket, prefixDir, func(entry string) bool {
return !strings.HasPrefix(entry, entryPrefixMatch)
}, isLeaf)
if err != nil { if err != nil {
send(treeWalkResult{err: err}) send(treeWalkResult{err: err})
return false return false
} }
if entryPrefixMatch != "" {
for i, entry := range entries {
if !strings.HasPrefix(entry, entryPrefixMatch) {
entries[i] = ""
}
}
}
// For XL multipart files strip the trailing "/" and append ".minio.multipart" to the entry so that
// entryToFileInfo() can call StatFile for regular files or getMultipartObjectInfo() for multipart files.
for i, entry := range entries {
if isXL && strings.HasSuffix(entry, slashSeparator) {
if isMultipartObject(disk, bucket, path.Join(prefixDir, entry)) {
entries[i] = strings.TrimSuffix(entry, slashSeparator) + multipartSuffix
}
}
}
sort.Sort(byMultipartFiles(entries))
// Skip the empty strings
for len(entries) > 0 && entries[0] == "" {
entries = entries[1:]
}
if len(entries) == 0 { if len(entries) == 0 {
return true return true
} }
// example: // example:
// If markerDir="four/" Search() returns the index of "four/" in the sorted // If markerDir="four/" Search() returns the index of "four/" in the sorted
// entries list so we skip all the entries till "four/" // entries list so we skip all the entries till "four/"
idx := sort.Search(len(entries), func(i int) bool { idx := sort.Search(len(entries), func(i int) bool {
return strings.TrimSuffix(entries[i], multipartSuffix) >= markerDir return entries[i] >= markerDir
}) })
entries = entries[idx:] entries = entries[idx:]
*count += len(entries) *count += len(entries)
@ -176,19 +140,13 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
} }
*count-- *count--
prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories. prefixMatch := "" // Valid only for first level treeWalk and empty for subdirectories.
if !treeWalk(layer, bucket, path.Join(prefixDir, entry), prefixMatch, markerArg, recursive, send, count) { if !xl.treeWalk(bucket, pathJoin(prefixDir, entry), prefixMatch, markerArg, recursive, send, count, isLeaf) {
return false return false
} }
continue continue
} }
*count-- *count--
fileInfo, err := entryToFileInfo(entry) if !send(treeWalkResult{entry: pathJoin(prefixDir, entry)}) {
if err != nil {
// The file got deleted in the interim between ListDir() and StatFile()
// Ignore error and continue.
continue
}
if !send(treeWalkResult{fileInfo: fileInfo}) {
return false return false
} }
} }
@ -196,7 +154,7 @@ func treeWalk(layer ObjectLayer, bucket, prefixDir, entryPrefixMatch, marker str
} }
// Initiate a new treeWalk in a goroutine. // Initiate a new treeWalk in a goroutine.
func startTreeWalk(layer ObjectLayer, bucket, prefix, marker string, recursive bool) *treeWalker { func (xl xlObjects) startTreeWalk(bucket, prefix, marker string, recursive bool, isLeaf func(string, string) bool) *treeWalker {
// Example 1 // Example 1
// If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt" // If prefix is "one/two/three/" and marker is "one/two/three/four/five.txt"
// treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt" // treeWalk is called with prefixDir="one/two/three/" and marker="four/five.txt"
@ -233,61 +191,41 @@ func startTreeWalk(layer ObjectLayer, bucket, prefix, marker string, recursive b
return false return false
} }
} }
treeWalk(layer, bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count) xl.treeWalk(bucket, prefixDir, entryPrefixMatch, marker, recursive, send, &count, isLeaf)
}() }()
return &walkNotify return &walkNotify
} }
// Save the goroutine reference in the map // Save the goroutine reference in the map
func saveTreeWalk(layer ObjectLayer, params listParams, walker *treeWalker) { func (xl xlObjects) saveTreeWalk(params listParams, walker *treeWalker) {
var listObjectMap map[listParams][]*treeWalker xl.listObjectMapMutex.Lock()
var listObjectMapMutex *sync.Mutex defer xl.listObjectMapMutex.Unlock()
switch l := layer.(type) {
case xlObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
case fsObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
}
listObjectMapMutex.Lock()
defer listObjectMapMutex.Unlock()
walkers, _ := listObjectMap[params] walkers, _ := xl.listObjectMap[params]
walkers = append(walkers, walker) walkers = append(walkers, walker)
listObjectMap[params] = walkers xl.listObjectMap[params] = walkers
} }
// Lookup the goroutine reference from map // Lookup the goroutine reference from map
func lookupTreeWalk(layer ObjectLayer, params listParams) *treeWalker { func (xl xlObjects) lookupTreeWalk(params listParams) *treeWalker {
var listObjectMap map[listParams][]*treeWalker xl.listObjectMapMutex.Lock()
var listObjectMapMutex *sync.Mutex defer xl.listObjectMapMutex.Unlock()
switch l := layer.(type) {
case xlObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
case fsObjects:
listObjectMap = l.listObjectMap
listObjectMapMutex = l.listObjectMapMutex
}
listObjectMapMutex.Lock()
defer listObjectMapMutex.Unlock()
if walkChs, ok := listObjectMap[params]; ok { if walkChs, ok := xl.listObjectMap[params]; ok {
for i, walkCh := range walkChs { for i, walkCh := range walkChs {
if !walkCh.timedOut { if !walkCh.timedOut {
newWalkChs := walkChs[i+1:] newWalkChs := walkChs[i+1:]
if len(newWalkChs) > 0 { if len(newWalkChs) > 0 {
listObjectMap[params] = newWalkChs xl.listObjectMap[params] = newWalkChs
} else { } else {
delete(listObjectMap, params) delete(xl.listObjectMap, params)
} }
return walkCh return walkCh
} }
} }
// As all channels are timed out, delete the map entry // As all channels are timed out, delete the map entry
delete(listObjectMap, params) delete(xl.listObjectMap, params)
} }
return nil return nil
} }

View File

@ -6,6 +6,7 @@
```sh ```sh
$ git clone https://github.com/minio/MinioBrowser $ git clone https://github.com/minio/MinioBrowser
$ cd MinioBrowser
$ npm install $ npm install
``` ```

File diff suppressed because one or more lines are too long

4
vendor/vendor.json vendored
View File

@ -94,8 +94,8 @@
}, },
{ {
"path": "github.com/minio/miniobrowser", "path": "github.com/minio/miniobrowser",
"revision": "16a35547d5b0aea8de96b74416929ab2e2d248cd", "revision": "9c9fbc91e4b2e952048f9299c45d53ee0a0d0f2b",
"revisionTime": "2016-04-21T19:57:24-07:00" "revisionTime": "2016-05-30T17:30:33-07:00"
}, },
{ {
"path": "github.com/mitchellh/go-homedir", "path": "github.com/mitchellh/go-homedir",

View File

@ -18,7 +18,6 @@ package main
import ( import (
"fmt" "fmt"
"io"
"net/http" "net/http"
"os" "os"
"path" "path"
@ -98,6 +97,22 @@ func (web *webAPIHandlers) ServerInfo(r *http.Request, args *WebGenericArgs, rep
return nil return nil
} }
// StorageInfoRep - contains storage usage statistics.
type StorageInfoRep struct {
StorageInfo StorageInfo `json:"storageInfo"`
UIVersion string `json:"uiVersion"`
}
// StorageInfo - web call to gather storage usage statistics.
func (web *webAPIHandlers) StorageInfo(r *http.Request, args *GenericArgs, reply *StorageInfoRep) error {
if !isJWTReqAuthenticated(r) {
return &json2.Error{Message: "Unauthorized request"}
}
reply.UIVersion = miniobrowser.UIVersion
reply.StorageInfo = web.ObjectAPI.StorageInfo()
return nil
}
// MakeBucketArgs - make bucket args. // MakeBucketArgs - make bucket args.
type MakeBucketArgs struct { type MakeBucketArgs struct {
BucketName string `json:"bucketName"` BucketName string `json:"bucketName"`
@ -127,10 +142,6 @@ type WebBucketInfo struct {
Name string `json:"name"` Name string `json:"name"`
// Date the bucket was created. // Date the bucket was created.
CreationDate time.Time `json:"creationDate"` CreationDate time.Time `json:"creationDate"`
// Total storage space where the bucket resides.
Total int64 `json:"total"`
// Free storage space where the bucket resides.
Free int64 `json:"free"`
} }
// ListBuckets - list buckets api. // ListBuckets - list buckets api.
@ -148,8 +159,6 @@ func (web *webAPIHandlers) ListBuckets(r *http.Request, args *WebGenericArgs, re
reply.Buckets = append(reply.Buckets, WebBucketInfo{ reply.Buckets = append(reply.Buckets, WebBucketInfo{
Name: bucket.Name, Name: bucket.Name,
CreationDate: bucket.Created, CreationDate: bucket.Created,
Total: bucket.Total,
Free: bucket.Free,
}) })
} }
} }
@ -373,12 +382,14 @@ func (web *webAPIHandlers) Download(w http.ResponseWriter, r *http.Request) {
// Add content disposition. // Add content disposition.
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filepath.Base(object))) w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filepath.Base(object)))
objReader, err := web.ObjectAPI.GetObject(bucket, object, 0) objInfo, err := web.ObjectAPI.GetObjectInfo(bucket, object)
if err != nil { if err != nil {
writeWebErrorResponse(w, err) writeWebErrorResponse(w, err)
return return
} }
if _, err := io.Copy(w, objReader); err != nil { offset := int64(0)
err = web.ObjectAPI.GetObject(bucket, object, offset, objInfo.Size, w)
if err != nil {
/// No need to print error, response writer already written to. /// No need to print error, response writer already written to.
return return
} }

View File

@ -1,287 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"fmt"
"io"
slashpath "path"
"sync"
"time"
)
// Erasure block size.
const erasureBlockSize = 4 * 1024 * 1024 // 4MiB.
// cleanupCreateFileOps - cleans up all the temporary files and other
// temporary data upon any failure.
func (xl XL) cleanupCreateFileOps(volume, path string, writers ...io.WriteCloser) {
closeAndRemoveWriters(writers...)
for _, disk := range xl.storageDisks {
if err := disk.DeleteFile(volume, path); err != nil {
errorIf(err, "Unable to delete file.")
}
}
}
// Close and remove writers if they are safeFile.
func closeAndRemoveWriters(writers ...io.WriteCloser) {
for _, writer := range writers {
if err := safeCloseAndRemove(writer); err != nil {
errorIf(err, "Failed to close writer.")
}
}
}
// WriteErasure reads predefined blocks, encodes them and writes to
// configured storage disks.
func (xl XL) writeErasure(volume, path string, reader *io.PipeReader, wcloser *waitCloser) {
// Release the block writer upon function return.
defer wcloser.release()
partsMetadata, errs := xl.getPartsMetadata(volume, path)
// Convert errs into meaningful err to be sent upwards if possible
// based on total number of errors and read quorum.
err := xl.reduceError(errs)
if err != nil && err != errFileNotFound {
reader.CloseWithError(err)
return
}
// List all the file versions on existing files.
versions := listFileVersions(partsMetadata, errs)
// Get highest file version.
higherVersion := highestInt(versions)
// Increment to have next higher version.
higherVersion++
writers := make([]io.WriteCloser, len(xl.storageDisks))
xlMetaV1FilePath := slashpath.Join(path, xlMetaV1File)
metadataWriters := make([]io.WriteCloser, len(xl.storageDisks))
// Save additional erasureMetadata.
modTime := time.Now().UTC()
createFileError := 0
for index, disk := range xl.storageDisks {
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
var writer io.WriteCloser
writer, err = disk.CreateFile(volume, erasurePart)
if err != nil {
// Treat errFileNameTooLong specially
if err == errFileNameTooLong {
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
createFileError++
// We can safely allow CreateFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure.
if createFileError <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
// Remove previous temp writers for any failure.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(errWriteQuorum)
return
}
// Create meta data file.
var metadataWriter io.WriteCloser
metadataWriter, err = disk.CreateFile(volume, xlMetaV1FilePath)
if err != nil {
createFileError++
// We can safely allow CreateFile errors up to
// len(xl.storageDisks) - xl.writeQuorum otherwise return failure.
if createFileError <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
// Remove previous temp writers for any failure.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(errWriteQuorum)
return
}
writers[index] = writer
metadataWriters[index] = metadataWriter
}
// Allocate 4MiB block size buffer for reading.
dataBuffer := make([]byte, erasureBlockSize)
var totalSize int64 // Saves total incoming stream size.
for {
// Read up to allocated block size.
var n int
n, err = io.ReadFull(reader, dataBuffer)
if err != nil {
// Any unexpected errors, close the pipe reader with error.
if err != io.ErrUnexpectedEOF && err != io.EOF {
// Remove all temp writers.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
}
// At EOF break out.
if err == io.EOF {
break
}
if n > 0 {
// Split the input buffer into data and parity blocks.
var dataBlocks [][]byte
dataBlocks, err = xl.ReedSolomon.Split(dataBuffer[0:n])
if err != nil {
// Remove all temp writers.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
// Encode parity blocks using data blocks.
err = xl.ReedSolomon.Encode(dataBlocks)
if err != nil {
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
var wg = &sync.WaitGroup{}
var wErrs = make([]error, len(writers))
// Loop through and write encoded data to quorum disks.
for index, writer := range writers {
if writer == nil {
continue
}
wg.Add(1)
go func(index int, writer io.Writer) {
defer wg.Done()
encodedData := dataBlocks[index]
_, wErr := writers[index].Write(encodedData)
wErrs[index] = wErr
}(index, writer)
}
wg.Wait()
for _, wErr := range wErrs {
if wErr == nil {
continue
}
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(wErr)
return
}
// Update total written.
totalSize += int64(n)
}
}
// Initialize metadata map, save all erasure related metadata.
metadata := xlMetaV1{}
metadata.Version = "1"
metadata.Stat.Size = totalSize
metadata.Stat.ModTime = modTime
metadata.Minio.Release = minioReleaseTag
if len(xl.storageDisks) > len(writers) {
// Save file.version only if we wrote to less disks than all
// storage disks.
metadata.Stat.Version = higherVersion
}
metadata.Erasure.DataBlocks = xl.DataBlocks
metadata.Erasure.ParityBlocks = xl.ParityBlocks
metadata.Erasure.BlockSize = erasureBlockSize
// Write all the metadata.
// below case is not handled here
// Case: when storageDisks is 16 and write quorumDisks is 13,
// meta data write failure up to 2 can be considered.
// currently we fail for any meta data writes
for _, metadataWriter := range metadataWriters {
if metadataWriter == nil {
continue
}
// Write metadata.
err = metadata.Write(metadataWriter)
if err != nil {
// Remove temporary files.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
}
// Close all writers and metadata writers in routines.
for index, writer := range writers {
if writer == nil {
continue
}
// Safely wrote, now rename to its actual location.
if err = writer.Close(); err != nil {
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
if metadataWriters[index] == nil {
continue
}
// Safely wrote, now rename to its actual location.
if err = metadataWriters[index].Close(); err != nil {
// Remove all temp writers upon error.
xl.cleanupCreateFileOps(volume, path, append(writers, metadataWriters...)...)
reader.CloseWithError(err)
return
}
}
// Close the pipe reader and return.
reader.Close()
return
}
// CreateFile - create a file.
func (xl XL) CreateFile(volume, path string) (writeCloser io.WriteCloser, err error) {
if !isValidVolname(volume) {
return nil, errInvalidArgument
}
if !isValidPath(path) {
return nil, errInvalidArgument
}
// Initialize pipe for data pipe line.
pipeReader, pipeWriter := io.Pipe()
// Initialize a new wait closer, implements both Write and Close.
wcloser := newWaitCloser(pipeWriter)
// Start erasure encoding in routine, reading data block by block from pipeReader.
go xl.writeErasure(volume, path, pipeReader, wcloser)
// Return the writer, caller should start writing to this.
return wcloser, nil
}

View File

@ -1,185 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"io"
slashpath "path"
)
// healHeal - heals the file at path.
func (xl XL) healFile(volume string, path string) error {
totalBlocks := xl.DataBlocks + xl.ParityBlocks
needsHeal := make([]bool, totalBlocks)
var readers = make([]io.Reader, totalBlocks)
var writers = make([]io.WriteCloser, totalBlocks)
// List all online disks to verify if we need to heal.
onlineDisks, metadata, heal, err := xl.listOnlineDisks(volume, path)
if err != nil {
return err
}
if !heal {
return nil
}
for index, disk := range onlineDisks {
if disk == nil {
needsHeal[index] = true
continue
}
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
// If disk.ReadFile returns error and we don't have read quorum it will be taken care as
// ReedSolomon.Reconstruct() will fail later.
var reader io.ReadCloser
offset := int64(0)
if reader, err = xl.storageDisks[index].ReadFile(volume, erasurePart, offset); err == nil {
readers[index] = reader
defer reader.Close()
}
}
// create writers for parts where healing is needed.
for index, healNeeded := range needsHeal {
if !healNeeded {
continue
}
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
writers[index], err = xl.storageDisks[index].CreateFile(volume, erasurePart)
if err != nil {
needsHeal[index] = false
safeCloseAndRemove(writers[index])
continue
}
}
// Check if there is atleast one part that needs to be healed.
atleastOneHeal := false
for _, healNeeded := range needsHeal {
if healNeeded {
atleastOneHeal = true
break
}
}
if !atleastOneHeal {
// Return if healing not needed anywhere.
return nil
}
var totalLeft = metadata.Stat.Size
for totalLeft > 0 {
// Figure out the right blockSize.
var curBlockSize int64
if metadata.Erasure.BlockSize < totalLeft {
curBlockSize = metadata.Erasure.BlockSize
} else {
curBlockSize = totalLeft
}
// Calculate the current block size.
curBlockSize = getEncodedBlockLen(curBlockSize, metadata.Erasure.DataBlocks)
enBlocks := make([][]byte, totalBlocks)
// Loop through all readers and read.
for index, reader := range readers {
// Initialize block slice and fill the data from each parts.
// ReedSolomon.Verify() expects that slice is not nil even if the particular
// part needs healing.
enBlocks[index] = make([]byte, curBlockSize)
if needsHeal[index] {
// Skip reading if the part needs healing.
continue
}
if reader == nil {
// If ReadFile() had returned error, do not read from this disk.
continue
}
_, err = io.ReadFull(reader, enBlocks[index])
if err != nil && err != io.ErrUnexpectedEOF {
enBlocks[index] = nil
}
}
// Check blocks if they are all zero in length.
if checkBlockSize(enBlocks) == 0 {
return errDataCorrupt
}
// Verify the blocks.
ok, err := xl.ReedSolomon.Verify(enBlocks)
if err != nil {
closeAndRemoveWriters(writers...)
return err
}
// Verification failed, blocks require reconstruction.
if !ok {
for index, healNeeded := range needsHeal {
if healNeeded {
// Reconstructs() reconstructs the parts if the array is nil.
enBlocks[index] = nil
}
}
err = xl.ReedSolomon.Reconstruct(enBlocks)
if err != nil {
closeAndRemoveWriters(writers...)
return err
}
// Verify reconstructed blocks again.
ok, err = xl.ReedSolomon.Verify(enBlocks)
if err != nil {
closeAndRemoveWriters(writers...)
return err
}
if !ok {
// Blocks cannot be reconstructed, corrupted data.
err = errors.New("Verification failed after reconstruction, data likely corrupted.")
closeAndRemoveWriters(writers...)
return err
}
}
for index, healNeeded := range needsHeal {
if !healNeeded {
continue
}
_, err := writers[index].Write(enBlocks[index])
if err != nil {
safeCloseAndRemove(writers[index])
continue
}
}
totalLeft = totalLeft - metadata.Erasure.BlockSize
}
// After successful healing Close() the writer so that the temp
// files are committed to their location.
for _, writer := range writers {
if writer == nil {
continue
}
writer.Close()
}
// Update the quorum metadata after heal.
errs := xl.updatePartsMetadata(volume, path, metadata, needsHeal)
for index, healNeeded := range needsHeal {
if healNeeded && errs[index] != nil {
return errs[index]
}
}
return nil
}

View File

@ -1,61 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"io"
"time"
)
// A xlMetaV1 represents a metadata header mapping keys to sets of values.
type xlMetaV1 struct {
Version string `json:"version"`
Stat struct {
Size int64 `json:"size"`
ModTime time.Time `json:"modTime"`
Version int64 `json:"version"`
} `json:"stat"`
Erasure struct {
DataBlocks int `json:"data"`
ParityBlocks int `json:"parity"`
BlockSize int64 `json:"blockSize"`
} `json:"erasure"`
Minio struct {
Release string `json:"release"`
} `json:"minio"`
}
// Write writes a metadata in wire format.
func (m xlMetaV1) Write(writer io.Writer) error {
metadataBytes, err := json.Marshal(m)
if err != nil {
return err
}
_, err = writer.Write(metadataBytes)
return err
}
// xlMetaV1Decode - file metadata decode.
func xlMetaV1Decode(reader io.Reader) (metadata xlMetaV1, err error) {
decoder := json.NewDecoder(reader)
// Unmarshalling failed, file possibly corrupted.
if err = decoder.Decode(&metadata); err != nil {
return xlMetaV1{}, err
}
return metadata, nil
}

View File

@ -1,190 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"io"
slashpath "path"
"sync"
)
// ReadFile - read file
func (xl XL) ReadFile(volume, path string, startOffset int64) (io.ReadCloser, error) {
// Input validation.
if !isValidVolname(volume) {
return nil, errInvalidArgument
}
if !isValidPath(path) {
return nil, errInvalidArgument
}
onlineDisks, metadata, heal, err := xl.listOnlineDisks(volume, path)
if err != nil {
return nil, err
}
if heal {
// Heal in background safely, since we already have read
// quorum disks. Let the reads continue.
go func() {
hErr := xl.healFile(volume, path)
errorIf(hErr, "Unable to heal file "+volume+"/"+path+".")
}()
}
readers := make([]io.ReadCloser, len(xl.storageDisks))
for index, disk := range onlineDisks {
if disk == nil {
continue
}
erasurePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
// If disk.ReadFile returns error and we don't have read quorum it will be taken care as
// ReedSolomon.Reconstruct() will fail later.
var reader io.ReadCloser
offset := int64(0)
if reader, err = disk.ReadFile(volume, erasurePart, offset); err == nil {
readers[index] = reader
}
}
// Initialize pipe.
pipeReader, pipeWriter := io.Pipe()
go func() {
var totalLeft = metadata.Stat.Size
// Read until the totalLeft.
for totalLeft > 0 {
// Figure out the right blockSize as it was encoded before.
var curBlockSize int64
if metadata.Erasure.BlockSize < totalLeft {
curBlockSize = metadata.Erasure.BlockSize
} else {
curBlockSize = totalLeft
}
// Calculate the current encoded block size.
curEncBlockSize := getEncodedBlockLen(curBlockSize, metadata.Erasure.DataBlocks)
enBlocks := make([][]byte, len(xl.storageDisks))
var wg = &sync.WaitGroup{}
// Loop through all readers and read.
for index, reader := range readers {
// Initialize shard slice and fill the data from each parts.
enBlocks[index] = make([]byte, curEncBlockSize)
if reader == nil {
continue
}
// Parallelize reading.
wg.Add(1)
go func(index int, reader io.Reader) {
defer wg.Done()
// Read the necessary blocks.
_, rErr := io.ReadFull(reader, enBlocks[index])
if rErr != nil && rErr != io.ErrUnexpectedEOF {
readers[index] = nil
}
}(index, reader)
}
// Wait for the read routines to finish.
wg.Wait()
// Check blocks if they are all zero in length.
if checkBlockSize(enBlocks) == 0 {
pipeWriter.CloseWithError(errDataCorrupt)
return
}
// Verify the blocks.
var ok bool
ok, err = xl.ReedSolomon.Verify(enBlocks)
if err != nil {
pipeWriter.CloseWithError(err)
return
}
// Verification failed, blocks require reconstruction.
if !ok {
for index, reader := range readers {
if reader == nil {
// Reconstruct expects missing blocks to be nil.
enBlocks[index] = nil
}
}
err = xl.ReedSolomon.Reconstruct(enBlocks)
if err != nil {
pipeWriter.CloseWithError(err)
return
}
// Verify reconstructed blocks again.
ok, err = xl.ReedSolomon.Verify(enBlocks)
if err != nil {
pipeWriter.CloseWithError(err)
return
}
if !ok {
// Blocks cannot be reconstructed, corrupted data.
err = errors.New("Verification failed after reconstruction, data likely corrupted.")
pipeWriter.CloseWithError(err)
return
}
}
// Get all the data blocks.
dataBlocks := getDataBlocks(enBlocks, metadata.Erasure.DataBlocks, int(curBlockSize))
// Verify if the offset is right for the block, if not move to
// the next block.
if startOffset > 0 {
startOffset = startOffset - int64(len(dataBlocks))
// Start offset is greater than or equal to zero, skip the dataBlocks.
if startOffset >= 0 {
totalLeft = totalLeft - metadata.Erasure.BlockSize
continue
}
// Now get back the remaining offset if startOffset is negative.
startOffset = startOffset + int64(len(dataBlocks))
}
// Write safely the necessary blocks.
_, err = pipeWriter.Write(dataBlocks[int(startOffset):])
if err != nil {
pipeWriter.CloseWithError(err)
return
}
// Reset offset to '0' to read rest of the blocks.
startOffset = int64(0)
// Save what's left after reading erasureBlockSize.
totalLeft = totalLeft - metadata.Erasure.BlockSize
}
// Cleanly end the pipe after a successful decoding.
pipeWriter.Close()
// Cleanly close all the underlying data readers.
for _, reader := range readers {
if reader == nil {
continue
}
reader.Close()
}
}()
// Return the pipe for the top level caller to start reading.
return pipeReader, nil
}

View File

@ -1,46 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
// getDataBlocks - fetches the data block only part of the input encoded blocks.
func getDataBlocks(enBlocks [][]byte, dataBlocks int, curBlockSize int) []byte {
var data []byte
for _, block := range enBlocks[:dataBlocks] {
data = append(data, block...)
}
data = data[:curBlockSize]
return data
}
// checkBlockSize return the size of a single block.
// The first non-zero size is returned,
// or 0 if all blocks are size 0.
func checkBlockSize(blocks [][]byte) int {
for _, block := range blocks {
if len(block) != 0 {
return len(block)
}
}
return 0
}
// calculate the blockSize based on input length and total number of
// data blocks.
func getEncodedBlockLen(inputLen int64, dataBlocks int) (curBlockSize int64) {
curBlockSize = (inputLen + int64(dataBlocks) - 1) / int64(dataBlocks)
return curBlockSize
}

View File

@ -1,546 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"math/rand"
"os"
slashpath "path"
"strings"
"path"
"sync"
"github.com/klauspost/reedsolomon"
)
const (
// XL erasure metadata file.
xlMetaV1File = "file.json"
)
// XL layer structure.
type XL struct {
ReedSolomon reedsolomon.Encoder // Erasure encoder/decoder.
DataBlocks int
ParityBlocks int
storageDisks []StorageAPI
readQuorum int
writeQuorum int
}
// errUnexpected - returned for any unexpected error.
var errUnexpected = errors.New("Unexpected error - please report at https://github.com/minio/minio/issues")
// newXL instantiate a new XL.
func newXL(disks []StorageAPI) (StorageAPI, error) {
// Initialize XL.
xl := &XL{}
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(disks)/2, len(disks)/2
// Initialize reed solomon encoding.
rs, err := reedsolomon.New(dataBlocks, parityBlocks)
if err != nil {
return nil, err
}
// Save the reedsolomon.
xl.DataBlocks = dataBlocks
xl.ParityBlocks = parityBlocks
xl.ReedSolomon = rs
// Save all the initialized storage disks.
xl.storageDisks = disks
// Figure out read and write quorum based on number of storage disks.
// Read quorum should be always N/2 + 1 (due to Vandermonde matrix
// erasure requirements)
xl.readQuorum = len(xl.storageDisks)/2 + 1
// Write quorum is assumed if we have total disks + 3
// parity. (Need to discuss this again)
xl.writeQuorum = len(xl.storageDisks)/2 + 3
if xl.writeQuorum > len(xl.storageDisks) {
xl.writeQuorum = len(xl.storageDisks)
}
// Return successfully initialized.
return xl, nil
}
// MakeVol - make a volume.
func (xl XL) MakeVol(volume string) error {
if !isValidVolname(volume) {
return errInvalidArgument
}
// Err counters.
createVolErr := 0 // Count generic create vol errs.
volumeExistsErrCnt := 0 // Count all errVolumeExists errs.
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
wg.Add(1)
// Make a volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
if disk == nil {
return
}
dErrs[index] = disk.MakeVol(volume)
}(index, disk)
}
// Wait for all make vol to finish.
wg.Wait()
// Loop through all the concocted errors.
for _, err := range dErrs {
if err == nil {
continue
}
// if volume already exists, count them.
if err == errVolumeExists {
volumeExistsErrCnt++
continue
}
// Update error counter separately.
createVolErr++
}
// Return err if all disks report volume exists.
if volumeExistsErrCnt == len(xl.storageDisks) {
return errVolumeExists
} else if createVolErr > len(xl.storageDisks)-xl.writeQuorum {
// Return errWriteQuorum if errors were more than
// allowed write quorum.
return errWriteQuorum
}
return nil
}
// DeleteVol - delete a volume.
func (xl XL) DeleteVol(volume string) error {
if !isValidVolname(volume) {
return errInvalidArgument
}
// Collect if all disks report volume not found.
var volumeNotFoundErrCnt int
var wg = &sync.WaitGroup{}
var dErrs = make([]error, len(xl.storageDisks))
// Remove a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
wg.Add(1)
// Delete volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
dErrs[index] = disk.DeleteVol(volume)
}(index, disk)
}
// Wait for all the delete vols to finish.
wg.Wait()
// Loop through concocted errors and return anything unusual.
for _, err := range dErrs {
if err != nil {
// We ignore error if errVolumeNotFound or errDiskNotFound
if err == errVolumeNotFound || err == errDiskNotFound {
volumeNotFoundErrCnt++
continue
}
return err
}
}
// Return err if all disks report volume not found.
if volumeNotFoundErrCnt == len(xl.storageDisks) {
return errVolumeNotFound
}
return nil
}
// ListVols - list volumes.
func (xl XL) ListVols() (volsInfo []VolInfo, err error) {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Success vols map carries successful results of ListVols from each disks.
var successVols = make([][]VolInfo, len(xl.storageDisks))
for index, disk := range xl.storageDisks {
wg.Add(1) // Add each go-routine to wait for.
go func(index int, disk StorageAPI) {
// Indicate wait group as finished.
defer wg.Done()
// Initiate listing.
vlsInfo, _ := disk.ListVols()
successVols[index] = vlsInfo
}(index, disk)
}
// For all the list volumes running in parallel to finish.
wg.Wait()
// Loop through success vols and get aggregated usage values.
var vlsInfo []VolInfo
var total, free int64
for _, vlsInfo = range successVols {
if len(vlsInfo) <= 1 {
continue
}
var vlInfo VolInfo
for _, vlInfo = range vlsInfo {
if vlInfo.Name == "" {
continue
}
break
}
free += vlInfo.Free
total += vlInfo.Total
}
// Save the updated usage values back into the vols.
for _, vlInfo := range vlsInfo {
vlInfo.Free = free
vlInfo.Total = total
volsInfo = append(volsInfo, vlInfo)
}
// NOTE: The assumption here is that volumes across all disks in
// readQuorum have consistent view i.e they all have same number
// of buckets. This is essentially not verified since healing
// should take care of this.
return volsInfo, nil
}
// getAllVolInfo - list bucket volume info from all disks.
// Returns error slice indicating the failed volume stat operations.
func (xl XL) getAllVolInfo(volume string) ([]VolInfo, []error) {
// Create errs and volInfo slices of storageDisks size.
var errs = make([]error, len(xl.storageDisks))
var volsInfo = make([]VolInfo, len(xl.storageDisks))
// Allocate a new waitgroup.
var wg = &sync.WaitGroup{}
for index, disk := range xl.storageDisks {
wg.Add(1)
// Stat volume on all the disks in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
volInfo, err := disk.StatVol(volume)
if err != nil {
errs[index] = err
return
}
volsInfo[index] = volInfo
}(index, disk)
}
// Wait for all the Stat operations to finish.
wg.Wait()
// Return the concocted values.
return volsInfo, errs
}
// listAllVolInfo - list all stat volume info from all disks.
// Returns
// - stat volume info for all online disks.
// - boolean to indicate if healing is necessary.
// - error if any.
func (xl XL) listAllVolInfo(volume string) ([]VolInfo, bool, error) {
volsInfo, errs := xl.getAllVolInfo(volume)
notFoundCount := 0
for _, err := range errs {
if err == errVolumeNotFound {
notFoundCount++
// If we have errors with file not found greater than allowed read
// quorum we return err as errFileNotFound.
if notFoundCount > len(xl.storageDisks)-xl.readQuorum {
return nil, false, errVolumeNotFound
}
}
}
// Calculate online disk count.
onlineDiskCount := 0
for index := range errs {
if errs[index] == nil {
onlineDiskCount++
}
}
var heal bool
// If online disks count is lesser than configured disks, most
// probably we need to heal the file, additionally verify if the
// count is lesser than readQuorum, if not we throw an error.
if onlineDiskCount < len(xl.storageDisks) {
// Online disks lesser than total storage disks, needs to be
// healed. unless we do not have readQuorum.
heal = true
// Verify if online disks count are lesser than readQuorum
// threshold, return an error if yes.
if onlineDiskCount < xl.readQuorum {
return nil, false, errReadQuorum
}
}
// Return success.
return volsInfo, heal, nil
}
// StatVol - get volume stat info.
func (xl XL) StatVol(volume string) (volInfo VolInfo, err error) {
if !isValidVolname(volume) {
return VolInfo{}, errInvalidArgument
}
// List and figured out if we need healing.
volsInfo, heal, err := xl.listAllVolInfo(volume)
if err != nil {
return VolInfo{}, err
}
// Heal for missing entries.
if heal {
go func() {
// Create volume if missing on disks.
for index, volInfo := range volsInfo {
if volInfo.Name != "" {
continue
}
// Volinfo name would be an empty string, create it.
xl.storageDisks[index].MakeVol(volume)
}
}()
}
// Loop through all statVols, calculate the actual usage values.
var total, free int64
for _, volInfo = range volsInfo {
if volInfo.Name == "" {
continue
}
free += volInfo.Free
total += volInfo.Total
}
// Update the aggregated values.
volInfo.Free = free
volInfo.Total = total
return volInfo, nil
}
// isLeafDirectoryXL - check if a given path is leaf directory. i.e
// if it contains file xlMetaV1File
func isLeafDirectoryXL(disk StorageAPI, volume, leafPath string) (isLeaf bool) {
_, err := disk.StatFile(volume, path.Join(leafPath, xlMetaV1File))
return err == nil
}
// ListDir - return all the entries at the given directory path.
// If an entry is a directory it will be returned with a trailing "/".
func (xl XL) ListDir(volume, dirPath string) (entries []string, err error) {
if !isValidVolname(volume) {
return nil, errInvalidArgument
}
// Count for list errors encountered.
var listErrCount = 0
// Loop through and return the first success entry based on the
// selected random disk.
for listErrCount < len(xl.storageDisks) {
// Choose a random disk on each attempt, do not hit the same disk all the time.
randIndex := rand.Intn(len(xl.storageDisks) - 1)
disk := xl.storageDisks[randIndex] // Pick a random disk.
// Initiate a list operation, if successful filter and return quickly.
if entries, err = disk.ListDir(volume, dirPath); err == nil {
for i, entry := range entries {
isLeaf := isLeafDirectoryXL(disk, volume, path.Join(dirPath, entry))
isDir := strings.HasSuffix(entry, slashSeparator)
if isDir && isLeaf {
entries[i] = strings.TrimSuffix(entry, slashSeparator)
}
}
// We got the entries successfully return.
return entries, nil
}
listErrCount++ // Update list error count.
}
// Return error at the end.
return nil, err
}
// Object API.
// StatFile - stat a file
func (xl XL) StatFile(volume, path string) (FileInfo, error) {
if !isValidVolname(volume) {
return FileInfo{}, errInvalidArgument
}
if !isValidPath(path) {
return FileInfo{}, errInvalidArgument
}
_, metadata, heal, err := xl.listOnlineDisks(volume, path)
if err != nil {
return FileInfo{}, err
}
if heal {
// Heal in background safely, since we already have read quorum disks.
go func() {
hErr := xl.healFile(volume, path)
errorIf(hErr, "Unable to heal file "+volume+"/"+path+".")
}()
}
// Return file info.
return FileInfo{
Volume: volume,
Name: path,
Size: metadata.Stat.Size,
ModTime: metadata.Stat.ModTime,
Mode: os.FileMode(0644),
}, nil
}
// deleteXLFiles - delete all XL backend files.
func (xl XL) deleteXLFiles(volume, path string) error {
errCount := 0
// Update meta data file and remove part file
for index, disk := range xl.storageDisks {
erasureFilePart := slashpath.Join(path, fmt.Sprintf("file.%d", index))
err := disk.DeleteFile(volume, erasureFilePart)
if err != nil {
errCount++
// We can safely allow DeleteFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure.
if errCount <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
return err
}
xlMetaV1FilePath := slashpath.Join(path, "file.json")
err = disk.DeleteFile(volume, xlMetaV1FilePath)
if err != nil {
errCount++
// We can safely allow DeleteFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure.
if errCount <= len(xl.storageDisks)-xl.writeQuorum {
continue
}
return err
}
}
// Return success.
return nil
}
// DeleteFile - delete a file
func (xl XL) DeleteFile(volume, path string) error {
if !isValidVolname(volume) {
return errInvalidArgument
}
if !isValidPath(path) {
return errInvalidArgument
}
// Delete all XL files.
return xl.deleteXLFiles(volume, path)
}
// RenameFile - rename file.
func (xl XL) RenameFile(srcVolume, srcPath, dstVolume, dstPath string) error {
// Validate inputs.
if !isValidVolname(srcVolume) {
return errInvalidArgument
}
if !isValidPath(srcPath) {
return errInvalidArgument
}
if !isValidVolname(dstVolume) {
return errInvalidArgument
}
if !isValidPath(dstPath) {
return errInvalidArgument
}
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var errs = make([]error, len(xl.storageDisks))
// Rename file on all underlying storage disks.
for index, disk := range xl.storageDisks {
// Append "/" as srcPath and dstPath are either leaf-dirs or non-leaf-dris.
// If srcPath is an object instead of prefix we just rename the leaf-dir and
// not rename the part and metadata files separately.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.RenameFile(srcVolume, retainSlash(srcPath), dstVolume, retainSlash(dstPath))
if err != nil {
errs[index] = err
}
errs[index] = nil
}(index, disk)
}
// Wait for all RenameFile to finish.
wg.Wait()
// Gather err count.
var errCount = 0
for _, err := range errs {
if err == nil {
continue
}
errCount++
}
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure. Cleanup successful renames.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Special condition if readQuorum exists, then return success.
if errCount <= len(xl.storageDisks)-xl.readQuorum {
return nil
}
// Ignore errors here, delete all successfully written files.
xl.deleteXLFiles(dstVolume, dstPath)
return errWriteQuorum
}
return nil
}

View File

@ -1,336 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"fmt"
"io"
"path"
"strings"
"sync"
"time"
)
// MultipartPartInfo Info of each part kept in the multipart metadata file after
// CompleteMultipartUpload() is called.
type MultipartPartInfo struct {
PartNumber int
ETag string
Size int64
}
// MultipartObjectInfo - contents of the multipart metadata file after
// CompleteMultipartUpload() is called.
type MultipartObjectInfo struct {
Parts []MultipartPartInfo
ModTime time.Time
Size int64
MD5Sum string
ContentType string
ContentEncoding string
// Add more fields here.
}
type byMultipartFiles []string
func (files byMultipartFiles) Len() int { return len(files) }
func (files byMultipartFiles) Less(i, j int) bool {
first := strings.TrimSuffix(files[i], multipartSuffix)
second := strings.TrimSuffix(files[j], multipartSuffix)
return first < second
}
func (files byMultipartFiles) Swap(i, j int) { files[i], files[j] = files[j], files[i] }
// GetPartNumberOffset - given an offset for the whole object, return the part and offset in that part.
func (m MultipartObjectInfo) GetPartNumberOffset(offset int64) (partIndex int, partOffset int64, err error) {
partOffset = offset
for i, part := range m.Parts {
partIndex = i
if partOffset < part.Size {
return
}
partOffset -= part.Size
}
// Offset beyond the size of the object
err = errUnexpected
return
}
// getMultipartObjectMeta - incomplete meta file and extract meta information if any.
func getMultipartObjectMeta(storage StorageAPI, metaFile string) (meta map[string]string, err error) {
meta = make(map[string]string)
offset := int64(0)
objMetaReader, err := storage.ReadFile(minioMetaBucket, metaFile, offset)
if err != nil {
return nil, err
}
// Close the metadata reader.
defer objMetaReader.Close()
decoder := json.NewDecoder(objMetaReader)
err = decoder.Decode(&meta)
if err != nil {
return nil, err
}
return meta, nil
}
func partNumToPartFileName(partNum int) string {
return fmt.Sprintf("%.5d%s", partNum, multipartSuffix)
}
// ListMultipartUploads - list multipart uploads.
func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
return listMultipartUploadsCommon(xl, bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// NewMultipartUpload - initialize a new multipart upload, returns a unique id.
func (xl xlObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
return newMultipartUploadCommon(xl.storage, bucket, object, meta)
}
// PutObjectPart - writes the multipart upload chunks.
func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
return putObjectPartCommon(xl.storage, bucket, object, uploadID, partID, size, data, md5Hex)
}
// ListObjectParts - list object parts.
func (xl xlObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
return listObjectPartsCommon(xl.storage, bucket, object, uploadID, partNumberMarker, maxParts)
}
// This function does the following check, suppose
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
// "a/b" and "a" do not exist.
func (xl xlObjects) parentDirIsObject(bucket, parent string) error {
var stat func(string) error
stat = func(p string) error {
if p == "." {
return nil
}
_, err := xl.getObjectInfo(bucket, p)
if err == nil {
// If there is already a file at prefix "p" return error.
return errFileAccessDenied
}
if err == errFileNotFound {
// Check if there is a file as one of the parent paths.
return stat(path.Dir(p))
}
return err
}
return stat(parent)
}
func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !isBucketExist(xl.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
if !isUploadIDExists(xl.storage, bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Hold lock so that
// 1) no one aborts this multipart upload
// 2) no one does a parallel complete-multipart-upload on this multipart upload
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
var metadata = MultipartObjectInfo{}
var errs = make([]error, len(parts))
uploadIDIncompletePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, incompleteFile)
objMeta, err := getMultipartObjectMeta(xl.storage, uploadIDIncompletePath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDIncompletePath)
}
// Waitgroup to wait for go-routines.
var wg = &sync.WaitGroup{}
// Loop through all parts, validate them and then commit to disk.
for i, part := range parts {
// Construct part suffix.
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag)
multipartPartFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
var fi FileInfo
fi, err = xl.storage.StatFile(minioMetaBucket, multipartPartFile)
if err != nil {
if err == errFileNotFound {
return "", InvalidPart{}
}
return "", err
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(fi.Size) {
return "", PartTooSmall{}
}
// Update metadata parts.
metadata.Parts = append(metadata.Parts, MultipartPartInfo{
PartNumber: part.PartNumber,
ETag: part.ETag,
Size: fi.Size,
})
metadata.Size += fi.Size
}
// check if an object is present as one of the parent dir.
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Save successfully calculated md5sum.
metadata.MD5Sum = s3MD5
metadata.ContentType = objMeta["content-type"]
metadata.ContentEncoding = objMeta["content-encoding"]
// Save modTime as well as the current time.
metadata.ModTime = time.Now().UTC()
// Create temporary multipart meta file to write and then rename.
multipartMetaSuffix := fmt.Sprintf("%s.%s", uploadID, multipartMetaFile)
tempMultipartMetaFile := path.Join(tmpMetaPrefix, bucket, object, multipartMetaSuffix)
w, err := xl.storage.CreateFile(minioMetaBucket, tempMultipartMetaFile)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
encoder := json.NewEncoder(w)
err = encoder.Encode(&metadata)
if err != nil {
if err = safeCloseAndRemove(w); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Close the writer.
if err = w.Close(); err != nil {
if err = safeCloseAndRemove(w); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Attempt a Rename of multipart meta file to final namespace.
multipartObjFile := path.Join(mpartMetaPrefix, bucket, object, uploadID, multipartMetaFile)
err = xl.storage.RenameFile(minioMetaBucket, tempMultipartMetaFile, minioMetaBucket, multipartObjFile)
if err != nil {
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMultipartMetaFile); derr != nil {
return "", toObjectErr(err, minioMetaBucket, tempMultipartMetaFile)
}
return "", toObjectErr(err, bucket, multipartObjFile)
}
// Loop through and atomically rename the parts to their actual location.
for index, part := range parts {
wg.Add(1)
go func(index int, part completePart) {
defer wg.Done()
partSuffix := fmt.Sprintf("%.5d.%s", part.PartNumber, part.ETag)
src := path.Join(mpartMetaPrefix, bucket, object, uploadID, partSuffix)
dst := path.Join(mpartMetaPrefix, bucket, object, uploadID, partNumToPartFileName(part.PartNumber))
errs[index] = xl.storage.RenameFile(minioMetaBucket, src, minioMetaBucket, dst)
errorIf(errs[index], "Unable to rename file %s to %s.", src, dst)
}(index, part)
}
// Wait for all the renames to finish.
wg.Wait()
// Loop through errs list and return first error.
for _, err := range errs {
if err != nil {
return "", toObjectErr(err, bucket, object)
}
}
// Delete the incomplete file place holder.
err = xl.storage.DeleteFile(minioMetaBucket, uploadIDIncompletePath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDIncompletePath)
}
// Hold write lock on the destination before rename
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Delete if an object already exists.
// FIXME: rename it to tmp file and delete only after
// the newly uploaded file is renamed from tmp location to
// the original location.
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err = xl.deleteMultipartObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
return s3MD5, nil
}
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
if err = xl.storage.RenameFile(minioMetaBucket, uploadIDPath, bucket, object); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Hold the lock so that two parallel complete-multipart-uploads do no
// leave a stale uploads.json behind.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
var entries []string
if entries, err = xl.storage.ListDir(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err == nil {
if len(entries) > 1 {
return s3MD5, nil
}
}
uploadsJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
err = xl.storage.DeleteFile(minioMetaBucket, uploadsJSONPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadsJSONPath)
}
// Return md5sum.
return s3MD5, nil
}
// AbortMultipartUpload - aborts a multipart upload.
func (xl xlObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
return abortMultipartUploadCommon(xl.storage, bucket, object, uploadID)
}

View File

@ -1,581 +0,0 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
"io"
"path"
"path/filepath"
"strings"
"sync"
"github.com/minio/minio/pkg/mimedb"
)
const (
multipartSuffix = ".minio.multipart"
multipartMetaFile = "00000" + multipartSuffix
formatConfigFile = "format.json"
)
// xlObjects - Implements fs object layer.
type xlObjects struct {
storage StorageAPI
listObjectMap map[listParams][]*treeWalker
listObjectMapMutex *sync.Mutex
}
// errMaxDisks - returned for reached maximum of disks.
var errMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'")
// errMinDisks - returned for minimum number of disks.
var errMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'")
// errNumDisks - returned for odd number of disks.
var errNumDisks = errors.New("Number of disks should be multiples of '2'")
const (
// Maximum erasure blocks.
maxErasureBlocks = 16
// Minimum erasure blocks.
minErasureBlocks = 8
)
func checkSufficientDisks(disks []string) error {
// Verify total number of disks.
totalDisks := len(disks)
if totalDisks > maxErasureBlocks {
return errMaxDisks
}
if totalDisks < minErasureBlocks {
return errMinDisks
}
// isEven function to verify if a given number if even.
isEven := func(number int) bool {
return number%2 == 0
}
// Verify if we have even number of disks.
// only combination of 8, 10, 12, 14, 16 are supported.
if !isEven(totalDisks) {
return errNumDisks
}
return nil
}
// Depending on the disk type network or local, initialize storage layer.
func newStorageLayer(disk string) (storage StorageAPI, err error) {
if !strings.ContainsRune(disk, ':') || filepath.VolumeName(disk) != "" {
// Initialize filesystem storage API.
return newPosix(disk)
}
// Initialize rpc client storage API.
return newRPCClient(disk)
}
// Initialize all storage disks to bootstrap.
func bootstrapDisks(disks []string) ([]StorageAPI, error) {
storageDisks := make([]StorageAPI, len(disks))
for index, disk := range disks {
var err error
// Intentionally ignore disk not found errors while
// initializing POSIX, so that we have successfully
// initialized posix Storage. Subsequent calls to XL/Erasure
// will manage any errors related to disks.
storageDisks[index], err = newStorageLayer(disk)
if err != nil && err != errDiskNotFound {
return nil, err
}
}
return storageDisks, nil
}
// newXLObjects - initialize new xl object layer.
func newXLObjects(disks []string) (ObjectLayer, error) {
if err := checkSufficientDisks(disks); err != nil {
return nil, err
}
storageDisks, err := bootstrapDisks(disks)
if err != nil {
return nil, err
}
// Initialize object layer - like creating minioMetaBucket, cleaning up tmp files etc.
initObjectLayer(storageDisks...)
// Load saved XL format.json and validate.
newDisks, err := loadFormatXL(storageDisks)
if err != nil {
switch err {
case errUnformattedDisk:
// Save new XL format.
errSave := initFormatXL(storageDisks)
if errSave != nil {
return nil, errSave
}
newDisks = storageDisks
default:
// errCorruptedDisk - error.
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
}
}
// FIXME: healFormatXL(newDisks)
storage, err := newXL(newDisks)
if err != nil {
return nil, err
}
// Return successfully initialized object layer.
return xlObjects{
storage: storage,
listObjectMap: make(map[listParams][]*treeWalker),
listObjectMapMutex: &sync.Mutex{},
}, nil
}
/// Bucket operations
// MakeBucket - make a bucket.
func (xl xlObjects) MakeBucket(bucket string) error {
nsMutex.Lock(bucket, "")
defer nsMutex.Unlock(bucket, "")
return makeBucket(xl.storage, bucket)
}
// GetBucketInfo - get bucket info.
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
nsMutex.RLock(bucket, "")
defer nsMutex.RUnlock(bucket, "")
return getBucketInfo(xl.storage, bucket)
}
// ListBuckets - list buckets.
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) {
return listBuckets(xl.storage)
}
// DeleteBucket - delete a bucket.
func (xl xlObjects) DeleteBucket(bucket string) error {
nsMutex.Lock(bucket, "")
nsMutex.Unlock(bucket, "")
return deleteBucket(xl.storage, bucket)
}
/// Object Operations
// GetObject - get an object.
func (xl xlObjects) GetObject(bucket, object string, startOffset int64) (io.ReadCloser, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return nil, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return nil, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
if !isMultipartObject(xl.storage, bucket, object) {
_, err := xl.storage.StatFile(bucket, object)
if err == nil {
var reader io.ReadCloser
reader, err = xl.storage.ReadFile(bucket, object, startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
return reader, nil
}
return nil, toObjectErr(err, bucket, object)
}
fileReader, fileWriter := io.Pipe()
info, err := getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
partIndex, offset, err := info.GetPartNumberOffset(startOffset)
if err != nil {
return nil, toObjectErr(err, bucket, object)
}
// Hold a read lock once more which can be released after the following go-routine ends.
// We hold RLock once more because the current function would return before the go routine below
// executes and hence releasing the read lock (because of defer'ed nsMutex.RUnlock() call).
nsMutex.RLock(bucket, object)
go func() {
defer nsMutex.RUnlock(bucket, object)
for ; partIndex < len(info.Parts); partIndex++ {
part := info.Parts[partIndex]
r, err := xl.storage.ReadFile(bucket, pathJoin(object, partNumToPartFileName(part.PartNumber)), offset)
if err != nil {
fileWriter.CloseWithError(err)
return
}
// Reset offset to 0 as it would be non-0 only for the first loop if startOffset is non-0.
offset = 0
if _, err = io.Copy(fileWriter, r); err != nil {
switch reader := r.(type) {
case *io.PipeReader:
reader.CloseWithError(err)
case io.ReadCloser:
reader.Close()
}
fileWriter.CloseWithError(err)
return
}
// Close the readerCloser that reads multiparts of an object from the xl storage layer.
// Not closing leaks underlying file descriptors.
r.Close()
}
fileWriter.Close()
}()
return fileReader, nil
}
// Return the partsInfo of a special multipart object.
func getMultipartObjectInfo(storage StorageAPI, bucket, object string) (info MultipartObjectInfo, err error) {
offset := int64(0)
r, err := storage.ReadFile(bucket, pathJoin(object, multipartMetaFile), offset)
if err != nil {
return MultipartObjectInfo{}, err
}
decoder := json.NewDecoder(r)
err = decoder.Decode(&info)
if err != nil {
return MultipartObjectInfo{}, err
}
return info, nil
}
// Return ObjectInfo.
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) {
objInfo.Bucket = bucket
objInfo.Name = object
// First see if the object was a simple-PUT upload.
fi, err := xl.storage.StatFile(bucket, object)
if err != nil {
if err != errFileNotFound {
return ObjectInfo{}, err
}
var info MultipartObjectInfo
// Check if the object was multipart upload.
info, err = getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return ObjectInfo{}, err
}
objInfo.Size = info.Size
objInfo.ModTime = info.ModTime
objInfo.MD5Sum = info.MD5Sum
objInfo.ContentType = info.ContentType
objInfo.ContentEncoding = info.ContentEncoding
} else {
metadata := make(map[string]string)
offset := int64(0) // To read entire content
r, err := xl.storage.ReadFile(bucket, pathJoin(object, "meta.json"), offset)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
decoder := json.NewDecoder(r)
if err = decoder.Decode(&metadata); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
contentType := metadata["content-type"]
if len(contentType) == 0 {
contentType = "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
}
objInfo.Size = fi.Size
objInfo.IsDir = fi.Mode.IsDir()
objInfo.ModTime = fi.ModTime
objInfo.MD5Sum = metadata["md5Sum"]
objInfo.ContentType = contentType
objInfo.ContentEncoding = metadata["content-encoding"]
}
return objInfo, nil
}
// GetObjectInfo - get object info.
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
info, err := xl.getObjectInfo(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
return info, nil
}
// PutObject - create an object.
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify bucket exists.
if !isBucketExist(xl.storage, bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
// No metadata is set, allocate a new one.
if metadata == nil {
metadata = make(map[string]string)
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
tempObj := path.Join(tmpMetaPrefix, bucket, object)
fileWriter, err := xl.storage.CreateFile(minioMetaBucket, tempObj)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Initialize md5 writer.
md5Writer := md5.New()
// Instantiate a new multi writer.
multiWriter := io.MultiWriter(md5Writer, fileWriter)
// Instantiate checksum hashers and create a multiwriter.
if size > 0 {
if _, err = io.CopyN(multiWriter, data, size); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
} else {
if _, err = io.Copy(multiWriter, data); err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
}
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// Update the md5sum if not set with the newly calculated one.
if len(metadata["md5Sum"]) == 0 {
metadata["md5Sum"] = newMD5Hex
}
// md5Hex representation.
md5Hex := metadata["md5Sum"]
if md5Hex != "" {
if newMD5Hex != md5Hex {
if err = safeCloseAndRemove(fileWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", BadDigest{md5Hex, newMD5Hex}
}
}
err = fileWriter.Close()
if err != nil {
if clErr := safeCloseAndRemove(fileWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Check if an object is present as one of the parent dir.
if err = xl.parentDirIsObject(bucket, path.Dir(object)); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Delete if an object already exists.
// FIXME: rename it to tmp file and delete only after
// the newly uploaded file is renamed from tmp location to
// the original location.
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err = xl.deleteMultipartObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
} else {
err = xl.deleteObject(bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
}
err = xl.storage.RenameFile(minioMetaBucket, tempObj, bucket, object)
if err != nil {
if dErr := xl.storage.DeleteFile(minioMetaBucket, tempObj); dErr != nil {
return "", toObjectErr(dErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
tempMetaJSONFile := path.Join(tmpMetaPrefix, bucket, object, "meta.json")
metaWriter, err := xl.storage.CreateFile(minioMetaBucket, tempMetaJSONFile)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
encoder := json.NewEncoder(metaWriter)
err = encoder.Encode(&metadata)
if err != nil {
if clErr := safeCloseAndRemove(metaWriter); clErr != nil {
return "", toObjectErr(clErr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
if err = metaWriter.Close(); err != nil {
if err = safeCloseAndRemove(metaWriter); err != nil {
return "", toObjectErr(err, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
metaJSONFile := path.Join(object, "meta.json")
err = xl.storage.RenameFile(minioMetaBucket, tempMetaJSONFile, bucket, metaJSONFile)
if err != nil {
if derr := xl.storage.DeleteFile(minioMetaBucket, tempMetaJSONFile); derr != nil {
return "", toObjectErr(derr, bucket, object)
}
return "", toObjectErr(err, bucket, object)
}
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
// isMultipartObject - verifies if an object is special multipart file.
func isMultipartObject(storage StorageAPI, bucket, object string) bool {
_, err := storage.StatFile(bucket, pathJoin(object, multipartMetaFile))
if err != nil {
if err == errFileNotFound {
return false
}
errorIf(err, "Failed to stat file "+bucket+pathJoin(object, multipartMetaFile))
return false
}
return true
}
// deleteMultipartObject - deletes only multipart object.
func (xl xlObjects) deleteMultipartObject(bucket, object string) error {
// Get parts info.
info, err := getMultipartObjectInfo(xl.storage, bucket, object)
if err != nil {
return err
}
// Range through all files and delete it.
var wg = &sync.WaitGroup{}
var errs = make([]error, len(info.Parts))
for index, part := range info.Parts {
wg.Add(1)
// Start deleting parts in routine.
go func(index int, part MultipartPartInfo) {
defer wg.Done()
partFileName := partNumToPartFileName(part.PartNumber)
errs[index] = xl.storage.DeleteFile(bucket, pathJoin(object, partFileName))
}(index, part)
}
// Wait for all the deletes to finish.
wg.Wait()
// Loop through and validate if any errors, if we are unable to remove any part return
// "unexpected" error as returning any other error might be misleading. For ex.
// if DeleteFile() had returned errFileNotFound and we return it, then client would see
// ObjectNotFound which is misleading.
for _, err := range errs {
if err != nil {
return errUnexpected
}
}
err = xl.storage.DeleteFile(bucket, pathJoin(object, multipartMetaFile))
if err != nil {
return err
}
return nil
}
// deleteObject - deletes a regular object.
func (xl xlObjects) deleteObject(bucket, object string) error {
metaJSONFile := path.Join(object, "meta.json")
// Ignore if meta.json file doesn't exist.
if err := xl.storage.DeleteFile(bucket, metaJSONFile); err != nil {
if err != errFileNotFound {
return err
}
}
if err := xl.storage.DeleteFile(bucket, object); err != nil {
if err != errFileNotFound {
return err
}
}
return nil
}
// DeleteObject - delete the object.
func (xl xlObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Verify if the object is a multipart object.
if isMultipartObject(xl.storage, bucket, object) {
err := xl.deleteMultipartObject(bucket, object)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
err := xl.deleteObject(bucket, object)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
}
// ListObjects - list all objects at prefix, delimited by '/'.
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
return listObjectsCommon(xl, bucket, prefix, marker, delimiter, maxKeys)
}

249
xl-v1-bucket.go Normal file
View File

@ -0,0 +1,249 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"sort"
"sync"
)
/// Bucket operations
// MakeBucket - make a bucket.
func (xl xlObjects) MakeBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
nsMutex.Lock(bucket, "")
defer nsMutex.Unlock(bucket, "")
// Err counters.
createVolErr := 0 // Count generic create vol errs.
volumeExistsErrCnt := 0 // Count all errVolumeExists errs.
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
// Make a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
if disk == nil {
dErrs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Make a volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.MakeVol(bucket)
if err != nil {
dErrs[index] = err
return
}
dErrs[index] = nil
}(index, disk)
}
// Wait for all make vol to finish.
wg.Wait()
// Look for specific errors and count them to be verified later.
for _, err := range dErrs {
if err == nil {
continue
}
// if volume already exists, count them.
if err == errVolumeExists {
volumeExistsErrCnt++
continue
}
// Update error counter separately.
createVolErr++
}
// Return err if all disks report volume exists.
if volumeExistsErrCnt > len(xl.storageDisks)-xl.readQuorum {
return toObjectErr(errVolumeExists, bucket)
} else if createVolErr > len(xl.storageDisks)-xl.writeQuorum {
// Return errXLWriteQuorum if errors were more than allowed write quorum.
return toObjectErr(errXLWriteQuorum, bucket)
}
return nil
}
// getBucketInfo - returns the BucketInfo from one of the load balanced disks.
func (xl xlObjects) getBucketInfo(bucketName string) (bucketInfo BucketInfo, err error) {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
var volInfo VolInfo
volInfo, err = disk.StatVol(bucketName)
if err != nil {
// For some reason disk went offline pick the next one.
if err == errDiskNotFound {
continue
}
return BucketInfo{}, err
}
bucketInfo = BucketInfo{
Name: volInfo.Name,
Created: volInfo.Created,
}
break
}
return bucketInfo, nil
}
// Checks whether bucket exists.
func (xl xlObjects) isBucketExist(bucket string) bool {
nsMutex.RLock(bucket, "")
defer nsMutex.RUnlock(bucket, "")
// Check whether bucket exists.
_, err := xl.getBucketInfo(bucket)
if err != nil {
if err == errVolumeNotFound {
return false
}
errorIf(err, "Stat failed on bucket "+bucket+".")
return false
}
return true
}
// GetBucketInfo - returns BucketInfo for a bucket.
func (xl xlObjects) GetBucketInfo(bucket string) (BucketInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketInfo{}, BucketNameInvalid{Bucket: bucket}
}
nsMutex.RLock(bucket, "")
defer nsMutex.RUnlock(bucket, "")
bucketInfo, err := xl.getBucketInfo(bucket)
if err != nil {
return BucketInfo{}, toObjectErr(err, bucket)
}
return bucketInfo, nil
}
// listBuckets - returns list of all buckets from a disk picked at random.
func (xl xlObjects) listBuckets() (bucketsInfo []BucketInfo, err error) {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
var volsInfo []VolInfo
volsInfo, err = disk.ListVols()
if err == nil {
// NOTE: The assumption here is that volumes across all disks in
// readQuorum have consistent view i.e they all have same number
// of buckets. This is essentially not verified since healing
// should take care of this.
var bucketsInfo []BucketInfo
for _, volInfo := range volsInfo {
// StorageAPI can send volume names which are incompatible
// with buckets, handle it and skip them.
if !IsValidBucketName(volInfo.Name) {
continue
}
bucketsInfo = append(bucketsInfo, BucketInfo{
Name: volInfo.Name,
Created: volInfo.Created,
})
}
return bucketsInfo, nil
}
break
}
return nil, err
}
// ListBuckets - lists all the buckets, sorted by its name.
func (xl xlObjects) ListBuckets() ([]BucketInfo, error) {
bucketInfos, err := xl.listBuckets()
if err != nil {
return nil, toObjectErr(err)
}
// Sort by bucket name before returning.
sort.Sort(byBucketName(bucketInfos))
return bucketInfos, nil
}
// DeleteBucket - deletes a bucket.
func (xl xlObjects) DeleteBucket(bucket string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
nsMutex.Lock(bucket, "")
defer nsMutex.Unlock(bucket, "")
// Collect if all disks report volume not found.
var volumeNotFoundErrCnt int
var wg = &sync.WaitGroup{}
var dErrs = make([]error, len(xl.storageDisks))
// Remove a volume entry on all underlying storage disks.
for index, disk := range xl.storageDisks {
if disk == nil {
dErrs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Delete volume inside a go-routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.DeleteVol(bucket)
if err != nil {
dErrs[index] = err
return
}
dErrs[index] = nil
}(index, disk)
}
// Wait for all the delete vols to finish.
wg.Wait()
// Count the errors for known errors, return quickly if we found
// an unknown error.
for _, err := range dErrs {
if err != nil {
// We ignore error if errVolumeNotFound or errDiskNotFound
if err == errVolumeNotFound || err == errDiskNotFound {
volumeNotFoundErrCnt++
continue
}
return toObjectErr(err, bucket)
}
}
// Return errVolumeNotFound if all disks report volume not found.
if volumeNotFoundErrCnt == len(xl.storageDisks) {
return toObjectErr(errVolumeNotFound, bucket)
}
return nil
}

86
xl-v1-common.go Normal file
View File

@ -0,0 +1,86 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import "path"
// getLoadBalancedQuorumDisks - fetches load balanced sufficiently
// randomized quorum disk slice.
func (xl xlObjects) getLoadBalancedQuorumDisks() (disks []StorageAPI) {
// It is okay to have readQuorum disks.
return xl.getLoadBalancedDisks()[:xl.readQuorum-1]
}
// getLoadBalancedDisks - fetches load balanced (sufficiently
// randomized) disk slice.
func (xl xlObjects) getLoadBalancedDisks() (disks []StorageAPI) {
// Based on the random shuffling return back randomized disks.
for _, i := range randInts(len(xl.storageDisks)) {
disks = append(disks, xl.storageDisks[i-1])
}
return disks
}
// This function does the following check, suppose
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
// "a/b" and "a" do not exist.
func (xl xlObjects) parentDirIsObject(bucket, parent string) bool {
var isParentDirObject func(string) bool
isParentDirObject = func(p string) bool {
if p == "." {
return false
}
if xl.isObject(bucket, p) {
// If there is already a file at prefix "p" return error.
return true
}
// Check if there is a file as one of the parent paths.
return isParentDirObject(path.Dir(p))
}
return isParentDirObject(parent)
}
// isObject - returns `true` if the prefix is an object i.e if
// `xl.json` exists at the leaf, false otherwise.
func (xl xlObjects) isObject(bucket, prefix string) bool {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
_, err := disk.StatFile(bucket, path.Join(prefix, xlMetaJSONFile))
if err != nil {
return false
}
break
}
return true
}
// statPart - returns fileInfo structure for a successful stat on part file.
func (xl xlObjects) statPart(bucket, objectPart string) (fileInfo FileInfo, err error) {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
fileInfo, err = disk.StatFile(bucket, objectPart)
if err != nil {
return FileInfo{}, err
}
break
}
return fileInfo, nil
}

View File

@ -1,30 +1,14 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main package main
import ( import (
"errors" "encoding/json"
slashpath "path" "path"
"sync" "sync"
) )
// Get the highest integer from a given integer slice. // Get the highest integer from a given integer slice.
func highestInt(intSlice []int64) (highestInteger int64) { func highestInt(intSlice []int64) (highestInteger int64) {
highestInteger = int64(0) highestInteger = int64(1)
for _, integer := range intSlice { for _, integer := range intSlice {
if highestInteger < integer { if highestInteger < integer {
highestInteger = integer highestInteger = integer
@ -33,8 +17,8 @@ func highestInt(intSlice []int64) (highestInteger int64) {
return highestInteger return highestInteger
} }
// Extracts file versions from partsMetadata slice and returns version slice. // Extracts objects versions from xlMetaV1 slice and returns version slice.
func listFileVersions(partsMetadata []xlMetaV1, errs []error) (versions []int64) { func listObjectVersions(partsMetadata []xlMetaV1, errs []error) (versions []int64) {
versions = make([]int64, len(partsMetadata)) versions = make([]int64, len(partsMetadata))
for index, metadata := range partsMetadata { for index, metadata := range partsMetadata {
if errs[index] == nil { if errs[index] == nil {
@ -46,16 +30,57 @@ func listFileVersions(partsMetadata []xlMetaV1, errs []error) (versions []int64)
return versions return versions
} }
// reduceError - convert collection of errors into a single // Reads all `xl.json` metadata as a xlMetaV1 slice.
// Returns error slice indicating the failed metadata reads.
func (xl xlObjects) readAllXLMetadata(bucket, object string) ([]xlMetaV1, []error) {
errs := make([]error, len(xl.storageDisks))
metadataArray := make([]xlMetaV1, len(xl.storageDisks))
xlMetaPath := path.Join(object, xlMetaJSONFile)
var wg = &sync.WaitGroup{}
for index, disk := range xl.storageDisks {
if disk == nil {
errs[index] = errDiskNotFound
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
buffer, err := readAll(disk, bucket, xlMetaPath)
if err != nil {
errs[index] = err
return
}
err = json.Unmarshal(buffer, &metadataArray[index])
if err != nil {
// Unable to parse xl.json, set error.
errs[index] = err
return
}
// Relinquish buffer.
buffer = nil
errs[index] = nil
}(index, disk)
}
// Wait for all the routines to finish.
wg.Wait()
// Return all the metadata.
return metadataArray, errs
}
// error based on total errors and read quorum. // error based on total errors and read quorum.
func (xl XL) reduceError(errs []error) error { func (xl xlObjects) reduceError(errs []error) error {
fileNotFoundCount := 0 fileNotFoundCount := 0
longNameCount := 0
diskNotFoundCount := 0 diskNotFoundCount := 0
volumeNotFoundCount := 0 volumeNotFoundCount := 0
diskAccessDeniedCount := 0 diskAccessDeniedCount := 0
for _, err := range errs { for _, err := range errs {
if err == errFileNotFound { if err == errFileNotFound {
fileNotFoundCount++ fileNotFoundCount++
} else if err == errFileNameTooLong {
longNameCount++
} else if err == errDiskNotFound { } else if err == errDiskNotFound {
diskNotFoundCount++ diskNotFoundCount++
} else if err == errVolumeAccessDenied { } else if err == errVolumeAccessDenied {
@ -66,10 +91,12 @@ func (xl XL) reduceError(errs []error) error {
} }
// If we have errors with 'file not found' greater than // If we have errors with 'file not found' greater than
// readQuorum, return as errFileNotFound. // readQuorum, return as errFileNotFound.
// else if we have errors with 'volume not found' greater than // else if we have errors with 'volume not found'
// readQuorum, return as errVolumeNotFound. // greater than readQuorum, return as errVolumeNotFound.
if fileNotFoundCount > len(xl.storageDisks)-xl.readQuorum { if fileNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
return errFileNotFound return errFileNotFound
} else if longNameCount > len(xl.storageDisks)-xl.readQuorum {
return errFileNameTooLong
} else if volumeNotFoundCount > len(xl.storageDisks)-xl.readQuorum { } else if volumeNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
return errVolumeNotFound return errVolumeNotFound
} }
@ -78,8 +105,8 @@ func (xl XL) reduceError(errs []error) error {
if diskNotFoundCount == len(xl.storageDisks) { if diskNotFoundCount == len(xl.storageDisks) {
return errDiskNotFound return errDiskNotFound
} else if diskNotFoundCount > len(xl.storageDisks)-xl.readQuorum { } else if diskNotFoundCount > len(xl.storageDisks)-xl.readQuorum {
// If we have errors with 'disk not found' greater than // If we have errors with 'disk not found'
// readQuorum, return as errFileNotFound. // greater than readQuorum, return as errFileNotFound.
return errFileNotFound return errFileNotFound
} }
// If we have errors with disk not found equal to the // If we have errors with disk not found equal to the
@ -90,36 +117,21 @@ func (xl XL) reduceError(errs []error) error {
return nil return nil
} }
// Returns slice of online disks needed. // Similar to 'len(slice)' but returns the actualelements count
// - slice returing readable disks. // skipping the unallocated elements.
// - xlMetaV1 func diskCount(disks []StorageAPI) int {
// - bool value indicating if healing is needed. diskCount := 0
// - error if any. for _, disk := range disks {
func (xl XL) listOnlineDisks(volume, path string) (onlineDisks []StorageAPI, mdata xlMetaV1, heal bool, err error) { if disk == nil {
partsMetadata, errs := xl.getPartsMetadata(volume, path) continue
if err = xl.reduceError(errs); err != nil {
return nil, xlMetaV1{}, false, err
}
highestVersion := int64(0)
onlineDisks = make([]StorageAPI, len(xl.storageDisks))
// List all the file versions from partsMetadata list.
versions := listFileVersions(partsMetadata, errs)
// Get highest file version.
highestVersion = highestInt(versions)
// Pick online disks with version set to highestVersion.
onlineDiskCount := 0
for index, version := range versions {
if version == highestVersion {
mdata = partsMetadata[index]
onlineDisks[index] = xl.storageDisks[index]
onlineDiskCount++
} else {
onlineDisks[index] = nil
} }
diskCount++
} }
return diskCount
}
func (xl xlObjects) shouldHeal(onlineDisks []StorageAPI) (heal bool) {
onlineDiskCount := diskCount(onlineDisks)
// If online disks count is lesser than configured disks, most // If online disks count is lesser than configured disks, most
// probably we need to heal the file, additionally verify if the // probably we need to heal the file, additionally verify if the
// count is lesser than readQuorum, if not we throw an error. // count is lesser than readQuorum, if not we throw an error.
@ -128,77 +140,45 @@ func (xl XL) listOnlineDisks(volume, path string) (onlineDisks []StorageAPI, mda
// healed. unless we do not have readQuorum. // healed. unless we do not have readQuorum.
heal = true heal = true
// Verify if online disks count are lesser than readQuorum // Verify if online disks count are lesser than readQuorum
// threshold, return an error if yes. // threshold, return an error.
if onlineDiskCount < xl.readQuorum { if onlineDiskCount < xl.readQuorum {
return nil, xlMetaV1{}, false, errReadQuorum errorIf(errXLReadQuorum, "Unable to establish read quorum, disks are offline.")
return false
} }
} }
return onlineDisks, mdata, heal, nil return heal
} }
// Get file.json metadata as a map slice. // Returns slice of online disks needed.
// Returns error slice indicating the failed metadata reads. // - slice returing readable disks.
// Read lockNS() should be done by caller. // - xlMetaV1
func (xl XL) getPartsMetadata(volume, path string) ([]xlMetaV1, []error) { // - bool value indicating if healing is needed.
errs := make([]error, len(xl.storageDisks)) // - error if any.
metadataArray := make([]xlMetaV1, len(xl.storageDisks)) func (xl xlObjects) listOnlineDisks(partsMetadata []xlMetaV1, errs []error) (onlineDisks []StorageAPI, version int64, err error) {
xlMetaV1FilePath := slashpath.Join(path, xlMetaV1File) onlineDisks = make([]StorageAPI, len(xl.storageDisks))
var wg = &sync.WaitGroup{} if err = xl.reduceError(errs); err != nil {
for index, disk := range xl.storageDisks { if err == errFileNotFound {
wg.Add(1) // For file not found, treat as if disks are available
go func(index int, disk StorageAPI) { // return all the configured ones.
defer wg.Done() onlineDisks = xl.storageDisks
offset := int64(0) return onlineDisks, 1, nil
metadataReader, err := disk.ReadFile(volume, xlMetaV1FilePath, offset)
if err != nil {
errs[index] = err
return
} }
defer metadataReader.Close() return nil, 0, err
}
highestVersion := int64(0)
// List all the file versions from partsMetadata list.
versions := listObjectVersions(partsMetadata, errs)
metadata, err := xlMetaV1Decode(metadataReader) // Get highest object version.
if err != nil { highestVersion = highestInt(versions)
// Unable to parse file.json, set error.
errs[index] = err // Pick online disks with version set to highestVersion.
return for index, version := range versions {
if version == highestVersion {
onlineDisks[index] = xl.storageDisks[index]
} else {
onlineDisks[index] = nil
} }
metadataArray[index] = metadata
}(index, disk)
} }
wg.Wait() return onlineDisks, highestVersion, nil
return metadataArray, errs
}
// Writes/Updates `file.json` for given file. updateParts carries
// index of disks where `file.json` needs to be updated.
//
// Returns collection of errors, indexed in accordance with input
// updateParts order.
// Write lockNS() should be done by caller.
func (xl XL) updatePartsMetadata(volume, path string, metadata xlMetaV1, updateParts []bool) []error {
xlMetaV1FilePath := pathJoin(path, xlMetaV1File)
errs := make([]error, len(xl.storageDisks))
for index := range updateParts {
errs[index] = errors.New("Metadata not updated")
}
for index, shouldUpdate := range updateParts {
if !shouldUpdate {
continue
}
writer, err := xl.storageDisks[index].CreateFile(volume, xlMetaV1FilePath)
errs[index] = err
if err != nil {
continue
}
err = metadata.Write(writer)
if err != nil {
errs[index] = err
safeCloseAndRemove(writer)
continue
}
writer.Close()
}
return errs
} }

153
xl-v1-list-objects.go Normal file
View File

@ -0,0 +1,153 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import "strings"
// listObjects - wrapper function implemented over file tree walk.
func (xl xlObjects) listObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
walker := xl.lookupTreeWalk(listParams{bucket, recursive, marker, prefix})
if walker == nil {
walker = xl.startTreeWalk(bucket, prefix, marker, recursive, xl.isObject)
}
var objInfos []ObjectInfo
var eof bool
var nextMarker string
for i := 0; i < maxKeys; {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return ListObjectsInfo{}, nil
}
return ListObjectsInfo{}, toObjectErr(walkResult.err, bucket, prefix)
}
entry := walkResult.entry
var objInfo ObjectInfo
if strings.HasSuffix(entry, slashSeparator) {
// Object name needs to be full path.
objInfo.Bucket = bucket
objInfo.Name = entry
objInfo.IsDir = true
} else {
// Set the Mode to a "regular" file.
var err error
objInfo, err = xl.getObjectInfo(bucket, entry)
if err != nil {
return ListObjectsInfo{}, toObjectErr(err, bucket, prefix)
}
}
nextMarker = objInfo.Name
objInfos = append(objInfos, objInfo)
if walkResult.end {
eof = true
break
}
i++
}
params := listParams{bucket, recursive, nextMarker, prefix}
if !eof {
xl.saveTreeWalk(params, walker)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, objInfo := range objInfos {
result.NextMarker = objInfo.Name
if objInfo.IsDir {
result.Prefixes = append(result.Prefixes, objInfo.Name)
continue
}
result.Objects = append(result.Objects, ObjectInfo{
Name: objInfo.Name,
ModTime: objInfo.ModTime,
Size: objInfo.Size,
IsDir: false,
})
}
return result, nil
}
// ListObjects - list all objects at prefix, delimited by '/'.
func (xl xlObjects) ListObjects(bucket, prefix, marker, delimiter string, maxKeys int) (ListObjectsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListObjectsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if bucket exists.
if !xl.isBucketExist(bucket) {
return ListObjectsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListObjectsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListObjectsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if marker != "" {
if !strings.HasPrefix(marker, prefix) {
return ListObjectsInfo{}, InvalidMarkerPrefixCombination{
Marker: marker,
Prefix: prefix,
}
}
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return ListObjectsInfo{}, nil
}
// For delimiter and prefix as '/' we do not list anything at all
// since according to s3 spec we stop at the 'delimiter' along
// with the prefix. On a flat namespace with 'prefix' as '/'
// we don't have any entries, since all the keys are of form 'keyName/...'
if delimiter == slashSeparator && prefix == slashSeparator {
return ListObjectsInfo{}, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Initiate a list operation, if successful filter and return quickly.
listObjInfo, err := xl.listObjects(bucket, prefix, marker, delimiter, maxKeys)
if err == nil {
// We got the entries successfully return.
return listObjInfo, nil
}
// Return error at the end.
return ListObjectsInfo{}, toObjectErr(err, bucket, prefix)
}

451
xl-v1-metadata.go Normal file
View File

@ -0,0 +1,451 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"path"
"sort"
"sync"
"time"
)
const (
// Erasure related constants.
erasureAlgorithmKlauspost = "klauspost/reedsolomon/vandermonde"
erasureAlgorithmISAL = "isa-l/reedsolomon/cauchy"
)
// objectPartInfo Info of each part kept in the multipart metadata
// file after CompleteMultipartUpload() is called.
type objectPartInfo struct {
Number int `json:"number"`
Name string `json:"name"`
ETag string `json:"etag"`
Size int64 `json:"size"`
}
// byObjectPartNumber is a collection satisfying sort.Interface.
type byObjectPartNumber []objectPartInfo
func (t byObjectPartNumber) Len() int { return len(t) }
func (t byObjectPartNumber) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t byObjectPartNumber) Less(i, j int) bool { return t[i].Number < t[j].Number }
// checkSumInfo - carries checksums of individual scattered parts per disk.
type checkSumInfo struct {
Name string `json:"name"`
Algorithm string `json:"algorithm"`
Hash string `json:"hash"`
}
// erasureInfo - carries erasure coding related information, block
// distribution and checksums.
type erasureInfo struct {
Algorithm string `json:"algorithm"`
DataBlocks int `json:"data"`
ParityBlocks int `json:"parity"`
BlockSize int64 `json:"blockSize"`
Index int `json:"index"`
Distribution []int `json:"distribution"`
Checksum []checkSumInfo `json:"checksum,omitempty"`
}
// IsValid - tells if the erasure info is sane by validating the data
// blocks, parity blocks and distribution.
func (e erasureInfo) IsValid() bool {
return e.DataBlocks != 0 && e.ParityBlocks != 0 && len(e.Distribution) != 0
}
// pickValidErasureInfo - picks one valid erasure info content and returns, from a
// slice of erasure info content. If no value is found this function panics
// and dies.
func pickValidErasureInfo(eInfos []erasureInfo) erasureInfo {
for _, eInfo := range eInfos {
if eInfo.IsValid() {
return eInfo
}
}
panic("Unable to look for valid erasure info content")
}
// statInfo - carries stat information of the object.
type statInfo struct {
Size int64 `json:"size"` // Size of the object `xl.json`.
ModTime time.Time `json:"modTime"` // ModTime of the object `xl.json`.
Version int64 `json:"version"` // Version of the object `xl.json`, useful to calculate quorum.
}
// A xlMetaV1 represents `xl.json` metadata header.
type xlMetaV1 struct {
Version string `json:"version"` // Version of the current `xl.json`.
Format string `json:"format"` // Format of the current `xl.json`.
Stat statInfo `json:"stat"` // Stat of the current object `xl.json`.
// Erasure coded info for the current object `xl.json`.
Erasure erasureInfo `json:"erasure"`
// Minio release tag for current object `xl.json`.
Minio struct {
Release string `json:"release"`
} `json:"minio"`
// Metadata map for current object `xl.json`.
Meta map[string]string `json:"meta"`
// Captures all the individual object `xl.json`.
Parts []objectPartInfo `json:"parts,omitempty"`
}
// newXLMetaV1 - initializes new xlMetaV1, adds version, allocates a
// fresh erasure info.
func newXLMetaV1(dataBlocks, parityBlocks int) (xlMeta xlMetaV1) {
xlMeta = xlMetaV1{}
xlMeta.Version = "1"
xlMeta.Format = "xl"
xlMeta.Minio.Release = minioReleaseTag
xlMeta.Erasure = erasureInfo{
Algorithm: erasureAlgorithmKlauspost,
DataBlocks: dataBlocks,
ParityBlocks: parityBlocks,
BlockSize: blockSizeV1,
Distribution: randInts(dataBlocks + parityBlocks),
}
return xlMeta
}
// IsValid - tells if the format is sane by validating the version
// string and format style.
func (m xlMetaV1) IsValid() bool {
return m.Version == "1" && m.Format == "xl"
}
// ObjectPartIndex - returns the index of matching object part number.
func (m xlMetaV1) ObjectPartIndex(partNumber int) (index int) {
for i, part := range m.Parts {
if partNumber == part.Number {
index = i
return index
}
}
return -1
}
// AddObjectPart - add a new object part in order.
func (m *xlMetaV1) AddObjectPart(partNumber int, partName string, partETag string, partSize int64) {
partInfo := objectPartInfo{
Number: partNumber,
Name: partName,
ETag: partETag,
Size: partSize,
}
// Update part info if it already exists.
for i, part := range m.Parts {
if partNumber == part.Number {
m.Parts[i] = partInfo
return
}
}
// Proceed to include new part info.
m.Parts = append(m.Parts, partInfo)
// Parts in xlMeta should be in sorted order by part number.
sort.Sort(byObjectPartNumber(m.Parts))
}
// ObjectToPartOffset - translate offset of an object to offset of its individual part.
func (m xlMetaV1) ObjectToPartOffset(offset int64) (partIndex int, partOffset int64, err error) {
partOffset = offset
// Seek until object offset maps to a particular part offset.
for i, part := range m.Parts {
partIndex = i
// Last part can be of '0' bytes, treat it specially and
// return right here.
if part.Size == 0 {
return partIndex, partOffset, nil
}
// Offset is smaller than size we have reached the proper part offset.
if partOffset < part.Size {
return partIndex, partOffset, nil
}
// Continue to towards the next part.
partOffset -= part.Size
}
// Offset beyond the size of the object return InvalidRange.
return 0, 0, InvalidRange{}
}
// pickValidXLMeta - picks one valid xlMeta content and returns from a
// slice of xlmeta content. If no value is found this function panics
// and dies.
func pickValidXLMeta(xlMetas []xlMetaV1) xlMetaV1 {
for _, xlMeta := range xlMetas {
if xlMeta.IsValid() {
return xlMeta
}
}
panic("Unable to look for valid XL metadata content")
}
// readXLMetadata - returns the object metadata `xl.json` content from
// one of the disks picked at random.
func (xl xlObjects) readXLMetadata(bucket, object string) (xlMeta xlMetaV1, err error) {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
var buf []byte
buf, err = readAll(disk, bucket, path.Join(object, xlMetaJSONFile))
if err != nil {
return xlMetaV1{}, err
}
err = json.Unmarshal(buf, &xlMeta)
if err != nil {
return xlMetaV1{}, err
}
break
}
return xlMeta, nil
}
// renameXLMetadata - renames `xl.json` from source prefix to destination prefix.
func (xl xlObjects) renameXLMetadata(srcBucket, srcPrefix, dstBucket, dstPrefix string) error {
var wg = &sync.WaitGroup{}
var mErrs = make([]error, len(xl.storageDisks))
srcJSONFile := path.Join(srcPrefix, xlMetaJSONFile)
dstJSONFile := path.Join(dstPrefix, xlMetaJSONFile)
// Rename `xl.json` to all disks in parallel.
for index, disk := range xl.storageDisks {
if disk == nil {
mErrs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Rename `xl.json` in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
// Renames `xl.json` from source prefix to destination prefix.
rErr := disk.RenameFile(srcBucket, srcJSONFile, dstBucket, dstJSONFile)
if rErr != nil {
mErrs[index] = rErr
return
}
// Delete any dangling directories.
dErr := disk.DeleteFile(srcBucket, srcPrefix)
if dErr != nil {
mErrs[index] = dErr
return
}
mErrs[index] = nil
}(index, disk)
}
// Wait for all the routines.
wg.Wait()
// Gather err count.
var errCount = 0
for _, err := range mErrs {
if err == nil {
continue
}
errCount++
}
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure. Cleanup successful renames.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Check we have successful read quorum.
if errCount <= len(xl.storageDisks)-xl.readQuorum {
return nil // Return success.
} // else - failed to acquire read quorum.
// Undo rename `xl.json` on disks where RenameFile succeeded.
for index, disk := range xl.storageDisks {
if disk == nil {
continue
}
// Undo rename object in parallel.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
if mErrs[index] != nil {
return
}
_ = disk.RenameFile(dstBucket, dstJSONFile, srcBucket, srcJSONFile)
}(index, disk)
}
wg.Wait()
return errXLWriteQuorum
}
return nil
}
// deleteXLMetadata - deletes `xl.json` on a single disk.
func deleteXLMetdata(disk StorageAPI, bucket, prefix string) error {
jsonFile := path.Join(prefix, xlMetaJSONFile)
return disk.DeleteFile(bucket, jsonFile)
}
// writeXLMetadata - writes `xl.json` to a single disk.
func writeXLMetadata(disk StorageAPI, bucket, prefix string, xlMeta xlMetaV1) error {
jsonFile := path.Join(prefix, xlMetaJSONFile)
// Marshal json.
metadataBytes, err := json.Marshal(&xlMeta)
if err != nil {
return err
}
// Persist marshalled data.
n, err := disk.AppendFile(bucket, jsonFile, metadataBytes)
if err != nil {
return err
}
if n != int64(len(metadataBytes)) {
return errUnexpected
}
return nil
}
// writeUniqueXLMetadata - writes unique `xl.json` content for each disk in order.
func (xl xlObjects) writeUniqueXLMetadata(bucket, prefix string, xlMetas []xlMetaV1) error {
var wg = &sync.WaitGroup{}
var mErrs = make([]error, len(xl.storageDisks))
// Start writing `xl.json` to all disks in parallel.
for index, disk := range xl.storageDisks {
if disk == nil {
mErrs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Write `xl.json` in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
// Pick one xlMeta for a disk at index.
xlMetas[index].Erasure.Index = index + 1
// Write unique `xl.json` for a disk at index.
if err := writeXLMetadata(disk, bucket, prefix, xlMetas[index]); err != nil {
mErrs[index] = err
return
}
mErrs[index] = nil
}(index, disk)
}
// Wait for all the routines.
wg.Wait()
var errCount = 0
// Return the first error.
for _, err := range mErrs {
if err == nil {
continue
}
errCount++
}
// Count all the errors and validate if we have write quorum.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Validate if we have read quorum, then return success.
if errCount > len(xl.storageDisks)-xl.readQuorum {
return nil
}
// Delete all the `xl.json` left over.
for index, disk := range xl.storageDisks {
if disk == nil {
continue
}
// Undo rename object in parallel.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
if mErrs[index] != nil {
return
}
_ = deleteXLMetdata(disk, bucket, prefix)
}(index, disk)
}
wg.Wait()
return errXLWriteQuorum
}
return nil
}
// writeSameXLMetadata - write `xl.json` on all disks in order.
func (xl xlObjects) writeSameXLMetadata(bucket, prefix string, xlMeta xlMetaV1) error {
var wg = &sync.WaitGroup{}
var mErrs = make([]error, len(xl.storageDisks))
// Start writing `xl.json` to all disks in parallel.
for index, disk := range xl.storageDisks {
if disk == nil {
mErrs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Write `xl.json` in a routine.
go func(index int, disk StorageAPI, metadata xlMetaV1) {
defer wg.Done()
// Save the disk order index.
metadata.Erasure.Index = index + 1
// Write xl metadata.
if err := writeXLMetadata(disk, bucket, prefix, metadata); err != nil {
mErrs[index] = err
return
}
mErrs[index] = nil
}(index, disk, xlMeta)
}
// Wait for all the routines.
wg.Wait()
var errCount = 0
// Return the first error.
for _, err := range mErrs {
if err == nil {
continue
}
errCount++
}
// Count all the errors and validate if we have write quorum.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Validate if we have read quorum, then return success.
if errCount > len(xl.storageDisks)-xl.readQuorum {
return nil
}
// Delete all the `xl.json` left over.
for index, disk := range xl.storageDisks {
if disk == nil {
continue
}
// Undo rename object in parallel.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
if mErrs[index] != nil {
return
}
_ = deleteXLMetdata(disk, bucket, prefix)
}(index, disk)
}
wg.Wait()
return errXLWriteQuorum
}
return nil
}

417
xl-v1-multipart-common.go Normal file
View File

@ -0,0 +1,417 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"encoding/json"
"path"
"sort"
"strings"
"sync"
"time"
)
// A uploadInfo represents the s3 compatible spec.
type uploadInfo struct {
UploadID string `json:"uploadId"` // UploadID for the active multipart upload.
Deleted bool `json:"deleted"` // Currently unused, for future use.
Initiated time.Time `json:"initiated"` // Indicates when the uploadID was initiated.
}
// A uploadsV1 represents `uploads.json` metadata header.
type uploadsV1 struct {
Version string `json:"version"` // Version of the current `uploads.json`
Format string `json:"format"` // Format of the current `uploads.json`
Uploads []uploadInfo `json:"uploadIds"` // Captures all the upload ids for a given object.
}
// byInitiatedTime is a collection satisfying sort.Interface.
type byInitiatedTime []uploadInfo
func (t byInitiatedTime) Len() int { return len(t) }
func (t byInitiatedTime) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t byInitiatedTime) Less(i, j int) bool {
return t[i].Initiated.Before(t[j].Initiated)
}
// AddUploadID - adds a new upload id in order of its initiated time.
func (u *uploadsV1) AddUploadID(uploadID string, initiated time.Time) {
u.Uploads = append(u.Uploads, uploadInfo{
UploadID: uploadID,
Initiated: initiated,
})
sort.Sort(byInitiatedTime(u.Uploads))
}
// Index - returns the index of matching the upload id.
func (u uploadsV1) Index(uploadID string) int {
for i, u := range u.Uploads {
if u.UploadID == uploadID {
return i
}
}
return -1
}
// readUploadsJSON - get all the saved uploads JSON.
func readUploadsJSON(bucket, object string, disk StorageAPI) (uploadIDs uploadsV1, err error) {
uploadJSONPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
// Read all of 'uploads.json'
buffer, rErr := readAll(disk, minioMetaBucket, uploadJSONPath)
if rErr != nil {
return uploadsV1{}, rErr
}
rErr = json.Unmarshal(buffer, &uploadIDs)
if rErr != nil {
return uploadsV1{}, rErr
}
return uploadIDs, nil
}
// updateUploadsJSON - update `uploads.json` with new uploadsJSON for all disks.
func updateUploadsJSON(bucket, object string, uploadsJSON uploadsV1, storageDisks ...StorageAPI) error {
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
uniqueID := getUUID()
tmpUploadsPath := path.Join(tmpMetaPrefix, uniqueID)
var errs = make([]error, len(storageDisks))
var wg = &sync.WaitGroup{}
// Update `uploads.json` for all the disks.
for index, disk := range storageDisks {
if disk == nil {
errs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Update `uploads.json` in routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
uploadsBytes, wErr := json.Marshal(uploadsJSON)
if wErr != nil {
errs[index] = wErr
return
}
n, wErr := disk.AppendFile(minioMetaBucket, tmpUploadsPath, uploadsBytes)
if wErr != nil {
errs[index] = wErr
return
}
if n != int64(len(uploadsBytes)) {
errs[index] = errUnexpected
return
}
if wErr = disk.RenameFile(minioMetaBucket, tmpUploadsPath, minioMetaBucket, uploadsPath); wErr != nil {
errs[index] = wErr
return
}
}(index, disk)
}
// Wait for all the routines to finish updating `uploads.json`
wg.Wait()
// For only single disk return first error.
if len(storageDisks) == 1 {
return errs[0]
} // else count all the errors for quorum validation.
var errCount = 0
// Return for first error.
for _, err := range errs {
if err != nil {
errCount++
}
}
// Count all the errors and validate if we have write quorum.
if errCount > len(storageDisks)-len(storageDisks)/2+3 {
// Validate if we have read quorum return success.
if errCount > len(storageDisks)-len(storageDisks)/2+1 {
return nil
}
// Rename `uploads.json` left over back to tmp location.
for index, disk := range storageDisks {
if disk == nil {
continue
}
// Undo rename `uploads.json` in parallel.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
if errs[index] != nil {
return
}
_ = disk.RenameFile(minioMetaBucket, uploadsPath, minioMetaBucket, tmpUploadsPath)
}(index, disk)
}
wg.Wait()
return errXLWriteQuorum
}
return nil
}
// newUploadsV1 - initialize new uploads v1.
func newUploadsV1(format string) uploadsV1 {
uploadIDs := uploadsV1{}
uploadIDs.Version = "1"
uploadIDs.Format = format
return uploadIDs
}
// writeUploadJSON - create `uploads.json` or update it with new uploadID.
func writeUploadJSON(bucket, object, uploadID string, initiated time.Time, storageDisks ...StorageAPI) (err error) {
uploadsPath := path.Join(mpartMetaPrefix, bucket, object, uploadsJSONFile)
uniqueID := getUUID()
tmpUploadsPath := path.Join(tmpMetaPrefix, uniqueID)
var errs = make([]error, len(storageDisks))
var wg = &sync.WaitGroup{}
var uploadsJSON uploadsV1
for _, disk := range storageDisks {
if disk == nil {
continue
}
uploadsJSON, err = readUploadsJSON(bucket, object, disk)
break
}
if err != nil {
// For any other errors.
if err != errFileNotFound {
return err
}
if len(storageDisks) == 1 {
// Set uploads format to `fs` for single disk.
uploadsJSON = newUploadsV1("fs")
} else {
// Set uploads format to `xl` otherwise.
uploadsJSON = newUploadsV1("xl")
}
}
// Add a new upload id.
uploadsJSON.AddUploadID(uploadID, initiated)
// Update `uploads.json` on all disks.
for index, disk := range storageDisks {
if disk == nil {
errs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Update `uploads.json` in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
uploadsJSONBytes, wErr := json.Marshal(&uploadsJSON)
if wErr != nil {
errs[index] = wErr
return
}
// Write `uploads.json` to disk.
n, wErr := disk.AppendFile(minioMetaBucket, tmpUploadsPath, uploadsJSONBytes)
if wErr != nil {
errs[index] = wErr
return
}
if n != int64(len(uploadsJSONBytes)) {
errs[index] = errUnexpected
return
}
wErr = disk.RenameFile(minioMetaBucket, tmpUploadsPath, minioMetaBucket, uploadsPath)
if wErr != nil {
if dErr := disk.DeleteFile(minioMetaBucket, tmpUploadsPath); dErr != nil {
errs[index] = dErr
return
}
errs[index] = wErr
return
}
errs[index] = nil
}(index, disk)
}
// Wait for all the writes to finish.
wg.Wait()
// For only single disk return first error.
if len(storageDisks) == 1 {
return errs[0]
} // else count all the errors for quorum validation.
var errCount = 0
// Return for first error.
for _, err := range errs {
if err != nil {
errCount++
}
}
// Count all the errors and validate if we have write quorum.
if errCount > len(storageDisks)-len(storageDisks)/2+3 {
// Validate if we have read quorum return success.
if errCount > len(storageDisks)-len(storageDisks)/2+1 {
return nil
}
// Rename `uploads.json` left over back to tmp location.
for index, disk := range storageDisks {
if disk == nil {
continue
}
// Undo rename `uploads.json` in parallel.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
if errs[index] != nil {
return
}
_ = disk.RenameFile(minioMetaBucket, uploadsPath, minioMetaBucket, tmpUploadsPath)
}(index, disk)
}
wg.Wait()
return errXLWriteQuorum
}
return nil
}
// Wrapper which removes all the uploaded parts.
func cleanupUploadedParts(bucket, object, uploadID string, storageDisks ...StorageAPI) error {
var errs = make([]error, len(storageDisks))
var wg = &sync.WaitGroup{}
// Construct uploadIDPath.
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
// Cleanup uploadID for all disks.
for index, disk := range storageDisks {
if disk == nil {
errs[index] = errDiskNotFound
continue
}
wg.Add(1)
// Cleanup each uploadID in a routine.
go func(index int, disk StorageAPI) {
defer wg.Done()
err := cleanupDir(disk, minioMetaBucket, uploadIDPath)
if err != nil {
errs[index] = err
return
}
errs[index] = nil
}(index, disk)
}
// Wait for all the cleanups to finish.
wg.Wait()
// Return first error.
for _, err := range errs {
if err != nil {
return err
}
}
return nil
}
// listMultipartUploadIDs - list all the upload ids from a marker up to 'count'.
func listMultipartUploadIDs(bucketName, objectName, uploadIDMarker string, count int, disk StorageAPI) ([]uploadMetadata, bool, error) {
var uploads []uploadMetadata
// Read `uploads.json`.
uploadsJSON, err := readUploadsJSON(bucketName, objectName, disk)
if err != nil {
return nil, false, err
}
index := 0
if uploadIDMarker != "" {
for ; index < len(uploadsJSON.Uploads); index++ {
if uploadsJSON.Uploads[index].UploadID == uploadIDMarker {
// Skip the uploadID as it would already be listed in previous listing.
index++
break
}
}
}
for index < len(uploadsJSON.Uploads) {
uploads = append(uploads, uploadMetadata{
Object: objectName,
UploadID: uploadsJSON.Uploads[index].UploadID,
Initiated: uploadsJSON.Uploads[index].Initiated,
})
count--
index++
if count == 0 {
break
}
}
end := (index == len(uploadsJSON.Uploads))
return uploads, end, nil
}
// Returns if the prefix is a multipart upload.
func (xl xlObjects) isMultipartUpload(bucket, prefix string) bool {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
_, err := disk.StatFile(bucket, pathJoin(prefix, uploadsJSONFile))
if err != nil {
return false
}
break
}
return true
}
// listUploadsInfo - list all uploads info.
func (xl xlObjects) listUploadsInfo(prefixPath string) (uploadsInfo []uploadInfo, err error) {
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
splitPrefixes := strings.SplitN(prefixPath, "/", 3)
uploadsJSON, err := readUploadsJSON(splitPrefixes[1], splitPrefixes[2], disk)
if err != nil {
if err == errFileNotFound {
return []uploadInfo{}, nil
}
return nil, err
}
uploadsInfo = uploadsJSON.Uploads
break
}
return uploadsInfo, nil
}
// isUploadIDExists - verify if a given uploadID exists and is valid.
func (xl xlObjects) isUploadIDExists(bucket, object, uploadID string) bool {
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
return xl.isObject(minioMetaBucket, uploadIDPath)
}
// Removes part given by partName belonging to a mulitpart upload from minioMetaBucket
func (xl xlObjects) removeObjectPart(bucket, object, uploadID, partName string) {
curpartPath := path.Join(mpartMetaPrefix, bucket, object, uploadID, partName)
wg := sync.WaitGroup{}
for i, disk := range xl.storageDisks {
if disk == nil {
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
// Ignoring failure to remove parts that weren't present in CompleteMultipartUpload
// requests. xl.json is the authoritative source of truth on which parts constitute
// the object. The presence of parts that don't belong in the object doesn't affect correctness.
_ = disk.DeleteFile(minioMetaBucket, curpartPath)
}(i, disk)
}
wg.Wait()
}

776
xl-v1-multipart.go Normal file
View File

@ -0,0 +1,776 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"crypto/md5"
"encoding/hex"
"fmt"
"io"
"path"
"path/filepath"
"strings"
"time"
"github.com/minio/minio/pkg/mimedb"
"github.com/skyrings/skyring-common/tools/uuid"
)
// listMultipartUploads - lists all multipart uploads.
func (xl xlObjects) listMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
result := ListMultipartsInfo{
IsTruncated: true,
MaxUploads: maxUploads,
KeyMarker: keyMarker,
Prefix: prefix,
Delimiter: delimiter,
}
recursive := true
if delimiter == slashSeparator {
recursive = false
}
// Not using path.Join() as it strips off the trailing '/'.
multipartPrefixPath := pathJoin(mpartMetaPrefix, bucket, prefix)
if prefix == "" {
// Should have a trailing "/" if prefix is ""
// For ex. multipartPrefixPath should be "multipart/bucket/" if prefix is ""
multipartPrefixPath += slashSeparator
}
multipartMarkerPath := ""
if keyMarker != "" {
multipartMarkerPath = pathJoin(mpartMetaPrefix, bucket, keyMarker)
}
var uploads []uploadMetadata
var err error
var eof bool
// List all upload ids for the keyMarker starting from
// uploadIDMarker first.
if uploadIDMarker != "" {
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, keyMarker))
for _, disk := range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
uploads, _, err = listMultipartUploadIDs(bucket, keyMarker, uploadIDMarker, maxUploads, disk)
break
}
nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, keyMarker))
if err != nil {
return ListMultipartsInfo{}, err
}
maxUploads = maxUploads - len(uploads)
}
// Validate if we need to list further depending on maxUploads.
if maxUploads > 0 {
walker := xl.lookupTreeWalk(listParams{minioMetaBucket, recursive, multipartMarkerPath, multipartPrefixPath})
if walker == nil {
walker = xl.startTreeWalk(minioMetaBucket, multipartPrefixPath, multipartMarkerPath, recursive, xl.isMultipartUpload)
}
// Collect uploads until we have reached maxUploads count to 0.
for maxUploads > 0 {
walkResult, ok := <-walker.ch
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found or Disk not found is a valid case.
if walkResult.err == errFileNotFound || walkResult.err == errDiskNotFound {
continue
}
return ListMultipartsInfo{}, err
}
entry := strings.TrimPrefix(walkResult.entry, retainSlash(pathJoin(mpartMetaPrefix, bucket)))
// For an entry looking like a directory, store and
// continue the loop not need to fetch uploads.
if strings.HasSuffix(walkResult.entry, slashSeparator) {
uploads = append(uploads, uploadMetadata{
Object: entry,
})
maxUploads--
if maxUploads == 0 {
if walkResult.end {
eof = true
break
}
}
continue
}
var newUploads []uploadMetadata
var end bool
uploadIDMarker = ""
// For the new object entry we get all its pending uploadIDs.
nsMutex.RLock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, entry))
var disk StorageAPI
for _, disk = range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
break
}
newUploads, end, err = listMultipartUploadIDs(bucket, entry, uploadIDMarker, maxUploads, disk)
nsMutex.RUnlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, entry))
if err != nil {
if err == errFileNotFound || walkResult.err == errDiskNotFound {
continue
}
return ListMultipartsInfo{}, err
}
uploads = append(uploads, newUploads...)
maxUploads -= len(newUploads)
if walkResult.end && end {
eof = true
break
}
}
}
// For all received uploads fill in the multiparts result.
for _, upload := range uploads {
var objectName string
var uploadID string
if strings.HasSuffix(upload.Object, slashSeparator) {
// All directory entries are common prefixes.
uploadID = "" // For common prefixes, upload ids are empty.
objectName = upload.Object
result.CommonPrefixes = append(result.CommonPrefixes, objectName)
} else {
uploadID = upload.UploadID
objectName = upload.Object
result.Uploads = append(result.Uploads, upload)
}
result.NextKeyMarker = objectName
result.NextUploadIDMarker = uploadID
}
result.IsTruncated = !eof
// Result is not truncated, reset the markers.
if !result.IsTruncated {
result.NextKeyMarker = ""
result.NextUploadIDMarker = ""
}
return result, nil
}
// ListMultipartUploads - lists all the pending multipart uploads on a
// bucket. Additionally takes 'prefix, keyMarker, uploadIDmarker and a
// delimiter' which allows us to list uploads match a particular
// prefix or lexically starting from 'keyMarker' or delimiting the
// output to get a directory like listing.
//
// Implements S3 compatible ListMultipartUploads API. The resulting
// ListMultipartsInfo structure is unmarshalled directly into XML and
// replied back to the client.
func (xl xlObjects) ListMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter string, maxUploads int) (ListMultipartsInfo, error) {
result := ListMultipartsInfo{}
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListMultipartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
if !xl.isBucketExist(bucket) {
return ListMultipartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectPrefix(prefix) {
return ListMultipartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: prefix}
}
// Verify if delimiter is anything other than '/', which we do not support.
if delimiter != "" && delimiter != slashSeparator {
return ListMultipartsInfo{}, UnsupportedDelimiter{
Delimiter: delimiter,
}
}
// Verify if marker has prefix.
if keyMarker != "" && !strings.HasPrefix(keyMarker, prefix) {
return ListMultipartsInfo{}, InvalidMarkerPrefixCombination{
Marker: keyMarker,
Prefix: prefix,
}
}
if uploadIDMarker != "" {
if strings.HasSuffix(keyMarker, slashSeparator) {
return result, InvalidUploadIDKeyCombination{
UploadIDMarker: uploadIDMarker,
KeyMarker: keyMarker,
}
}
id, err := uuid.Parse(uploadIDMarker)
if err != nil {
return result, err
}
if id.IsZero() {
return result, MalformedUploadID{
UploadID: uploadIDMarker,
}
}
}
return xl.listMultipartUploads(bucket, prefix, keyMarker, uploadIDMarker, delimiter, maxUploads)
}
// newMultipartUpload - wrapper for initializing a new multipart
// request, returns back a unique upload id.
//
// Internally this function creates 'uploads.json' associated for the
// incoming object at '.minio/multipart/bucket/object/uploads.json' on
// all the disks. `uploads.json` carries metadata regarding on going
// multipart operation on the object.
func (xl xlObjects) newMultipartUpload(bucket string, object string, meta map[string]string) (uploadID string, err error) {
xlMeta := newXLMetaV1(xl.dataBlocks, xl.parityBlocks)
// If not set default to "application/octet-stream"
if meta["content-type"] == "" {
contentType := "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
meta["content-type"] = contentType
}
xlMeta.Stat.ModTime = time.Now().UTC()
xlMeta.Stat.Version = 1
xlMeta.Meta = meta
// This lock needs to be held for any changes to the directory contents of ".minio/multipart/object/"
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
uploadID = getUUID()
initiated := time.Now().UTC()
// Create 'uploads.json'
if err = writeUploadJSON(bucket, object, uploadID, initiated, xl.storageDisks...); err != nil {
return "", err
}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
tempUploadIDPath := path.Join(tmpMetaPrefix, uploadID)
// Write updated `xl.json` to all disks.
if err = xl.writeSameXLMetadata(minioMetaBucket, tempUploadIDPath, xlMeta); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
rErr := xl.renameObject(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath)
if rErr == nil {
// Return success.
return uploadID, nil
}
return "", toObjectErr(rErr, minioMetaBucket, uploadIDPath)
}
// NewMultipartUpload - initialize a new multipart upload, returns a
// unique id. The unique id returned here is of UUID form, for each
// subsequent request each UUID is unique.
//
// Implements S3 compatible initiate multipart API.
func (xl xlObjects) NewMultipartUpload(bucket, object string, meta map[string]string) (string, error) {
// Verify if bucket name is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
// Verify if object name is valid.
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
// No metadata is set, allocate a new one.
if meta == nil {
meta = make(map[string]string)
}
return xl.newMultipartUpload(bucket, object, meta)
}
// putObjectPart - reads incoming data until EOF for the part file on
// an ongoing multipart transaction. Internally incoming data is
// erasure coded and written across all disks.
func (xl xlObjects) putObjectPart(bucket string, object string, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Hold the lock and start the operation.
uploadIDPath := pathJoin(mpartMetaPrefix, bucket, object, uploadID)
nsMutex.Lock(minioMetaBucket, uploadIDPath)
defer nsMutex.Unlock(minioMetaBucket, uploadIDPath)
if !xl.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Read metadata associated with the object from all disks.
partsMetadata, errs := xl.readAllXLMetadata(minioMetaBucket, uploadIDPath)
// List all online disks.
onlineDisks, higherVersion, err := xl.listOnlineDisks(partsMetadata, errs)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Pick one from the first valid metadata.
xlMeta := pickValidXLMeta(partsMetadata)
partSuffix := fmt.Sprintf("object%d", partID)
tmpPartPath := path.Join(tmpMetaPrefix, uploadID, partSuffix)
// Initialize md5 writer.
md5Writer := md5.New()
// Construct a tee reader for md5sum.
teeReader := io.TeeReader(data, md5Writer)
// Collect all the previous erasure infos across the disk.
var eInfos []erasureInfo
for index := range onlineDisks {
eInfos = append(eInfos, partsMetadata[index].Erasure)
}
// Erasure code data and write across all disks.
newEInfos, n, err := erasureCreateFile(onlineDisks, minioMetaBucket, tmpPartPath, partSuffix, teeReader, eInfos)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, tmpPartPath)
}
if size == -1 {
size = n
}
// Calculate new md5sum.
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
if md5Hex != "" {
if newMD5Hex != md5Hex {
// MD5 mismatch, delete the temporary object.
xl.deleteObject(minioMetaBucket, tmpPartPath)
// Returns md5 mismatch.
return "", BadDigest{md5Hex, newMD5Hex}
}
}
// Validates if upload ID exists again.
if !xl.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Rename temporary part file to its final location.
partPath := path.Join(uploadIDPath, partSuffix)
err = xl.renameObject(minioMetaBucket, tmpPartPath, minioMetaBucket, partPath)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, partPath)
}
// Once part is successfully committed, proceed with updating XL metadata.
xlMeta.Stat.Version = higherVersion
// Add the current part.
xlMeta.AddObjectPart(partID, partSuffix, newMD5Hex, size)
// Update `xl.json` content for each disks.
for index := range partsMetadata {
partsMetadata[index].Parts = xlMeta.Parts
partsMetadata[index].Erasure = newEInfos[index]
}
// Write all the checksum metadata.
tempUploadIDPath := path.Join(tmpMetaPrefix, uploadID)
// Writes a unique `xl.json` each disk carrying new checksum
// related information.
if err = xl.writeUniqueXLMetadata(minioMetaBucket, tempUploadIDPath, partsMetadata); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
rErr := xl.renameXLMetadata(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath)
if rErr != nil {
return "", toObjectErr(rErr, minioMetaBucket, uploadIDPath)
}
// Return success.
return newMD5Hex, nil
}
// PutObjectPart - reads incoming stream and internally erasure codes
// them. This call is similar to single put operation but it is part
// of the multipart transcation.
//
// Implements S3 compatible Upload Part API.
func (xl xlObjects) PutObjectPart(bucket, object, uploadID string, partID int, size int64, data io.Reader, md5Hex string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{Bucket: bucket, Object: object}
}
return xl.putObjectPart(bucket, object, uploadID, partID, size, data, md5Hex)
}
// listObjectParts - wrapper reading `xl.json` for a given object and
// uploadID. Lists all the parts captured inside `xl.json` content.
func (xl xlObjects) listObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
result := ListPartsInfo{}
uploadIDPath := path.Join(mpartMetaPrefix, bucket, object, uploadID)
xlMeta, err := xl.readXLMetadata(minioMetaBucket, uploadIDPath)
if err != nil {
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Populate the result stub.
result.Bucket = bucket
result.Object = object
result.UploadID = uploadID
result.MaxParts = maxParts
// For empty number of parts or maxParts as zero, return right here.
if len(xlMeta.Parts) == 0 || maxParts == 0 {
return result, nil
}
// Limit output to maxPartsList.
if maxParts > maxPartsList {
maxParts = maxPartsList
}
// Only parts with higher part numbers will be listed.
partIdx := xlMeta.ObjectPartIndex(partNumberMarker)
parts := xlMeta.Parts
if partIdx != -1 {
parts = xlMeta.Parts[partIdx+1:]
}
count := maxParts
for _, part := range parts {
partNamePath := path.Join(mpartMetaPrefix, bucket, object, uploadID, part.Name)
var fi FileInfo
fi, err = xl.statPart(minioMetaBucket, partNamePath)
if err != nil {
return ListPartsInfo{}, toObjectErr(err, minioMetaBucket, partNamePath)
}
result.Parts = append(result.Parts, partInfo{
PartNumber: part.Number,
ETag: part.ETag,
LastModified: fi.ModTime,
Size: part.Size,
})
count--
if count == 0 {
break
}
}
// If listed entries are more than maxParts, we set IsTruncated as true.
if len(parts) > len(result.Parts) {
result.IsTruncated = true
// Make sure to fill next part number marker if IsTruncated is
// true for subsequent listing.
nextPartNumberMarker := result.Parts[len(result.Parts)-1].PartNumber
result.NextPartNumberMarker = nextPartNumberMarker
}
return result, nil
}
// ListObjectParts - lists all previously uploaded parts for a given
// object and uploadID. Takes additional input of part-number-marker
// to indicate where the listing should begin from.
//
// Implements S3 compatible ListObjectParts API. The resulting
// ListPartsInfo structure is unmarshalled directly into XML and
// replied back to the client.
func (xl xlObjects) ListObjectParts(bucket, object, uploadID string, partNumberMarker, maxParts int) (ListPartsInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ListPartsInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return ListPartsInfo{}, BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ListPartsInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
// Hold lock so that there is no competing abort-multipart-upload or complete-multipart-upload.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if !xl.isUploadIDExists(bucket, object, uploadID) {
return ListPartsInfo{}, InvalidUploadID{UploadID: uploadID}
}
result, err := xl.listObjectParts(bucket, object, uploadID, partNumberMarker, maxParts)
return result, err
}
// CompleteMultipartUpload - completes an ongoing multipart
// transaction after receiving all the parts indicated by the client.
// Returns an md5sum calculated by concatenating all the individual
// md5sums of all the parts.
//
// Implements S3 compatible Complete multipart API.
func (xl xlObjects) CompleteMultipartUpload(bucket string, object string, uploadID string, parts []completePart) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify whether the bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
// Hold lock so that
// 1) no one aborts this multipart upload
// 2) no one does a parallel complete-multipart-upload on this multipart upload
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if !xl.isUploadIDExists(bucket, object, uploadID) {
return "", InvalidUploadID{UploadID: uploadID}
}
// Calculate s3 compatible md5sum for complete multipart.
s3MD5, err := completeMultipartMD5(parts...)
if err != nil {
return "", err
}
uploadIDPath := pathJoin(mpartMetaPrefix, bucket, object, uploadID)
// Read metadata associated with the object from all disks.
partsMetadata, errs := xl.readAllXLMetadata(minioMetaBucket, uploadIDPath)
if err = xl.reduceError(errs); err != nil {
return "", toObjectErr(err, minioMetaBucket, uploadIDPath)
}
// Calculate full object size.
var objectSize int64
// Pick one from the first valid metadata.
xlMeta := pickValidXLMeta(partsMetadata)
// Save current xl meta for validation.
var currentXLMeta = xlMeta
// Allocate parts similar to incoming slice.
xlMeta.Parts = make([]objectPartInfo, len(parts))
// Validate each part and then commit to disk.
for i, part := range parts {
partIdx := currentXLMeta.ObjectPartIndex(part.PartNumber)
if partIdx == -1 {
return "", InvalidPart{}
}
if currentXLMeta.Parts[partIdx].ETag != part.ETag {
return "", BadDigest{}
}
// All parts except the last part has to be atleast 5MB.
if (i < len(parts)-1) && !isMinAllowedPartSize(currentXLMeta.Parts[partIdx].Size) {
return "", PartTooSmall{}
}
// Save for total object size.
objectSize += currentXLMeta.Parts[partIdx].Size
// Add incoming parts.
xlMeta.Parts[i] = objectPartInfo{
Number: part.PartNumber,
ETag: part.ETag,
Size: currentXLMeta.Parts[partIdx].Size,
Name: fmt.Sprintf("object%d", part.PartNumber),
}
}
// Check if an object is present as one of the parent dir.
if xl.parentDirIsObject(bucket, path.Dir(object)) {
return "", toObjectErr(errFileAccessDenied, bucket, object)
}
// Save the final object size and modtime.
xlMeta.Stat.Size = objectSize
xlMeta.Stat.ModTime = time.Now().UTC()
// Save successfully calculated md5sum.
xlMeta.Meta["md5Sum"] = s3MD5
uploadIDPath = path.Join(mpartMetaPrefix, bucket, object, uploadID)
tempUploadIDPath := path.Join(tmpMetaPrefix, uploadID)
// Update all xl metadata, make sure to not modify fields like
// checksum which are different on each disks.
for index := range partsMetadata {
partsMetadata[index].Stat = xlMeta.Stat
partsMetadata[index].Meta = xlMeta.Meta
partsMetadata[index].Parts = xlMeta.Parts
}
// Write unique `xl.json` for each disk.
if err = xl.writeUniqueXLMetadata(minioMetaBucket, tempUploadIDPath, partsMetadata); err != nil {
return "", toObjectErr(err, minioMetaBucket, tempUploadIDPath)
}
rErr := xl.renameXLMetadata(minioMetaBucket, tempUploadIDPath, minioMetaBucket, uploadIDPath)
if rErr != nil {
return "", toObjectErr(rErr, minioMetaBucket, uploadIDPath)
}
// Hold write lock on the destination before rename
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
// Rename if an object already exists to temporary location.
uniqueID := getUUID()
err = xl.renameObject(bucket, object, minioMetaBucket, path.Join(tmpMetaPrefix, uniqueID))
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Remove parts that weren't present in CompleteMultipartUpload request
for _, curpart := range currentXLMeta.Parts {
if xlMeta.ObjectPartIndex(curpart.Number) == -1 {
// Delete the missing part files. e.g,
// Request 1: NewMultipart
// Request 2: PutObjectPart 1
// Request 3: PutObjectPart 2
// Request 4: CompleteMultipartUpload --part 2
// N.B. 1st part is not present. This part should be removed from the storage.
xl.removeObjectPart(bucket, object, uploadID, curpart.Name)
}
}
// Rename the multipart object to final location.
if err = xl.renameObject(minioMetaBucket, uploadIDPath, bucket, object); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Delete the previously successfully renamed object.
xl.deleteObject(minioMetaBucket, path.Join(tmpMetaPrefix, uniqueID))
// Hold the lock so that two parallel complete-multipart-uploads do not
// leave a stale uploads.json behind.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
var disk StorageAPI
for _, disk = range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
break
}
uploadsJSON, err := readUploadsJSON(bucket, object, disk)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, object)
}
// If we have successfully read `uploads.json`, then we proceed to
// purge or update `uploads.json`.
uploadIDIdx := uploadsJSON.Index(uploadID)
if uploadIDIdx != -1 {
uploadsJSON.Uploads = append(uploadsJSON.Uploads[:uploadIDIdx], uploadsJSON.Uploads[uploadIDIdx+1:]...)
}
if len(uploadsJSON.Uploads) > 0 {
if err = updateUploadsJSON(bucket, object, uploadsJSON, xl.storageDisks...); err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
// Return success.
return s3MD5, nil
} // No more pending uploads for the object, proceed to delete
// object completely from '.minio/multipart'.
err = xl.deleteObject(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
if err != nil {
return "", toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
// Return md5sum.
return s3MD5, nil
}
// abortMultipartUpload - wrapper for purging an ongoing multipart
// transaction, deletes uploadID entry from `uploads.json` and purges
// the directory at '.minio/multipart/bucket/object/uploadID' holding
// all the upload parts.
func (xl xlObjects) abortMultipartUpload(bucket, object, uploadID string) (err error) {
// Cleanup all uploaded parts.
if err = cleanupUploadedParts(bucket, object, uploadID, xl.storageDisks...); err != nil {
return toObjectErr(err, bucket, object)
}
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object))
// Validate if there are other incomplete upload-id's present for
// the object, if yes do not attempt to delete 'uploads.json'.
var disk StorageAPI
for _, disk = range xl.getLoadBalancedQuorumDisks() {
if disk == nil {
continue
}
break
}
uploadsJSON, err := readUploadsJSON(bucket, object, disk)
if err != nil {
return toObjectErr(err, bucket, object)
}
uploadIDIdx := uploadsJSON.Index(uploadID)
if uploadIDIdx != -1 {
uploadsJSON.Uploads = append(uploadsJSON.Uploads[:uploadIDIdx], uploadsJSON.Uploads[uploadIDIdx+1:]...)
}
if len(uploadsJSON.Uploads) > 0 {
// There are pending uploads for the same object, preserve
// them update 'uploads.json' in-place.
err = updateUploadsJSON(bucket, object, uploadsJSON, xl.storageDisks...)
if err != nil {
return toObjectErr(err, bucket, object)
}
return nil
} // No more pending uploads for the object, we purge the entire
// entry at '.minio/multipart/bucket/object'.
if err = xl.deleteObject(minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object)); err != nil {
return toObjectErr(err, minioMetaBucket, path.Join(mpartMetaPrefix, bucket, object))
}
// Successfully purged.
return nil
}
// AbortMultipartUpload - aborts an ongoing multipart operation
// signified by the input uploadID. This is an atomic operation
// doesn't require clients to initiate multiple such requests.
//
// All parts are purged from all disks and reference to the uploadID
// would be removed from the system, rollback is not possible on this
// operation.
//
// Implements S3 compatible Abort multipart API, slight difference is
// that this is an atomic idempotent operation. Subsequent calls have
// no affect and further requests to the same uploadID would not be honored.
func (xl xlObjects) AbortMultipartUpload(bucket, object, uploadID string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !xl.isBucketExist(bucket) {
return BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
// Hold lock so that there is no competing complete-multipart-upload or put-object-part.
nsMutex.Lock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
defer nsMutex.Unlock(minioMetaBucket, pathJoin(mpartMetaPrefix, bucket, object, uploadID))
if !xl.isUploadIDExists(bucket, object, uploadID) {
return InvalidUploadID{UploadID: uploadID}
}
err := xl.abortMultipartUpload(bucket, object, uploadID)
return err
}

455
xl-v1-object.go Normal file
View File

@ -0,0 +1,455 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"bytes"
"crypto/md5"
"encoding/hex"
"io"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/minio/minio/pkg/mimedb"
)
/// Object Operations
// GetObject - reads an object erasured coded across multiple
// disks. Supports additional parameters like offset and length
// which is synonymous with HTTP Range requests.
//
// startOffset indicates the location at which the client requested
// object to be read at. length indicates the total length of the
// object requested by client.
func (xl xlObjects) GetObject(bucket, object string, startOffset int64, length int64, writer io.Writer) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
// Lock the object before reading.
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
// Read metadata associated with the object from all disks.
partsMetadata, errs := xl.readAllXLMetadata(bucket, object)
if err := xl.reduceError(errs); err != nil {
return toObjectErr(err, bucket, object)
}
// List all online disks.
onlineDisks, _, err := xl.listOnlineDisks(partsMetadata, errs)
if err != nil {
return toObjectErr(err, bucket, object)
}
// Pick one from the first valid metadata.
xlMeta := partsMetadata[0]
if !xlMeta.IsValid() {
for _, partMetadata := range partsMetadata {
if partMetadata.IsValid() {
xlMeta = partMetadata
break
}
}
}
// Get part index offset.
partIndex, partOffset, err := xlMeta.ObjectToPartOffset(startOffset)
if err != nil {
return toObjectErr(err, bucket, object)
}
// Collect all the previous erasure infos across the disk.
var eInfos []erasureInfo
for index := range onlineDisks {
eInfos = append(eInfos, partsMetadata[index].Erasure)
}
// Read from all parts.
for ; partIndex < len(xlMeta.Parts); partIndex++ {
// Save the current part name and size.
partName := xlMeta.Parts[partIndex].Name
partSize := xlMeta.Parts[partIndex].Size
// Start reading the part name.
var buffer []byte
buffer, err = erasureReadFile(onlineDisks, bucket, pathJoin(object, partName), partName, partSize, eInfos)
if err != nil {
return err
}
// Copy to client until length requested.
if length > int64(len(buffer)) {
var m int64
m, err = io.Copy(writer, bytes.NewReader(buffer[partOffset:]))
if err != nil {
return err
}
length -= m
} else {
_, err = io.CopyN(writer, bytes.NewReader(buffer[partOffset:]), length)
if err != nil {
return err
}
return nil
}
// Reset part offset to 0 to read rest of the part from the beginning.
partOffset = 0
} // End of read all parts loop.
// Return success.
return nil
}
// GetObjectInfo - reads object metadata and replies back ObjectInfo.
func (xl xlObjects) GetObjectInfo(bucket, object string) (ObjectInfo, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return ObjectInfo{}, BucketNameInvalid{Bucket: bucket}
}
// Verify if object is valid.
if !IsValidObjectName(object) {
return ObjectInfo{}, ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.RLock(bucket, object)
defer nsMutex.RUnlock(bucket, object)
info, err := xl.getObjectInfo(bucket, object)
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
return info, nil
}
// getObjectInfo - wrapper for reading object metadata and constructs ObjectInfo.
func (xl xlObjects) getObjectInfo(bucket, object string) (objInfo ObjectInfo, err error) {
var xlMeta xlMetaV1
xlMeta, err = xl.readXLMetadata(bucket, object)
if err != nil {
// Return error.
return ObjectInfo{}, err
}
objInfo = ObjectInfo{
IsDir: false,
Bucket: bucket,
Name: object,
Size: xlMeta.Stat.Size,
ModTime: xlMeta.Stat.ModTime,
MD5Sum: xlMeta.Meta["md5Sum"],
ContentType: xlMeta.Meta["content-type"],
ContentEncoding: xlMeta.Meta["content-encoding"],
}
return objInfo, nil
}
// renameObject - renames all source objects to destination object
// across all disks in parallel. Additionally if we have errors and do
// not have a readQuorum partially renamed files are renamed back to
// its proper location.
func (xl xlObjects) renameObject(srcBucket, srcObject, dstBucket, dstObject string) error {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var errs = make([]error, len(xl.storageDisks))
// Rename file on all underlying storage disks.
for index, disk := range xl.storageDisks {
if disk == nil {
errs[index] = errDiskNotFound
continue
}
// Append "/" as srcObject and dstObject are either leaf-dirs or non-leaf-dris.
// If srcObject is an object instead of prefix we just rename the leaf-dir and
// not rename the part and metadata files separately.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
err := disk.RenameFile(srcBucket, retainSlash(srcObject), dstBucket, retainSlash(dstObject))
if err != nil && err != errFileNotFound {
errs[index] = err
}
}(index, disk)
}
// Wait for all renames to finish.
wg.Wait()
// Gather err count.
var errCount = 0
for _, err := range errs {
if err == nil {
continue
}
errCount++
}
// We can safely allow RenameFile errors up to len(xl.storageDisks) - xl.writeQuorum
// otherwise return failure. Cleanup successful renames.
if errCount > len(xl.storageDisks)-xl.writeQuorum {
// Check we have successful read quorum.
if errCount <= len(xl.storageDisks)-xl.readQuorum {
return nil // Return success.
} // else - failed to acquire read quorum.
// Undo rename object on disks where RenameFile succeeded.
for index, disk := range xl.storageDisks {
if disk == nil {
continue
}
// Undo rename object in parallel.
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
if errs[index] != nil {
return
}
_ = disk.RenameFile(dstBucket, retainSlash(dstObject), srcBucket, retainSlash(srcObject))
}(index, disk)
}
wg.Wait()
return errXLWriteQuorum
}
return nil
}
// PutObject - creates an object upon reading from the input stream
// until EOF, erasure codes the data across all disk and additionally
// writes `xl.json` which carries the necessary metadata for future
// object operations.
func (xl xlObjects) PutObject(bucket string, object string, size int64, data io.Reader, metadata map[string]string) (string, error) {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return "", BucketNameInvalid{Bucket: bucket}
}
// Verify bucket exists.
if !xl.isBucketExist(bucket) {
return "", BucketNotFound{Bucket: bucket}
}
if !IsValidObjectName(object) {
return "", ObjectNameInvalid{
Bucket: bucket,
Object: object,
}
}
// No metadata is set, allocate a new one.
if metadata == nil {
metadata = make(map[string]string)
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
uniqueID := getUUID()
tempErasureObj := path.Join(tmpMetaPrefix, uniqueID, "object1")
tempObj := path.Join(tmpMetaPrefix, uniqueID)
// Initialize xl meta.
xlMeta := newXLMetaV1(xl.dataBlocks, xl.parityBlocks)
// Read metadata associated with the object from all disks.
partsMetadata, errs := xl.readAllXLMetadata(bucket, object)
// List all online disks.
onlineDisks, higherVersion, err := xl.listOnlineDisks(partsMetadata, errs)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Increment version only if we have online disks less than configured storage disks.
if diskCount(onlineDisks) < len(xl.storageDisks) {
higherVersion++
}
// Initialize md5 writer.
md5Writer := md5.New()
// Tee reader combines incoming data stream and md5, data read
// from input stream is written to md5.
teeReader := io.TeeReader(data, md5Writer)
// Collect all the previous erasure infos across the disk.
var eInfos []erasureInfo
for range onlineDisks {
eInfos = append(eInfos, xlMeta.Erasure)
}
// Erasure code and write across all disks.
newEInfos, n, err := erasureCreateFile(onlineDisks, minioMetaBucket, tempErasureObj, "object1", teeReader, eInfos)
if err != nil {
return "", toObjectErr(err, minioMetaBucket, tempErasureObj)
}
if size == -1 {
size = n
}
// Save additional erasureMetadata.
modTime := time.Now().UTC()
newMD5Hex := hex.EncodeToString(md5Writer.Sum(nil))
// Update the md5sum if not set with the newly calculated one.
if len(metadata["md5Sum"]) == 0 {
metadata["md5Sum"] = newMD5Hex
}
// If not set default to "application/octet-stream"
if metadata["content-type"] == "" {
contentType := "application/octet-stream"
if objectExt := filepath.Ext(object); objectExt != "" {
content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]
if ok {
contentType = content.ContentType
}
}
metadata["content-type"] = contentType
}
// md5Hex representation.
md5Hex := metadata["md5Sum"]
if md5Hex != "" {
if newMD5Hex != md5Hex {
// MD5 mismatch, delete the temporary object.
xl.deleteObject(minioMetaBucket, tempObj)
// Returns md5 mismatch.
return "", BadDigest{md5Hex, newMD5Hex}
}
}
// Check if an object is present as one of the parent dir.
// -- FIXME. (needs a new kind of lock).
if xl.parentDirIsObject(bucket, path.Dir(object)) {
return "", toObjectErr(errFileAccessDenied, bucket, object)
}
// Rename if an object already exists to temporary location.
newUniqueID := getUUID()
err = xl.renameObject(bucket, object, minioMetaBucket, path.Join(tmpMetaPrefix, newUniqueID))
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Fill all the necessary metadata.
xlMeta.Meta = metadata
xlMeta.Stat.Size = size
xlMeta.Stat.ModTime = modTime
xlMeta.Stat.Version = higherVersion
// Add the final part.
xlMeta.AddObjectPart(1, "object1", newMD5Hex, xlMeta.Stat.Size)
// Update `xl.json` content on each disks.
for index := range partsMetadata {
partsMetadata[index] = xlMeta
partsMetadata[index].Erasure = newEInfos[index]
}
// Write unique `xl.json` for each disk.
if err = xl.writeUniqueXLMetadata(minioMetaBucket, tempObj, partsMetadata); err != nil {
return "", toObjectErr(err, bucket, object)
}
// Rename the successfully written temporary object to final location.
err = xl.renameObject(minioMetaBucket, tempObj, bucket, object)
if err != nil {
return "", toObjectErr(err, bucket, object)
}
// Delete the temporary object.
xl.deleteObject(minioMetaBucket, path.Join(tmpMetaPrefix, newUniqueID))
// Return md5sum, successfully wrote object.
return newMD5Hex, nil
}
// deleteObject - wrapper for delete object, deletes an object from
// all the disks in parallel, including `xl.json` associated with the
// object.
func (xl xlObjects) deleteObject(bucket, object string) error {
// Initialize sync waitgroup.
var wg = &sync.WaitGroup{}
// Initialize list of errors.
var dErrs = make([]error, len(xl.storageDisks))
for index, disk := range xl.storageDisks {
if disk == nil {
dErrs[index] = errDiskNotFound
continue
}
wg.Add(1)
go func(index int, disk StorageAPI) {
defer wg.Done()
err := cleanupDir(disk, bucket, object)
if err != nil {
dErrs[index] = err
return
}
dErrs[index] = nil
}(index, disk)
}
// Wait for all routines to finish.
wg.Wait()
var fileNotFoundCnt, deleteFileErr int
// Count for specific errors.
for _, err := range dErrs {
if err == nil {
continue
}
// If file not found, count them.
if err == errFileNotFound {
fileNotFoundCnt++
continue
}
// Update error counter separately.
deleteFileErr++
}
// Return err if all disks report file not found.
if fileNotFoundCnt == len(xl.storageDisks) {
return errFileNotFound
} else if deleteFileErr > len(xl.storageDisks)-xl.writeQuorum {
// Return errXLWriteQuorum if errors were more than
// allowed write quorum.
return errXLWriteQuorum
}
return nil
}
// DeleteObject - deletes an object, this call doesn't necessary reply
// any error as it is not necessary for the handler to reply back a
// response to the client request.
func (xl xlObjects) DeleteObject(bucket, object string) error {
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
return BucketNameInvalid{Bucket: bucket}
}
if !IsValidObjectName(object) {
return ObjectNameInvalid{Bucket: bucket, Object: object}
}
nsMutex.Lock(bucket, object)
defer nsMutex.Unlock(bucket, object)
xl.deleteObject(bucket, object)
return nil
}

86
xl-v1-utils.go Normal file
View File

@ -0,0 +1,86 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"bytes"
"io"
"math/rand"
"path"
"time"
)
// randInts - uses Knuth Fisher-Yates shuffle algorithm for generating uniform shuffling.
func randInts(count int) []int {
rand.Seed(time.Now().UTC().UnixNano()) // Seed with current time.
ints := make([]int, count)
for i := 0; i < count; i++ {
ints[i] = i + 1
}
for i := 0; i < count; i++ {
// Choose index uniformly in [i, count-1]
r := i + rand.Intn(count-i)
ints[r], ints[i] = ints[i], ints[r]
}
return ints
}
// readAll - returns contents from volume/path as byte array.
func readAll(disk StorageAPI, volume string, path string) ([]byte, error) {
var writer = new(bytes.Buffer)
startOffset := int64(0)
// Allocate 10MiB buffer.
buf := make([]byte, blockSizeV1)
// Read until io.EOF.
for {
n, err := disk.ReadFile(volume, path, startOffset, buf)
if err == io.EOF {
break
}
if err != nil && err != io.EOF {
return nil, err
}
writer.Write(buf[:n])
startOffset += n
}
return writer.Bytes(), nil
}
// readXLMeta reads `xl.json` returns contents as byte array.
func readXLMeta(disk StorageAPI, bucket string, object string) ([]byte, error) {
var writer = new(bytes.Buffer)
startOffset := int64(0)
// Allocate 2MiB buffer, this is sufficient for the most of `xl.json`.
buf := make([]byte, 2*1024*1024)
// Read until io.EOF.
for {
n, err := disk.ReadFile(bucket, path.Join(object, xlMetaJSONFile), startOffset, buf)
if err == io.EOF {
break
}
if err != nil && err != io.EOF {
return nil, err
}
writer.Write(buf[:n])
startOffset += n
}
return writer.Bytes(), nil
}

217
xl-v1.go Normal file
View File

@ -0,0 +1,217 @@
/*
* Minio Cloud Storage, (C) 2016 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"errors"
"fmt"
"sort"
"sync"
"github.com/minio/minio/pkg/disk"
)
// XL constants.
const (
// Format config file carries backend format specific details.
formatConfigFile = "format.json"
// XL metadata file carries per object metadata.
xlMetaJSONFile = "xl.json"
// Uploads metadata file carries per multipart object metadata.
uploadsJSONFile = "uploads.json"
)
// xlObjects - Implements XL object layer.
type xlObjects struct {
physicalDisks []string // Collection of regular disks.
storageDisks []StorageAPI // Collection of initialized backend disks.
dataBlocks int // dataBlocks count caculated for erasure.
parityBlocks int // parityBlocks count calculated for erasure.
readQuorum int // readQuorum minimum required disks to read data.
writeQuorum int // writeQuorum minimum required disks to write data.
// List pool management.
listObjectMap map[listParams][]*treeWalker
listObjectMapMutex *sync.Mutex
}
// errXLMaxDisks - returned for reached maximum of disks.
var errXLMaxDisks = errors.New("Number of disks are higher than supported maximum count '16'")
// errXLMinDisks - returned for minimum number of disks.
var errXLMinDisks = errors.New("Number of disks are smaller than supported minimum count '8'")
// errXLNumDisks - returned for odd number of disks.
var errXLNumDisks = errors.New("Number of disks should be multiples of '2'")
// errXLReadQuorum - did not meet read quorum.
var errXLReadQuorum = errors.New("I/O error. did not meet read quorum.")
// errXLWriteQuorum - did not meet write quorum.
var errXLWriteQuorum = errors.New("I/O error. did not meet write quorum.")
// errXLDataCorrupt - err data corrupt.
var errXLDataCorrupt = errors.New("data likely corrupted, all blocks are zero in length")
const (
// Maximum erasure blocks.
maxErasureBlocks = 16
// Minimum erasure blocks.
minErasureBlocks = 8
)
// Validate if input disks are sufficient for initializing XL.
func checkSufficientDisks(disks []string) error {
// Verify total number of disks.
totalDisks := len(disks)
if totalDisks > maxErasureBlocks {
return errXLMaxDisks
}
if totalDisks < minErasureBlocks {
return errXLMinDisks
}
// isEven function to verify if a given number if even.
isEven := func(number int) bool {
return number%2 == 0
}
// Verify if we have even number of disks.
// only combination of 8, 12, 16 are supported.
if !isEven(totalDisks) {
return errXLNumDisks
}
return nil
}
// newXLObjects - initialize new xl object layer.
func newXLObjects(disks []string) (ObjectLayer, error) {
// Validate if input disks are sufficient.
if err := checkSufficientDisks(disks); err != nil {
return nil, err
}
// Bootstrap disks.
storageDisks := make([]StorageAPI, len(disks))
for index, disk := range disks {
var err error
// Intentionally ignore disk not found errors. XL will
// manage such errors internally.
storageDisks[index], err = newStorageAPI(disk)
if err != nil && err != errDiskNotFound {
return nil, err
}
}
// Runs house keeping code, like creating minioMetaBucket, cleaning up tmp files etc.
xlHouseKeeping(storageDisks)
// Attempt to load all `format.json`
formatConfigs, sErrs := loadAllFormats(storageDisks)
// Generic format check validates all necessary cases.
if err := genericFormatCheck(formatConfigs, sErrs); err != nil {
return nil, err
}
// Handles different cases properly.
switch reduceFormatErrs(sErrs, len(storageDisks)) {
case errUnformattedDisk:
// All drives online but fresh, initialize format.
if err := initFormatXL(storageDisks); err != nil {
return nil, fmt.Errorf("Unable to initialize format, %s", err)
}
case errSomeDiskUnformatted:
// All drives online but some report missing format.json.
if err := healFormatXL(storageDisks); err != nil {
// There was an unexpected unrecoverable error during healing.
return nil, fmt.Errorf("Unable to heal backend %s", err)
}
case errSomeDiskOffline:
// Some disks offline but some report missing format.json.
// FIXME.
}
// Load saved XL format.json and validate.
newPosixDisks, err := loadFormatXL(storageDisks)
if err != nil {
// errCorruptedDisk - healing failed
return nil, fmt.Errorf("Unable to recognize backend format, %s", err)
}
// Calculate data and parity blocks.
dataBlocks, parityBlocks := len(newPosixDisks)/2, len(newPosixDisks)/2
// Initialize xl objects.
xl := xlObjects{
physicalDisks: disks,
storageDisks: newPosixDisks,
dataBlocks: dataBlocks,
parityBlocks: parityBlocks,
listObjectMap: make(map[listParams][]*treeWalker),
listObjectMapMutex: &sync.Mutex{},
}
// Figure out read and write quorum based on number of storage disks.
// Read quorum should be always N/2 + 1 (due to Vandermonde matrix
// erasure requirements)
xl.readQuorum = len(xl.storageDisks)/2 + 1
// Write quorum is assumed if we have total disks + 3
// parity. (Need to discuss this again)
xl.writeQuorum = len(xl.storageDisks)/2 + 3
if xl.writeQuorum > len(xl.storageDisks) {
xl.writeQuorum = len(xl.storageDisks)
}
// Return successfully initialized object layer.
return xl, nil
}
// byDiskTotal is a collection satisfying sort.Interface.
type byDiskTotal []disk.Info
func (d byDiskTotal) Len() int { return len(d) }
func (d byDiskTotal) Swap(i, j int) { d[i], d[j] = d[j], d[i] }
func (d byDiskTotal) Less(i, j int) bool {
return d[i].Total < d[j].Total
}
// StorageInfo - returns underlying storage statistics.
func (xl xlObjects) StorageInfo() StorageInfo {
var disksInfo []disk.Info
for _, diskPath := range xl.physicalDisks {
info, err := disk.GetInfo(diskPath)
if err != nil {
errorIf(err, "Unable to fetch disk info for "+diskPath)
continue
}
disksInfo = append(disksInfo, info)
}
// Sort so that the first element is the smallest.
sort.Sort(byDiskTotal(disksInfo))
// Return calculated storage info, choose the lowest Total and
// Free as the total aggregated values. Total capacity is always
// the multiple of smallest disk among the disk list.
return StorageInfo{
Total: disksInfo[0].Total * int64(len(xl.storageDisks)),
Free: disksInfo[0].Free * int64(len(xl.storageDisks)),
}
}