minio/cmd/fs-v1.go
Krishna Srinivas 52f6d5aafc Rename of structs and methods (#6230)
Rename of ErasureStorage to Erasure (and rename of related variables and methods)
2018-08-23 23:35:37 -07:00

1262 lines
38 KiB
Go

/*
* Minio Cloud Storage, (C) 2016, 2017, 2018 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"encoding/hex"
"io"
"io/ioutil"
"os"
"path"
"sort"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/hash"
"github.com/minio/minio/pkg/lock"
"github.com/minio/minio/pkg/madmin"
"github.com/minio/minio/pkg/mimedb"
"github.com/minio/minio/pkg/mountinfo"
"github.com/minio/minio/pkg/policy"
)
// Default etag is used for pre-existing objects.
var defaultEtag = "00000000000000000000000000000000-1"
// FSObjects - Implements fs object layer.
type FSObjects struct {
// Disk usage metrics
totalUsed uint64 // ref: https://golang.org/pkg/sync/atomic/#pkg-note-BUG
// Path to be exported over S3 API.
fsPath string
// meta json filename, varies by fs / cache backend.
metaJSONFile string
// Unique value to be used for all
// temporary transactions.
fsUUID string
// This value shouldn't be touched, once initialized.
fsFormatRlk *lock.RLockedFile // Is a read lock on `format.json`.
// FS rw pool.
rwPool *fsIOPool
// ListObjects pool management.
listPool *treeWalkPool
diskMount bool
appendFileMap map[string]*fsAppendFile
appendFileMapMu sync.Mutex
// To manage the appendRoutine go-routines
nsMutex *nsLockMap
}
// Represents the background append file.
type fsAppendFile struct {
sync.Mutex
parts []PartInfo // List of parts appended.
filePath string // Absolute path of the file in the temp location.
}
// Initializes meta volume on all the fs path.
func initMetaVolumeFS(fsPath, fsUUID string) error {
// This happens for the first time, but keep this here since this
// is the only place where it can be made less expensive
// optimizing all other calls. Create minio meta volume,
// if it doesn't exist yet.
metaBucketPath := pathJoin(fsPath, minioMetaBucket)
if err := os.MkdirAll(metaBucketPath, 0777); err != nil {
return err
}
metaTmpPath := pathJoin(fsPath, minioMetaTmpBucket, fsUUID)
if err := os.MkdirAll(metaTmpPath, 0777); err != nil {
return err
}
metaMultipartPath := pathJoin(fsPath, minioMetaMultipartBucket)
return os.MkdirAll(metaMultipartPath, 0777)
}
// NewFSObjectLayer - initialize new fs object layer.
func NewFSObjectLayer(fsPath string) (ObjectLayer, error) {
ctx := context.Background()
if fsPath == "" {
return nil, errInvalidArgument
}
var err error
if fsPath, err = getValidPath(fsPath); err != nil {
if err == errMinDiskSize {
return nil, err
}
return nil, uiErrUnableToWriteInBackend(err)
}
// Assign a new UUID for FS minio mode. Each server instance
// gets its own UUID for temporary file transaction.
fsUUID := mustGetUUID()
// Initialize meta volume, if volume already exists ignores it.
if err = initMetaVolumeFS(fsPath, fsUUID); err != nil {
return nil, err
}
// Initialize `format.json`, this function also returns.
rlk, err := initFormatFS(ctx, fsPath)
if err != nil {
return nil, err
}
// Initialize fs objects.
fs := &FSObjects{
fsPath: fsPath,
metaJSONFile: fsMetaJSONFile,
fsUUID: fsUUID,
rwPool: &fsIOPool{
readersMap: make(map[string]*lock.RLockedFile),
},
nsMutex: newNSLock(false),
listPool: newTreeWalkPool(globalLookupTimeout),
appendFileMap: make(map[string]*fsAppendFile),
diskMount: mountinfo.IsLikelyMountPoint(fsPath),
}
// Once the filesystem has initialized hold the read lock for
// the life time of the server. This is done to ensure that under
// shared backend mode for FS, remote servers do not migrate
// or cause changes on backend format.
fs.fsFormatRlk = rlk
if !fs.diskMount {
go fs.diskUsage(globalServiceDoneCh)
}
go fs.cleanupStaleMultipartUploads(ctx, globalMultipartCleanupInterval, globalMultipartExpiry, globalServiceDoneCh)
// Return successfully initialized object layer.
return fs, nil
}
// Shutdown - should be called when process shuts down.
func (fs *FSObjects) Shutdown(ctx context.Context) error {
fs.fsFormatRlk.Close()
// Cleanup and delete tmp uuid.
return fsRemoveAll(ctx, pathJoin(fs.fsPath, minioMetaTmpBucket, fs.fsUUID))
}
// diskUsage returns du information for the posix path, in a continuous routine.
func (fs *FSObjects) diskUsage(doneCh chan struct{}) {
usageFn := func(ctx context.Context, entry string) error {
if globalHTTPServer != nil {
// Wait at max 1 minute for an inprogress request
// before proceeding to count the usage.
waitCount := 60
// Any requests in progress, delay the usage.
for globalHTTPServer.GetRequestCount() > 0 && waitCount > 0 {
waitCount--
time.Sleep(1 * time.Second)
}
}
select {
case <-doneCh:
return errWalkAbort
default:
var fi os.FileInfo
var err error
if hasSuffix(entry, slashSeparator) {
fi, err = fsStatDir(ctx, entry)
} else {
fi, err = fsStatFile(ctx, entry)
}
if err != nil {
return err
}
atomic.AddUint64(&fs.totalUsed, uint64(fi.Size()))
}
return nil
}
// Return this routine upon errWalkAbort, continue for any other error on purpose
// so that we can start the routine freshly in another 12 hours.
if err := getDiskUsage(context.Background(), fs.fsPath, usageFn); err == errWalkAbort {
return
}
for {
select {
case <-doneCh:
return
case <-time.After(globalUsageCheckInterval):
var usage uint64
usageFn = func(ctx context.Context, entry string) error {
if globalHTTPServer != nil {
// Wait at max 1 minute for an inprogress request
// before proceeding to count the usage.
waitCount := 60
// Any requests in progress, delay the usage.
for globalHTTPServer.GetRequestCount() > 0 && waitCount > 0 {
waitCount--
time.Sleep(1 * time.Second)
}
}
var fi os.FileInfo
var err error
if hasSuffix(entry, slashSeparator) {
fi, err = fsStatDir(ctx, entry)
} else {
fi, err = fsStatFile(ctx, entry)
}
if err != nil {
return err
}
usage = usage + uint64(fi.Size())
return nil
}
if err := getDiskUsage(context.Background(), fs.fsPath, usageFn); err != nil {
continue
}
atomic.StoreUint64(&fs.totalUsed, usage)
}
}
}
// StorageInfo - returns underlying storage statistics.
func (fs *FSObjects) StorageInfo(ctx context.Context) StorageInfo {
di, err := getDiskInfo(fs.fsPath)
if err != nil {
return StorageInfo{}
}
used := di.Total - di.Free
if !fs.diskMount {
used = atomic.LoadUint64(&fs.totalUsed)
}
storageInfo := StorageInfo{
Used: used,
}
storageInfo.Backend.Type = BackendFS
return storageInfo
}
/// Bucket operations
// getBucketDir - will convert incoming bucket names to
// corresponding valid bucket names on the backend in a platform
// compatible way for all operating systems.
func (fs *FSObjects) getBucketDir(ctx context.Context, bucket string) (string, error) {
if bucket == "" || bucket == "." || bucket == ".." {
return "", errVolumeNotFound
}
bucketDir := pathJoin(fs.fsPath, bucket)
return bucketDir, nil
}
func (fs *FSObjects) statBucketDir(ctx context.Context, bucket string) (os.FileInfo, error) {
bucketDir, err := fs.getBucketDir(ctx, bucket)
if err != nil {
return nil, err
}
st, err := fsStatVolume(ctx, bucketDir)
if err != nil {
return nil, err
}
return st, nil
}
// MakeBucketWithLocation - create a new bucket, returns if it
// already exists.
func (fs *FSObjects) MakeBucketWithLocation(ctx context.Context, bucket, location string) error {
bucketLock := fs.nsMutex.NewNSLock(bucket, "")
if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
return err
}
defer bucketLock.Unlock()
// Verify if bucket is valid.
if !IsValidBucketName(bucket) {
err := BucketNameInvalid{Bucket: bucket}
logger.LogIf(ctx, err)
return err
}
bucketDir, err := fs.getBucketDir(ctx, bucket)
if err != nil {
return toObjectErr(err, bucket)
}
if err = fsMkdir(ctx, bucketDir); err != nil {
return toObjectErr(err, bucket)
}
return nil
}
// GetBucketInfo - fetch bucket metadata info.
func (fs *FSObjects) GetBucketInfo(ctx context.Context, bucket string) (bi BucketInfo, e error) {
bucketLock := fs.nsMutex.NewNSLock(bucket, "")
if e := bucketLock.GetRLock(globalObjectTimeout); e != nil {
return bi, e
}
defer bucketLock.RUnlock()
st, err := fs.statBucketDir(ctx, bucket)
if err != nil {
return bi, toObjectErr(err, bucket)
}
// As os.Stat() doesn't carry other than ModTime(), use ModTime() as CreatedTime.
createdTime := st.ModTime()
return BucketInfo{
Name: bucket,
Created: createdTime,
}, nil
}
// ListBuckets - list all s3 compatible buckets (directories) at fsPath.
func (fs *FSObjects) ListBuckets(ctx context.Context) ([]BucketInfo, error) {
if err := checkPathLength(fs.fsPath); err != nil {
logger.LogIf(ctx, err)
return nil, err
}
var bucketInfos []BucketInfo
entries, err := readDir((fs.fsPath))
if err != nil {
logger.LogIf(ctx, errDiskNotFound)
return nil, toObjectErr(errDiskNotFound)
}
for _, entry := range entries {
// Ignore all reserved bucket names and invalid bucket names.
if isReservedOrInvalidBucket(entry) {
continue
}
var fi os.FileInfo
fi, err = fsStatVolume(ctx, pathJoin(fs.fsPath, entry))
// There seems like no practical reason to check for errors
// at this point, if there are indeed errors we can simply
// just ignore such buckets and list only those which
// return proper Stat information instead.
if err != nil {
// Ignore any errors returned here.
continue
}
bucketInfos = append(bucketInfos, BucketInfo{
Name: fi.Name(),
// As os.Stat() doesnt carry CreatedTime, use ModTime() as CreatedTime.
Created: fi.ModTime(),
})
}
// Sort bucket infos by bucket name.
sort.Sort(byBucketName(bucketInfos))
// Succes.
return bucketInfos, nil
}
// DeleteBucket - delete a bucket and all the metadata associated
// with the bucket including pending multipart, object metadata.
func (fs *FSObjects) DeleteBucket(ctx context.Context, bucket string) error {
bucketLock := fs.nsMutex.NewNSLock(bucket, "")
if err := bucketLock.GetLock(globalObjectTimeout); err != nil {
logger.LogIf(ctx, err)
return err
}
defer bucketLock.Unlock()
bucketDir, err := fs.getBucketDir(ctx, bucket)
if err != nil {
return toObjectErr(err, bucket)
}
// Attempt to delete regular bucket.
if err = fsRemoveDir(ctx, bucketDir); err != nil {
return toObjectErr(err, bucket)
}
// Cleanup all the bucket metadata.
minioMetadataBucketDir := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket)
if err = fsRemoveAll(ctx, minioMetadataBucketDir); err != nil {
return toObjectErr(err, bucket)
}
// Delete all bucket metadata.
deleteBucketMetadata(ctx, bucket, fs)
return nil
}
/// Object Operations
// CopyObject - copy object source object to destination object.
// if source object and destination object are same we only
// update metadata.
func (fs *FSObjects) CopyObject(ctx context.Context, srcBucket, srcObject, dstBucket, dstObject string, srcInfo ObjectInfo) (oi ObjectInfo, e error) {
cpSrcDstSame := isStringEqual(pathJoin(srcBucket, srcObject), pathJoin(dstBucket, dstObject))
// Hold write lock on destination since in both cases
// - if source and destination are same
// - if source and destination are different
// it is the sole mutating state.
objectDWLock := fs.nsMutex.NewNSLock(dstBucket, dstObject)
if err := objectDWLock.GetLock(globalObjectTimeout); err != nil {
return oi, err
}
defer objectDWLock.Unlock()
// if source and destination are different, we have to hold
// additional read lock as well to protect against writes on
// source.
if !cpSrcDstSame {
// Hold read locks on source object only if we are
// going to read data from source object.
objectSRLock := fs.nsMutex.NewNSLock(srcBucket, srcObject)
if err := objectSRLock.GetRLock(globalObjectTimeout); err != nil {
return oi, err
}
defer objectSRLock.RUnlock()
}
if _, err := fs.statBucketDir(ctx, srcBucket); err != nil {
return oi, toObjectErr(err, srcBucket)
}
if cpSrcDstSame && srcInfo.metadataOnly {
// Close any writer which was initialized.
defer srcInfo.Writer.Close()
fsMetaPath := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, srcBucket, srcObject, fs.metaJSONFile)
wlk, err := fs.rwPool.Write(fsMetaPath)
if err != nil {
logger.LogIf(ctx, err)
return oi, toObjectErr(err, srcBucket, srcObject)
}
// This close will allow for locks to be synchronized on `fs.json`.
defer wlk.Close()
// Save objects' metadata in `fs.json`.
fsMeta := newFSMetaV1()
if _, err = fsMeta.ReadFrom(ctx, wlk); err != nil && err != io.EOF {
return oi, toObjectErr(err, srcBucket, srcObject)
}
fsMeta.Meta = srcInfo.UserDefined
fsMeta.Meta["etag"] = srcInfo.ETag
if _, err = fsMeta.WriteTo(wlk); err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
}
// Stat the file to get file size.
fi, err := fsStatFile(ctx, pathJoin(fs.fsPath, srcBucket, srcObject))
if err != nil {
return oi, toObjectErr(err, srcBucket, srcObject)
}
// Return the new object info.
return fsMeta.ToObjectInfo(srcBucket, srcObject, fi), nil
}
go func() {
if gerr := fs.getObject(ctx, srcBucket, srcObject, 0, srcInfo.Size, srcInfo.Writer, srcInfo.ETag, !cpSrcDstSame); gerr != nil {
if gerr = srcInfo.Writer.Close(); gerr != nil {
logger.LogIf(ctx, gerr)
}
return
}
// Close writer explicitly signaling we wrote all data.
if gerr := srcInfo.Writer.Close(); gerr != nil {
logger.LogIf(ctx, gerr)
return
}
}()
objInfo, err := fs.putObject(ctx, dstBucket, dstObject, srcInfo.Reader, srcInfo.UserDefined)
if err != nil {
return oi, toObjectErr(err, dstBucket, dstObject)
}
return objInfo, nil
}
// GetObject - reads an object from the disk.
// Supports additional parameters like offset and length
// which are synonymous with HTTP Range requests.
//
// startOffset indicates the starting read location of the object.
// length indicates the total length of the object.
func (fs *FSObjects) GetObject(ctx context.Context, bucket, object string, offset int64, length int64, writer io.Writer, etag string) (err error) {
if err = checkGetObjArgs(ctx, bucket, object); err != nil {
return err
}
// Lock the object before reading.
objectLock := fs.nsMutex.NewNSLock(bucket, object)
if err := objectLock.GetRLock(globalObjectTimeout); err != nil {
logger.LogIf(ctx, err)
return err
}
defer objectLock.RUnlock()
return fs.getObject(ctx, bucket, object, offset, length, writer, etag, true)
}
// getObject - wrapper for GetObject
func (fs *FSObjects) getObject(ctx context.Context, bucket, object string, offset int64, length int64, writer io.Writer, etag string, lock bool) (err error) {
if _, err = fs.statBucketDir(ctx, bucket); err != nil {
return toObjectErr(err, bucket)
}
// Offset cannot be negative.
if offset < 0 {
logger.LogIf(ctx, errUnexpected)
return toObjectErr(errUnexpected, bucket, object)
}
// Writer cannot be nil.
if writer == nil {
logger.LogIf(ctx, errUnexpected)
return toObjectErr(errUnexpected, bucket, object)
}
// If its a directory request, we return an empty body.
if hasSuffix(object, slashSeparator) {
_, err = writer.Write([]byte(""))
logger.LogIf(ctx, err)
return toObjectErr(err, bucket, object)
}
if bucket != minioMetaBucket {
fsMetaPath := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)
if lock {
_, err = fs.rwPool.Open(fsMetaPath)
if err != nil && err != errFileNotFound {
logger.LogIf(ctx, err)
return toObjectErr(err, bucket, object)
}
defer fs.rwPool.Close(fsMetaPath)
}
}
if etag != "" && etag != defaultEtag {
objEtag, perr := fs.getObjectETag(ctx, bucket, object, lock)
if perr != nil {
return toObjectErr(perr, bucket, object)
}
if objEtag != etag {
logger.LogIf(ctx, InvalidETag{})
return toObjectErr(InvalidETag{}, bucket, object)
}
}
// Read the object, doesn't exist returns an s3 compatible error.
fsObjPath := pathJoin(fs.fsPath, bucket, object)
reader, size, err := fsOpenFile(ctx, fsObjPath, offset)
if err != nil {
return toObjectErr(err, bucket, object)
}
defer reader.Close()
bufSize := int64(readSizeV1)
if length > 0 && bufSize > length {
bufSize = length
}
// For negative length we read everything.
if length < 0 {
length = size - offset
}
// Reply back invalid range if the input offset and length fall out of range.
if offset > size || offset+length > size {
err = InvalidRange{offset, length, size}
logger.LogIf(ctx, err)
return err
}
// Allocate a staging buffer.
buf := make([]byte, int(bufSize))
_, err = io.CopyBuffer(writer, io.LimitReader(reader, length), buf)
logger.LogIf(ctx, err)
return toObjectErr(err, bucket, object)
}
// Create a new fs.json file, if the existing one is corrupt. Should happen very rarely.
func (fs *FSObjects) createFsJSON(object, fsMetaPath string) error {
fsMeta := newFSMetaV1()
fsMeta.Meta = make(map[string]string)
fsMeta.Meta["etag"] = GenETag()
if objectExt := path.Ext(object); objectExt != "" {
if content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]; ok {
fsMeta.Meta["content-type"] = content.ContentType
} else {
fsMeta.Meta["content-type"] = "application/octet-stream"
}
}
wlk, werr := fs.rwPool.Create(fsMetaPath)
if werr == nil {
_, err := fsMeta.WriteTo(wlk)
wlk.Close()
return err
}
return werr
}
// Used to return default etag values when a pre-existing object's meta data is queried.
func (fs *FSObjects) defaultFsJSON(object string) fsMetaV1 {
fsMeta := newFSMetaV1()
fsMeta.Meta = make(map[string]string)
fsMeta.Meta["etag"] = defaultEtag
if objectExt := path.Ext(object); objectExt != "" {
if content, ok := mimedb.DB[strings.ToLower(strings.TrimPrefix(objectExt, "."))]; ok {
fsMeta.Meta["content-type"] = content.ContentType
} else {
fsMeta.Meta["content-type"] = "application/octet-stream"
}
}
return fsMeta
}
// getObjectInfo - wrapper for reading object metadata and constructs ObjectInfo.
func (fs *FSObjects) getObjectInfo(ctx context.Context, bucket, object string) (oi ObjectInfo, e error) {
fsMeta := fsMetaV1{}
if hasSuffix(object, slashSeparator) {
// Since we support PUT of a "directory" object, we allow HEAD.
if !fsIsDir(ctx, pathJoin(fs.fsPath, bucket, object)) {
return oi, errFileNotFound
}
fi, err := fsStatDir(ctx, pathJoin(fs.fsPath, bucket, object))
if err != nil {
return oi, err
}
return fsMeta.ToObjectInfo(bucket, object, fi), nil
}
fsMetaPath := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)
// Read `fs.json` to perhaps contend with
// parallel Put() operations.
rlk, err := fs.rwPool.Open(fsMetaPath)
if err == nil {
// Read from fs metadata only if it exists.
_, rerr := fsMeta.ReadFrom(ctx, rlk.LockedFile)
fs.rwPool.Close(fsMetaPath)
if rerr != nil && rerr != io.EOF {
return oi, rerr
}
}
// Return a default etag and content-type based on the object's extension.
if err == errFileNotFound {
fsMeta = fs.defaultFsJSON(object)
}
// Ignore if `fs.json` is not available, this is true for pre-existing data.
if err != nil && err != errFileNotFound {
logger.LogIf(ctx, err)
return oi, err
}
// Stat the file to get file size.
fi, err := fsStatFile(ctx, pathJoin(fs.fsPath, bucket, object))
if err != nil {
return oi, err
}
return fsMeta.ToObjectInfo(bucket, object, fi), nil
}
// getObjectInfoWithLock - reads object metadata and replies back ObjectInfo.
func (fs *FSObjects) getObjectInfoWithLock(ctx context.Context, bucket, object string) (oi ObjectInfo, e error) {
// Lock the object before reading.
objectLock := fs.nsMutex.NewNSLock(bucket, object)
if err := objectLock.GetRLock(globalObjectTimeout); err != nil {
return oi, err
}
defer objectLock.RUnlock()
if err := checkGetObjArgs(ctx, bucket, object); err != nil {
return oi, err
}
if _, err := fs.statBucketDir(ctx, bucket); err != nil {
return oi, err
}
if strings.HasSuffix(object, slashSeparator) && !fs.isObjectDir(bucket, object) {
return oi, errFileNotFound
}
return fs.getObjectInfo(ctx, bucket, object)
}
// GetObjectInfo - reads object metadata and replies back ObjectInfo.
func (fs *FSObjects) GetObjectInfo(ctx context.Context, bucket, object string) (oi ObjectInfo, e error) {
oi, err := fs.getObjectInfoWithLock(ctx, bucket, object)
if err == errCorruptedFormat || err == io.EOF {
objectLock := fs.nsMutex.NewNSLock(bucket, object)
if err = objectLock.GetLock(globalObjectTimeout); err != nil {
return oi, toObjectErr(err, bucket, object)
}
fsMetaPath := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, object, fs.metaJSONFile)
err = fs.createFsJSON(object, fsMetaPath)
objectLock.Unlock()
if err != nil {
return oi, toObjectErr(err, bucket, object)
}
oi, err = fs.getObjectInfoWithLock(ctx, bucket, object)
}
return oi, toObjectErr(err, bucket, object)
}
// This function does the following check, suppose
// object is "a/b/c/d", stat makes sure that objects ""a/b/c""
// "a/b" and "a" do not exist.
func (fs *FSObjects) parentDirIsObject(ctx context.Context, bucket, parent string) bool {
var isParentDirObject func(string) bool
isParentDirObject = func(p string) bool {
if p == "." || p == "/" {
return false
}
if fsIsFile(ctx, pathJoin(fs.fsPath, bucket, p)) {
// If there is already a file at prefix "p", return true.
return true
}
// Check if there is a file as one of the parent paths.
return isParentDirObject(path.Dir(p))
}
return isParentDirObject(parent)
}
// PutObject - creates an object upon reading from the input stream
// until EOF, writes data directly to configured filesystem path.
// Additionally writes `fs.json` which carries the necessary metadata
// for future object operations.
func (fs *FSObjects) PutObject(ctx context.Context, bucket string, object string, data *hash.Reader, metadata map[string]string) (objInfo ObjectInfo, retErr error) {
if err := checkPutObjectArgs(ctx, bucket, object, fs, data.Size()); err != nil {
return ObjectInfo{}, err
}
// Lock the object.
objectLock := fs.nsMutex.NewNSLock(bucket, object)
if err := objectLock.GetLock(globalObjectTimeout); err != nil {
logger.LogIf(ctx, err)
return objInfo, err
}
defer objectLock.Unlock()
return fs.putObject(ctx, bucket, object, data, metadata)
}
// putObject - wrapper for PutObject
func (fs *FSObjects) putObject(ctx context.Context, bucket string, object string, data *hash.Reader, metadata map[string]string) (objInfo ObjectInfo, retErr error) {
// No metadata is set, allocate a new one.
meta := make(map[string]string)
for k, v := range metadata {
meta[k] = v
}
var err error
// Validate if bucket name is valid and exists.
if _, err = fs.statBucketDir(ctx, bucket); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket)
}
fsMeta := newFSMetaV1()
fsMeta.Meta = meta
// This is a special case with size as '0' and object ends
// with a slash separator, we treat it like a valid operation
// and return success.
if isObjectDir(object, data.Size()) {
// Check if an object is present as one of the parent dir.
if fs.parentDirIsObject(ctx, bucket, path.Dir(object)) {
logger.LogIf(ctx, errFileAccessDenied)
return ObjectInfo{}, toObjectErr(errFileAccessDenied, bucket, object)
}
if err = mkdirAll(pathJoin(fs.fsPath, bucket, object), 0777); err != nil {
logger.LogIf(ctx, err)
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
var fi os.FileInfo
if fi, err = fsStatDir(ctx, pathJoin(fs.fsPath, bucket, object)); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
return fsMeta.ToObjectInfo(bucket, object, fi), nil
}
if err = checkPutObjectArgs(ctx, bucket, object, fs, data.Size()); err != nil {
return ObjectInfo{}, err
}
// Check if an object is present as one of the parent dir.
if fs.parentDirIsObject(ctx, bucket, path.Dir(object)) {
logger.LogIf(ctx, errFileAccessDenied)
return ObjectInfo{}, toObjectErr(errFileAccessDenied, bucket, object)
}
// Validate input data size and it can never be less than zero.
if data.Size() < 0 {
logger.LogIf(ctx, errInvalidArgument)
return ObjectInfo{}, errInvalidArgument
}
var wlk *lock.LockedFile
if bucket != minioMetaBucket {
bucketMetaDir := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix)
fsMetaPath := pathJoin(bucketMetaDir, bucket, object, fs.metaJSONFile)
wlk, err = fs.rwPool.Create(fsMetaPath)
if err != nil {
logger.LogIf(ctx, err)
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
// This close will allow for locks to be synchronized on `fs.json`.
defer wlk.Close()
defer func() {
// Remove meta file when PutObject encounters any error
if retErr != nil {
tmpDir := pathJoin(fs.fsPath, minioMetaTmpBucket, fs.fsUUID)
fsRemoveMeta(ctx, bucketMetaDir, fsMetaPath, tmpDir)
}
}()
}
// Uploaded object will first be written to the temporary location which will eventually
// be renamed to the actual location. It is first written to the temporary location
// so that cleaning it up will be easy if the server goes down.
tempObj := mustGetUUID()
// Allocate a buffer to Read() from request body
bufSize := int64(readSizeV1)
if size := data.Size(); size > 0 && bufSize > size {
bufSize = size
}
buf := make([]byte, int(bufSize))
fsTmpObjPath := pathJoin(fs.fsPath, minioMetaTmpBucket, fs.fsUUID, tempObj)
bytesWritten, err := fsCreateFile(ctx, fsTmpObjPath, data, buf, data.Size())
if err != nil {
fsRemoveFile(ctx, fsTmpObjPath)
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
fsMeta.Meta["etag"] = hex.EncodeToString(data.MD5Current())
// Should return IncompleteBody{} error when reader has fewer
// bytes than specified in request header.
if bytesWritten < data.Size() {
fsRemoveFile(ctx, fsTmpObjPath)
return ObjectInfo{}, IncompleteBody{}
}
// Delete the temporary object in the case of a
// failure. If PutObject succeeds, then there would be
// nothing to delete.
defer fsRemoveFile(ctx, fsTmpObjPath)
// Entire object was written to the temp location, now it's safe to rename it to the actual location.
fsNSObjPath := pathJoin(fs.fsPath, bucket, object)
// Deny if WORM is enabled
if globalWORMEnabled {
if _, err = fsStatFile(ctx, fsNSObjPath); err == nil {
return ObjectInfo{}, ObjectAlreadyExists{Bucket: bucket, Object: object}
}
}
if err = fsRenameFile(ctx, fsTmpObjPath, fsNSObjPath); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
if bucket != minioMetaBucket {
// Write FS metadata after a successful namespace operation.
if _, err = fsMeta.WriteTo(wlk); err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
}
// Stat the file to fetch timestamp, size.
fi, err := fsStatFile(ctx, pathJoin(fs.fsPath, bucket, object))
if err != nil {
return ObjectInfo{}, toObjectErr(err, bucket, object)
}
// Success.
return fsMeta.ToObjectInfo(bucket, object, fi), nil
}
// DeleteObject - deletes an object from a bucket, this operation is destructive
// and there are no rollbacks supported.
func (fs *FSObjects) DeleteObject(ctx context.Context, bucket, object string) error {
// Acquire a write lock before deleting the object.
objectLock := fs.nsMutex.NewNSLock(bucket, object)
if err := objectLock.GetLock(globalOperationTimeout); err != nil {
return err
}
defer objectLock.Unlock()
if err := checkDelObjArgs(ctx, bucket, object); err != nil {
return err
}
if _, err := fs.statBucketDir(ctx, bucket); err != nil {
return toObjectErr(err, bucket)
}
minioMetaBucketDir := pathJoin(fs.fsPath, minioMetaBucket)
fsMetaPath := pathJoin(minioMetaBucketDir, bucketMetaPrefix, bucket, object, fs.metaJSONFile)
if bucket != minioMetaBucket {
rwlk, lerr := fs.rwPool.Write(fsMetaPath)
if lerr == nil {
// This close will allow for fs locks to be synchronized on `fs.json`.
defer rwlk.Close()
}
if lerr != nil && lerr != errFileNotFound {
logger.LogIf(ctx, lerr)
return toObjectErr(lerr, bucket, object)
}
}
// Delete the object.
if err := fsDeleteFile(ctx, pathJoin(fs.fsPath, bucket), pathJoin(fs.fsPath, bucket, object)); err != nil {
return toObjectErr(err, bucket, object)
}
if bucket != minioMetaBucket {
// Delete the metadata object.
err := fsDeleteFile(ctx, minioMetaBucketDir, fsMetaPath)
if err != nil && err != errFileNotFound {
return toObjectErr(err, bucket, object)
}
}
return nil
}
// Returns function "listDir" of the type listDirFunc.
// isLeaf - is used by listDir function to check if an entry
// is a leaf or non-leaf entry.
func (fs *FSObjects) listDirFactory(isLeaf isLeafFunc) listDirFunc {
// listDir - lists all the entries at a given prefix and given entry in the prefix.
listDir := func(bucket, prefixDir, prefixEntry string) (entries []string, delayIsLeaf bool) {
var err error
entries, err = readDir(pathJoin(fs.fsPath, bucket, prefixDir))
if err != nil {
logger.LogIf(context.Background(), err)
return
}
entries, delayIsLeaf = filterListEntries(bucket, prefixDir, entries, prefixEntry, isLeaf)
return entries, delayIsLeaf
}
// Return list factory instance.
return listDir
}
// isObjectDir returns true if the specified bucket & prefix exists
// and the prefix represents an empty directory. An S3 empty directory
// is also an empty directory in the FS backend.
func (fs *FSObjects) isObjectDir(bucket, prefix string) bool {
entries, err := readDirN(pathJoin(fs.fsPath, bucket, prefix), 1)
if err != nil {
return false
}
return len(entries) == 0
}
// getObjectETag is a helper function, which returns only the md5sum
// of the file on the disk.
func (fs *FSObjects) getObjectETag(ctx context.Context, bucket, entry string, lock bool) (string, error) {
fsMetaPath := pathJoin(fs.fsPath, minioMetaBucket, bucketMetaPrefix, bucket, entry, fs.metaJSONFile)
var reader io.Reader
var fi os.FileInfo
var size int64
if lock {
// Read `fs.json` to perhaps contend with
// parallel Put() operations.
rlk, err := fs.rwPool.Open(fsMetaPath)
// Ignore if `fs.json` is not available, this is true for pre-existing data.
if err != nil && err != errFileNotFound {
logger.LogIf(ctx, err)
return "", toObjectErr(err, bucket, entry)
}
// If file is not found, we don't need to proceed forward.
if err == errFileNotFound {
return "", nil
}
// Read from fs metadata only if it exists.
defer fs.rwPool.Close(fsMetaPath)
// Fetch the size of the underlying file.
fi, err = rlk.LockedFile.Stat()
if err != nil {
logger.LogIf(ctx, err)
return "", toObjectErr(err, bucket, entry)
}
size = fi.Size()
reader = io.NewSectionReader(rlk.LockedFile, 0, fi.Size())
} else {
var err error
reader, size, err = fsOpenFile(ctx, fsMetaPath, 0)
if err != nil {
return "", toObjectErr(err, bucket, entry)
}
}
// `fs.json` can be empty due to previously failed
// PutObject() transaction, if we arrive at such
// a situation we just ignore and continue.
if size == 0 {
return "", nil
}
fsMetaBuf, err := ioutil.ReadAll(reader)
if err != nil {
logger.LogIf(ctx, err)
return "", toObjectErr(err, bucket, entry)
}
// Check if FS metadata is valid, if not return error.
if !isFSMetaValid(parseFSVersion(fsMetaBuf)) {
logger.LogIf(ctx, errCorruptedFormat)
return "", toObjectErr(errCorruptedFormat, bucket, entry)
}
return extractETag(parseFSMetaMap(fsMetaBuf)), nil
}
// ListObjects - list all objects at prefix upto maxKeys., optionally delimited by '/'. Maintains the list pool
// state for future re-entrant list requests.
func (fs *FSObjects) ListObjects(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi ListObjectsInfo, e error) {
if err := checkListObjsArgs(ctx, bucket, prefix, marker, delimiter, fs); err != nil {
return loi, err
}
// Marker is set validate pre-condition.
if marker != "" {
// Marker not common with prefix is not implemented.Send an empty response
if !hasPrefix(marker, prefix) {
return ListObjectsInfo{}, e
}
}
if _, err := fs.statBucketDir(ctx, bucket); err != nil {
return loi, err
}
// With max keys of zero we have reached eof, return right here.
if maxKeys == 0 {
return loi, nil
}
// For delimiter and prefix as '/' we do not list anything at all
// since according to s3 spec we stop at the 'delimiter'
// along // with the prefix. On a flat namespace with 'prefix'
// as '/' we don't have any entries, since all the keys are
// of form 'keyName/...'
if delimiter == slashSeparator && prefix == slashSeparator {
return loi, nil
}
// Over flowing count - reset to maxObjectList.
if maxKeys < 0 || maxKeys > maxObjectList {
maxKeys = maxObjectList
}
// Default is recursive, if delimiter is set then list non recursive.
recursive := true
if delimiter == slashSeparator {
recursive = false
}
// Convert entry to ObjectInfo
entryToObjectInfo := func(entry string) (objInfo ObjectInfo, err error) {
// Protect the entry from concurrent deletes, or renames.
objectLock := fs.nsMutex.NewNSLock(bucket, entry)
if err = objectLock.GetRLock(globalListingTimeout); err != nil {
logger.LogIf(ctx, err)
return ObjectInfo{}, err
}
defer objectLock.RUnlock()
return fs.getObjectInfo(ctx, bucket, entry)
}
heal := false // true only for xl.ListObjectsHeal()
walkResultCh, endWalkCh := fs.listPool.Release(listParams{bucket, recursive, marker, prefix, heal})
if walkResultCh == nil {
endWalkCh = make(chan struct{})
isLeaf := func(bucket, object string) bool {
// bucket argument is unused as we don't need to StatFile
// to figure if it's a file, just need to check that the
// object string does not end with "/".
return !hasSuffix(object, slashSeparator)
}
// Return true if the specified object is an empty directory
isLeafDir := func(bucket, object string) bool {
if !hasSuffix(object, slashSeparator) {
return false
}
return fs.isObjectDir(bucket, object)
}
listDir := fs.listDirFactory(isLeaf)
walkResultCh = startTreeWalk(ctx, bucket, prefix, marker, recursive, listDir, isLeaf, isLeafDir, endWalkCh)
}
var objInfos []ObjectInfo
var eof bool
var nextMarker string
// List until maxKeys requested.
for i := 0; i < maxKeys; {
walkResult, ok := <-walkResultCh
if !ok {
// Closed channel.
eof = true
break
}
// For any walk error return right away.
if walkResult.err != nil {
// File not found is a valid case.
if walkResult.err == errFileNotFound {
return loi, nil
}
return loi, toObjectErr(walkResult.err, bucket, prefix)
}
objInfo, err := entryToObjectInfo(walkResult.entry)
if err != nil {
return loi, nil
}
nextMarker = objInfo.Name
objInfos = append(objInfos, objInfo)
if walkResult.end {
eof = true
break
}
i++
}
// Save list routine for the next marker if we haven't reached EOF.
params := listParams{bucket, recursive, nextMarker, prefix, heal}
if !eof {
fs.listPool.Set(params, walkResultCh, endWalkCh)
}
result := ListObjectsInfo{IsTruncated: !eof}
for _, objInfo := range objInfos {
result.NextMarker = objInfo.Name
if objInfo.IsDir && delimiter == slashSeparator {
result.Prefixes = append(result.Prefixes, objInfo.Name)
continue
}
result.Objects = append(result.Objects, objInfo)
}
// Success.
return result, nil
}
// ReloadFormat - no-op for fs, Valid only for XL.
func (fs *FSObjects) ReloadFormat(ctx context.Context, dryRun bool) error {
logger.LogIf(ctx, NotImplemented{})
return NotImplemented{}
}
// HealFormat - no-op for fs, Valid only for XL.
func (fs *FSObjects) HealFormat(ctx context.Context, dryRun bool) (madmin.HealResultItem, error) {
logger.LogIf(ctx, NotImplemented{})
return madmin.HealResultItem{}, NotImplemented{}
}
// HealObject - no-op for fs. Valid only for XL.
func (fs *FSObjects) HealObject(ctx context.Context, bucket, object string, dryRun bool) (
res madmin.HealResultItem, err error) {
logger.LogIf(ctx, NotImplemented{})
return res, NotImplemented{}
}
// HealBucket - no-op for fs, Valid only for XL.
func (fs *FSObjects) HealBucket(ctx context.Context, bucket string, dryRun bool) ([]madmin.HealResultItem,
error) {
logger.LogIf(ctx, NotImplemented{})
return nil, NotImplemented{}
}
// ListObjectsHeal - list all objects to be healed. Valid only for XL
func (fs *FSObjects) ListObjectsHeal(ctx context.Context, bucket, prefix, marker, delimiter string, maxKeys int) (loi ListObjectsInfo, e error) {
logger.LogIf(ctx, NotImplemented{})
return loi, NotImplemented{}
}
// ListBucketsHeal - list all buckets to be healed. Valid only for XL
func (fs *FSObjects) ListBucketsHeal(ctx context.Context) ([]BucketInfo, error) {
logger.LogIf(ctx, NotImplemented{})
return []BucketInfo{}, NotImplemented{}
}
// SetBucketPolicy sets policy on bucket
func (fs *FSObjects) SetBucketPolicy(ctx context.Context, bucket string, policy *policy.Policy) error {
return savePolicyConfig(fs, bucket, policy)
}
// GetBucketPolicy will get policy on bucket
func (fs *FSObjects) GetBucketPolicy(ctx context.Context, bucket string) (*policy.Policy, error) {
return getPolicyConfig(fs, bucket)
}
// DeleteBucketPolicy deletes all policies on bucket
func (fs *FSObjects) DeleteBucketPolicy(ctx context.Context, bucket string) error {
return removePolicyConfig(ctx, fs, bucket)
}
// ListObjectsV2 lists all blobs in bucket filtered by prefix
func (fs *FSObjects) ListObjectsV2(ctx context.Context, bucket, prefix, continuationToken, delimiter string, maxKeys int, fetchOwner bool, startAfter string) (result ListObjectsV2Info, err error) {
marker := continuationToken
if marker == "" {
marker = startAfter
}
loi, err := fs.ListObjects(ctx, bucket, prefix, marker, delimiter, maxKeys)
if err != nil {
return result, err
}
listObjectsV2Info := ListObjectsV2Info{
IsTruncated: loi.IsTruncated,
ContinuationToken: continuationToken,
NextContinuationToken: loi.NextMarker,
Objects: loi.Objects,
Prefixes: loi.Prefixes,
}
return listObjectsV2Info, err
}
// IsNotificationSupported returns whether bucket notification is applicable for this layer.
func (fs *FSObjects) IsNotificationSupported() bool {
return true
}
// IsEncryptionSupported returns whether server side encryption is applicable for this layer.
func (fs *FSObjects) IsEncryptionSupported() bool {
return true
}