minio/pkg/storage/donut/cache.go

603 lines
18 KiB
Go

/*
* Minimalist Object Storage, (C) 2015 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package donut
import (
"bufio"
"bytes"
"crypto/md5"
"encoding/base64"
"encoding/hex"
"errors"
"io"
"io/ioutil"
"log"
"runtime/debug"
"sort"
"strconv"
"strings"
"sync"
"time"
"github.com/minio/minio/pkg/iodine"
"github.com/minio/minio/pkg/storage/donut/trove"
)
// total Number of buckets allowed
const (
totalBuckets = 100
)
// Cache - local variables
type Cache struct {
storedBuckets map[string]storedBucket
lock *sync.RWMutex
objects *trove.Cache
multiPartObjects *trove.Cache
maxSize uint64
expiration time.Duration
donut Donut
}
// storedBucket saved bucket
type storedBucket struct {
bucketMetadata BucketMetadata
objectMetadata map[string]ObjectMetadata
partMetadata map[string]PartMetadata
multiPartSession map[string]multiPartSession
}
// multiPartSession multipart session
type multiPartSession struct {
totalParts int
uploadID string
initiated time.Time
}
type proxyWriter struct {
writer io.Writer
writtenBytes []byte
}
func (r *proxyWriter) Write(p []byte) (n int, err error) {
n, err = r.writer.Write(p)
if err != nil {
return
}
r.writtenBytes = append(r.writtenBytes, p[0:n]...)
return
}
func newProxyWriter(w io.Writer) *proxyWriter {
return &proxyWriter{writer: w, writtenBytes: nil}
}
// NewCache new cache
func NewCache(maxSize uint64, expiration time.Duration, donutName string, nodeDiskMap map[string][]string) Cache {
c := Cache{}
c.storedBuckets = make(map[string]storedBucket)
c.objects = trove.NewCache(maxSize, expiration)
c.multiPartObjects = trove.NewCache(0, time.Duration(0))
c.objects.OnExpired = c.expiredObject
c.multiPartObjects.OnExpired = c.expiredPart
// set up cache expiration
c.objects.ExpireObjects(time.Second * 5)
c.donut, _ = NewDonut(donutName, nodeDiskMap)
return c
}
// GetObject - GET object from cache buffer
func (cache Cache) GetObject(w io.Writer, bucket string, object string) (int64, error) {
cache.lock.RLock()
defer cache.lock.RUnlock()
if !IsValidBucket(bucket) {
return 0, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(object) {
return 0, iodine.New(ObjectNameInvalid{Object: object}, nil)
}
if _, ok := cache.storedBuckets[bucket]; ok == false {
return 0, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
objectKey := bucket + "/" + object
data, ok := cache.objects.Get(objectKey)
if !ok {
if cache.donut != nil {
reader, size, err := cache.donut.GetObject(bucket, object)
if err != nil {
return 0, iodine.New(err, nil)
}
written, err := io.CopyN(w, reader, size)
if err != nil {
return 0, iodine.New(err, nil)
}
return written, nil
}
return 0, iodine.New(ObjectNotFound{Object: object}, nil)
}
written, err := io.Copy(w, bytes.NewBuffer(data))
if err != nil {
return 0, iodine.New(err, nil)
}
return written, nil
}
// GetPartialObject - GET object from cache buffer range
func (cache Cache) GetPartialObject(w io.Writer, bucket, object string, start, length int64) (int64, error) {
errParams := map[string]string{
"bucket": bucket,
"object": object,
"start": strconv.FormatInt(start, 10),
"length": strconv.FormatInt(length, 10),
}
cache.lock.RLock()
defer cache.lock.RUnlock()
if !IsValidBucket(bucket) {
return 0, iodine.New(BucketNameInvalid{Bucket: bucket}, errParams)
}
if !IsValidObjectName(object) {
return 0, iodine.New(ObjectNameInvalid{Object: object}, errParams)
}
if start < 0 {
return 0, iodine.New(InvalidRange{
Start: start,
Length: length,
}, errParams)
}
objectKey := bucket + "/" + object
data, ok := cache.objects.Get(objectKey)
if !ok {
if cache.donut != nil {
reader, _, err := cache.donut.GetObject(bucket, object)
if err != nil {
return 0, iodine.New(err, nil)
}
if _, err := io.CopyN(ioutil.Discard, reader, start); err != nil {
return 0, iodine.New(err, nil)
}
written, err := io.CopyN(w, reader, length)
if err != nil {
return 0, iodine.New(err, nil)
}
return written, nil
}
return 0, iodine.New(ObjectNotFound{Object: object}, nil)
}
written, err := io.CopyN(w, bytes.NewBuffer(data[start:]), length)
if err != nil {
return 0, iodine.New(err, nil)
}
return written, nil
}
// GetBucketMetadata -
func (cache Cache) GetBucketMetadata(bucket string) (BucketMetadata, error) {
cache.lock.RLock()
if !IsValidBucket(bucket) {
cache.lock.RUnlock()
return BucketMetadata{}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if _, ok := cache.storedBuckets[bucket]; ok == false {
if cache.donut == nil {
cache.lock.RUnlock()
return BucketMetadata{}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
bucketMetadata, err := cache.donut.GetBucketMetadata(bucket)
if err != nil {
cache.lock.RUnlock()
return BucketMetadata{}, iodine.New(err, nil)
}
storedBucket := cache.storedBuckets[bucket]
cache.lock.RUnlock()
cache.lock.Lock()
storedBucket.bucketMetadata = bucketMetadata
cache.storedBuckets[bucket] = storedBucket
cache.lock.Unlock()
}
cache.lock.RUnlock()
return cache.storedBuckets[bucket].bucketMetadata, nil
}
// SetBucketMetadata -
func (cache Cache) SetBucketMetadata(bucket, acl string) error {
cache.lock.RLock()
if !IsValidBucket(bucket) {
cache.lock.RUnlock()
return iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if _, ok := cache.storedBuckets[bucket]; ok == false {
cache.lock.RUnlock()
return iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
if strings.TrimSpace(acl) == "" {
acl = "private"
}
cache.lock.RUnlock()
cache.lock.Lock()
m := make(map[string]string)
m["acl"] = acl
if cache.donut != nil {
if err := cache.donut.SetBucketMetadata(bucket, m); err != nil {
return iodine.New(err, nil)
}
}
storedBucket := cache.storedBuckets[bucket]
storedBucket.bucketMetadata.ACL = BucketACL(acl)
cache.storedBuckets[bucket] = storedBucket
cache.lock.Unlock()
return nil
}
// isMD5SumEqual - returns error if md5sum mismatches, success its `nil`
func isMD5SumEqual(expectedMD5Sum, actualMD5Sum string) error {
if strings.TrimSpace(expectedMD5Sum) != "" && strings.TrimSpace(actualMD5Sum) != "" {
expectedMD5SumBytes, err := hex.DecodeString(expectedMD5Sum)
if err != nil {
return iodine.New(err, nil)
}
actualMD5SumBytes, err := hex.DecodeString(actualMD5Sum)
if err != nil {
return iodine.New(err, nil)
}
if !bytes.Equal(expectedMD5SumBytes, actualMD5SumBytes) {
return iodine.New(errors.New("bad digest, md5sum mismatch"), nil)
}
return nil
}
return iodine.New(errors.New("invalid argument"), nil)
}
// CreateObject -
func (cache Cache) CreateObject(bucket, key, contentType, expectedMD5Sum string, size int64, data io.Reader) (string, error) {
if size > int64(cache.maxSize) {
generic := GenericObjectError{Bucket: bucket, Object: key}
return "", iodine.New(EntityTooLarge{
GenericObjectError: generic,
Size: strconv.FormatInt(size, 10),
MaxSize: strconv.FormatUint(cache.maxSize, 10),
}, nil)
}
md5sum, err := cache.createObject(bucket, key, contentType, expectedMD5Sum, size, data)
// free
debug.FreeOSMemory()
return md5sum, iodine.New(err, nil)
}
// createObject - PUT object to cache buffer
func (cache Cache) createObject(bucket, key, contentType, expectedMD5Sum string, size int64, data io.Reader) (string, error) {
cache.lock.RLock()
if !IsValidBucket(bucket) {
cache.lock.RUnlock()
return "", iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(key) {
cache.lock.RUnlock()
return "", iodine.New(ObjectNameInvalid{Object: key}, nil)
}
if _, ok := cache.storedBuckets[bucket]; ok == false {
cache.lock.RUnlock()
return "", iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
storedBucket := cache.storedBuckets[bucket]
// get object key
objectKey := bucket + "/" + key
if _, ok := storedBucket.objectMetadata[objectKey]; ok == true {
cache.lock.RUnlock()
return "", iodine.New(ObjectExists{Object: key}, nil)
}
cache.lock.RUnlock()
if contentType == "" {
contentType = "application/octet-stream"
}
contentType = strings.TrimSpace(contentType)
if strings.TrimSpace(expectedMD5Sum) != "" {
expectedMD5SumBytes, err := base64.StdEncoding.DecodeString(strings.TrimSpace(expectedMD5Sum))
if err != nil {
// pro-actively close the connection
return "", iodine.New(InvalidDigest{Md5: expectedMD5Sum}, nil)
}
expectedMD5Sum = hex.EncodeToString(expectedMD5SumBytes)
}
// calculate md5
hash := md5.New()
var readBytes []byte
var err error
var length int
for err == nil {
byteBuffer := make([]byte, 1024*1024)
length, err = data.Read(byteBuffer)
// While hash.Write() wouldn't mind a Nil byteBuffer
// It is necessary for us to verify this and break
if length == 0 {
break
}
hash.Write(byteBuffer[0:length])
readBytes = append(readBytes, byteBuffer[0:length]...)
}
if err != io.EOF {
return "", iodine.New(err, nil)
}
md5SumBytes := hash.Sum(nil)
totalLength := len(readBytes)
cache.lock.Lock()
ok := cache.objects.Set(objectKey, readBytes)
// setting up for de-allocation
readBytes = nil
go debug.FreeOSMemory()
cache.lock.Unlock()
if !ok {
return "", iodine.New(InternalError{}, nil)
}
md5Sum := hex.EncodeToString(md5SumBytes)
// Verify if the written object is equal to what is expected, only if it is requested as such
if strings.TrimSpace(expectedMD5Sum) != "" {
if err := isMD5SumEqual(strings.TrimSpace(expectedMD5Sum), md5Sum); err != nil {
return "", iodine.New(BadDigest{}, nil)
}
}
m := make(map[string]string)
m["contentType"] = contentType
newObject := ObjectMetadata{
Bucket: bucket,
Object: key,
Metadata: m,
Created: time.Now().UTC(),
MD5Sum: md5Sum,
Size: int64(totalLength),
}
cache.lock.Lock()
storedBucket.objectMetadata[objectKey] = newObject
cache.storedBuckets[bucket] = storedBucket
cache.lock.Unlock()
return newObject.MD5Sum, nil
}
// CreateBucket - create bucket in cache
func (cache Cache) CreateBucket(bucketName, acl string) error {
cache.lock.RLock()
if len(cache.storedBuckets) == totalBuckets {
cache.lock.RUnlock()
return iodine.New(TooManyBuckets{Bucket: bucketName}, nil)
}
if !IsValidBucket(bucketName) {
cache.lock.RUnlock()
return iodine.New(BucketNameInvalid{Bucket: bucketName}, nil)
}
if !IsValidBucketACL(acl) {
cache.lock.RUnlock()
return iodine.New(InvalidACL{ACL: acl}, nil)
}
if _, ok := cache.storedBuckets[bucketName]; ok == true {
cache.lock.RUnlock()
return iodine.New(BucketExists{Bucket: bucketName}, nil)
}
cache.lock.RUnlock()
if strings.TrimSpace(acl) == "" {
// default is private
acl = "private"
}
if cache.donut != nil {
if err := cache.donut.MakeBucket(bucketName, BucketACL(acl)); err != nil {
return iodine.New(err, nil)
}
}
var newBucket = storedBucket{}
newBucket.objectMetadata = make(map[string]ObjectMetadata)
newBucket.multiPartSession = make(map[string]multiPartSession)
newBucket.partMetadata = make(map[string]PartMetadata)
newBucket.bucketMetadata = BucketMetadata{}
newBucket.bucketMetadata.Name = bucketName
newBucket.bucketMetadata.Created = time.Now().UTC()
newBucket.bucketMetadata.ACL = BucketACL(acl)
cache.lock.Lock()
cache.storedBuckets[bucketName] = newBucket
cache.lock.Unlock()
return nil
}
func delimiter(object, delimiter string) string {
readBuffer := bytes.NewBufferString(object)
reader := bufio.NewReader(readBuffer)
stringReader := strings.NewReader(delimiter)
delimited, _ := stringReader.ReadByte()
delimitedStr, _ := reader.ReadString(delimited)
return delimitedStr
}
func appendUniq(slice []string, i string) []string {
for _, ele := range slice {
if ele == i {
return slice
}
}
return append(slice, i)
}
func (cache Cache) filterDelimiterPrefix(keys []string, key, delim string, r BucketResourcesMetadata) ([]string, BucketResourcesMetadata) {
switch true {
case key == r.Prefix:
keys = appendUniq(keys, key)
// delim - requires r.Prefix as it was trimmed off earlier
case key == r.Prefix+delim:
keys = appendUniq(keys, key)
case delim != "":
r.CommonPrefixes = appendUniq(r.CommonPrefixes, r.Prefix+delim)
}
return keys, r
}
func (cache Cache) listObjects(keys []string, key string, r BucketResourcesMetadata) ([]string, BucketResourcesMetadata) {
switch true {
// Prefix absent, delimit object key based on delimiter
case r.IsDelimiterSet():
delim := delimiter(key, r.Delimiter)
switch true {
case delim == "" || delim == key:
keys = appendUniq(keys, key)
case delim != "":
r.CommonPrefixes = appendUniq(r.CommonPrefixes, delim)
}
// Prefix present, delimit object key with prefix key based on delimiter
case r.IsDelimiterPrefixSet():
if strings.HasPrefix(key, r.Prefix) {
trimmedName := strings.TrimPrefix(key, r.Prefix)
delim := delimiter(trimmedName, r.Delimiter)
keys, r = cache.filterDelimiterPrefix(keys, key, delim, r)
}
// Prefix present, nothing to delimit
case r.IsPrefixSet():
keys = appendUniq(keys, key)
// Prefix and delimiter absent
case r.IsDefault():
keys = appendUniq(keys, key)
}
return keys, r
}
// ListObjects - list objects from cache
func (cache Cache) ListObjects(bucket string, resources BucketResourcesMetadata) ([]ObjectMetadata, BucketResourcesMetadata, error) {
cache.lock.RLock()
defer cache.lock.RUnlock()
if !IsValidBucket(bucket) {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(resources.Prefix) {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(ObjectNameInvalid{Object: resources.Prefix}, nil)
}
if _, ok := cache.storedBuckets[bucket]; ok == false {
return nil, BucketResourcesMetadata{IsTruncated: false}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
var results []ObjectMetadata
var keys []string
storedBucket := cache.storedBuckets[bucket]
for key := range storedBucket.objectMetadata {
if strings.HasPrefix(key, bucket+"/") {
key = key[len(bucket)+1:]
keys, resources = cache.listObjects(keys, key, resources)
}
}
var newKeys []string
switch {
case resources.Marker != "":
for _, key := range keys {
if key > resources.Marker {
newKeys = appendUniq(newKeys, key)
}
}
default:
newKeys = keys
}
sort.Strings(newKeys)
for _, key := range newKeys {
if len(results) == resources.Maxkeys {
resources.IsTruncated = true
if resources.IsTruncated && resources.IsDelimiterSet() {
resources.NextMarker = results[len(results)-1].Object
}
return results, resources, nil
}
object := storedBucket.objectMetadata[bucket+"/"+key]
results = append(results, object)
}
return results, resources, nil
}
// byBucketName is a type for sorting bucket metadata by bucket name
type byBucketName []BucketMetadata
func (b byBucketName) Len() int { return len(b) }
func (b byBucketName) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
func (b byBucketName) Less(i, j int) bool { return b[i].Name < b[j].Name }
// ListBuckets - List buckets from cache
func (cache Cache) ListBuckets() ([]BucketMetadata, error) {
cache.lock.RLock()
defer cache.lock.RUnlock()
var results []BucketMetadata
for _, bucket := range cache.storedBuckets {
results = append(results, bucket.bucketMetadata)
}
sort.Sort(byBucketName(results))
return results, nil
}
// GetObjectMetadata - get object metadata from cache
func (cache Cache) GetObjectMetadata(bucket, key string) (ObjectMetadata, error) {
cache.lock.RLock()
// check if bucket exists
if !IsValidBucket(bucket) {
cache.lock.RUnlock()
return ObjectMetadata{}, iodine.New(BucketNameInvalid{Bucket: bucket}, nil)
}
if !IsValidObjectName(key) {
cache.lock.RUnlock()
return ObjectMetadata{}, iodine.New(ObjectNameInvalid{Object: key}, nil)
}
if _, ok := cache.storedBuckets[bucket]; ok == false {
cache.lock.RUnlock()
return ObjectMetadata{}, iodine.New(BucketNotFound{Bucket: bucket}, nil)
}
storedBucket := cache.storedBuckets[bucket]
objectKey := bucket + "/" + key
if objMetadata, ok := storedBucket.objectMetadata[objectKey]; ok == true {
cache.lock.RUnlock()
return objMetadata, nil
}
if cache.donut != nil {
objMetadata, err := cache.donut.GetObjectMetadata(bucket, key)
cache.lock.RUnlock()
if err != nil {
return ObjectMetadata{}, iodine.New(err, nil)
}
// update
cache.lock.Lock()
storedBucket.objectMetadata[objectKey] = objMetadata
cache.lock.Unlock()
return objMetadata, nil
}
cache.lock.RUnlock()
return ObjectMetadata{}, iodine.New(ObjectNotFound{Object: key}, nil)
}
func (cache Cache) expiredObject(a ...interface{}) {
cacheStats := cache.objects.Stats()
log.Printf("CurrentSize: %d, CurrentItems: %d, TotalExpirations: %d",
cacheStats.Bytes, cacheStats.Items, cacheStats.Expired)
key := a[0].(string)
// loop through all buckets
for bucket, storedBucket := range cache.storedBuckets {
delete(storedBucket.objectMetadata, key)
// remove bucket if no objects found anymore
if len(storedBucket.objectMetadata) == 0 {
if time.Since(cache.storedBuckets[bucket].bucketMetadata.Created) > cache.expiration {
delete(cache.storedBuckets, bucket)
}
}
}
debug.FreeOSMemory()
}