mirror of
https://github.com/minio/minio.git
synced 2025-01-23 12:43:16 -05:00
b9d1698d74
This commit adds two new functions to the internal `etag` package: - `ETag.Format` - `Decrypt` The `Decrypt` function decrypts an encrypted ETag using a decryption key. It returns not encrypted / multipart ETags unmodified. The `Decrypt` function is mainly used when handling SSE-S3 encrypted single-part objects. In particular, the ETag of an SSE-S3 encrypted single-part object needs to be decrypted since S3 clients expect that this ETag is equal to the content MD5. The `ETag.Format` method also covers SSE ETag handling. MinIO encrypts all ETags of SSE single part objects. However, only the ETag of SSE-S3 encrypted single part objects needs to be decrypted. The ETag of an SSE-C or SSE-KMS single part object does not correspond to its content MD5 and can be a random value. The `ETag.Format` function formats an ETag such that it is an AWS S3 compliant ETag. In particular, it returns non-encrypted ETags (single / multipart) unmodified. However, for encrypted ETags it returns the trailing 16 bytes as ETag. For encrypted ETags the last 16 bytes will be a random value. The main purpose of `Format` is to format ETags such that clients accept them as well-formed AWS S3 ETags. It differs from the `String` method since `String` will return string representations for encrypted ETags that are not AWS S3 compliant. Signed-off-by: Andreas Auernhammer <hi@aead.dev>
588 lines
14 KiB
Go
588 lines
14 KiB
Go
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"container/list"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/minio/minio/internal/crypto"
|
|
"github.com/minio/minio/internal/etag"
|
|
)
|
|
|
|
// CacheStatusType - whether the request was served from cache.
|
|
type CacheStatusType string
|
|
|
|
const (
|
|
// CacheHit - whether object was served from cache.
|
|
CacheHit CacheStatusType = "HIT"
|
|
|
|
// CacheMiss - object served from backend.
|
|
CacheMiss CacheStatusType = "MISS"
|
|
)
|
|
|
|
func (c CacheStatusType) String() string {
|
|
if c != "" {
|
|
return string(c)
|
|
}
|
|
return string(CacheMiss)
|
|
}
|
|
|
|
type cacheControl struct {
|
|
expiry time.Time
|
|
maxAge int
|
|
sMaxAge int
|
|
minFresh int
|
|
maxStale int
|
|
noStore bool
|
|
onlyIfCached bool
|
|
noCache bool
|
|
}
|
|
|
|
func (c *cacheControl) isStale(modTime time.Time) bool {
|
|
if c == nil {
|
|
return false
|
|
}
|
|
// response will never be stale if only-if-cached is set
|
|
if c.onlyIfCached {
|
|
return false
|
|
}
|
|
// Cache-Control value no-store indicates never cache
|
|
if c.noStore {
|
|
return true
|
|
}
|
|
// Cache-Control value no-cache indicates cache entry needs to be revalidated before
|
|
// serving from cache
|
|
if c.noCache {
|
|
return true
|
|
}
|
|
now := time.Now()
|
|
|
|
if c.sMaxAge > 0 && c.sMaxAge < int(now.Sub(modTime).Seconds()) {
|
|
return true
|
|
}
|
|
if c.maxAge > 0 && c.maxAge < int(now.Sub(modTime).Seconds()) {
|
|
return true
|
|
}
|
|
|
|
if !c.expiry.Equal(time.Time{}) && c.expiry.Before(time.Now().Add(time.Duration(c.maxStale))) {
|
|
return true
|
|
}
|
|
|
|
if c.minFresh > 0 && c.minFresh <= int(now.Sub(modTime).Seconds()) {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// returns struct with cache-control settings from user metadata.
|
|
func cacheControlOpts(o ObjectInfo) *cacheControl {
|
|
c := cacheControl{}
|
|
m := o.UserDefined
|
|
if !o.Expires.Equal(timeSentinel) {
|
|
c.expiry = o.Expires
|
|
}
|
|
|
|
var headerVal string
|
|
for k, v := range m {
|
|
if strings.EqualFold(k, "cache-control") {
|
|
headerVal = v
|
|
}
|
|
}
|
|
if headerVal == "" {
|
|
return nil
|
|
}
|
|
headerVal = strings.ToLower(headerVal)
|
|
headerVal = strings.TrimSpace(headerVal)
|
|
|
|
vals := strings.Split(headerVal, ",")
|
|
for _, val := range vals {
|
|
val = strings.TrimSpace(val)
|
|
|
|
if val == "no-store" {
|
|
c.noStore = true
|
|
continue
|
|
}
|
|
if val == "only-if-cached" {
|
|
c.onlyIfCached = true
|
|
continue
|
|
}
|
|
if val == "no-cache" {
|
|
c.noCache = true
|
|
continue
|
|
}
|
|
p := strings.Split(val, "=")
|
|
|
|
if len(p) != 2 {
|
|
continue
|
|
}
|
|
if p[0] == "max-age" ||
|
|
p[0] == "s-maxage" ||
|
|
p[0] == "min-fresh" ||
|
|
p[0] == "max-stale" {
|
|
i, err := strconv.Atoi(p[1])
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
if p[0] == "max-age" {
|
|
c.maxAge = i
|
|
}
|
|
if p[0] == "s-maxage" {
|
|
c.sMaxAge = i
|
|
}
|
|
if p[0] == "min-fresh" {
|
|
c.minFresh = i
|
|
}
|
|
if p[0] == "max-stale" {
|
|
c.maxStale = i
|
|
}
|
|
}
|
|
}
|
|
return &c
|
|
}
|
|
|
|
// backendDownError returns true if err is due to backend failure or faulty disk if in server mode
|
|
func backendDownError(err error) bool {
|
|
_, backendDown := err.(BackendDown)
|
|
return backendDown || IsErr(err, baseErrs...)
|
|
}
|
|
|
|
// IsCacheable returns if the object should be saved in the cache.
|
|
func (o ObjectInfo) IsCacheable() bool {
|
|
if globalCacheKMS != nil {
|
|
return true
|
|
}
|
|
_, ok := crypto.IsEncrypted(o.UserDefined)
|
|
return !ok
|
|
}
|
|
|
|
// reads file cached on disk from offset upto length
|
|
func readCacheFileStream(filePath string, offset, length int64) (io.ReadCloser, error) {
|
|
if filePath == "" || offset < 0 {
|
|
return nil, errInvalidArgument
|
|
}
|
|
if err := checkPathLength(filePath); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
fr, err := os.Open(filePath)
|
|
if err != nil {
|
|
return nil, osErrToFileErr(err)
|
|
}
|
|
// Stat to get the size of the file at path.
|
|
st, err := fr.Stat()
|
|
if err != nil {
|
|
err = osErrToFileErr(err)
|
|
return nil, err
|
|
}
|
|
|
|
if err = os.Chtimes(filePath, time.Now(), st.ModTime()); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Verify if its not a regular file, since subsequent Seek is undefined.
|
|
if !st.Mode().IsRegular() {
|
|
return nil, errIsNotRegular
|
|
}
|
|
|
|
if err = os.Chtimes(filePath, time.Now(), st.ModTime()); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Seek to the requested offset.
|
|
if offset > 0 {
|
|
_, err = fr.Seek(offset, io.SeekStart)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
return struct {
|
|
io.Reader
|
|
io.Closer
|
|
}{Reader: io.LimitReader(fr, length), Closer: fr}, nil
|
|
}
|
|
|
|
func isCacheEncrypted(meta map[string]string) bool {
|
|
_, ok := meta[SSECacheEncrypted]
|
|
return ok
|
|
}
|
|
|
|
// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS
|
|
func decryptCacheObjectETag(info *ObjectInfo) error {
|
|
if info.IsDir {
|
|
return nil // Directories are never encrypted.
|
|
}
|
|
|
|
// Depending on the SSE type we handle ETags slightly
|
|
// differently. ETags encrypted with SSE-S3 must be
|
|
// decrypted first, since the client expects that
|
|
// a single-part SSE-S3 ETag is equal to the content MD5.
|
|
//
|
|
// For all other SSE types, the ETag is not the content MD5.
|
|
// Therefore, we don't decrypt but only format it.
|
|
switch kind, ok := crypto.IsEncrypted(info.UserDefined); {
|
|
case ok && kind == crypto.S3 && isCacheEncrypted(info.UserDefined):
|
|
ETag, err := etag.Parse(info.ETag)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if !ETag.IsEncrypted() {
|
|
info.ETag = ETag.Format().String()
|
|
return nil
|
|
}
|
|
|
|
key, err := crypto.S3.UnsealObjectKey(globalCacheKMS, info.UserDefined, info.Bucket, info.Name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
ETag, err = etag.Decrypt(key[:], ETag)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
info.ETag = ETag.Format().String()
|
|
case ok && (kind == crypto.S3KMS || kind == crypto.SSEC) && isCacheEncrypted(info.UserDefined):
|
|
ETag, err := etag.Parse(info.ETag)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
info.ETag = ETag.Format().String()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS
|
|
func decryptCachePartETags(c *cacheMeta) ([]string, error) {
|
|
// Depending on the SSE type we handle ETags slightly
|
|
// differently. ETags encrypted with SSE-S3 must be
|
|
// decrypted first, since the client expects that
|
|
// a single-part SSE-S3 ETag is equal to the content MD5.
|
|
//
|
|
// For all other SSE types, the ETag is not the content MD5.
|
|
// Therefore, we don't decrypt but only format it.
|
|
switch kind, ok := crypto.IsEncrypted(c.Meta); {
|
|
case ok && kind == crypto.S3 && isCacheEncrypted(c.Meta):
|
|
key, err := crypto.S3.UnsealObjectKey(globalCacheKMS, c.Meta, c.Bucket, c.Object)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
etags := make([]string, 0, len(c.PartETags))
|
|
for i := range c.PartETags {
|
|
ETag, err := etag.Parse(c.PartETags[i])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ETag, err = etag.Decrypt(key[:], ETag)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
etags = append(etags, ETag.Format().String())
|
|
}
|
|
return etags, nil
|
|
case ok && (kind == crypto.S3KMS || kind == crypto.SSEC) && isCacheEncrypted(c.Meta):
|
|
etags := make([]string, 0, len(c.PartETags))
|
|
for i := range c.PartETags {
|
|
ETag, err := etag.Parse(c.PartETags[i])
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
etags = append(etags, ETag.Format().String())
|
|
}
|
|
return etags, nil
|
|
default:
|
|
return c.PartETags, nil
|
|
}
|
|
}
|
|
|
|
func isMetadataSame(m1, m2 map[string]string) bool {
|
|
if m1 == nil && m2 == nil {
|
|
return true
|
|
}
|
|
if (m1 == nil && m2 != nil) || (m2 == nil && m1 != nil) {
|
|
return false
|
|
}
|
|
if len(m1) != len(m2) {
|
|
return false
|
|
}
|
|
for k1, v1 := range m1 {
|
|
if v2, ok := m2[k1]; !ok || (v1 != v2) {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
type fileScorer struct {
|
|
saveBytes uint64
|
|
now int64
|
|
maxHits int
|
|
// 1/size for consistent score.
|
|
sizeMult float64
|
|
|
|
// queue is a linked list of files we want to delete.
|
|
// The list is kept sorted according to score, highest at top, lowest at bottom.
|
|
queue list.List
|
|
queuedBytes uint64
|
|
seenBytes uint64
|
|
}
|
|
|
|
type queuedFile struct {
|
|
name string
|
|
versionID string
|
|
size uint64
|
|
score float64
|
|
}
|
|
|
|
// newFileScorer allows to collect files to save a specific number of bytes.
|
|
// Each file is assigned a score based on its age, size and number of hits.
|
|
// A list of files is maintained
|
|
func newFileScorer(saveBytes uint64, now int64, maxHits int) (*fileScorer, error) {
|
|
if saveBytes == 0 {
|
|
return nil, errors.New("newFileScorer: saveBytes = 0")
|
|
}
|
|
if now < 0 {
|
|
return nil, errors.New("newFileScorer: now < 0")
|
|
}
|
|
if maxHits <= 0 {
|
|
return nil, errors.New("newFileScorer: maxHits <= 0")
|
|
}
|
|
f := fileScorer{saveBytes: saveBytes, maxHits: maxHits, now: now, sizeMult: 1 / float64(saveBytes)}
|
|
f.queue.Init()
|
|
return &f, nil
|
|
}
|
|
|
|
func (f *fileScorer) addFile(name string, accTime time.Time, size int64, hits int) {
|
|
f.addFileWithObjInfo(ObjectInfo{
|
|
Name: name,
|
|
AccTime: accTime,
|
|
Size: size,
|
|
}, hits)
|
|
}
|
|
|
|
func (f *fileScorer) addFileWithObjInfo(objInfo ObjectInfo, hits int) {
|
|
// Calculate how much we want to delete this object.
|
|
file := queuedFile{
|
|
name: objInfo.Name,
|
|
versionID: objInfo.VersionID,
|
|
size: uint64(objInfo.Size),
|
|
}
|
|
f.seenBytes += uint64(objInfo.Size)
|
|
|
|
var score float64
|
|
if objInfo.ModTime.IsZero() {
|
|
// Mod time is not available with disk cache use atime.
|
|
score = float64(f.now - objInfo.AccTime.Unix())
|
|
} else {
|
|
// if not used mod time when mod time is available.
|
|
score = float64(f.now - objInfo.ModTime.Unix())
|
|
}
|
|
|
|
// Size as fraction of how much we want to save, 0->1.
|
|
szWeight := math.Max(0, (math.Min(1, float64(file.size)*f.sizeMult)))
|
|
// 0 at f.maxHits, 1 at 0.
|
|
hitsWeight := (1.0 - math.Max(0, math.Min(1.0, float64(hits)/float64(f.maxHits))))
|
|
file.score = score * (1 + 0.25*szWeight + 0.25*hitsWeight)
|
|
// If we still haven't saved enough, just add the file
|
|
if f.queuedBytes < f.saveBytes {
|
|
f.insertFile(file)
|
|
f.trimQueue()
|
|
return
|
|
}
|
|
// If we score less than the worst, don't insert.
|
|
worstE := f.queue.Back()
|
|
if worstE != nil && file.score < worstE.Value.(queuedFile).score {
|
|
return
|
|
}
|
|
f.insertFile(file)
|
|
f.trimQueue()
|
|
}
|
|
|
|
// adjustSaveBytes allows to adjust the number of bytes to save.
|
|
// This can be used to adjust the count on the fly.
|
|
// Returns true if there still is a need to delete files (n+saveBytes >0),
|
|
// false if no more bytes needs to be saved.
|
|
func (f *fileScorer) adjustSaveBytes(n int64) bool {
|
|
if f == nil {
|
|
return false
|
|
}
|
|
if int64(f.saveBytes)+n <= 0 {
|
|
f.saveBytes = 0
|
|
f.trimQueue()
|
|
return false
|
|
}
|
|
if n < 0 {
|
|
f.saveBytes -= ^uint64(n - 1)
|
|
} else {
|
|
f.saveBytes += uint64(n)
|
|
}
|
|
if f.saveBytes == 0 {
|
|
f.queue.Init()
|
|
f.saveBytes = 0
|
|
return false
|
|
}
|
|
if n < 0 {
|
|
f.trimQueue()
|
|
}
|
|
return true
|
|
}
|
|
|
|
// insertFile will insert a file into the list, sorted by its score.
|
|
func (f *fileScorer) insertFile(file queuedFile) {
|
|
e := f.queue.Front()
|
|
for e != nil {
|
|
v := e.Value.(queuedFile)
|
|
if v.score < file.score {
|
|
break
|
|
}
|
|
e = e.Next()
|
|
}
|
|
f.queuedBytes += file.size
|
|
// We reached the end.
|
|
if e == nil {
|
|
f.queue.PushBack(file)
|
|
return
|
|
}
|
|
f.queue.InsertBefore(file, e)
|
|
}
|
|
|
|
// trimQueue will trim the back of queue and still keep below wantSave.
|
|
func (f *fileScorer) trimQueue() {
|
|
for {
|
|
e := f.queue.Back()
|
|
if e == nil {
|
|
return
|
|
}
|
|
v := e.Value.(queuedFile)
|
|
if f.queuedBytes-v.size < f.saveBytes {
|
|
return
|
|
}
|
|
f.queue.Remove(e)
|
|
f.queuedBytes -= v.size
|
|
}
|
|
}
|
|
|
|
func (f *fileScorer) purgeFunc(p func(qfile queuedFile)) {
|
|
e := f.queue.Front()
|
|
for e != nil {
|
|
p(e.Value.(queuedFile))
|
|
e = e.Next()
|
|
}
|
|
}
|
|
|
|
// fileNames returns all queued file names.
|
|
func (f *fileScorer) fileNames() []string {
|
|
res := make([]string, 0, f.queue.Len())
|
|
e := f.queue.Front()
|
|
for e != nil {
|
|
res = append(res, e.Value.(queuedFile).name)
|
|
e = e.Next()
|
|
}
|
|
return res
|
|
}
|
|
|
|
func (f *fileScorer) reset() {
|
|
f.queue.Init()
|
|
f.queuedBytes = 0
|
|
}
|
|
|
|
func (f *fileScorer) queueString() string {
|
|
var res strings.Builder
|
|
e := f.queue.Front()
|
|
i := 0
|
|
for e != nil {
|
|
v := e.Value.(queuedFile)
|
|
if i > 0 {
|
|
res.WriteByte('\n')
|
|
}
|
|
res.WriteString(fmt.Sprintf("%03d: %s (score: %.3f, bytes: %d)", i, v.name, v.score, v.size))
|
|
i++
|
|
e = e.Next()
|
|
}
|
|
return res.String()
|
|
}
|
|
|
|
// bytesToClear() returns the number of bytes to clear to reach low watermark
|
|
// w.r.t quota given disk total and free space, quota in % allocated to cache
|
|
// and low watermark % w.r.t allowed quota.
|
|
// If the high watermark hasn't been reached 0 will be returned.
|
|
func bytesToClear(total, free int64, quotaPct, lowWatermark, highWatermark uint64) uint64 {
|
|
used := total - free
|
|
quotaAllowed := total * (int64)(quotaPct) / 100
|
|
highWMUsage := total * (int64)(highWatermark*quotaPct) / (100 * 100)
|
|
if used < highWMUsage {
|
|
return 0
|
|
}
|
|
// Return bytes needed to reach low watermark.
|
|
lowWMUsage := total * (int64)(lowWatermark*quotaPct) / (100 * 100)
|
|
return (uint64)(math.Min(float64(quotaAllowed), math.Max(0.0, float64(used-lowWMUsage))))
|
|
}
|
|
|
|
type multiWriter struct {
|
|
backendWriter io.Writer
|
|
cacheWriter *io.PipeWriter
|
|
pipeClosed bool
|
|
}
|
|
|
|
// multiWriter writes to backend and cache - if cache write
|
|
// fails close the pipe, but continue writing to the backend
|
|
func (t *multiWriter) Write(p []byte) (n int, err error) {
|
|
n, err = t.backendWriter.Write(p)
|
|
if err == nil && n != len(p) {
|
|
err = io.ErrShortWrite
|
|
return
|
|
}
|
|
if err != nil {
|
|
if !t.pipeClosed {
|
|
t.cacheWriter.CloseWithError(err)
|
|
}
|
|
return
|
|
}
|
|
|
|
// ignore errors writing to cache
|
|
if !t.pipeClosed {
|
|
_, cerr := t.cacheWriter.Write(p)
|
|
if cerr != nil {
|
|
t.pipeClosed = true
|
|
t.cacheWriter.CloseWithError(cerr)
|
|
}
|
|
}
|
|
return len(p), nil
|
|
}
|
|
|
|
func cacheMultiWriter(w1 io.Writer, w2 *io.PipeWriter) io.Writer {
|
|
return &multiWriter{backendWriter: w1, cacheWriter: w2}
|
|
}
|
|
|
|
// writebackInProgress returns true if writeback commit is not complete
|
|
func writebackInProgress(m map[string]string) bool {
|
|
if v, ok := m[writeBackStatusHeader]; ok {
|
|
switch cacheCommitStatus(v) {
|
|
case CommitPending, CommitFailed:
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|