minio/cmd/disk-cache-utils.go
Andreas Auernhammer b9d1698d74
etag: add Format and Decrypt functions (#14659)
This commit adds two new functions to the
internal `etag` package:
 - `ETag.Format`
 - `Decrypt`

The `Decrypt` function decrypts an encrypted
ETag using a decryption key. It returns not
encrypted / multipart ETags unmodified.

The `Decrypt` function is mainly used when
handling SSE-S3 encrypted single-part objects.
In particular, the ETag of an SSE-S3 encrypted
single-part object needs to be decrypted since
S3 clients expect that this ETag is equal to the
content MD5.

The `ETag.Format` method also covers SSE ETag handling.
MinIO encrypts all ETags of SSE single part objects.
However, only the ETag of SSE-S3 encrypted single part
objects needs to be decrypted.
The ETag of an SSE-C or SSE-KMS single part object
does not correspond to its content MD5 and can be
a random value.
The `ETag.Format` function formats an ETag such that
it is an AWS S3 compliant ETag. In particular, it
returns non-encrypted ETags (single / multipart)
unmodified. However, for encrypted ETags it returns
the trailing 16 bytes as ETag. For encrypted ETags
the last 16 bytes will be a random value.

The main purpose of `Format` is to format ETags
such that clients accept them as well-formed AWS S3
ETags.
It differs from the `String` method since `String`
will return string representations for encrypted
ETags that are not AWS S3 compliant.

Signed-off-by: Andreas Auernhammer <hi@aead.dev>
2022-04-03 13:29:13 -07:00

588 lines
14 KiB
Go

// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
package cmd
import (
"container/list"
"errors"
"fmt"
"io"
"math"
"os"
"strconv"
"strings"
"time"
"github.com/minio/minio/internal/crypto"
"github.com/minio/minio/internal/etag"
)
// CacheStatusType - whether the request was served from cache.
type CacheStatusType string
const (
// CacheHit - whether object was served from cache.
CacheHit CacheStatusType = "HIT"
// CacheMiss - object served from backend.
CacheMiss CacheStatusType = "MISS"
)
func (c CacheStatusType) String() string {
if c != "" {
return string(c)
}
return string(CacheMiss)
}
type cacheControl struct {
expiry time.Time
maxAge int
sMaxAge int
minFresh int
maxStale int
noStore bool
onlyIfCached bool
noCache bool
}
func (c *cacheControl) isStale(modTime time.Time) bool {
if c == nil {
return false
}
// response will never be stale if only-if-cached is set
if c.onlyIfCached {
return false
}
// Cache-Control value no-store indicates never cache
if c.noStore {
return true
}
// Cache-Control value no-cache indicates cache entry needs to be revalidated before
// serving from cache
if c.noCache {
return true
}
now := time.Now()
if c.sMaxAge > 0 && c.sMaxAge < int(now.Sub(modTime).Seconds()) {
return true
}
if c.maxAge > 0 && c.maxAge < int(now.Sub(modTime).Seconds()) {
return true
}
if !c.expiry.Equal(time.Time{}) && c.expiry.Before(time.Now().Add(time.Duration(c.maxStale))) {
return true
}
if c.minFresh > 0 && c.minFresh <= int(now.Sub(modTime).Seconds()) {
return true
}
return false
}
// returns struct with cache-control settings from user metadata.
func cacheControlOpts(o ObjectInfo) *cacheControl {
c := cacheControl{}
m := o.UserDefined
if !o.Expires.Equal(timeSentinel) {
c.expiry = o.Expires
}
var headerVal string
for k, v := range m {
if strings.EqualFold(k, "cache-control") {
headerVal = v
}
}
if headerVal == "" {
return nil
}
headerVal = strings.ToLower(headerVal)
headerVal = strings.TrimSpace(headerVal)
vals := strings.Split(headerVal, ",")
for _, val := range vals {
val = strings.TrimSpace(val)
if val == "no-store" {
c.noStore = true
continue
}
if val == "only-if-cached" {
c.onlyIfCached = true
continue
}
if val == "no-cache" {
c.noCache = true
continue
}
p := strings.Split(val, "=")
if len(p) != 2 {
continue
}
if p[0] == "max-age" ||
p[0] == "s-maxage" ||
p[0] == "min-fresh" ||
p[0] == "max-stale" {
i, err := strconv.Atoi(p[1])
if err != nil {
return nil
}
if p[0] == "max-age" {
c.maxAge = i
}
if p[0] == "s-maxage" {
c.sMaxAge = i
}
if p[0] == "min-fresh" {
c.minFresh = i
}
if p[0] == "max-stale" {
c.maxStale = i
}
}
}
return &c
}
// backendDownError returns true if err is due to backend failure or faulty disk if in server mode
func backendDownError(err error) bool {
_, backendDown := err.(BackendDown)
return backendDown || IsErr(err, baseErrs...)
}
// IsCacheable returns if the object should be saved in the cache.
func (o ObjectInfo) IsCacheable() bool {
if globalCacheKMS != nil {
return true
}
_, ok := crypto.IsEncrypted(o.UserDefined)
return !ok
}
// reads file cached on disk from offset upto length
func readCacheFileStream(filePath string, offset, length int64) (io.ReadCloser, error) {
if filePath == "" || offset < 0 {
return nil, errInvalidArgument
}
if err := checkPathLength(filePath); err != nil {
return nil, err
}
fr, err := os.Open(filePath)
if err != nil {
return nil, osErrToFileErr(err)
}
// Stat to get the size of the file at path.
st, err := fr.Stat()
if err != nil {
err = osErrToFileErr(err)
return nil, err
}
if err = os.Chtimes(filePath, time.Now(), st.ModTime()); err != nil {
return nil, err
}
// Verify if its not a regular file, since subsequent Seek is undefined.
if !st.Mode().IsRegular() {
return nil, errIsNotRegular
}
if err = os.Chtimes(filePath, time.Now(), st.ModTime()); err != nil {
return nil, err
}
// Seek to the requested offset.
if offset > 0 {
_, err = fr.Seek(offset, io.SeekStart)
if err != nil {
return nil, err
}
}
return struct {
io.Reader
io.Closer
}{Reader: io.LimitReader(fr, length), Closer: fr}, nil
}
func isCacheEncrypted(meta map[string]string) bool {
_, ok := meta[SSECacheEncrypted]
return ok
}
// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS
func decryptCacheObjectETag(info *ObjectInfo) error {
if info.IsDir {
return nil // Directories are never encrypted.
}
// Depending on the SSE type we handle ETags slightly
// differently. ETags encrypted with SSE-S3 must be
// decrypted first, since the client expects that
// a single-part SSE-S3 ETag is equal to the content MD5.
//
// For all other SSE types, the ETag is not the content MD5.
// Therefore, we don't decrypt but only format it.
switch kind, ok := crypto.IsEncrypted(info.UserDefined); {
case ok && kind == crypto.S3 && isCacheEncrypted(info.UserDefined):
ETag, err := etag.Parse(info.ETag)
if err != nil {
return err
}
if !ETag.IsEncrypted() {
info.ETag = ETag.Format().String()
return nil
}
key, err := crypto.S3.UnsealObjectKey(globalCacheKMS, info.UserDefined, info.Bucket, info.Name)
if err != nil {
return err
}
ETag, err = etag.Decrypt(key[:], ETag)
if err != nil {
return err
}
info.ETag = ETag.Format().String()
case ok && (kind == crypto.S3KMS || kind == crypto.SSEC) && isCacheEncrypted(info.UserDefined):
ETag, err := etag.Parse(info.ETag)
if err != nil {
return err
}
info.ETag = ETag.Format().String()
}
return nil
}
// decryptCacheObjectETag tries to decrypt the ETag saved in encrypted format using the cache KMS
func decryptCachePartETags(c *cacheMeta) ([]string, error) {
// Depending on the SSE type we handle ETags slightly
// differently. ETags encrypted with SSE-S3 must be
// decrypted first, since the client expects that
// a single-part SSE-S3 ETag is equal to the content MD5.
//
// For all other SSE types, the ETag is not the content MD5.
// Therefore, we don't decrypt but only format it.
switch kind, ok := crypto.IsEncrypted(c.Meta); {
case ok && kind == crypto.S3 && isCacheEncrypted(c.Meta):
key, err := crypto.S3.UnsealObjectKey(globalCacheKMS, c.Meta, c.Bucket, c.Object)
if err != nil {
return nil, err
}
etags := make([]string, 0, len(c.PartETags))
for i := range c.PartETags {
ETag, err := etag.Parse(c.PartETags[i])
if err != nil {
return nil, err
}
ETag, err = etag.Decrypt(key[:], ETag)
if err != nil {
return nil, err
}
etags = append(etags, ETag.Format().String())
}
return etags, nil
case ok && (kind == crypto.S3KMS || kind == crypto.SSEC) && isCacheEncrypted(c.Meta):
etags := make([]string, 0, len(c.PartETags))
for i := range c.PartETags {
ETag, err := etag.Parse(c.PartETags[i])
if err != nil {
return nil, err
}
etags = append(etags, ETag.Format().String())
}
return etags, nil
default:
return c.PartETags, nil
}
}
func isMetadataSame(m1, m2 map[string]string) bool {
if m1 == nil && m2 == nil {
return true
}
if (m1 == nil && m2 != nil) || (m2 == nil && m1 != nil) {
return false
}
if len(m1) != len(m2) {
return false
}
for k1, v1 := range m1 {
if v2, ok := m2[k1]; !ok || (v1 != v2) {
return false
}
}
return true
}
type fileScorer struct {
saveBytes uint64
now int64
maxHits int
// 1/size for consistent score.
sizeMult float64
// queue is a linked list of files we want to delete.
// The list is kept sorted according to score, highest at top, lowest at bottom.
queue list.List
queuedBytes uint64
seenBytes uint64
}
type queuedFile struct {
name string
versionID string
size uint64
score float64
}
// newFileScorer allows to collect files to save a specific number of bytes.
// Each file is assigned a score based on its age, size and number of hits.
// A list of files is maintained
func newFileScorer(saveBytes uint64, now int64, maxHits int) (*fileScorer, error) {
if saveBytes == 0 {
return nil, errors.New("newFileScorer: saveBytes = 0")
}
if now < 0 {
return nil, errors.New("newFileScorer: now < 0")
}
if maxHits <= 0 {
return nil, errors.New("newFileScorer: maxHits <= 0")
}
f := fileScorer{saveBytes: saveBytes, maxHits: maxHits, now: now, sizeMult: 1 / float64(saveBytes)}
f.queue.Init()
return &f, nil
}
func (f *fileScorer) addFile(name string, accTime time.Time, size int64, hits int) {
f.addFileWithObjInfo(ObjectInfo{
Name: name,
AccTime: accTime,
Size: size,
}, hits)
}
func (f *fileScorer) addFileWithObjInfo(objInfo ObjectInfo, hits int) {
// Calculate how much we want to delete this object.
file := queuedFile{
name: objInfo.Name,
versionID: objInfo.VersionID,
size: uint64(objInfo.Size),
}
f.seenBytes += uint64(objInfo.Size)
var score float64
if objInfo.ModTime.IsZero() {
// Mod time is not available with disk cache use atime.
score = float64(f.now - objInfo.AccTime.Unix())
} else {
// if not used mod time when mod time is available.
score = float64(f.now - objInfo.ModTime.Unix())
}
// Size as fraction of how much we want to save, 0->1.
szWeight := math.Max(0, (math.Min(1, float64(file.size)*f.sizeMult)))
// 0 at f.maxHits, 1 at 0.
hitsWeight := (1.0 - math.Max(0, math.Min(1.0, float64(hits)/float64(f.maxHits))))
file.score = score * (1 + 0.25*szWeight + 0.25*hitsWeight)
// If we still haven't saved enough, just add the file
if f.queuedBytes < f.saveBytes {
f.insertFile(file)
f.trimQueue()
return
}
// If we score less than the worst, don't insert.
worstE := f.queue.Back()
if worstE != nil && file.score < worstE.Value.(queuedFile).score {
return
}
f.insertFile(file)
f.trimQueue()
}
// adjustSaveBytes allows to adjust the number of bytes to save.
// This can be used to adjust the count on the fly.
// Returns true if there still is a need to delete files (n+saveBytes >0),
// false if no more bytes needs to be saved.
func (f *fileScorer) adjustSaveBytes(n int64) bool {
if f == nil {
return false
}
if int64(f.saveBytes)+n <= 0 {
f.saveBytes = 0
f.trimQueue()
return false
}
if n < 0 {
f.saveBytes -= ^uint64(n - 1)
} else {
f.saveBytes += uint64(n)
}
if f.saveBytes == 0 {
f.queue.Init()
f.saveBytes = 0
return false
}
if n < 0 {
f.trimQueue()
}
return true
}
// insertFile will insert a file into the list, sorted by its score.
func (f *fileScorer) insertFile(file queuedFile) {
e := f.queue.Front()
for e != nil {
v := e.Value.(queuedFile)
if v.score < file.score {
break
}
e = e.Next()
}
f.queuedBytes += file.size
// We reached the end.
if e == nil {
f.queue.PushBack(file)
return
}
f.queue.InsertBefore(file, e)
}
// trimQueue will trim the back of queue and still keep below wantSave.
func (f *fileScorer) trimQueue() {
for {
e := f.queue.Back()
if e == nil {
return
}
v := e.Value.(queuedFile)
if f.queuedBytes-v.size < f.saveBytes {
return
}
f.queue.Remove(e)
f.queuedBytes -= v.size
}
}
func (f *fileScorer) purgeFunc(p func(qfile queuedFile)) {
e := f.queue.Front()
for e != nil {
p(e.Value.(queuedFile))
e = e.Next()
}
}
// fileNames returns all queued file names.
func (f *fileScorer) fileNames() []string {
res := make([]string, 0, f.queue.Len())
e := f.queue.Front()
for e != nil {
res = append(res, e.Value.(queuedFile).name)
e = e.Next()
}
return res
}
func (f *fileScorer) reset() {
f.queue.Init()
f.queuedBytes = 0
}
func (f *fileScorer) queueString() string {
var res strings.Builder
e := f.queue.Front()
i := 0
for e != nil {
v := e.Value.(queuedFile)
if i > 0 {
res.WriteByte('\n')
}
res.WriteString(fmt.Sprintf("%03d: %s (score: %.3f, bytes: %d)", i, v.name, v.score, v.size))
i++
e = e.Next()
}
return res.String()
}
// bytesToClear() returns the number of bytes to clear to reach low watermark
// w.r.t quota given disk total and free space, quota in % allocated to cache
// and low watermark % w.r.t allowed quota.
// If the high watermark hasn't been reached 0 will be returned.
func bytesToClear(total, free int64, quotaPct, lowWatermark, highWatermark uint64) uint64 {
used := total - free
quotaAllowed := total * (int64)(quotaPct) / 100
highWMUsage := total * (int64)(highWatermark*quotaPct) / (100 * 100)
if used < highWMUsage {
return 0
}
// Return bytes needed to reach low watermark.
lowWMUsage := total * (int64)(lowWatermark*quotaPct) / (100 * 100)
return (uint64)(math.Min(float64(quotaAllowed), math.Max(0.0, float64(used-lowWMUsage))))
}
type multiWriter struct {
backendWriter io.Writer
cacheWriter *io.PipeWriter
pipeClosed bool
}
// multiWriter writes to backend and cache - if cache write
// fails close the pipe, but continue writing to the backend
func (t *multiWriter) Write(p []byte) (n int, err error) {
n, err = t.backendWriter.Write(p)
if err == nil && n != len(p) {
err = io.ErrShortWrite
return
}
if err != nil {
if !t.pipeClosed {
t.cacheWriter.CloseWithError(err)
}
return
}
// ignore errors writing to cache
if !t.pipeClosed {
_, cerr := t.cacheWriter.Write(p)
if cerr != nil {
t.pipeClosed = true
t.cacheWriter.CloseWithError(cerr)
}
}
return len(p), nil
}
func cacheMultiWriter(w1 io.Writer, w2 *io.PipeWriter) io.Writer {
return &multiWriter{backendWriter: w1, cacheWriter: w2}
}
// writebackInProgress returns true if writeback commit is not complete
func writebackInProgress(m map[string]string) bool {
if v, ok := m[writeBackStatusHeader]; ok {
switch cacheCommitStatus(v) {
case CommitPending, CommitFailed:
return true
}
}
return false
}