mirror of
https://github.com/minio/minio.git
synced 2024-12-25 06:35:56 -05:00
1222 lines
35 KiB
Go
1222 lines
35 KiB
Go
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
//
|
|
// This file is part of MinIO Object Storage stack
|
|
//
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
// (at your option) any later version.
|
|
//
|
|
// This program is distributed in the hope that it will be useful
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU Affero General Public License for more details.
|
|
//
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/hex"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math/rand"
|
|
"net"
|
|
"net/http"
|
|
"path"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"github.com/google/uuid"
|
|
"github.com/klauspost/compress/s2"
|
|
"github.com/klauspost/readahead"
|
|
"github.com/minio/minio-go/v7/pkg/s3utils"
|
|
"github.com/minio/minio/internal/config/compress"
|
|
"github.com/minio/minio/internal/config/dns"
|
|
"github.com/minio/minio/internal/config/storageclass"
|
|
"github.com/minio/minio/internal/crypto"
|
|
"github.com/minio/minio/internal/hash"
|
|
xhttp "github.com/minio/minio/internal/http"
|
|
"github.com/minio/minio/internal/ioutil"
|
|
"github.com/minio/minio/internal/logger"
|
|
"github.com/minio/pkg/v2/trie"
|
|
"github.com/minio/pkg/v2/wildcard"
|
|
"github.com/valyala/bytebufferpool"
|
|
"golang.org/x/exp/slices"
|
|
)
|
|
|
|
const (
|
|
// MinIO meta bucket.
|
|
minioMetaBucket = ".minio.sys"
|
|
// Multipart meta prefix.
|
|
mpartMetaPrefix = "multipart"
|
|
// MinIO Multipart meta prefix.
|
|
minioMetaMultipartBucket = minioMetaBucket + SlashSeparator + mpartMetaPrefix
|
|
// MinIO tmp meta prefix.
|
|
minioMetaTmpBucket = minioMetaBucket + "/tmp"
|
|
// MinIO tmp meta prefix for deleted objects.
|
|
minioMetaTmpDeletedBucket = minioMetaTmpBucket + "/.trash"
|
|
// DNS separator (period), used for bucket name validation.
|
|
dnsDelimiter = "."
|
|
// On compressed files bigger than this;
|
|
compReadAheadSize = 100 << 20
|
|
// Read this many buffers ahead.
|
|
compReadAheadBuffers = 5
|
|
// Size of each buffer.
|
|
compReadAheadBufSize = 1 << 20
|
|
// Pad Encrypted+Compressed files to a multiple of this.
|
|
compPadEncrypted = 256
|
|
// Disable compressed file indices below this size
|
|
compMinIndexSize = 8 << 20
|
|
)
|
|
|
|
// isMinioBucket returns true if given bucket is a MinIO internal
|
|
// bucket and false otherwise.
|
|
func isMinioMetaBucketName(bucket string) bool {
|
|
return strings.HasPrefix(bucket, minioMetaBucket)
|
|
}
|
|
|
|
// IsValidBucketName verifies that a bucket name is in accordance with
|
|
// Amazon's requirements (i.e. DNS naming conventions). It must be 3-63
|
|
// characters long, and it must be a sequence of one or more labels
|
|
// separated by periods. Each label can contain lowercase ascii
|
|
// letters, decimal digits and hyphens, but must not begin or end with
|
|
// a hyphen. See:
|
|
// http://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
|
|
func IsValidBucketName(bucket string) bool {
|
|
// Special case when bucket is equal to one of the meta buckets.
|
|
if isMinioMetaBucketName(bucket) {
|
|
return true
|
|
}
|
|
if len(bucket) < 3 || len(bucket) > 63 {
|
|
return false
|
|
}
|
|
|
|
// Split on dot and check each piece conforms to rules.
|
|
allNumbers := true
|
|
pieces := strings.Split(bucket, dnsDelimiter)
|
|
for _, piece := range pieces {
|
|
if len(piece) == 0 || piece[0] == '-' ||
|
|
piece[len(piece)-1] == '-' {
|
|
// Current piece has 0-length or starts or
|
|
// ends with a hyphen.
|
|
return false
|
|
}
|
|
// Now only need to check if each piece is a valid
|
|
// 'label' in AWS terminology and if the bucket looks
|
|
// like an IP address.
|
|
isNotNumber := false
|
|
for i := 0; i < len(piece); i++ {
|
|
switch {
|
|
case (piece[i] >= 'a' && piece[i] <= 'z' ||
|
|
piece[i] == '-'):
|
|
// Found a non-digit character, so
|
|
// this piece is not a number.
|
|
isNotNumber = true
|
|
case piece[i] >= '0' && piece[i] <= '9':
|
|
// Nothing to do.
|
|
default:
|
|
// Found invalid character.
|
|
return false
|
|
}
|
|
}
|
|
allNumbers = allNumbers && !isNotNumber
|
|
}
|
|
// Does the bucket name look like an IP address?
|
|
return !(len(pieces) == 4 && allNumbers)
|
|
}
|
|
|
|
// IsValidObjectName verifies an object name in accordance with Amazon's
|
|
// requirements. It cannot exceed 1024 characters and must be a valid UTF8
|
|
// string.
|
|
//
|
|
// See:
|
|
// http://docs.aws.amazon.com/AmazonS3/latest/dev/UsingMetadata.html
|
|
//
|
|
// You should avoid the following characters in a key name because of
|
|
// significant special handling for consistency across all
|
|
// applications.
|
|
//
|
|
// Rejects strings with following characters.
|
|
//
|
|
// - Backslash ("\")
|
|
//
|
|
// additionally minio does not support object names with trailing SlashSeparator.
|
|
func IsValidObjectName(object string) bool {
|
|
if len(object) == 0 {
|
|
return false
|
|
}
|
|
if HasSuffix(object, SlashSeparator) {
|
|
return false
|
|
}
|
|
return IsValidObjectPrefix(object)
|
|
}
|
|
|
|
// IsValidObjectPrefix verifies whether the prefix is a valid object name.
|
|
// Its valid to have a empty prefix.
|
|
func IsValidObjectPrefix(object string) bool {
|
|
if hasBadPathComponent(object) {
|
|
return false
|
|
}
|
|
if !utf8.ValidString(object) {
|
|
return false
|
|
}
|
|
if strings.Contains(object, `//`) {
|
|
return false
|
|
}
|
|
// This is valid for AWS S3 but it will never
|
|
// work with file systems, we will reject here
|
|
// to return object name invalid rather than
|
|
// a cryptic error from the file system.
|
|
return !strings.ContainsRune(object, 0)
|
|
}
|
|
|
|
// checkObjectNameForLengthAndSlash -check for the validity of object name length and prefis as slash
|
|
func checkObjectNameForLengthAndSlash(bucket, object string) error {
|
|
// Check for the length of object name
|
|
if len(object) > 1024 {
|
|
return ObjectNameTooLong{
|
|
Bucket: bucket,
|
|
Object: object,
|
|
}
|
|
}
|
|
// Check for slash as prefix in object name
|
|
if HasPrefix(object, SlashSeparator) {
|
|
return ObjectNamePrefixAsSlash{
|
|
Bucket: bucket,
|
|
Object: object,
|
|
}
|
|
}
|
|
if runtime.GOOS == globalWindowsOSName {
|
|
// Explicitly disallowed characters on windows.
|
|
// Avoids most problematic names.
|
|
if strings.ContainsAny(object, `\:*?"|<>`) {
|
|
return ObjectNameInvalid{
|
|
Bucket: bucket,
|
|
Object: object,
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// SlashSeparator - slash separator.
|
|
const SlashSeparator = "/"
|
|
|
|
// SlashSeparatorChar - slash separator.
|
|
const SlashSeparatorChar = '/'
|
|
|
|
// retainSlash - retains slash from a path.
|
|
func retainSlash(s string) string {
|
|
if s == "" {
|
|
return s
|
|
}
|
|
return strings.TrimSuffix(s, SlashSeparator) + SlashSeparator
|
|
}
|
|
|
|
// pathsJoinPrefix - like pathJoin retains trailing SlashSeparator
|
|
// for all elements, prepends them with 'prefix' respectively.
|
|
func pathsJoinPrefix(prefix string, elem ...string) (paths []string) {
|
|
paths = make([]string, len(elem))
|
|
for i, e := range elem {
|
|
paths[i] = pathJoin(prefix, e)
|
|
}
|
|
return paths
|
|
}
|
|
|
|
// pathJoin - like path.Join() but retains trailing SlashSeparator of the last element
|
|
func pathJoin(elem ...string) string {
|
|
sb := bytebufferpool.Get()
|
|
defer func() {
|
|
sb.Reset()
|
|
bytebufferpool.Put(sb)
|
|
}()
|
|
|
|
return pathJoinBuf(sb, elem...)
|
|
}
|
|
|
|
// pathJoinBuf - like path.Join() but retains trailing SlashSeparator of the last element.
|
|
// Provide a string builder to reduce allocation.
|
|
func pathJoinBuf(dst *bytebufferpool.ByteBuffer, elem ...string) string {
|
|
trailingSlash := len(elem) > 0 && hasSuffixByte(elem[len(elem)-1], SlashSeparatorChar)
|
|
dst.Reset()
|
|
added := 0
|
|
for _, e := range elem {
|
|
if added > 0 || e != "" {
|
|
if added > 0 {
|
|
dst.WriteByte(SlashSeparatorChar)
|
|
}
|
|
dst.WriteString(e)
|
|
added += len(e)
|
|
}
|
|
}
|
|
|
|
if pathNeedsClean(dst.Bytes()) {
|
|
s := path.Clean(dst.String())
|
|
if trailingSlash {
|
|
return s + SlashSeparator
|
|
}
|
|
return s
|
|
}
|
|
if trailingSlash {
|
|
dst.WriteByte(SlashSeparatorChar)
|
|
}
|
|
return dst.String()
|
|
}
|
|
|
|
// hasSuffixByte returns true if the last byte of s is 'suffix'
|
|
func hasSuffixByte(s string, suffix byte) bool {
|
|
return len(s) > 0 && s[len(s)-1] == suffix
|
|
}
|
|
|
|
// pathNeedsClean returns whether path.Clean may change the path.
|
|
// Will detect all cases that will be cleaned,
|
|
// but may produce false positives on non-trivial paths.
|
|
func pathNeedsClean(path []byte) bool {
|
|
if len(path) == 0 {
|
|
return true
|
|
}
|
|
|
|
rooted := path[0] == '/'
|
|
n := len(path)
|
|
|
|
r, w := 0, 0
|
|
if rooted {
|
|
r, w = 1, 1
|
|
}
|
|
|
|
for r < n {
|
|
switch {
|
|
case path[r] > 127:
|
|
// Non ascii.
|
|
return true
|
|
case path[r] == '/':
|
|
// multiple / elements
|
|
return true
|
|
case path[r] == '.' && (r+1 == n || path[r+1] == '/'):
|
|
// . element - assume it has to be cleaned.
|
|
return true
|
|
case path[r] == '.' && path[r+1] == '.' && (r+2 == n || path[r+2] == '/'):
|
|
// .. element: remove to last / - assume it has to be cleaned.
|
|
return true
|
|
default:
|
|
// real path element.
|
|
// add slash if needed
|
|
if rooted && w != 1 || !rooted && w != 0 {
|
|
w++
|
|
}
|
|
// copy element
|
|
for ; r < n && path[r] != '/'; r++ {
|
|
w++
|
|
}
|
|
// allow one slash, not at end
|
|
if r < n-1 && path[r] == '/' {
|
|
r++
|
|
}
|
|
}
|
|
}
|
|
|
|
// Turn empty string into "."
|
|
if w == 0 {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// mustGetUUID - get a random UUID.
|
|
func mustGetUUID() string {
|
|
u, err := uuid.NewRandom()
|
|
if err != nil {
|
|
logger.CriticalIf(GlobalContext, err)
|
|
}
|
|
|
|
return u.String()
|
|
}
|
|
|
|
// mustGetUUIDBytes - get a random UUID as 16 bytes unencoded.
|
|
func mustGetUUIDBytes() []byte {
|
|
u, err := uuid.NewRandom()
|
|
if err != nil {
|
|
logger.CriticalIf(GlobalContext, err)
|
|
}
|
|
return u[:]
|
|
}
|
|
|
|
// Create an s3 compatible MD5sum for complete multipart transaction.
|
|
func getCompleteMultipartMD5(parts []CompletePart) string {
|
|
var finalMD5Bytes []byte
|
|
for _, part := range parts {
|
|
md5Bytes, err := hex.DecodeString(canonicalizeETag(part.ETag))
|
|
if err != nil {
|
|
finalMD5Bytes = append(finalMD5Bytes, []byte(part.ETag)...)
|
|
} else {
|
|
finalMD5Bytes = append(finalMD5Bytes, md5Bytes...)
|
|
}
|
|
}
|
|
s3MD5 := fmt.Sprintf("%s-%d", getMD5Hash(finalMD5Bytes), len(parts))
|
|
return s3MD5
|
|
}
|
|
|
|
// Clean unwanted fields from metadata
|
|
func cleanMetadata(metadata map[string]string) map[string]string {
|
|
// Remove STANDARD StorageClass
|
|
metadata = removeStandardStorageClass(metadata)
|
|
// Clean meta etag keys 'md5Sum', 'etag', "expires", "x-amz-tagging".
|
|
return cleanMetadataKeys(metadata, "md5Sum", "etag", "expires", xhttp.AmzObjectTagging, "last-modified", VersionPurgeStatusKey)
|
|
}
|
|
|
|
// Filter X-Amz-Storage-Class field only if it is set to STANDARD.
|
|
// This is done since AWS S3 doesn't return STANDARD Storage class as response header.
|
|
func removeStandardStorageClass(metadata map[string]string) map[string]string {
|
|
if metadata[xhttp.AmzStorageClass] == storageclass.STANDARD {
|
|
delete(metadata, xhttp.AmzStorageClass)
|
|
}
|
|
return metadata
|
|
}
|
|
|
|
// cleanMetadataKeys takes keyNames to be filtered
|
|
// and returns a new map with all the entries with keyNames removed.
|
|
func cleanMetadataKeys(metadata map[string]string, keyNames ...string) map[string]string {
|
|
newMeta := make(map[string]string, len(metadata))
|
|
for k, v := range metadata {
|
|
if slices.Contains(keyNames, k) {
|
|
continue
|
|
}
|
|
newMeta[k] = v
|
|
}
|
|
return newMeta
|
|
}
|
|
|
|
// Extracts etag value from the metadata.
|
|
func extractETag(metadata map[string]string) string {
|
|
etag, ok := metadata["etag"]
|
|
if !ok {
|
|
// md5Sum tag is kept for backward compatibility.
|
|
etag = metadata["md5Sum"]
|
|
}
|
|
// Success.
|
|
return etag
|
|
}
|
|
|
|
// HasPrefix - Prefix matcher string matches prefix in a platform specific way.
|
|
// For example on windows since its case insensitive we are supposed
|
|
// to do case insensitive checks.
|
|
func HasPrefix(s string, prefix string) bool {
|
|
if runtime.GOOS == globalWindowsOSName {
|
|
return stringsHasPrefixFold(s, prefix)
|
|
}
|
|
return strings.HasPrefix(s, prefix)
|
|
}
|
|
|
|
// HasSuffix - Suffix matcher string matches suffix in a platform specific way.
|
|
// For example on windows since its case insensitive we are supposed
|
|
// to do case insensitive checks.
|
|
func HasSuffix(s string, suffix string) bool {
|
|
if runtime.GOOS == globalWindowsOSName {
|
|
return strings.HasSuffix(strings.ToLower(s), strings.ToLower(suffix))
|
|
}
|
|
return strings.HasSuffix(s, suffix)
|
|
}
|
|
|
|
// Validates if two strings are equal.
|
|
func isStringEqual(s1 string, s2 string) bool {
|
|
if runtime.GOOS == globalWindowsOSName {
|
|
return strings.EqualFold(s1, s2)
|
|
}
|
|
return s1 == s2
|
|
}
|
|
|
|
// Ignores all reserved bucket names or invalid bucket names.
|
|
func isReservedOrInvalidBucket(bucketEntry string, strict bool) bool {
|
|
if bucketEntry == "" {
|
|
return true
|
|
}
|
|
|
|
bucketEntry = strings.TrimSuffix(bucketEntry, SlashSeparator)
|
|
if strict {
|
|
if err := s3utils.CheckValidBucketNameStrict(bucketEntry); err != nil {
|
|
return true
|
|
}
|
|
} else {
|
|
if err := s3utils.CheckValidBucketName(bucketEntry); err != nil {
|
|
return true
|
|
}
|
|
}
|
|
return isMinioMetaBucket(bucketEntry) || isMinioReservedBucket(bucketEntry)
|
|
}
|
|
|
|
// Returns true if input bucket is a reserved minio meta bucket '.minio.sys'.
|
|
func isMinioMetaBucket(bucketName string) bool {
|
|
return bucketName == minioMetaBucket
|
|
}
|
|
|
|
// Returns true if input bucket is a reserved minio bucket 'minio'.
|
|
func isMinioReservedBucket(bucketName string) bool {
|
|
return bucketName == minioReservedBucket
|
|
}
|
|
|
|
// returns a slice of hosts by reading a slice of DNS records
|
|
func getHostsSlice(records []dns.SrvRecord) []string {
|
|
hosts := make([]string, len(records))
|
|
for i, r := range records {
|
|
hosts[i] = net.JoinHostPort(r.Host, string(r.Port))
|
|
}
|
|
return hosts
|
|
}
|
|
|
|
// returns an online host (and corresponding port) from a slice of DNS records
|
|
func getHostFromSrv(records []dns.SrvRecord) (host string) {
|
|
hosts := getHostsSlice(records)
|
|
rng := rand.New(rand.NewSource(time.Now().UTC().UnixNano()))
|
|
var d net.Dialer
|
|
var retry int
|
|
for retry < len(hosts) {
|
|
ctx, cancel := context.WithTimeout(GlobalContext, 300*time.Millisecond)
|
|
|
|
host = hosts[rng.Intn(len(hosts))]
|
|
conn, err := d.DialContext(ctx, "tcp", host)
|
|
cancel()
|
|
if err != nil {
|
|
retry++
|
|
continue
|
|
}
|
|
conn.Close()
|
|
break
|
|
}
|
|
|
|
return host
|
|
}
|
|
|
|
// IsCompressed returns true if the object is marked as compressed.
|
|
func (o *ObjectInfo) IsCompressed() bool {
|
|
_, ok := o.UserDefined[ReservedMetadataPrefix+"compression"]
|
|
return ok
|
|
}
|
|
|
|
// IsCompressedOK returns whether the object is compressed and can be decompressed.
|
|
func (o *ObjectInfo) IsCompressedOK() (bool, error) {
|
|
scheme, ok := o.UserDefined[ReservedMetadataPrefix+"compression"]
|
|
if !ok {
|
|
return false, nil
|
|
}
|
|
switch scheme {
|
|
case compressionAlgorithmV1, compressionAlgorithmV2:
|
|
return true, nil
|
|
}
|
|
return true, fmt.Errorf("unknown compression scheme: %s", scheme)
|
|
}
|
|
|
|
// GetActualSize - returns the actual size of the stored object
|
|
func (o ObjectInfo) GetActualSize() (int64, error) {
|
|
if o.ActualSize != nil {
|
|
return *o.ActualSize, nil
|
|
}
|
|
if o.IsCompressed() {
|
|
sizeStr, ok := o.UserDefined[ReservedMetadataPrefix+"actual-size"]
|
|
if !ok {
|
|
return -1, errInvalidDecompressedSize
|
|
}
|
|
size, err := strconv.ParseInt(sizeStr, 10, 64)
|
|
if err != nil {
|
|
return -1, errInvalidDecompressedSize
|
|
}
|
|
return size, nil
|
|
}
|
|
if _, ok := crypto.IsEncrypted(o.UserDefined); ok {
|
|
return o.DecryptedSize()
|
|
}
|
|
|
|
return o.Size, nil
|
|
}
|
|
|
|
// Disabling compression for encrypted enabled requests.
|
|
// Using compression and encryption together enables room for side channel attacks.
|
|
// Eliminate non-compressible objects by extensions/content-types.
|
|
func isCompressible(header http.Header, object string) bool {
|
|
globalCompressConfigMu.Lock()
|
|
cfg := globalCompressConfig
|
|
globalCompressConfigMu.Unlock()
|
|
|
|
return !excludeForCompression(header, object, cfg)
|
|
}
|
|
|
|
// Eliminate the non-compressible objects.
|
|
func excludeForCompression(header http.Header, object string, cfg compress.Config) bool {
|
|
objStr := object
|
|
contentType := header.Get(xhttp.ContentType)
|
|
if !cfg.Enabled {
|
|
return true
|
|
}
|
|
|
|
if crypto.Requested(header) && !cfg.AllowEncrypted {
|
|
return true
|
|
}
|
|
|
|
// We strictly disable compression for standard extensions/content-types (`compressed`).
|
|
if hasStringSuffixInSlice(objStr, standardExcludeCompressExtensions) || hasPattern(standardExcludeCompressContentTypes, contentType) {
|
|
return true
|
|
}
|
|
|
|
// Filter compression includes.
|
|
exclude := len(cfg.Extensions) > 0 || len(cfg.MimeTypes) > 0
|
|
if len(cfg.Extensions) > 0 && hasStringSuffixInSlice(objStr, cfg.Extensions) {
|
|
exclude = false
|
|
}
|
|
|
|
if len(cfg.MimeTypes) > 0 && hasPattern(cfg.MimeTypes, contentType) {
|
|
exclude = false
|
|
}
|
|
return exclude
|
|
}
|
|
|
|
// Utility which returns if a string is present in the list.
|
|
// Comparison is case insensitive.
|
|
func hasStringSuffixInSlice(str string, list []string) bool {
|
|
str = strings.ToLower(str)
|
|
for _, v := range list {
|
|
if strings.HasSuffix(str, strings.ToLower(v)) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Returns true if any of the given wildcard patterns match the matchStr.
|
|
func hasPattern(patterns []string, matchStr string) bool {
|
|
for _, pattern := range patterns {
|
|
if ok := wildcard.MatchSimple(pattern, matchStr); ok {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// Returns the part file name which matches the partNumber and etag.
|
|
func getPartFile(entriesTrie *trie.Trie, partNumber int, etag string) (partFile string) {
|
|
for _, match := range entriesTrie.PrefixMatch(fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
|
|
partFile = match
|
|
break
|
|
}
|
|
return partFile
|
|
}
|
|
|
|
func partNumberToRangeSpec(oi ObjectInfo, partNumber int) *HTTPRangeSpec {
|
|
if oi.Size == 0 || len(oi.Parts) == 0 {
|
|
return nil
|
|
}
|
|
|
|
var start int64
|
|
end := int64(-1)
|
|
for i := 0; i < len(oi.Parts) && i < partNumber; i++ {
|
|
start = end + 1
|
|
end = start + oi.Parts[i].ActualSize - 1
|
|
}
|
|
|
|
return &HTTPRangeSpec{Start: start, End: end}
|
|
}
|
|
|
|
// Returns the compressed offset which should be skipped.
|
|
// If encrypted offsets are adjusted for encrypted block headers/trailers.
|
|
// Since de-compression is after decryption encryption overhead is only added to compressedOffset.
|
|
func getCompressedOffsets(oi ObjectInfo, offset int64, decrypt func([]byte) ([]byte, error)) (compressedOffset int64, partSkip int64, firstPart int, decryptSkip int64, seqNum uint32) {
|
|
var skipLength int64
|
|
var cumulativeActualSize int64
|
|
var firstPartIdx int
|
|
if len(oi.Parts) > 0 {
|
|
for i, part := range oi.Parts {
|
|
cumulativeActualSize += part.ActualSize
|
|
if cumulativeActualSize <= offset {
|
|
compressedOffset += part.Size
|
|
} else {
|
|
firstPartIdx = i
|
|
skipLength = cumulativeActualSize - part.ActualSize
|
|
break
|
|
}
|
|
}
|
|
}
|
|
partSkip = offset - skipLength
|
|
|
|
// Load index and skip more if feasible.
|
|
if partSkip > 0 && len(oi.Parts) > firstPartIdx && len(oi.Parts[firstPartIdx].Index) > 0 {
|
|
_, isEncrypted := crypto.IsEncrypted(oi.UserDefined)
|
|
if isEncrypted {
|
|
dec, err := decrypt(oi.Parts[firstPartIdx].Index)
|
|
if err == nil {
|
|
// Load Index
|
|
var idx s2.Index
|
|
_, err := idx.Load(s2.RestoreIndexHeaders(dec))
|
|
|
|
// Find compressed/uncompressed offsets of our partskip
|
|
compOff, uCompOff, err2 := idx.Find(partSkip)
|
|
|
|
if err == nil && err2 == nil && compOff > 0 {
|
|
// Encrypted.
|
|
const sseDAREEncPackageBlockSize = SSEDAREPackageBlockSize + SSEDAREPackageMetaSize
|
|
// Number of full blocks in skipped area
|
|
seqNum = uint32(compOff / SSEDAREPackageBlockSize)
|
|
// Skip this many inside a decrypted block to get to compression block start
|
|
decryptSkip = compOff % SSEDAREPackageBlockSize
|
|
// Skip this number of full blocks.
|
|
skipEnc := compOff / SSEDAREPackageBlockSize
|
|
skipEnc *= sseDAREEncPackageBlockSize
|
|
compressedOffset += skipEnc
|
|
// Skip this number of uncompressed bytes.
|
|
partSkip -= uCompOff
|
|
}
|
|
}
|
|
} else {
|
|
// Not encrypted
|
|
var idx s2.Index
|
|
_, err := idx.Load(s2.RestoreIndexHeaders(oi.Parts[firstPartIdx].Index))
|
|
|
|
// Find compressed/uncompressed offsets of our partskip
|
|
compOff, uCompOff, err2 := idx.Find(partSkip)
|
|
|
|
if err == nil && err2 == nil && compOff > 0 {
|
|
compressedOffset += compOff
|
|
partSkip -= uCompOff
|
|
}
|
|
}
|
|
}
|
|
|
|
return compressedOffset, partSkip, firstPartIdx, decryptSkip, seqNum
|
|
}
|
|
|
|
// GetObjectReader is a type that wraps a reader with a lock to
|
|
// provide a ReadCloser interface that unlocks on Close()
|
|
type GetObjectReader struct {
|
|
io.Reader
|
|
ObjInfo ObjectInfo
|
|
cleanUpFns []func()
|
|
opts ObjectOptions
|
|
once sync.Once
|
|
}
|
|
|
|
// WithCleanupFuncs sets additional cleanup functions to be called when closing
|
|
// the GetObjectReader.
|
|
func (g *GetObjectReader) WithCleanupFuncs(fns ...func()) *GetObjectReader {
|
|
g.cleanUpFns = append(g.cleanUpFns, fns...)
|
|
return g
|
|
}
|
|
|
|
// NewGetObjectReaderFromReader sets up a GetObjectReader with a given
|
|
// reader. This ignores any object properties.
|
|
func NewGetObjectReaderFromReader(r io.Reader, oi ObjectInfo, opts ObjectOptions, cleanupFns ...func()) (*GetObjectReader, error) {
|
|
if opts.CheckPrecondFn != nil && opts.CheckPrecondFn(oi) {
|
|
// Call the cleanup funcs
|
|
for i := len(cleanupFns) - 1; i >= 0; i-- {
|
|
cleanupFns[i]()
|
|
}
|
|
return nil, PreConditionFailed{}
|
|
}
|
|
return &GetObjectReader{
|
|
ObjInfo: oi,
|
|
Reader: r,
|
|
cleanUpFns: cleanupFns,
|
|
opts: opts,
|
|
}, nil
|
|
}
|
|
|
|
// ObjReaderFn is a function type that takes a reader and returns
|
|
// GetObjectReader and an error. Request headers are passed to provide
|
|
// encryption parameters. cleanupFns allow cleanup funcs to be
|
|
// registered for calling after usage of the reader.
|
|
type ObjReaderFn func(inputReader io.Reader, h http.Header, cleanupFns ...func()) (r *GetObjectReader, err error)
|
|
|
|
// NewGetObjectReader creates a new GetObjectReader. The cleanUpFns
|
|
// are called on Close() in FIFO order as passed in ObjReadFn(). NOTE: It is
|
|
// assumed that clean up functions do not panic (otherwise, they may
|
|
// not all run!).
|
|
func NewGetObjectReader(rs *HTTPRangeSpec, oi ObjectInfo, opts ObjectOptions) (
|
|
fn ObjReaderFn, off, length int64, err error,
|
|
) {
|
|
if opts.CheckPrecondFn != nil && opts.CheckPrecondFn(oi) {
|
|
return nil, 0, 0, PreConditionFailed{}
|
|
}
|
|
|
|
if rs == nil && opts.PartNumber > 0 {
|
|
rs = partNumberToRangeSpec(oi, opts.PartNumber)
|
|
}
|
|
|
|
_, isEncrypted := crypto.IsEncrypted(oi.UserDefined)
|
|
isCompressed, err := oi.IsCompressedOK()
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
|
|
// if object is encrypted and it is a restore request or if NoDecryption
|
|
// was requested, fetch content without decrypting.
|
|
if opts.Transition.RestoreRequest != nil || opts.NoDecryption {
|
|
isEncrypted = false
|
|
isCompressed = false
|
|
}
|
|
|
|
// Calculate range to read (different for encrypted/compressed objects)
|
|
switch {
|
|
case isCompressed:
|
|
var firstPart int
|
|
if opts.PartNumber > 0 {
|
|
// firstPart is an index to Parts slice,
|
|
// make sure that PartNumber uses the
|
|
// index value properly.
|
|
firstPart = opts.PartNumber - 1
|
|
}
|
|
|
|
// If compressed, we start from the beginning of the part.
|
|
// Read the decompressed size from the meta.json.
|
|
actualSize, err := oi.GetActualSize()
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
var decryptSkip int64
|
|
var seqNum uint32
|
|
|
|
off, length = int64(0), oi.Size
|
|
decOff, decLength := int64(0), actualSize
|
|
if rs != nil {
|
|
off, length, err = rs.GetOffsetLength(actualSize)
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
decrypt := func(b []byte) ([]byte, error) {
|
|
return b, nil
|
|
}
|
|
if isEncrypted {
|
|
decrypt = oi.compressionIndexDecrypt
|
|
}
|
|
// In case of range based queries on multiparts, the offset and length are reduced.
|
|
off, decOff, firstPart, decryptSkip, seqNum = getCompressedOffsets(oi, off, decrypt)
|
|
decLength = length
|
|
length = oi.Size - off
|
|
// For negative length we read everything.
|
|
if decLength < 0 {
|
|
decLength = actualSize - decOff
|
|
}
|
|
|
|
// Reply back invalid range if the input offset and length fall out of range.
|
|
if decOff > actualSize || decOff+decLength > actualSize {
|
|
return nil, 0, 0, errInvalidRange
|
|
}
|
|
}
|
|
fn = func(inputReader io.Reader, h http.Header, cFns ...func()) (r *GetObjectReader, err error) {
|
|
if isEncrypted {
|
|
copySource := h.Get(xhttp.AmzServerSideEncryptionCopyCustomerAlgorithm) != ""
|
|
// Attach decrypter on inputReader
|
|
inputReader, err = DecryptBlocksRequestR(inputReader, h, seqNum, firstPart, oi, copySource)
|
|
if err != nil {
|
|
// Call the cleanup funcs
|
|
for i := len(cFns) - 1; i >= 0; i-- {
|
|
cFns[i]()
|
|
}
|
|
return nil, err
|
|
}
|
|
if decryptSkip > 0 {
|
|
inputReader = ioutil.NewSkipReader(inputReader, decryptSkip)
|
|
}
|
|
oi.Size = decLength
|
|
}
|
|
// Decompression reader.
|
|
var dopts []s2.ReaderOption
|
|
if off > 0 || decOff > 0 {
|
|
// We are not starting at the beginning, so ignore stream identifiers.
|
|
dopts = append(dopts, s2.ReaderIgnoreStreamIdentifier())
|
|
}
|
|
s2Reader := s2.NewReader(inputReader, dopts...)
|
|
// Apply the skipLen and limit on the decompressed stream.
|
|
if decOff > 0 {
|
|
if err = s2Reader.Skip(decOff); err != nil {
|
|
// Call the cleanup funcs
|
|
for i := len(cFns) - 1; i >= 0; i-- {
|
|
cFns[i]()
|
|
}
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
decReader := io.LimitReader(s2Reader, decLength)
|
|
if decLength > compReadAheadSize {
|
|
rah, err := readahead.NewReaderSize(decReader, compReadAheadBuffers, compReadAheadBufSize)
|
|
if err == nil {
|
|
decReader = rah
|
|
cFns = append([]func(){func() {
|
|
rah.Close()
|
|
}}, cFns...)
|
|
}
|
|
}
|
|
oi.Size = decLength
|
|
|
|
// Assemble the GetObjectReader
|
|
r = &GetObjectReader{
|
|
ObjInfo: oi,
|
|
Reader: decReader,
|
|
cleanUpFns: cFns,
|
|
opts: opts,
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
case isEncrypted:
|
|
var seqNumber uint32
|
|
var partStart int
|
|
var skipLen int64
|
|
|
|
off, length, skipLen, seqNumber, partStart, err = oi.GetDecryptedRange(rs)
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
var decSize int64
|
|
decSize, err = oi.DecryptedSize()
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
var decRangeLength int64
|
|
decRangeLength, err = rs.GetLength(decSize)
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
|
|
// We define a closure that performs decryption given
|
|
// a reader that returns the desired range of
|
|
// encrypted bytes. The header parameter is used to
|
|
// provide encryption parameters.
|
|
fn = func(inputReader io.Reader, h http.Header, cFns ...func()) (r *GetObjectReader, err error) {
|
|
copySource := h.Get(xhttp.AmzServerSideEncryptionCopyCustomerAlgorithm) != ""
|
|
|
|
// Attach decrypter on inputReader
|
|
var decReader io.Reader
|
|
decReader, err = DecryptBlocksRequestR(inputReader, h, seqNumber, partStart, oi, copySource)
|
|
if err != nil {
|
|
// Call the cleanup funcs
|
|
for i := len(cFns) - 1; i >= 0; i-- {
|
|
cFns[i]()
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
oi.ETag = getDecryptedETag(h, oi, false)
|
|
|
|
// Apply the skipLen and limit on the
|
|
// decrypted stream
|
|
decReader = io.LimitReader(ioutil.NewSkipReader(decReader, skipLen), decRangeLength)
|
|
|
|
// Assemble the GetObjectReader
|
|
r = &GetObjectReader{
|
|
ObjInfo: oi,
|
|
Reader: decReader,
|
|
cleanUpFns: cFns,
|
|
opts: opts,
|
|
}
|
|
return r, nil
|
|
}
|
|
|
|
default:
|
|
off, length, err = rs.GetOffsetLength(oi.Size)
|
|
if err != nil {
|
|
return nil, 0, 0, err
|
|
}
|
|
fn = func(inputReader io.Reader, _ http.Header, cFns ...func()) (r *GetObjectReader, err error) {
|
|
r = &GetObjectReader{
|
|
ObjInfo: oi,
|
|
Reader: inputReader,
|
|
cleanUpFns: cFns,
|
|
opts: opts,
|
|
}
|
|
return r, nil
|
|
}
|
|
}
|
|
return fn, off, length, nil
|
|
}
|
|
|
|
// Close - calls the cleanup actions in reverse order
|
|
func (g *GetObjectReader) Close() error {
|
|
if g == nil {
|
|
return nil
|
|
}
|
|
// sync.Once is used here to ensure that Close() is
|
|
// idempotent.
|
|
g.once.Do(func() {
|
|
for i := len(g.cleanUpFns) - 1; i >= 0; i-- {
|
|
g.cleanUpFns[i]()
|
|
}
|
|
})
|
|
return nil
|
|
}
|
|
|
|
// compressionIndexEncrypter returns a function that will read data from input,
|
|
// encrypt it using the provided key and return the result.
|
|
func compressionIndexEncrypter(key crypto.ObjectKey, input func() []byte) func() []byte {
|
|
var data []byte
|
|
var fetched bool
|
|
return func() []byte {
|
|
if !fetched {
|
|
data = input()
|
|
fetched = true
|
|
}
|
|
return metadataEncrypter(key)("compression-index", data)
|
|
}
|
|
}
|
|
|
|
// compressionIndexDecrypt reverses compressionIndexEncrypter.
|
|
func (o *ObjectInfo) compressionIndexDecrypt(input []byte) ([]byte, error) {
|
|
return o.metadataDecrypter()("compression-index", input)
|
|
}
|
|
|
|
// SealMD5CurrFn seals md5sum with object encryption key and returns sealed
|
|
// md5sum
|
|
type SealMD5CurrFn func([]byte) []byte
|
|
|
|
// PutObjReader is a type that wraps sio.EncryptReader and
|
|
// underlying hash.Reader in a struct
|
|
type PutObjReader struct {
|
|
*hash.Reader // actual data stream
|
|
rawReader *hash.Reader // original data stream
|
|
sealMD5Fn SealMD5CurrFn
|
|
}
|
|
|
|
// Size returns the absolute number of bytes the Reader
|
|
// will return during reading. It returns -1 for unlimited
|
|
// data.
|
|
func (p *PutObjReader) Size() int64 {
|
|
return p.Reader.Size()
|
|
}
|
|
|
|
// MD5CurrentHexString returns the current MD5Sum or encrypted MD5Sum
|
|
// as a hex encoded string
|
|
func (p *PutObjReader) MD5CurrentHexString() string {
|
|
md5sumCurr := p.rawReader.MD5Current()
|
|
var appendHyphen bool
|
|
// md5sumcurr is not empty in two scenarios
|
|
// - server is running in strict compatibility mode
|
|
// - client set Content-Md5 during PUT operation
|
|
if len(md5sumCurr) == 0 {
|
|
// md5sumCurr is only empty when we are running
|
|
// in non-compatibility mode.
|
|
md5sumCurr = make([]byte, 16)
|
|
rand.Read(md5sumCurr)
|
|
appendHyphen = true
|
|
}
|
|
if p.sealMD5Fn != nil {
|
|
md5sumCurr = p.sealMD5Fn(md5sumCurr)
|
|
}
|
|
if appendHyphen {
|
|
// Make sure to return etag string upto 32 length, for SSE
|
|
// requests ETag might be longer and the code decrypting the
|
|
// ETag ignores ETag in multipart ETag form i.e <hex>-N
|
|
return hex.EncodeToString(md5sumCurr)[:32] + "-1"
|
|
}
|
|
return hex.EncodeToString(md5sumCurr)
|
|
}
|
|
|
|
// WithEncryption sets up encrypted reader and the sealing for content md5sum
|
|
// using objEncKey. Unsealed md5sum is computed from the rawReader setup when
|
|
// NewPutObjReader was called. It returns an error if called on an uninitialized
|
|
// PutObjReader.
|
|
func (p *PutObjReader) WithEncryption(encReader *hash.Reader, objEncKey *crypto.ObjectKey) (*PutObjReader, error) {
|
|
if p.Reader == nil {
|
|
return nil, errors.New("put-object reader uninitialized")
|
|
}
|
|
p.Reader = encReader
|
|
p.sealMD5Fn = sealETagFn(*objEncKey)
|
|
return p, nil
|
|
}
|
|
|
|
// NewPutObjReader returns a new PutObjReader. It uses given hash.Reader's
|
|
// MD5Current method to construct md5sum when requested downstream.
|
|
func NewPutObjReader(rawReader *hash.Reader) *PutObjReader {
|
|
return &PutObjReader{Reader: rawReader, rawReader: rawReader}
|
|
}
|
|
|
|
func sealETag(encKey crypto.ObjectKey, md5CurrSum []byte) []byte {
|
|
var emptyKey [32]byte
|
|
if bytes.Equal(encKey[:], emptyKey[:]) {
|
|
return md5CurrSum
|
|
}
|
|
return encKey.SealETag(md5CurrSum)
|
|
}
|
|
|
|
func sealETagFn(key crypto.ObjectKey) SealMD5CurrFn {
|
|
fn := func(md5sumcurr []byte) []byte {
|
|
return sealETag(key, md5sumcurr)
|
|
}
|
|
return fn
|
|
}
|
|
|
|
// compressOpts are the options for writing compressed data.
|
|
var compressOpts []s2.WriterOption
|
|
|
|
func init() {
|
|
if runtime.GOARCH == "amd64" {
|
|
// On amd64 we have assembly and can use stronger compression.
|
|
compressOpts = append(compressOpts, s2.WriterBetterCompression())
|
|
}
|
|
}
|
|
|
|
// newS2CompressReader will read data from r, compress it and return the compressed data as a Reader.
|
|
// Use Close to ensure resources are released on incomplete streams.
|
|
//
|
|
// input 'on' is always recommended such that this function works
|
|
// properly, because we do not wish to create an object even if
|
|
// client closed the stream prematurely.
|
|
func newS2CompressReader(r io.Reader, on int64, encrypted bool) (rc io.ReadCloser, idx func() []byte) {
|
|
pr, pw := io.Pipe()
|
|
// Copy input to compressor
|
|
opts := compressOpts
|
|
if encrypted {
|
|
// The values used for padding are not a security concern,
|
|
// but we choose pseudo-random numbers instead of just zeros.
|
|
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
opts = append([]s2.WriterOption{s2.WriterPadding(compPadEncrypted), s2.WriterPaddingSrc(rng)}, compressOpts...)
|
|
}
|
|
comp := s2.NewWriter(pw, opts...)
|
|
indexCh := make(chan []byte, 1)
|
|
go func() {
|
|
defer close(indexCh)
|
|
cn, err := io.Copy(comp, r)
|
|
if err != nil {
|
|
comp.Close()
|
|
pw.CloseWithError(err)
|
|
return
|
|
}
|
|
if on > 0 && on != cn {
|
|
// if client didn't sent all data
|
|
// from the client verify here.
|
|
comp.Close()
|
|
pw.CloseWithError(IncompleteBody{})
|
|
return
|
|
}
|
|
// Close the stream.
|
|
// If more than compMinIndexSize was written, generate index.
|
|
if cn > compMinIndexSize {
|
|
idx, err := comp.CloseIndex()
|
|
idx = s2.RemoveIndexHeaders(idx)
|
|
indexCh <- idx
|
|
pw.CloseWithError(err)
|
|
return
|
|
}
|
|
pw.CloseWithError(comp.Close())
|
|
}()
|
|
var gotIdx []byte
|
|
return pr, func() []byte {
|
|
if gotIdx != nil {
|
|
return gotIdx
|
|
}
|
|
// Will get index or nil if closed.
|
|
gotIdx = <-indexCh
|
|
return gotIdx
|
|
}
|
|
}
|
|
|
|
// compressSelfTest performs a self-test to ensure that compression
|
|
// algorithms completes a roundtrip. If any algorithm
|
|
// produces an incorrect checksum it fails with a hard error.
|
|
//
|
|
// compressSelfTest tries to catch any issue in the compression implementation
|
|
// early instead of silently corrupting data.
|
|
func compressSelfTest() {
|
|
// 4 MB block.
|
|
// Approx runtime ~30ms
|
|
data := make([]byte, 4<<20)
|
|
rng := rand.New(rand.NewSource(0))
|
|
for i := range data {
|
|
// Generate compressible stream...
|
|
data[i] = byte(rng.Int63() & 3)
|
|
}
|
|
failOnErr := func(err error) {
|
|
if err != nil {
|
|
logger.Fatal(errSelfTestFailure, "compress: error on self-test: %v", err)
|
|
}
|
|
}
|
|
const skip = 2<<20 + 511
|
|
r, _ := newS2CompressReader(bytes.NewBuffer(data), int64(len(data)), true)
|
|
b, err := io.ReadAll(r)
|
|
failOnErr(err)
|
|
failOnErr(r.Close())
|
|
// Decompression reader.
|
|
s2Reader := s2.NewReader(bytes.NewBuffer(b))
|
|
// Apply the skipLen on the decompressed stream.
|
|
failOnErr(s2Reader.Skip(skip))
|
|
got, err := io.ReadAll(s2Reader)
|
|
failOnErr(err)
|
|
if !bytes.Equal(got, data[skip:]) {
|
|
logger.Fatal(errSelfTestFailure, "compress: self-test roundtrip mismatch.")
|
|
}
|
|
}
|
|
|
|
// getDiskInfos returns the disk information for the provided disks.
|
|
// If a disk is nil or an error is returned the result will be nil as well.
|
|
func getDiskInfos(ctx context.Context, disks ...StorageAPI) []*DiskInfo {
|
|
res := make([]*DiskInfo, len(disks))
|
|
for i, disk := range disks {
|
|
if disk == nil {
|
|
continue
|
|
}
|
|
if di, err := disk.DiskInfo(ctx, false); err == nil {
|
|
res[i] = &di
|
|
}
|
|
}
|
|
return res
|
|
}
|
|
|
|
// hasSpaceFor returns whether the disks in `di` have space for and object of a given size.
|
|
func hasSpaceFor(di []*DiskInfo, size int64) (bool, error) {
|
|
// We multiply the size by 2 to account for erasure coding.
|
|
size *= 2
|
|
if size < 0 {
|
|
// If no size, assume diskAssumeUnknownSize.
|
|
size = diskAssumeUnknownSize
|
|
}
|
|
|
|
var available uint64
|
|
var total uint64
|
|
var nDisks int
|
|
for _, disk := range di {
|
|
if disk == nil || disk.Total == 0 {
|
|
// Disk offline, no inodes or something else is wrong.
|
|
continue
|
|
}
|
|
nDisks++
|
|
total += disk.Total
|
|
available += disk.Total - disk.Used
|
|
}
|
|
|
|
if nDisks < len(di)/2 || nDisks <= 0 {
|
|
return false, fmt.Errorf("not enough online disks to calculate the available space, expected (%d)/(%d)", (len(di)/2)+1, nDisks)
|
|
}
|
|
|
|
// Check we have enough on each disk, ignoring diskFillFraction.
|
|
perDisk := size / int64(nDisks)
|
|
for _, disk := range di {
|
|
if disk == nil || disk.Total == 0 {
|
|
continue
|
|
}
|
|
if disk.FreeInodes < diskMinInodes && disk.UsedInodes > 0 {
|
|
// We have an inode count, but not enough inodes.
|
|
return false, nil
|
|
}
|
|
if int64(disk.Free) <= perDisk {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
// Make sure we can fit "size" on to the disk without getting above the diskFillFraction
|
|
if available < uint64(size) {
|
|
return false, nil
|
|
}
|
|
|
|
// How much will be left after adding the file.
|
|
available -= uint64(size)
|
|
|
|
// wantLeft is how much space there at least must be left.
|
|
wantLeft := uint64(float64(total) * (1.0 - diskFillFraction))
|
|
return available > wantLeft, nil
|
|
}
|