fix: reduce using memory and temporary files. (#17206)

This commit is contained in:
Harshavardhana 2023-05-15 14:08:54 -07:00 committed by GitHub
parent d063596430
commit ef2fc0f99e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 157 additions and 178 deletions

View File

@ -2189,10 +2189,10 @@ func toAPIErrorCode(ctx context.Context, err error) (apiErr APIErrorCode) {
apiErr = ErrContentSHA256Mismatch
case hash.ChecksumMismatch:
apiErr = ErrContentChecksumMismatch
case ObjectTooLarge:
apiErr = ErrEntityTooLarge
case ObjectTooSmall:
case hash.SizeTooSmall:
apiErr = ErrEntityTooSmall
case hash.SizeTooLarge:
apiErr = ErrEntityTooLarge
case NotImplemented:
apiErr = ErrNotImplemented
case PartTooBig:

View File

@ -22,8 +22,10 @@ import (
"context"
"encoding/base64"
"encoding/xml"
"errors"
"fmt"
"io"
"mime/multipart"
"net/http"
"net/textproto"
"net/url"
@ -912,40 +914,110 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h
reader, err := r.MultipartReader()
if err != nil {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%s)", apiErr.Description, err)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, err)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
// Read multipart data and save in memory and in the disk if needed
form, err := reader.ReadForm(maxFormMemory)
var (
fileBody io.ReadCloser
fileName string
)
maxParts := 1000
// Canonicalize the form values into http.Header.
formValues := make(http.Header)
for {
part, err := reader.NextRawPart()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%s)", apiErr.Description, err)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, err)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
// Remove all tmp files created during multipart upload
defer form.RemoveAll()
// Extract all form fields
fileBody, fileName, fileSize, formValues, err := extractPostPolicyFormValues(ctx, form)
if err != nil {
if maxParts <= 0 {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%s)", apiErr.Description, err)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, multipart.ErrMessageTooLarge)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
maxParts--
name := part.FormName()
if name == "" {
continue
}
fileName = part.FileName()
// Multiple values for the same key (one map entry, longer slice) are cheaper
// than the same number of values for different keys (many map entries), but
// using a consistent per-value cost for overhead is simpler.
const mapEntryOverhead = 200
maxMemoryBytes := 2 * int64(10<<20)
maxMemoryBytes -= int64(len(name))
maxMemoryBytes -= mapEntryOverhead
if maxMemoryBytes < 0 {
// We can't actually take this path, since nextPart would already have
// rejected the MIME headers for being too large. Check anyway.
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, multipart.ErrMessageTooLarge)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
// Check if file is provided, error out otherwise.
if fileBody == nil {
writeErrorResponse(ctx, w, errorCodes.ToAPIErr(ErrPOSTFileRequired), r.URL)
var b bytes.Buffer
if fileName == "" {
// value, store as string in memory
n, err := io.CopyN(&b, part, maxMemoryBytes+1)
part.Close()
if err != nil && err != io.EOF {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, err)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
maxMemoryBytes -= n
if maxMemoryBytes < 0 {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, multipart.ErrMessageTooLarge)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
if n > maxFormFieldSize {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, multipart.ErrMessageTooLarge)
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
formValues[http.CanonicalHeaderKey(name)] = append(formValues[http.CanonicalHeaderKey(name)], b.String())
continue
}
// Close multipart file
defer fileBody.Close()
// In accordance with https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectPOST.html
// The file or text content.
// The file or text content must be the last field in the form.
// You cannot upload more than one file at a time.
fileBody = part // we have found the File part of the request breakout
defer part.Close()
break
}
if _, ok := formValues["Key"]; !ok {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, errors.New("The name of the uploaded key is missing"))
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
if fileName == "" {
apiErr := errorCodes.ToAPIErr(ErrMalformedPOSTRequest)
apiErr.Description = fmt.Sprintf("%s (%v)", apiErr.Description, errors.New("The file or text content is missing"))
writeErrorResponse(ctx, w, apiErr, r.URL)
return
}
formValues.Set("Bucket", bucket)
if fileName != "" && strings.Contains(formValues.Get("Key"), "${filename}") {
@ -995,6 +1067,13 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h
return
}
hashReader, err := hash.NewReader(fileBody, -1, "", "", -1)
if err != nil {
logger.LogIf(ctx, err)
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return
}
// Handle policy if it is set.
if len(policyBytes) > 0 {
postPolicyForm, err := parsePostPolicyForm(bytes.NewReader(policyBytes))
@ -1015,15 +1094,8 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h
// should not exceed the maximum single Put size (5 GiB)
lengthRange := postPolicyForm.Conditions.ContentLengthRange
if lengthRange.Valid {
if fileSize < lengthRange.Min {
writeErrorResponse(ctx, w, toAPIError(ctx, errDataTooSmall), r.URL)
return
}
if fileSize > lengthRange.Max || isMaxObjectSize(fileSize) {
writeErrorResponse(ctx, w, toAPIError(ctx, errDataTooLarge), r.URL)
return
}
hashReader.SetExpectedMin(lengthRange.Min)
hashReader.SetExpectedMax(lengthRange.Max)
}
}
@ -1035,12 +1107,6 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h
return
}
hashReader, err := hash.NewReader(fileBody, fileSize, "", "", fileSize)
if err != nil {
logger.LogIf(ctx, err)
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return
}
rawReader := hashReader
pReader := NewPutObjReader(rawReader)
var objectEncryptionKey crypto.ObjectKey
@ -1095,9 +1161,8 @@ func (api objectAPIHandlers) PostPolicyBucketHandler(w http.ResponseWriter, r *h
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return
}
info := ObjectInfo{Size: fileSize}
// do not try to verify encrypted content
hashReader, err = hash.NewReader(reader, info.EncryptedSize(), "", "", fileSize)
// do not try to verify encrypted content/
hashReader, err = hash.NewReader(reader, -1, "", "", -1)
if err != nil {
writeErrorResponse(ctx, w, toAPIError(ctx, err), r.URL)
return

View File

@ -89,9 +89,6 @@ const (
// can reach that size according to https://aws.amazon.com/articles/1434
maxFormFieldSize = int64(1 * humanize.MiByte)
// Limit memory allocation to store multipart data
maxFormMemory = int64(5 * humanize.MiByte)
// The maximum allowed time difference between the incoming request
// date and server date during signature verification.
globalMaxSkewTime = 15 * time.Minute // 15 minutes skew allowed.

View File

@ -18,12 +18,9 @@
package cmd
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"mime/multipart"
"net/http"
"net/textproto"
"regexp"
@ -270,87 +267,6 @@ func trimAwsChunkedContentEncoding(contentEnc string) (trimmedContentEnc string)
return strings.Join(newEncs, ",")
}
// Validate form field size for s3 specification requirement.
func validateFormFieldSize(ctx context.Context, formValues http.Header) error {
// Iterate over form values
for k := range formValues {
// Check if value's field exceeds S3 limit
if int64(len(formValues.Get(k))) > maxFormFieldSize {
logger.LogIf(ctx, errSizeUnexpected)
return errSizeUnexpected
}
}
// Success.
return nil
}
// Extract form fields and file data from a HTTP POST Policy
func extractPostPolicyFormValues(ctx context.Context, form *multipart.Form) (filePart io.ReadCloser, fileName string, fileSize int64, formValues http.Header, err error) {
// HTML Form values
fileName = ""
// Canonicalize the form values into http.Header.
formValues = make(http.Header)
for k, v := range form.Value {
formValues[http.CanonicalHeaderKey(k)] = v
}
// Validate form values.
if err = validateFormFieldSize(ctx, formValues); err != nil {
return nil, "", 0, nil, err
}
// this means that filename="" was not specified for file key and Go has
// an ugly way of handling this situation. Refer here
// https://golang.org/src/mime/multipart/formdata.go#L61
if len(form.File) == 0 {
b := &bytes.Buffer{}
for _, v := range formValues["File"] {
b.WriteString(v)
}
fileSize = int64(b.Len())
filePart = io.NopCloser(b)
return filePart, fileName, fileSize, formValues, nil
}
// Iterator until we find a valid File field and break
for k, v := range form.File {
canonicalFormName := http.CanonicalHeaderKey(k)
if canonicalFormName == "File" {
if len(v) == 0 {
logger.LogIf(ctx, errInvalidArgument)
return nil, "", 0, nil, errInvalidArgument
}
// Fetch fileHeader which has the uploaded file information
fileHeader := v[0]
// Set filename
fileName = fileHeader.Filename
// Open the uploaded part
filePart, err = fileHeader.Open()
if err != nil {
logger.LogIf(ctx, err)
return nil, "", 0, nil, err
}
// Compute file size
fileSize, err = filePart.(io.Seeker).Seek(0, 2)
if err != nil {
logger.LogIf(ctx, err)
return nil, "", 0, nil, err
}
// Reset Seek to the beginning
_, err = filePart.(io.Seeker).Seek(0, 0)
if err != nil {
logger.LogIf(ctx, err)
return nil, "", 0, nil, err
}
// File found and ready for reading
break
}
}
return filePart, fileName, fileSize, formValues, nil
}
func collectAPIStats(api string, f http.HandlerFunc) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
globalHTTPStats.currentS3Requests.Inc(api)

View File

@ -26,7 +26,6 @@ import (
"net/textproto"
"os"
"reflect"
"strings"
"testing"
"github.com/minio/minio/internal/config"
@ -96,44 +95,6 @@ func TestIsValidLocationContraint(t *testing.T) {
}
}
// Test validate form field size.
func TestValidateFormFieldSize(t *testing.T) {
testCases := []struct {
header http.Header
err error
}{
// Empty header returns error as nil,
{
header: nil,
err: nil,
},
// Valid header returns error as nil.
{
header: http.Header{
"Content-Type": []string{"image/png"},
},
err: nil,
},
// Invalid header value > maxFormFieldSize+1
{
header: http.Header{
"Garbage": []string{strings.Repeat("a", int(maxFormFieldSize)+1)},
},
err: errSizeUnexpected,
},
}
// Run validate form field size check under all test cases.
for i, testCase := range testCases {
err := validateFormFieldSize(context.Background(), testCase.header)
if err != nil {
if err.Error() != testCase.err.Error() {
t.Errorf("Test %d: Expected error %s, got %s", i+1, testCase.err, err)
}
}
}
}
// Tests validate metadata extraction from http headers.
func TestExtractMetadataHeaders(t *testing.T) {
testCases := []struct {

View File

@ -30,9 +30,6 @@ var errMethodNotAllowed = errors.New("Method not allowed")
// errSignatureMismatch means signature did not match.
var errSignatureMismatch = errors.New("Signature does not match")
// used when we deal with data larger than expected
var errSizeUnexpected = errors.New("Data size larger than expected")
// When upload object size is greater than 5G in a single PUT/POST operation.
var errDataTooLarge = errors.New("Object size larger than allowed limit")

View File

@ -42,13 +42,33 @@ func (e BadDigest) Error() string {
return "Bad digest: Expected " + e.ExpectedMD5 + " does not match calculated " + e.CalculatedMD5
}
// ErrSizeMismatch error size mismatch
type ErrSizeMismatch struct {
// SizeTooSmall reader size too small
type SizeTooSmall struct {
Want int64
Got int64
}
func (e ErrSizeMismatch) Error() string {
func (e SizeTooSmall) Error() string {
return fmt.Sprintf("Size small: got %d, want %d", e.Got, e.Want)
}
// SizeTooLarge reader size too large
type SizeTooLarge struct {
Want int64
Got int64
}
func (e SizeTooLarge) Error() string {
return fmt.Sprintf("Size large: got %d, want %d", e.Got, e.Want)
}
// SizeMismatch error size mismatch
type SizeMismatch struct {
Want int64
Got int64
}
func (e SizeMismatch) Error() string {
return fmt.Sprintf("Size mismatch: got %d, want %d", e.Got, e.Want)
}

View File

@ -41,6 +41,8 @@ import (
type Reader struct {
src io.Reader
bytesRead int64
expectedMin int64
expectedMax int64
size int64
actualSize int64
@ -111,7 +113,7 @@ func newReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualSize i
}
}
if r.size >= 0 && size >= 0 && r.size != size {
return nil, ErrSizeMismatch{Want: r.size, Got: size}
return nil, SizeMismatch{Want: r.size, Got: size}
}
r.checksum = MD5
@ -171,6 +173,16 @@ func NewLimitReader(src io.Reader, size int64, md5Hex, sha256Hex string, actualS
// ErrInvalidChecksum is returned when an invalid checksum is provided in headers.
var ErrInvalidChecksum = errors.New("invalid checksum")
// SetExpectedMin set expected minimum data expected from reader
func (r *Reader) SetExpectedMin(expectedMin int64) {
r.expectedMin = expectedMin
}
// SetExpectedMax set expected max data expected from reader
func (r *Reader) SetExpectedMax(expectedMax int64) {
r.expectedMax = expectedMax
}
// AddChecksum will add checksum checks as specified in
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/checking-object-integrity.html
// Returns ErrInvalidChecksum if a problem with the checksum is found.
@ -209,6 +221,17 @@ func (r *Reader) Read(p []byte) (int, error) {
}
if err == io.EOF { // Verify content SHA256, if set.
if r.expectedMin > 0 {
if r.bytesRead < r.expectedMin {
return 0, SizeTooSmall{Want: r.expectedMin, Got: r.bytesRead}
}
}
if r.expectedMax > 0 {
if r.bytesRead > r.expectedMax {
return 0, SizeTooLarge{Want: r.expectedMax, Got: r.bytesRead}
}
}
if r.sha256 != nil {
if sum := r.sha256.Sum(nil); !bytes.Equal(r.contentSHA256, sum) {
return n, SHA256Mismatch{