fix: Use mime encode for Non-US-ASCII metadata (#21282)

This commit is contained in:
jiuker 2025-05-22 23:42:54 +08:00 committed by GitHub
parent 63e102c049
commit 12a6ea89cc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -23,6 +23,7 @@ import (
"encoding/json" "encoding/json"
"encoding/xml" "encoding/xml"
"fmt" "fmt"
"mime"
"net/http" "net/http"
"strconv" "strconv"
"strings" "strings"
@ -168,6 +169,32 @@ func setObjectHeaders(ctx context.Context, w http.ResponseWriter, objInfo Object
if !stringsHasPrefixFold(k, userMetadataPrefix) { if !stringsHasPrefixFold(k, userMetadataPrefix) {
continue continue
} }
// check the doc https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingMetadata.html
// For metadata values like "ö", "ÄMÄZÕÑ S3", and "öha, das sollte eigentlich
// funktionieren", tested against a real AWS S3 bucket, S3 may encode incorrectly. For
// example, "ö" was encoded as =?UTF-8?B?w4PCtg==?=, producing invalid UTF-8 instead
// of =?UTF-8?B?w7Y=?=. This mirrors errors like the ä½ in another string.
//
// S3 uses B-encoding (Base64) for non-ASCII-heavy metadata and Q-encoding
// (quoted-printable) for mostly ASCII strings. Long strings are split at word
// boundaries to fit RFC 2047s 75-character limit, ensuring HTTP parser
// compatibility.
//
// However, this splitting increases header size and can introduce errors, unlike Gos
// mime package in MinIO, which correctly encodes strings with fixed B/Q encodings,
// avoiding S3s heuristic-driven issues.
//
// For MinIO developers, decode S3 metadata with mime.WordDecoder, validate outputs,
// report encoding bugs to AWS, and use ASCII-only metadata to ensure reliable S3 API
// compatibility.
if needsMimeEncoding(v) {
// see https://github.com/golang/go/blob/release-branch.go1.24/src/net/mail/message.go#L325
if strings.ContainsAny(v, "\"#$%&'(),.:;<>@[]^`{|}~") {
v = mime.BEncoding.Encode("UTF-8", v)
} else {
v = mime.QEncoding.Encode("UTF-8", v)
}
}
w.Header()[strings.ToLower(k)] = []string{v} w.Header()[strings.ToLower(k)] = []string{v}
isSet = true isSet = true
break break
@ -229,3 +256,14 @@ func setObjectHeaders(ctx context.Context, w http.ResponseWriter, objInfo Object
return nil return nil
} }
// needsEncoding reports whether s contains any bytes that need to be encoded.
// see mime.needsEncoding
func needsMimeEncoding(s string) bool {
for _, b := range s {
if (b < ' ' || b > '~') && b != '\t' {
return true
}
}
return false
}