From dcc000ae2cf36dd3140c24072d386887cb62aa0d Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 27 Jan 2025 08:42:45 -0800 Subject: [PATCH] Allow URLs up to 32KB and improve parsing speed (#20874) Before/after... ``` Benchmark_hasBadPathComponent/long-32 43936 27232 ns/op 146.89 MB/s 32768 B/op 1 allocs/op Benchmark_hasBadPathComponent/long-32 89956 13375 ns/op 299.07 MB/s 0 B/op 0 allocs/op ``` * Remove unused. --- cmd/generic-handlers.go | 48 +++++++++++++++++++++++------------- cmd/generic-handlers_test.go | 25 +++++++++++++++++++ 2 files changed, 56 insertions(+), 17 deletions(-) diff --git a/cmd/generic-handlers.go b/cmd/generic-handlers.go index 67b906391..55986fdd8 100644 --- a/cmd/generic-handlers.go +++ b/cmd/generic-handlers.go @@ -22,11 +22,11 @@ import ( "net" "net/http" "path" - "path/filepath" "runtime/debug" "strings" "sync/atomic" "time" + "unicode" "github.com/dustin/go-humanize" "github.com/minio/minio-go/v7/pkg/s3utils" @@ -293,12 +293,6 @@ func parseAmzDateHeader(req *http.Request) (time.Time, APIErrorCode) { return time.Time{}, ErrMissingDateHeader } -// Bad path components to be rejected by the path validity handler. -const ( - dotdotComponent = ".." - dotComponent = "." -) - func hasBadHost(host string) error { if globalIsCICD && strings.TrimSpace(host) == "" { // under CI/CD test setups ignore empty hosts as invalid hosts @@ -311,21 +305,41 @@ func hasBadHost(host string) error { // Check if the incoming path has bad path components, // such as ".." and "." func hasBadPathComponent(path string) bool { - if len(path) > 4096 { - // path cannot be greater than Linux PATH_MAX - // this is to avoid a busy loop, that can happen - // if the caller sends path of following style - // a/a/a/a/a/a/a/a... + n := len(path) + if n > 32<<10 { + // At 32K we are beyond reasonable. return true } - path = filepath.ToSlash(strings.TrimSpace(path)) // For windows '\' must be converted to '/' - for _, p := range strings.Split(path, SlashSeparator) { - switch strings.TrimSpace(p) { - case dotdotComponent: + i := 0 + // Skip leading slashes (for sake of Windows \ is included as well) + for i < n && (path[i] == SlashSeparatorChar || path[i] == '\\') { + i++ + } + + for i < n { + // Find the next segment + start := i + for i < n && path[i] != SlashSeparatorChar && path[i] != '\\' { + i++ + } + + // Trim whitespace of segment + segmentStart, segmentEnd := start, i + for segmentStart < segmentEnd && unicode.IsSpace(rune(path[segmentStart])) { + segmentStart++ + } + for segmentEnd > segmentStart && unicode.IsSpace(rune(path[segmentEnd-1])) { + segmentEnd-- + } + + // Check for ".." or "." + switch { + case segmentEnd-segmentStart == 2 && path[segmentStart] == '.' && path[segmentStart+1] == '.': return true - case dotComponent: + case segmentEnd-segmentStart == 1 && path[segmentStart] == '.': return true } + i++ } return false } diff --git a/cmd/generic-handlers_test.go b/cmd/generic-handlers_test.go index f6cc6e07e..03813ebc0 100644 --- a/cmd/generic-handlers_test.go +++ b/cmd/generic-handlers_test.go @@ -22,6 +22,7 @@ import ( "net/http/httptest" "net/url" "strconv" + "strings" "testing" "github.com/minio/minio/internal/crypto" @@ -184,3 +185,27 @@ func TestSSETLSHandler(t *testing.T) { } } } + +func Benchmark_hasBadPathComponent(t *testing.B) { + tests := []struct { + name string + input string + want bool + }{ + {name: "empty", input: "", want: false}, + {name: "backslashes", input: `\a\a\ \\ \\\\\\\`, want: false}, + {name: "long", input: strings.Repeat("a/", 2000), want: false}, + {name: "long-fail", input: strings.Repeat("a/", 2000) + "../..", want: true}, + } + for _, tt := range tests { + t.Run(tt.name, func(b *testing.B) { + b.SetBytes(int64(len(tt.input))) + b.ReportAllocs() + for i := 0; i < b.N; i++ { + if got := hasBadPathComponent(tt.input); got != tt.want { + t.Fatalf("hasBadPathComponent() = %v, want %v", got, tt.want) + } + } + }) + } +}