From 839b9c92713c90afd673f7a1980e94d20dd19da8 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Sat, 15 Apr 2023 10:25:25 -0700 Subject: [PATCH] Reduce allocations in Walkdir (#17036) --- cmd/metacache-walk.go | 23 +++++---- cmd/object-api-utils.go | 94 +++++++++++++++++++++++++++++++++++- cmd/object-api-utils_test.go | 67 +++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 10 deletions(-) diff --git a/cmd/metacache-walk.go b/cmd/metacache-walk.go index 71dcafcb1..77573176b 100644 --- a/cmd/metacache-walk.go +++ b/cmd/metacache-walk.go @@ -18,6 +18,7 @@ package cmd import ( + "bytes" "context" "fmt" "io" @@ -126,6 +127,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ scanDir = func(current string) error { // Skip forward, if requested... + var sb bytes.Buffer forward := "" if len(opts.ForwardTo) > 0 && strings.HasPrefix(opts.ForwardTo, current) { forward = strings.TrimPrefix(opts.ForwardTo, current) @@ -160,6 +162,9 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ return nil } dirObjects := make(map[string]struct{}) + + // Avoid a bunch of cleanup when joining. + current = strings.Trim(current, SlashSeparator) for i, entry := range entries { if opts.Limit > 0 && objsReturned >= opts.Limit { return nil @@ -176,7 +181,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ entries[i] = "" continue } - if strings.HasSuffix(entry, slashSeparator) { + if hasSuffixByte(entry, SlashSeparatorChar) { if strings.HasSuffix(entry, globalDirSuffixWithSlash) { // Add without extension so it is sorted correctly. entry = strings.TrimSuffix(entry, globalDirSuffixWithSlash) + slashSeparator @@ -198,7 +203,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ if HasSuffix(entry, xlStorageFormatFile) { var meta metaCacheEntry s.walkReadMu.Lock() - meta.metadata, err = s.readMetadata(ctx, pathJoin(volumeDir, current, entry)) + meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, current, entry)) s.walkReadMu.Unlock() diskHealthCheckOK(ctx, err) if err != nil { @@ -212,7 +217,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ } meta.name = strings.TrimSuffix(entry, xlStorageFormatFile) meta.name = strings.TrimSuffix(meta.name, SlashSeparator) - meta.name = pathJoin(current, meta.name) + meta.name = pathJoinBuf(&sb, current, meta.name) meta.name = decodeDirObject(meta.name) objReturned(meta.metadata) @@ -223,7 +228,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ if HasSuffix(entry, xlStorageFormatFileV1) { var meta metaCacheEntry s.walkReadMu.Lock() - meta.metadata, err = xioutil.ReadFile(pathJoin(volumeDir, current, entry)) + meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, current, entry)) s.walkReadMu.Unlock() diskHealthCheckOK(ctx, err) if err != nil { @@ -234,7 +239,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ } meta.name = strings.TrimSuffix(entry, xlStorageFormatFileV1) meta.name = strings.TrimSuffix(meta.name, SlashSeparator) - meta.name = pathJoin(current, meta.name) + meta.name = pathJoinBuf(&sb, current, meta.name) objReturned(meta.metadata) out <- meta @@ -267,7 +272,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ if contextCanceled(ctx) { return ctx.Err() } - meta := metaCacheEntry{name: pathJoin(current, entry)} + meta := metaCacheEntry{name: pathJoinBuf(&sb, current, entry)} // If directory entry on stack before this, pop it now. for len(dirStack) > 0 && dirStack[len(dirStack)-1] < meta.name { @@ -291,7 +296,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ } s.walkReadMu.Lock() - meta.metadata, err = s.readMetadata(ctx, pathJoin(volumeDir, meta.name, xlStorageFormatFile)) + meta.metadata, err = s.readMetadata(ctx, pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFile)) s.walkReadMu.Unlock() diskHealthCheckOK(ctx, err) switch { @@ -304,7 +309,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ out <- meta case osIsNotExist(err), isSysErrIsDir(err): - meta.metadata, err = xioutil.ReadFile(pathJoin(volumeDir, meta.name, xlStorageFormatFileV1)) + meta.metadata, err = xioutil.ReadFile(pathJoinBuf(&sb, volumeDir, meta.name, xlStorageFormatFileV1)) diskHealthCheckOK(ctx, err) if err == nil { // It was an object @@ -317,7 +322,7 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ // NOT an object, append to stack (with slash) // If dirObject, but no metadata (which is unexpected) we skip it. if !isDirObj { - if !isDirEmpty(pathJoin(volumeDir, meta.name+slashSeparator)) { + if !isDirEmpty(pathJoinBuf(&sb, volumeDir, meta.name)) { dirStack = append(dirStack, meta.name+slashSeparator) } } diff --git a/cmd/object-api-utils.go b/cmd/object-api-utils.go index 2f558baac..5a45366fc 100644 --- a/cmd/object-api-utils.go +++ b/cmd/object-api-utils.go @@ -209,6 +209,9 @@ func checkObjectNameForLengthAndSlash(bucket, object string) error { // SlashSeparator - slash separator. const SlashSeparator = "/" +// SlashSeparatorChar - slash separator. +const SlashSeparatorChar = '/' + // retainSlash - retains slash from a path. func retainSlash(s string) string { if s == "" { @@ -231,13 +234,102 @@ func pathsJoinPrefix(prefix string, elem ...string) (paths []string) { func pathJoin(elem ...string) string { trailingSlash := "" if len(elem) > 0 { - if HasSuffix(elem[len(elem)-1], SlashSeparator) { + if hasSuffixByte(elem[len(elem)-1], SlashSeparatorChar) { trailingSlash = SlashSeparator } } return path.Join(elem...) + trailingSlash } +// pathJoinBuf - like path.Join() but retains trailing SlashSeparator of the last element. +// Provide a string builder to reduce allocation. +func pathJoinBuf(dst *bytes.Buffer, elem ...string) string { + trailingSlash := len(elem) > 0 && hasSuffixByte(elem[len(elem)-1], SlashSeparatorChar) + dst.Reset() + added := 0 + for _, e := range elem { + if added > 0 || e != "" { + if added > 0 { + dst.WriteRune(SlashSeparatorChar) + } + dst.WriteString(e) + added += len(e) + } + } + + if pathNeedsClean(dst.Bytes()) { + s := path.Clean(dst.String()) + if trailingSlash { + return s + SlashSeparator + } + return s + } + if trailingSlash { + dst.WriteRune(SlashSeparatorChar) + } + return dst.String() +} + +// hasSuffixByte returns true if the last byte of s is 'suffix' +func hasSuffixByte(s string, suffix byte) bool { + return len(s) > 0 && s[len(s)-1] == suffix +} + +// pathNeedsClean returns whether path.Clean may change the path. +// Will detect all cases that will be cleaned, +// but may produce false positives on non-trivial paths. +func pathNeedsClean(path []byte) bool { + if len(path) == 0 { + return true + } + + rooted := path[0] == '/' + n := len(path) + + r, w := 0, 0 + if rooted { + r, w = 1, 1 + } + + for r < n { + switch { + case path[r] > 127: + // Non ascii. + return true + case path[r] == '/': + // multiple / elements + return true + case path[r] == '.' && (r+1 == n || path[r+1] == '/'): + // . element - assume it has to be cleaned. + return true + case path[r] == '.' && path[r+1] == '.' && (r+2 == n || path[r+2] == '/'): + // .. element: remove to last / - assume it has to be cleaned. + return true + default: + // real path element. + // add slash if needed + if rooted && w != 1 || !rooted && w != 0 { + w++ + } + // copy element + for ; r < n && path[r] != '/'; r++ { + w++ + } + // allow one slash, not at end + if r < n-1 && path[r] == '/' { + r++ + } + } + } + + // Turn empty string into "." + if w == 0 { + return true + } + + return false +} + // mustGetUUID - get a random UUID. func mustGetUUID() string { u, err := uuid.NewRandom() diff --git a/cmd/object-api-utils_test.go b/cmd/object-api-utils_test.go index 114b26a21..c2a0f4610 100644 --- a/cmd/object-api-utils_test.go +++ b/cmd/object-api-utils_test.go @@ -740,3 +740,70 @@ func TestS2CompressReader(t *testing.T) { }) } } + +func Test_pathNeedsClean(t *testing.T) { + type pathTest struct { + path, result string + } + + cleantests := []pathTest{ + // Already clean + {"", "."}, + {"abc", "abc"}, + {"abc/def", "abc/def"}, + {"a/b/c", "a/b/c"}, + {".", "."}, + {"..", ".."}, + {"../..", "../.."}, + {"../../abc", "../../abc"}, + {"/abc", "/abc"}, + {"/abc/def", "/abc/def"}, + {"/", "/"}, + + // Remove trailing slash + {"abc/", "abc"}, + {"abc/def/", "abc/def"}, + {"a/b/c/", "a/b/c"}, + {"./", "."}, + {"../", ".."}, + {"../../", "../.."}, + {"/abc/", "/abc"}, + + // Remove doubled slash + {"abc//def//ghi", "abc/def/ghi"}, + {"//abc", "/abc"}, + {"///abc", "/abc"}, + {"//abc//", "/abc"}, + {"abc//", "abc"}, + + // Remove . elements + {"abc/./def", "abc/def"}, + {"/./abc/def", "/abc/def"}, + {"abc/.", "abc"}, + + // Remove .. elements + {"abc/def/ghi/../jkl", "abc/def/jkl"}, + {"abc/def/../ghi/../jkl", "abc/jkl"}, + {"abc/def/..", "abc"}, + {"abc/def/../..", "."}, + {"/abc/def/../..", "/"}, + {"abc/def/../../..", ".."}, + {"/abc/def/../../..", "/"}, + {"abc/def/../../../ghi/jkl/../../../mno", "../../mno"}, + + // Combinations + {"abc/./../def", "def"}, + {"abc//./../def", "def"}, + {"abc/../../././../def", "../../def"}, + } + for _, test := range cleantests { + want := test.path != test.result + got := pathNeedsClean([]byte(test.path)) + if !got { + t.Logf("no clean: %q", test.path) + } + if want && !got { + t.Errorf("input: %q, want %v, got %v", test.path, want, got) + } + } +}