mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
fix: [fs] CompleteMultipart use trie structure for partMatch (#10522)
performance improves by around 100x or more ``` go test -v -run NONE -bench BenchmarkGetPartFile goos: linux goarch: amd64 pkg: github.com/minio/minio/cmd BenchmarkGetPartFileWithTrie BenchmarkGetPartFileWithTrie-4 1000000000 0.140 ns/op 0 B/op 0 allocs/op PASS ok github.com/minio/minio/cmd 1.737s ``` fixes #10520
This commit is contained in:
parent
230fc0d186
commit
3831cc9e3b
@ -32,6 +32,7 @@ import (
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
mioutil "github.com/minio/minio/pkg/ioutil"
|
||||
"github.com/minio/minio/pkg/trie"
|
||||
)
|
||||
|
||||
// Returns EXPORT/.minio.sys/multipart/SHA256/UPLOADID
|
||||
@ -577,7 +578,10 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
|
||||
// Calculate s3 compatible md5sum for complete multipart.
|
||||
s3MD5 := getCompleteMultipartMD5(parts)
|
||||
|
||||
partSize := int64(-1) // Used later to ensure that all parts sizes are same.
|
||||
// ensure that part ETag is canonicalized to strip off extraneous quotes
|
||||
for i := range parts {
|
||||
parts[i].ETag = canonicalizeETag(parts[i].ETag)
|
||||
}
|
||||
|
||||
fsMeta := fsMetaV1{}
|
||||
|
||||
@ -591,16 +595,17 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
|
||||
return oi, err
|
||||
}
|
||||
|
||||
// ensure that part ETag is canonicalized to strip off extraneous quotes
|
||||
for i := range parts {
|
||||
parts[i].ETag = canonicalizeETag(parts[i].ETag)
|
||||
// Create entries trie structure for prefix match
|
||||
entriesTrie := trie.NewTrie()
|
||||
for _, entry := range entries {
|
||||
entriesTrie.Insert(entry)
|
||||
}
|
||||
|
||||
// Save consolidated actual size.
|
||||
var objectActualSize int64
|
||||
// Validate all parts and then commit to disk.
|
||||
for i, part := range parts {
|
||||
partFile := getPartFile(entries, part.PartNumber, part.ETag)
|
||||
partFile := getPartFile(entriesTrie, part.PartNumber, part.ETag)
|
||||
if partFile == "" {
|
||||
return oi, InvalidPart{
|
||||
PartNumber: part.PartNumber,
|
||||
@ -628,9 +633,6 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
|
||||
}
|
||||
return oi, err
|
||||
}
|
||||
if partSize == -1 {
|
||||
partSize = actualSize
|
||||
}
|
||||
|
||||
fsMeta.Parts[i] = ObjectPartInfo{
|
||||
Number: part.PartNumber,
|
||||
@ -695,8 +697,16 @@ func (fs *FSObjects) CompleteMultipartUpload(ctx context.Context, bucket string,
|
||||
fsRemoveFile(ctx, file.filePath)
|
||||
}
|
||||
for _, part := range parts {
|
||||
partPath := getPartFile(entries, part.PartNumber, part.ETag)
|
||||
if err = mioutil.AppendFile(appendFilePath, pathJoin(uploadIDDir, partPath), globalFSOSync); err != nil {
|
||||
partFile := getPartFile(entriesTrie, part.PartNumber, part.ETag)
|
||||
if partFile == "" {
|
||||
logger.LogIf(ctx, fmt.Errorf("%.5d.%s missing will not proceed",
|
||||
part.PartNumber, part.ETag))
|
||||
return oi, InvalidPart{
|
||||
PartNumber: part.PartNumber,
|
||||
GotETag: part.ETag,
|
||||
}
|
||||
}
|
||||
if err = mioutil.AppendFile(appendFilePath, pathJoin(uploadIDDir, partFile), globalFSOSync); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
return oi, toObjectErr(err)
|
||||
}
|
||||
|
10
cmd/main.go
10
cmd/main.go
@ -102,21 +102,19 @@ func newApp(name string) *cli.App {
|
||||
|
||||
findClosestCommands := func(command string) []string {
|
||||
var closestCommands []string
|
||||
for _, value := range commandsTree.PrefixMatch(command) {
|
||||
closestCommands = append(closestCommands, value.(string))
|
||||
}
|
||||
closestCommands = append(closestCommands, commandsTree.PrefixMatch(command)...)
|
||||
|
||||
sort.Strings(closestCommands)
|
||||
// Suggest other close commands - allow missed, wrongly added and
|
||||
// even transposed characters
|
||||
for _, value := range commandsTree.Walk(commandsTree.Root()) {
|
||||
if sort.SearchStrings(closestCommands, value.(string)) < len(closestCommands) {
|
||||
if sort.SearchStrings(closestCommands, value) < len(closestCommands) {
|
||||
continue
|
||||
}
|
||||
// 2 is arbitrary and represents the max
|
||||
// allowed number of typed errors
|
||||
if words.DamerauLevenshteinDistance(command, value.(string)) < 2 {
|
||||
closestCommands = append(closestCommands, value.(string))
|
||||
if words.DamerauLevenshteinDistance(command, value) < 2 {
|
||||
closestCommands = append(closestCommands, value)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,7 @@ import (
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/hash"
|
||||
"github.com/minio/minio/pkg/ioutil"
|
||||
"github.com/minio/minio/pkg/trie"
|
||||
"github.com/minio/minio/pkg/wildcard"
|
||||
)
|
||||
|
||||
@ -487,13 +488,12 @@ func hasPattern(patterns []string, matchStr string) bool {
|
||||
}
|
||||
|
||||
// Returns the part file name which matches the partNumber and etag.
|
||||
func getPartFile(entries []string, partNumber int, etag string) string {
|
||||
for _, entry := range entries {
|
||||
if strings.HasPrefix(entry, fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
|
||||
return entry
|
||||
func getPartFile(entriesTrie *trie.Trie, partNumber int, etag string) (partFile string) {
|
||||
for _, match := range entriesTrie.PrefixMatch(fmt.Sprintf("%.5d.%s.", partNumber, etag)) {
|
||||
partFile = match
|
||||
break
|
||||
}
|
||||
}
|
||||
return ""
|
||||
return partFile
|
||||
}
|
||||
|
||||
// Returns the compressed offset which should be skipped.
|
||||
|
@ -18,6 +18,7 @@ package cmd
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"reflect"
|
||||
@ -27,6 +28,7 @@ import (
|
||||
"github.com/klauspost/compress/s2"
|
||||
"github.com/minio/minio/cmd/config/compress"
|
||||
"github.com/minio/minio/cmd/crypto"
|
||||
"github.com/minio/minio/pkg/trie"
|
||||
)
|
||||
|
||||
// Tests validate bucket name.
|
||||
@ -440,40 +442,22 @@ func TestExcludeForCompression(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Test getPartFile function.
|
||||
func TestGetPartFile(t *testing.T) {
|
||||
testCases := []struct {
|
||||
entries []string
|
||||
partNumber int
|
||||
etag string
|
||||
result string
|
||||
}{
|
||||
{
|
||||
entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"},
|
||||
partNumber: 1,
|
||||
etag: "8a034f82cb9cb31140d87d3ce2a9ede3",
|
||||
result: "00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864",
|
||||
},
|
||||
{
|
||||
entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"},
|
||||
partNumber: 2,
|
||||
etag: "d73d8ab724016dfb051e2d3584495c54",
|
||||
result: "00002.d73d8ab724016dfb051e2d3584495c54.32891137",
|
||||
},
|
||||
{
|
||||
entries: []string{"00001.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", "fs.json", "00002.d73d8ab724016dfb051e2d3584495c54.32891137"},
|
||||
partNumber: 1,
|
||||
etag: "d73d8ab724016dfb051e2d3584495c54",
|
||||
result: "",
|
||||
},
|
||||
func BenchmarkGetPartFileWithTrie(b *testing.B) {
|
||||
b.ResetTimer()
|
||||
|
||||
entriesTrie := trie.NewTrie()
|
||||
for i := 1; i <= 10000; i++ {
|
||||
entriesTrie.Insert(fmt.Sprintf("%.5d.8a034f82cb9cb31140d87d3ce2a9ede3.67108864", i))
|
||||
}
|
||||
for i, test := range testCases {
|
||||
got := getPartFile(test.entries, test.partNumber, test.etag)
|
||||
if got != test.result {
|
||||
t.Errorf("Test %d - expected %s but received %s",
|
||||
i+1, test.result, got)
|
||||
|
||||
for i := 1; i <= 10000; i++ {
|
||||
partFile := getPartFile(entriesTrie, i, "8a034f82cb9cb31140d87d3ce2a9ede3")
|
||||
if partFile == "" {
|
||||
b.Fatal("partFile returned is empty")
|
||||
}
|
||||
}
|
||||
|
||||
b.ReportAllocs()
|
||||
}
|
||||
|
||||
func TestGetActualSize(t *testing.T) {
|
||||
|
@ -21,7 +21,7 @@ package trie
|
||||
// Node trie tree node container carries value and children.
|
||||
type Node struct {
|
||||
exists bool
|
||||
value interface{}
|
||||
value string
|
||||
child map[rune]*Node // runes as child.
|
||||
}
|
||||
|
||||
@ -29,7 +29,7 @@ type Node struct {
|
||||
func newNode() *Node {
|
||||
return &Node{
|
||||
exists: false,
|
||||
value: nil,
|
||||
value: "",
|
||||
child: make(map[rune]*Node),
|
||||
}
|
||||
}
|
||||
@ -65,16 +65,16 @@ func (t *Trie) Insert(key string) {
|
||||
}
|
||||
|
||||
// PrefixMatch - prefix match.
|
||||
func (t *Trie) PrefixMatch(key string) []interface{} {
|
||||
func (t *Trie) PrefixMatch(key string) []string {
|
||||
node, _ := t.findNode(key)
|
||||
if node != nil {
|
||||
return t.Walk(node)
|
||||
if node == nil {
|
||||
return nil
|
||||
}
|
||||
return []interface{}{}
|
||||
return t.Walk(node)
|
||||
}
|
||||
|
||||
// Walk the tree.
|
||||
func (t *Trie) Walk(node *Node) (ret []interface{}) {
|
||||
func (t *Trie) Walk(node *Node) (ret []string) {
|
||||
if node.exists {
|
||||
ret = append(ret, node.value)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user