Improve performance on multiple versions (#13573)

Existing:

```go
type xlMetaV2 struct {
    Versions []xlMetaV2Version `json:"Versions" msg:"Versions"`
}
```

Serialized as regular MessagePack.

```go
//msgp:tuple xlMetaV2VersionHeader
type xlMetaV2VersionHeader struct {
	VersionID [16]byte
	ModTime   int64
	Type      VersionType
	Flags     xlFlags
}
```

Serialize as streaming MessagePack, format:

```
int(headerVersion)
int(xlmetaVersion)
int(nVersions)
for each version {
    binary blob, xlMetaV2VersionHeader, serialized
    binary blob, xlMetaV2Version, serialized.
}
```

xlMetaV2VersionHeader is <= 30 bytes serialized. Deserialized struct 
can easily be reused and does not contain pointers, so efficient as a 
slice (single allocation)

This allows quickly parsing everything as slices of bytes (no copy).

Versions are always *saved* sorted by modTime, newest *first*. 
No more need to sort on load.

* Allows checking if a version exists.
* Allows reading single version without unmarshal all.
* Allows reading latest version of type without unmarshal all.
* Allows reading latest version without unmarshal of all.
* Allows checking if the latest is deleteMarker by reading first entry.
* Allows adding/updating/deleting a version with only header deserialization.
* Reduces allocations on conversion to FileInfo(s).
This commit is contained in:
Klaus Post
2021-11-18 12:15:22 -08:00
committed by GitHub
parent 7152915318
commit faf013ec84
22 changed files with 3282 additions and 1463 deletions

View File

@@ -21,10 +21,14 @@ import (
"bytes"
"encoding/hex"
"encoding/json"
"fmt"
"math/rand"
"testing"
"time"
"github.com/dustin/go-humanize"
jsoniter "github.com/json-iterator/go"
xhttp "github.com/minio/minio/internal/http"
)
func TestIsXLMetaFormatValid(t *testing.T) {
@@ -317,3 +321,221 @@ func TestGetPartSizeFromIdx(t *testing.T) {
}
}
}
func BenchmarkXlMetaV2Shallow(b *testing.B) {
fi := FileInfo{
Volume: "volume",
Name: "object-name",
VersionID: "756100c6-b393-4981-928a-d49bbc164741",
IsLatest: true,
Deleted: false,
TransitionStatus: "PENDING",
DataDir: "bffea160-ca7f-465f-98bc-9b4f1c3ba1ef",
XLV1: false,
ModTime: time.Now(),
Size: 1234456,
Mode: 0,
Metadata: map[string]string{
xhttp.AmzRestore: "FAILED",
xhttp.ContentMD5: mustGetUUID(),
xhttp.AmzBucketReplicationStatus: "PENDING",
xhttp.ContentType: "application/json",
},
Parts: []ObjectPartInfo{{
Number: 1,
Size: 1234345,
ActualSize: 1234345,
},
{
Number: 2,
Size: 1234345,
ActualSize: 1234345,
},
},
Erasure: ErasureInfo{
Algorithm: ReedSolomon.String(),
DataBlocks: 4,
ParityBlocks: 2,
BlockSize: 10000,
Index: 1,
Distribution: []int{1, 2, 3, 4, 5, 6, 7, 8},
Checksums: []ChecksumInfo{{
PartNumber: 1,
Algorithm: HighwayHash256S,
Hash: nil,
},
{
PartNumber: 2,
Algorithm: HighwayHash256S,
Hash: nil,
},
},
},
}
for _, size := range []int{1, 10, 1000, 100_000} {
b.Run(fmt.Sprint(size, "-versions"), func(b *testing.B) {
var xl xlMetaV2
ids := make([]string, size)
for i := 0; i < size; i++ {
fi.VersionID = mustGetUUID()
fi.DataDir = mustGetUUID()
ids[i] = fi.VersionID
fi.ModTime = fi.ModTime.Add(-time.Second)
xl.AddVersion(fi)
}
// Encode all. This is used for benchmarking.
enc, err := xl.AppendTo(nil)
if err != nil {
b.Fatal(err)
}
b.Logf("Serialized size: %d bytes", len(enc))
rng := rand.New(rand.NewSource(0))
var dump = make([]byte, len(enc))
b.Run("UpdateObjectVersion", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Load...
xl = xlMetaV2{}
err := xl.Load(enc)
if err != nil {
b.Fatal(err)
}
// Update modtime for resorting...
fi.ModTime = fi.ModTime.Add(-time.Second)
// Update a random version.
fi.VersionID = ids[rng.Intn(size)]
// Update...
err = xl.UpdateObjectVersion(fi)
if err != nil {
b.Fatal(err)
}
// Save...
dump, err = xl.AppendTo(dump[:0])
if err != nil {
b.Fatal(err)
}
}
})
b.Run("DeleteVersion", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Load...
xl = xlMetaV2{}
err := xl.Load(enc)
if err != nil {
b.Fatal(err)
}
// Update a random version.
fi.VersionID = ids[rng.Intn(size)]
// Delete...
_, _, err = xl.DeleteVersion(fi)
if err != nil {
b.Fatal(err)
}
// Save...
dump, err = xl.AppendTo(dump[:0])
if err != nil {
b.Fatal(err)
}
}
})
b.Run("AddVersion", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Load...
xl = xlMetaV2{}
err := xl.Load(enc)
if err != nil {
b.Fatal(err)
}
// Update modtime for resorting...
fi.ModTime = fi.ModTime.Add(-time.Second)
// Update a random version.
fi.VersionID = mustGetUUID()
// Add...
err = xl.AddVersion(fi)
if err != nil {
b.Fatal(err)
}
// Save...
dump, err = xl.AppendTo(dump[:0])
if err != nil {
b.Fatal(err)
}
}
})
b.Run("ToFileInfo", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Load...
xl = xlMetaV2{}
err := xl.Load(enc)
if err != nil {
b.Fatal(err)
}
// List...
_, err = xl.ToFileInfo("volume", "path", ids[rng.Intn(size)])
if err != nil {
b.Fatal(err)
}
}
})
b.Run("ListVersions", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
// Load...
xl = xlMetaV2{}
err := xl.Load(enc)
if err != nil {
b.Fatal(err)
}
// List...
_, err = xl.ListVersions("volume", "path")
if err != nil {
b.Fatal(err)
}
}
})
b.Run("ToFileInfoNew", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
buf, _ := isIndexedMetaV2(enc)
if buf == nil {
b.Fatal("buf == nil")
}
_, err = buf.ToFileInfo("volume", "path", ids[rng.Intn(size)])
if err != nil {
b.Fatal(err)
}
}
})
b.Run("ListVersionsNew", func(b *testing.B) {
b.SetBytes(int64(size))
b.ResetTimer()
b.ReportAllocs()
for i := 0; i < b.N; i++ {
buf, _ := isIndexedMetaV2(enc)
if buf == nil {
b.Fatal("buf == nil")
}
_, err = buf.ListVersions("volume", "path")
if err != nil {
b.Fatal(err)
}
}
})
})
}
}