Improve performance on multiple versions (#13573)

Existing:

```go
type xlMetaV2 struct {
    Versions []xlMetaV2Version `json:"Versions" msg:"Versions"`
}
```

Serialized as regular MessagePack.

```go
//msgp:tuple xlMetaV2VersionHeader
type xlMetaV2VersionHeader struct {
	VersionID [16]byte
	ModTime   int64
	Type      VersionType
	Flags     xlFlags
}
```

Serialize as streaming MessagePack, format:

```
int(headerVersion)
int(xlmetaVersion)
int(nVersions)
for each version {
    binary blob, xlMetaV2VersionHeader, serialized
    binary blob, xlMetaV2Version, serialized.
}
```

xlMetaV2VersionHeader is <= 30 bytes serialized. Deserialized struct 
can easily be reused and does not contain pointers, so efficient as a 
slice (single allocation)

This allows quickly parsing everything as slices of bytes (no copy).

Versions are always *saved* sorted by modTime, newest *first*. 
No more need to sort on load.

* Allows checking if a version exists.
* Allows reading single version without unmarshal all.
* Allows reading latest version of type without unmarshal all.
* Allows reading latest version without unmarshal of all.
* Allows checking if the latest is deleteMarker by reading first entry.
* Allows adding/updating/deleting a version with only header deserialization.
* Reduces allocations on conversion to FileInfo(s).
This commit is contained in:
Klaus Post
2021-11-18 12:15:22 -08:00
committed by GitHub
parent 7152915318
commit faf013ec84
22 changed files with 3282 additions and 1463 deletions

View File

@@ -114,6 +114,48 @@ FLAGS:
return nil, err
}
data = b
case 3:
v, b, err := msgp.ReadBytesZC(b)
if err != nil {
return nil, err
}
if _, nbuf, err := msgp.ReadUint32Bytes(b); err == nil {
// Read metadata CRC (added in v2, ignore if not found)
b = nbuf
}
nVers, v, err := decodeXLHeaders(v)
if err != nil {
return nil, err
}
var versions = struct {
Versions []json.RawMessage
Headers []json.RawMessage
}{
Versions: make([]json.RawMessage, nVers),
Headers: make([]json.RawMessage, nVers),
}
err = decodeVersions(v, nVers, func(idx int, hdr, meta []byte) error {
var buf bytes.Buffer
if _, err := msgp.UnmarshalAsJSON(&buf, hdr); err != nil {
return err
}
versions.Headers[idx] = buf.Bytes()
buf = bytes.Buffer{}
if _, err := msgp.UnmarshalAsJSON(&buf, meta); err != nil {
return err
}
versions.Versions[idx] = buf.Bytes()
return nil
})
if err != nil {
return nil, err
}
enc := json.NewEncoder(buf)
if err := enc.Encode(versions); err != nil {
return nil, err
}
data = b
default:
return nil, fmt.Errorf("unknown metadata version %d", minor)
}
@@ -416,3 +458,54 @@ func (x xlMetaInlineData) files(fn func(name string, data []byte)) error {
return nil
}
const (
xlHeaderVersion = 2
xlMetaVersion = 1
)
func decodeXLHeaders(buf []byte) (versions int, b []byte, err error) {
hdrVer, buf, err := msgp.ReadUintBytes(buf)
if err != nil {
return 0, buf, err
}
metaVer, buf, err := msgp.ReadUintBytes(buf)
if err != nil {
return 0, buf, err
}
if hdrVer > xlHeaderVersion {
return 0, buf, fmt.Errorf("decodeXLHeaders: Unknown xl header version %d", metaVer)
}
if metaVer > xlMetaVersion {
return 0, buf, fmt.Errorf("decodeXLHeaders: Unknown xl meta version %d", metaVer)
}
versions, buf, err = msgp.ReadIntBytes(buf)
if err != nil {
return 0, buf, err
}
if versions < 0 {
return 0, buf, fmt.Errorf("decodeXLHeaders: Negative version count %d", versions)
}
return versions, buf, nil
}
// decodeVersions will decode a number of versions from a buffer
// and perform a callback for each version in order, newest first.
// Any non-nil error is returned.
func decodeVersions(buf []byte, versions int, fn func(idx int, hdr, meta []byte) error) (err error) {
var tHdr, tMeta []byte // Zero copy bytes
for i := 0; i < versions; i++ {
tHdr, buf, err = msgp.ReadBytesZC(buf)
if err != nil {
return err
}
tMeta, buf, err = msgp.ReadBytesZC(buf)
if err != nil {
return err
}
if err = fn(i, tHdr, tMeta); err != nil {
return err
}
}
return nil
}