Avoid using fastjson parser pool, move back to jsoniter (#8190)

It looks like from implementation point of view fastjson
parser pool doesn't behave the same way as expected
when dealing many `xl.json` from multiple disks.

The fastjson parser pool usage ends up returning incorrect
xl.json entries for checksums, with references pointing
to older entries. This led to the subtle bug where checksum
info is duplicated from a previous xl.json read of a different
file from different disk.
This commit is contained in:
Harshavardhana
2019-09-05 15:51:27 -07:00
committed by kannappanr
parent 428836d4e1
commit b52a3e523c
15 changed files with 169 additions and 430 deletions

View File

@@ -18,15 +18,13 @@ package cmd
import (
"context"
"encoding/hex"
"errors"
"hash/crc32"
"path"
"sync"
"time"
jsoniter "github.com/json-iterator/go"
"github.com/minio/minio/cmd/logger"
"github.com/valyala/fastjson"
)
// Returns number of errors that occurred the most (incl. nil) and the
@@ -117,167 +115,11 @@ func hashOrder(key string, cardinality int) []int {
return nums
}
func parseXLStat(v *fastjson.Value) (si statInfo, err error) {
// obtain stat info.
st := v.GetObject("stat")
var mb []byte
mb, err = st.Get("modTime").StringBytes()
if err != nil {
return si, err
}
// fetching modTime.
si.ModTime, err = time.Parse(time.RFC3339, string(mb))
if err != nil {
return si, err
}
// obtain Stat.Size .
si.Size, err = st.Get("size").Int64()
if err != nil {
return si, err
}
return si, nil
}
func parseXLVersion(v *fastjson.Value) string {
return string(v.GetStringBytes("version"))
}
func parseXLFormat(v *fastjson.Value) string {
return string(v.GetStringBytes("format"))
}
func parseXLRelease(v *fastjson.Value) string {
return string(v.GetStringBytes("minio", "release"))
}
func parseXLErasureInfo(ctx context.Context, v *fastjson.Value) (ErasureInfo, error) {
erasure := ErasureInfo{}
// parse the xlV1Meta.Erasure.Distribution.
er := v.GetObject("erasure")
disResult := er.Get("distribution").GetArray()
distribution := make([]int, len(disResult))
var err error
for i, dis := range disResult {
distribution[i], err = dis.Int()
if err != nil {
return erasure, err
}
}
erasure.Distribution = distribution
erasure.Algorithm = string(er.Get("algorithm").GetStringBytes())
erasure.DataBlocks = er.Get("data").GetInt()
erasure.ParityBlocks = er.Get("parity").GetInt()
erasure.BlockSize = er.Get("blockSize").GetInt64()
erasure.Index = er.Get("index").GetInt()
checkSumsResult := er.Get("checksum").GetArray()
// Parse xlMetaV1.Erasure.Checksum array.
checkSums := make([]ChecksumInfo, len(checkSumsResult))
for i, ck := range checkSumsResult {
algorithm := BitrotAlgorithmFromString(string(ck.GetStringBytes("algorithm")))
if !algorithm.Available() {
logger.LogIf(ctx, errBitrotHashAlgoInvalid)
return erasure, errBitrotHashAlgoInvalid
}
srcHash := ck.GetStringBytes("hash")
n, err := hex.Decode(srcHash, srcHash)
if err != nil {
logger.LogIf(ctx, err)
return erasure, err
}
nmb := ck.GetStringBytes("name")
if nmb == nil {
return erasure, errCorruptedFormat
}
checkSums[i] = ChecksumInfo{
Name: string(nmb),
Algorithm: algorithm,
Hash: srcHash[:n],
}
}
erasure.Checksums = checkSums
return erasure, nil
}
func parseXLParts(partsResult []*fastjson.Value) []ObjectPartInfo {
// Parse the XL Parts.
partInfo := make([]ObjectPartInfo, len(partsResult))
for i, p := range partsResult {
partInfo[i] = ObjectPartInfo{
Number: p.GetInt("number"),
Name: string(p.GetStringBytes("name")),
ETag: string(p.GetStringBytes("etag")),
Size: p.GetInt64("size"),
ActualSize: p.GetInt64("actualSize"),
}
}
return partInfo
}
func parseXLMetaMap(v *fastjson.Value) map[string]string {
metaMap := make(map[string]string)
// Get xlMetaV1.Meta map.
v.GetObject("meta").Visit(func(k []byte, kv *fastjson.Value) {
metaMap[string(k)] = string(kv.GetStringBytes())
})
return metaMap
}
// xl.json Parser pool
var xlParserPool fastjson.ParserPool
// Constructs XLMetaV1 using `fastjson` lib to retrieve each field.
// Constructs xlMetaV1 using `jsoniter` lib.
func xlMetaV1UnmarshalJSON(ctx context.Context, xlMetaBuf []byte) (xlMeta xlMetaV1, err error) {
parser := xlParserPool.Get()
defer xlParserPool.Put(parser)
var v *fastjson.Value
v, err = parser.ParseBytes(xlMetaBuf)
if err != nil {
return xlMeta, err
}
// obtain version.
xlMeta.Version = parseXLVersion(v)
// obtain format.
xlMeta.Format = parseXLFormat(v)
// Validate if the xl.json we read is sane, return corrupted format.
if !isXLMetaFormatValid(xlMeta.Version, xlMeta.Format) {
// For version mismatchs and unrecognized format, return corrupted format.
logger.LogIf(ctx, errCorruptedFormat)
return xlMeta, errCorruptedFormat
}
// Parse xlMetaV1.Stat .
stat, err := parseXLStat(v)
if err != nil {
logger.LogIf(ctx, err)
return xlMeta, err
}
xlMeta.Stat = stat
// parse the xlV1Meta.Erasure fields.
xlMeta.Erasure, err = parseXLErasureInfo(ctx, v)
if err != nil {
return xlMeta, err
}
// Check for scenario where checksum information missing for some parts.
partsResult := v.Get("parts").GetArray()
if len(xlMeta.Erasure.Checksums) != len(partsResult) {
return xlMeta, errCorruptedFormat
}
// Parse the XL Parts.
xlMeta.Parts = parseXLParts(partsResult)
// Get the xlMetaV1.Realse field.
xlMeta.Minio.Release = parseXLRelease(v)
// parse xlMetaV1.
xlMeta.Meta = parseXLMetaMap(v)
return xlMeta, nil
var json = jsoniter.ConfigCompatibleWithStandardLibrary
err = json.Unmarshal(xlMetaBuf, &xlMeta)
return xlMeta, err
}
// read xl.json from the given disk, parse and return xlV1MetaV1.Parts.
@@ -298,7 +140,7 @@ func readXLMetaParts(ctx context.Context, disk StorageAPI, bucket string, object
return xlMeta.Parts, xlMeta.Meta, nil
}
// read xl.json from the given disk and parse xlV1Meta.Stat and xlV1Meta.Meta using fastjson.
// read xl.json from the given disk and parse xlV1Meta.Stat and xlV1Meta.Meta using jsoniter.
func readXLMetaStat(ctx context.Context, disk StorageAPI, bucket string, object string) (si statInfo,
mp map[string]string, e error) {
// Reads entire `xl.json`.