S3 Select: Concurrent LINES delimited json parsing (#8610)

The speedup is ~5x on a 6 core CPU
This commit is contained in:
Klaus Post
2019-12-09 06:55:31 -08:00
committed by Harshavardhana
parent 3b67f629a4
commit bf3a97d3aa
3 changed files with 338 additions and 1 deletions

View File

@@ -316,7 +316,11 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos
return err
}
s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
if strings.EqualFold(s3Select.Input.JSONArgs.ContentType, "lines") {
s3Select.recordReader = json.NewPReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
} else {
s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs)
}
return nil
case parquetFormat:
var err error