mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
sql, csv: Cache some values between Read() calls to gain performance (#9645)
Below is the benchmark enhancement after this commit: benchmark old ns/op new ns/op delta BenchmarkRead-8 2807 2189 -22.02% BenchmarkReadWithFieldsPerRecord-8 2802 2179 -22.23% BenchmarkReadWithoutFieldsPerRecord-8 2824 2181 -22.77% BenchmarkReadLargeFields-8 3584 3371 -5.94% BenchmarkReadReuseRecord-8 2044 1480 -27.59% BenchmarkReadReuseRecordWithFieldsPerRecord-8 2056 1483 -27.87% BenchmarkReadReuseRecordWithoutFieldsPerRecord-8 2047 1482 -27.60% BenchmarkReadReuseRecordLargeFields-8 2777 2594 -6.59% benchmark old allocs new allocs delta BenchmarkRead-8 26 16 -38.46% BenchmarkReadWithFieldsPerRecord-8 26 16 -38.46% BenchmarkReadWithoutFieldsPerRecord-8 26 16 -38.46% BenchmarkReadLargeFields-8 36 24 -33.33% BenchmarkReadReuseRecord-8 16 6 -62.50% BenchmarkReadReuseRecordWithFieldsPerRecord-8 16 6 -62.50% BenchmarkReadReuseRecordWithoutFieldsPerRecord-8 16 6 -62.50% BenchmarkReadReuseRecordLargeFields-8 24 12 -50.00% benchmark old bytes new bytes delta BenchmarkRead-8 672 664 -1.19% BenchmarkReadWithFieldsPerRecord-8 672 664 -1.19% BenchmarkReadWithoutFieldsPerRecord-8 672 664 -1.19% BenchmarkReadLargeFields-8 3948 3936 -0.30% BenchmarkReadReuseRecord-8 32 24 -25.00% BenchmarkReadReuseRecordWithFieldsPerRecord-8 32 24 -25.00% BenchmarkReadReuseRecordWithoutFieldsPerRecord-8 32 24 -25.00% BenchmarkReadReuseRecordLargeFields-8 2988 2976 -0.40%
This commit is contained in:
parent
bede525dc9
commit
6542bc4a03
@ -171,6 +171,14 @@ type Reader struct {
|
||||
|
||||
// lastRecord is a record cache and only used when ReuseRecord == true.
|
||||
lastRecord []string
|
||||
|
||||
// Caching some values between Read() calls for performance gain
|
||||
cached bool
|
||||
cachedQuoteEscapeLen int
|
||||
cachedQuoteLen int
|
||||
cachedEncodedQuote []byte
|
||||
cachedCommaLen int
|
||||
cachedQuotes string
|
||||
}
|
||||
|
||||
// NewReader returns a new Reader that reads from r.
|
||||
@ -295,21 +303,20 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||
return nil, errRead
|
||||
}
|
||||
|
||||
var quoteEscape = r.QuoteEscape
|
||||
var quoteEscapeLen = utf8.RuneLen(quoteEscape)
|
||||
|
||||
var quote rune
|
||||
var quoteLen int
|
||||
if len(r.Quote) > 0 {
|
||||
quote = r.Quote[0]
|
||||
quoteLen = utf8.RuneLen(quote)
|
||||
if !r.cached {
|
||||
r.cachedQuoteEscapeLen = utf8.RuneLen(r.QuoteEscape)
|
||||
if len(r.Quote) > 0 {
|
||||
r.cachedQuoteLen = utf8.RuneLen(r.Quote[0])
|
||||
r.cachedEncodedQuote = encodeRune(r.Quote[0])
|
||||
r.cachedQuotes += string(r.Quote[0])
|
||||
}
|
||||
r.cachedCommaLen = utf8.RuneLen(r.Comma)
|
||||
r.cachedQuotes += string(r.QuoteEscape)
|
||||
r.cached = true
|
||||
}
|
||||
|
||||
encodedQuote := encodeRune(quote)
|
||||
|
||||
// Parse each field in the record.
|
||||
var err error
|
||||
commaLen := utf8.RuneLen(r.Comma)
|
||||
recLine := r.numLine // Starting line for record
|
||||
r.recordBuffer = r.recordBuffer[:0]
|
||||
r.fieldIndexes = r.fieldIndexes[:0]
|
||||
@ -318,7 +325,7 @@ parseField:
|
||||
if r.TrimLeadingSpace {
|
||||
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
|
||||
}
|
||||
if len(line) == 0 || quoteLen == 0 || nextRune(line) != quote {
|
||||
if len(line) == 0 || r.cachedQuoteLen == 0 || nextRune(line) != r.Quote[0] {
|
||||
// Non-quoted string field
|
||||
i := bytes.IndexRune(line, r.Comma)
|
||||
field := line
|
||||
@ -329,7 +336,7 @@ parseField:
|
||||
}
|
||||
// Check to make sure a quote does not appear in field.
|
||||
if !r.LazyQuotes {
|
||||
if j := bytes.IndexRune(field, quote); j >= 0 {
|
||||
if j := bytes.IndexRune(field, r.Quote[0]); j >= 0 {
|
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
|
||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
|
||||
break parseField
|
||||
@ -338,37 +345,37 @@ parseField:
|
||||
r.recordBuffer = append(r.recordBuffer, field...)
|
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||
if i >= 0 {
|
||||
line = line[i+commaLen:]
|
||||
line = line[i+r.cachedCommaLen:]
|
||||
continue parseField
|
||||
}
|
||||
break parseField
|
||||
} else {
|
||||
// Quoted string field
|
||||
line = line[quoteLen:]
|
||||
line = line[r.cachedQuoteLen:]
|
||||
for {
|
||||
i := bytes.IndexAny(line, string(quote)+string(quoteEscape))
|
||||
i := bytes.IndexAny(line, r.cachedQuotes)
|
||||
if i >= 0 {
|
||||
// Hit next quote or escape quote
|
||||
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
||||
|
||||
escape := nextRune(line[i:]) == quoteEscape
|
||||
escape := nextRune(line[i:]) == r.QuoteEscape
|
||||
if escape {
|
||||
line = line[i+quoteEscapeLen:]
|
||||
line = line[i+r.cachedQuoteEscapeLen:]
|
||||
} else {
|
||||
line = line[i+quoteLen:]
|
||||
line = line[i+r.cachedQuoteLen:]
|
||||
}
|
||||
|
||||
switch rn := nextRune(line); {
|
||||
case escape && quoteEscape != quote:
|
||||
case escape && r.QuoteEscape != r.Quote[0]:
|
||||
r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...)
|
||||
line = line[utf8.RuneLen(rn):]
|
||||
case rn == quote:
|
||||
case rn == r.Quote[0]:
|
||||
// `""` sequence (append quote).
|
||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
||||
line = line[quoteLen:]
|
||||
r.recordBuffer = append(r.recordBuffer, r.cachedEncodedQuote...)
|
||||
line = line[r.cachedQuoteLen:]
|
||||
case rn == r.Comma:
|
||||
// `",` sequence (end of field).
|
||||
line = line[commaLen:]
|
||||
line = line[r.cachedCommaLen:]
|
||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||
continue parseField
|
||||
case lengthNL(line) == len(line):
|
||||
@ -377,10 +384,10 @@ parseField:
|
||||
break parseField
|
||||
case r.LazyQuotes:
|
||||
// `"` sequence (bare quote).
|
||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
||||
r.recordBuffer = append(r.recordBuffer, r.cachedEncodedQuote...)
|
||||
default:
|
||||
// `"*` sequence (invalid non-escaped quote).
|
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
|
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-r.cachedQuoteLen])
|
||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
|
||||
break parseField
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user