mirror of
https://github.com/minio/minio.git
synced 2025-01-13 07:53:21 -05:00
sql, csv: Cache some values between Read() calls to gain performance (#9645)
Below is the benchmark enhancement after this commit: benchmark old ns/op new ns/op delta BenchmarkRead-8 2807 2189 -22.02% BenchmarkReadWithFieldsPerRecord-8 2802 2179 -22.23% BenchmarkReadWithoutFieldsPerRecord-8 2824 2181 -22.77% BenchmarkReadLargeFields-8 3584 3371 -5.94% BenchmarkReadReuseRecord-8 2044 1480 -27.59% BenchmarkReadReuseRecordWithFieldsPerRecord-8 2056 1483 -27.87% BenchmarkReadReuseRecordWithoutFieldsPerRecord-8 2047 1482 -27.60% BenchmarkReadReuseRecordLargeFields-8 2777 2594 -6.59% benchmark old allocs new allocs delta BenchmarkRead-8 26 16 -38.46% BenchmarkReadWithFieldsPerRecord-8 26 16 -38.46% BenchmarkReadWithoutFieldsPerRecord-8 26 16 -38.46% BenchmarkReadLargeFields-8 36 24 -33.33% BenchmarkReadReuseRecord-8 16 6 -62.50% BenchmarkReadReuseRecordWithFieldsPerRecord-8 16 6 -62.50% BenchmarkReadReuseRecordWithoutFieldsPerRecord-8 16 6 -62.50% BenchmarkReadReuseRecordLargeFields-8 24 12 -50.00% benchmark old bytes new bytes delta BenchmarkRead-8 672 664 -1.19% BenchmarkReadWithFieldsPerRecord-8 672 664 -1.19% BenchmarkReadWithoutFieldsPerRecord-8 672 664 -1.19% BenchmarkReadLargeFields-8 3948 3936 -0.30% BenchmarkReadReuseRecord-8 32 24 -25.00% BenchmarkReadReuseRecordWithFieldsPerRecord-8 32 24 -25.00% BenchmarkReadReuseRecordWithoutFieldsPerRecord-8 32 24 -25.00% BenchmarkReadReuseRecordLargeFields-8 2988 2976 -0.40%
This commit is contained in:
parent
bede525dc9
commit
6542bc4a03
@ -171,6 +171,14 @@ type Reader struct {
|
|||||||
|
|
||||||
// lastRecord is a record cache and only used when ReuseRecord == true.
|
// lastRecord is a record cache and only used when ReuseRecord == true.
|
||||||
lastRecord []string
|
lastRecord []string
|
||||||
|
|
||||||
|
// Caching some values between Read() calls for performance gain
|
||||||
|
cached bool
|
||||||
|
cachedQuoteEscapeLen int
|
||||||
|
cachedQuoteLen int
|
||||||
|
cachedEncodedQuote []byte
|
||||||
|
cachedCommaLen int
|
||||||
|
cachedQuotes string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewReader returns a new Reader that reads from r.
|
// NewReader returns a new Reader that reads from r.
|
||||||
@ -295,21 +303,20 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
|||||||
return nil, errRead
|
return nil, errRead
|
||||||
}
|
}
|
||||||
|
|
||||||
var quoteEscape = r.QuoteEscape
|
if !r.cached {
|
||||||
var quoteEscapeLen = utf8.RuneLen(quoteEscape)
|
r.cachedQuoteEscapeLen = utf8.RuneLen(r.QuoteEscape)
|
||||||
|
if len(r.Quote) > 0 {
|
||||||
var quote rune
|
r.cachedQuoteLen = utf8.RuneLen(r.Quote[0])
|
||||||
var quoteLen int
|
r.cachedEncodedQuote = encodeRune(r.Quote[0])
|
||||||
if len(r.Quote) > 0 {
|
r.cachedQuotes += string(r.Quote[0])
|
||||||
quote = r.Quote[0]
|
}
|
||||||
quoteLen = utf8.RuneLen(quote)
|
r.cachedCommaLen = utf8.RuneLen(r.Comma)
|
||||||
|
r.cachedQuotes += string(r.QuoteEscape)
|
||||||
|
r.cached = true
|
||||||
}
|
}
|
||||||
|
|
||||||
encodedQuote := encodeRune(quote)
|
|
||||||
|
|
||||||
// Parse each field in the record.
|
// Parse each field in the record.
|
||||||
var err error
|
var err error
|
||||||
commaLen := utf8.RuneLen(r.Comma)
|
|
||||||
recLine := r.numLine // Starting line for record
|
recLine := r.numLine // Starting line for record
|
||||||
r.recordBuffer = r.recordBuffer[:0]
|
r.recordBuffer = r.recordBuffer[:0]
|
||||||
r.fieldIndexes = r.fieldIndexes[:0]
|
r.fieldIndexes = r.fieldIndexes[:0]
|
||||||
@ -318,7 +325,7 @@ parseField:
|
|||||||
if r.TrimLeadingSpace {
|
if r.TrimLeadingSpace {
|
||||||
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
|
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
|
||||||
}
|
}
|
||||||
if len(line) == 0 || quoteLen == 0 || nextRune(line) != quote {
|
if len(line) == 0 || r.cachedQuoteLen == 0 || nextRune(line) != r.Quote[0] {
|
||||||
// Non-quoted string field
|
// Non-quoted string field
|
||||||
i := bytes.IndexRune(line, r.Comma)
|
i := bytes.IndexRune(line, r.Comma)
|
||||||
field := line
|
field := line
|
||||||
@ -329,7 +336,7 @@ parseField:
|
|||||||
}
|
}
|
||||||
// Check to make sure a quote does not appear in field.
|
// Check to make sure a quote does not appear in field.
|
||||||
if !r.LazyQuotes {
|
if !r.LazyQuotes {
|
||||||
if j := bytes.IndexRune(field, quote); j >= 0 {
|
if j := bytes.IndexRune(field, r.Quote[0]); j >= 0 {
|
||||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
|
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
|
||||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
|
||||||
break parseField
|
break parseField
|
||||||
@ -338,37 +345,37 @@ parseField:
|
|||||||
r.recordBuffer = append(r.recordBuffer, field...)
|
r.recordBuffer = append(r.recordBuffer, field...)
|
||||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||||
if i >= 0 {
|
if i >= 0 {
|
||||||
line = line[i+commaLen:]
|
line = line[i+r.cachedCommaLen:]
|
||||||
continue parseField
|
continue parseField
|
||||||
}
|
}
|
||||||
break parseField
|
break parseField
|
||||||
} else {
|
} else {
|
||||||
// Quoted string field
|
// Quoted string field
|
||||||
line = line[quoteLen:]
|
line = line[r.cachedQuoteLen:]
|
||||||
for {
|
for {
|
||||||
i := bytes.IndexAny(line, string(quote)+string(quoteEscape))
|
i := bytes.IndexAny(line, r.cachedQuotes)
|
||||||
if i >= 0 {
|
if i >= 0 {
|
||||||
// Hit next quote or escape quote
|
// Hit next quote or escape quote
|
||||||
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
||||||
|
|
||||||
escape := nextRune(line[i:]) == quoteEscape
|
escape := nextRune(line[i:]) == r.QuoteEscape
|
||||||
if escape {
|
if escape {
|
||||||
line = line[i+quoteEscapeLen:]
|
line = line[i+r.cachedQuoteEscapeLen:]
|
||||||
} else {
|
} else {
|
||||||
line = line[i+quoteLen:]
|
line = line[i+r.cachedQuoteLen:]
|
||||||
}
|
}
|
||||||
|
|
||||||
switch rn := nextRune(line); {
|
switch rn := nextRune(line); {
|
||||||
case escape && quoteEscape != quote:
|
case escape && r.QuoteEscape != r.Quote[0]:
|
||||||
r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...)
|
r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...)
|
||||||
line = line[utf8.RuneLen(rn):]
|
line = line[utf8.RuneLen(rn):]
|
||||||
case rn == quote:
|
case rn == r.Quote[0]:
|
||||||
// `""` sequence (append quote).
|
// `""` sequence (append quote).
|
||||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
r.recordBuffer = append(r.recordBuffer, r.cachedEncodedQuote...)
|
||||||
line = line[quoteLen:]
|
line = line[r.cachedQuoteLen:]
|
||||||
case rn == r.Comma:
|
case rn == r.Comma:
|
||||||
// `",` sequence (end of field).
|
// `",` sequence (end of field).
|
||||||
line = line[commaLen:]
|
line = line[r.cachedCommaLen:]
|
||||||
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
r.fieldIndexes = append(r.fieldIndexes, len(r.recordBuffer))
|
||||||
continue parseField
|
continue parseField
|
||||||
case lengthNL(line) == len(line):
|
case lengthNL(line) == len(line):
|
||||||
@ -377,10 +384,10 @@ parseField:
|
|||||||
break parseField
|
break parseField
|
||||||
case r.LazyQuotes:
|
case r.LazyQuotes:
|
||||||
// `"` sequence (bare quote).
|
// `"` sequence (bare quote).
|
||||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
r.recordBuffer = append(r.recordBuffer, r.cachedEncodedQuote...)
|
||||||
default:
|
default:
|
||||||
// `"*` sequence (invalid non-escaped quote).
|
// `"*` sequence (invalid non-escaped quote).
|
||||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
|
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-r.cachedQuoteLen])
|
||||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
|
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrQuote}
|
||||||
break parseField
|
break parseField
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user