Support configurable quote character parameter in Select (#8955)

This commit is contained in:
Anis Elleuch
2020-03-13 22:09:34 -07:00
committed by GitHub
parent 3ca9f5ffa3
commit 35ecc04223
18 changed files with 567 additions and 142 deletions

View File

@@ -113,6 +113,9 @@ type Reader struct {
// or the Unicode replacement character (0xFFFD).
Comma rune
// Quote is the single character used for marking fields limits
Quote []rune
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character without preceding whitespace are ignored.
// With leading whitespace the Comment character becomes part of the
@@ -171,6 +174,7 @@ type Reader struct {
func NewReader(r io.Reader) *Reader {
return &Reader{
Comma: ',',
Quote: []rune(`"`),
r: bufio.NewReader(r),
}
}
@@ -255,6 +259,13 @@ func nextRune(b []byte) rune {
return r
}
func encodeRune(r rune) []byte {
rlen := utf8.RuneLen(r)
p := make([]byte, rlen)
_ = utf8.EncodeRune(p, r)
return p
}
func (r *Reader) readRecord(dst []string) ([]string, error) {
if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
return nil, errInvalidDelim
@@ -280,9 +291,17 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
return nil, errRead
}
var quote rune
var quoteLen int
if len(r.Quote) > 0 {
quote = r.Quote[0]
quoteLen = utf8.RuneLen(quote)
}
encodedQuote := encodeRune(quote)
// Parse each field in the record.
var err error
const quoteLen = len(`"`)
commaLen := utf8.RuneLen(r.Comma)
recLine := r.numLine // Starting line for record
r.recordBuffer = r.recordBuffer[:0]
@@ -292,7 +311,7 @@ parseField:
if r.TrimLeadingSpace {
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
}
if len(line) == 0 || line[0] != '"' {
if len(line) == 0 || quoteLen == 0 || nextRune(line) != quote {
// Non-quoted string field
i := bytes.IndexRune(line, r.Comma)
field := line
@@ -303,7 +322,7 @@ parseField:
}
// Check to make sure a quote does not appear in field.
if !r.LazyQuotes {
if j := bytes.IndexByte(field, '"'); j >= 0 {
if j := bytes.IndexRune(field, quote); j >= 0 {
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
break parseField
@@ -320,15 +339,15 @@ parseField:
// Quoted string field
line = line[quoteLen:]
for {
i := bytes.IndexByte(line, '"')
i := bytes.IndexRune(line, quote)
if i >= 0 {
// Hit next quote.
r.recordBuffer = append(r.recordBuffer, line[:i]...)
line = line[i+quoteLen:]
switch rn := nextRune(line); {
case rn == '"':
case rn == quote:
// `""` sequence (append quote).
r.recordBuffer = append(r.recordBuffer, '"')
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
line = line[quoteLen:]
case rn == r.Comma:
// `",` sequence (end of field).
@@ -341,7 +360,7 @@ parseField:
break parseField
case r.LazyQuotes:
// `"` sequence (bare quote).
r.recordBuffer = append(r.recordBuffer, '"')
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
default:
// `"*` sequence (invalid non-escaped quote).
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])