sql: Add support of escape quote in CSV (#9231)

This commit modifies csv parser, a fork of golang csv
parser to support a custom quote escape character.

The quote escape character is used to escape the quote
character when a csv field contains a quote character
as part of data.
This commit is contained in:
Anis Elleuch
2020-04-01 23:39:34 +01:00
committed by GitHub
parent 7de29e6e6b
commit 9902c9baaa
12 changed files with 153 additions and 64 deletions

View File

@@ -113,9 +113,12 @@ type Reader struct {
// or the Unicode replacement character (0xFFFD).
Comma rune
// Quote is the single character used for marking fields limits
// Quote is a single rune used for marking fields limits
Quote []rune
// QuoteEscape is a single rune to escape the quote character
QuoteEscape rune
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character without preceding whitespace are ignored.
// With leading whitespace the Comment character becomes part of the
@@ -173,9 +176,10 @@ type Reader struct {
// NewReader returns a new Reader that reads from r.
func NewReader(r io.Reader) *Reader {
return &Reader{
Comma: ',',
Quote: []rune(`"`),
r: bufio.NewReader(r),
Comma: ',',
Quote: []rune(`"`),
QuoteEscape: '"',
r: bufio.NewReader(r),
}
}
@@ -291,6 +295,9 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
return nil, errRead
}
var quoteEscape = r.QuoteEscape
var quoteEscapeLen = utf8.RuneLen(quoteEscape)
var quote rune
var quoteLen int
if len(r.Quote) > 0 {
@@ -339,12 +346,22 @@ parseField:
// Quoted string field
line = line[quoteLen:]
for {
i := bytes.IndexRune(line, quote)
i := bytes.IndexAny(line, string(quote)+string(quoteEscape))
if i >= 0 {
// Hit next quote.
// Hit next quote or escape quote
r.recordBuffer = append(r.recordBuffer, line[:i]...)
line = line[i+quoteLen:]
escape := nextRune(line[i:]) == quoteEscape
if escape {
line = line[i+quoteEscapeLen:]
} else {
line = line[i+quoteLen:]
}
switch rn := nextRune(line); {
case escape && quoteEscape != quote:
r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...)
line = line[utf8.RuneLen(rn):]
case rn == quote:
// `""` sequence (append quote).
r.recordBuffer = append(r.recordBuffer, encodedQuote...)

View File

@@ -30,6 +30,7 @@ import (
type Writer struct {
Comma rune // Field delimiter (set to ',' by NewWriter)
Quote rune // Fields quote character
QuoteEscape rune
AlwaysQuote bool // True to quote all fields
UseCRLF bool // True to use \r\n as the line terminator
w *bufio.Writer
@@ -38,9 +39,10 @@ type Writer struct {
// NewWriter returns a new Writer that writes to w.
func NewWriter(w io.Writer) *Writer {
return &Writer{
Comma: ',',
Quote: '"',
w: bufio.NewWriter(w),
Comma: ',',
Quote: '"',
QuoteEscape: '"',
w: bufio.NewWriter(w),
}
}
@@ -93,7 +95,7 @@ func (w *Writer) Write(record []string) error {
var err error
switch nextRune([]byte(field)) {
case w.Quote:
_, err = w.w.WriteRune(w.Quote)
_, err = w.w.WriteRune(w.QuoteEscape)
if err != nil {
break
}