mirror of
https://github.com/minio/minio.git
synced 2025-11-09 21:49:46 -05:00
sql: Add support of escape quote in CSV (#9231)
This commit modifies csv parser, a fork of golang csv parser to support a custom quote escape character. The quote escape character is used to escape the quote character when a csv field contains a quote character as part of data.
This commit is contained in:
@@ -113,9 +113,12 @@ type Reader struct {
|
||||
// or the Unicode replacement character (0xFFFD).
|
||||
Comma rune
|
||||
|
||||
// Quote is the single character used for marking fields limits
|
||||
// Quote is a single rune used for marking fields limits
|
||||
Quote []rune
|
||||
|
||||
// QuoteEscape is a single rune to escape the quote character
|
||||
QuoteEscape rune
|
||||
|
||||
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||
// Comment character without preceding whitespace are ignored.
|
||||
// With leading whitespace the Comment character becomes part of the
|
||||
@@ -173,9 +176,10 @@ type Reader struct {
|
||||
// NewReader returns a new Reader that reads from r.
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
Comma: ',',
|
||||
Quote: []rune(`"`),
|
||||
r: bufio.NewReader(r),
|
||||
Comma: ',',
|
||||
Quote: []rune(`"`),
|
||||
QuoteEscape: '"',
|
||||
r: bufio.NewReader(r),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -291,6 +295,9 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||
return nil, errRead
|
||||
}
|
||||
|
||||
var quoteEscape = r.QuoteEscape
|
||||
var quoteEscapeLen = utf8.RuneLen(quoteEscape)
|
||||
|
||||
var quote rune
|
||||
var quoteLen int
|
||||
if len(r.Quote) > 0 {
|
||||
@@ -339,12 +346,22 @@ parseField:
|
||||
// Quoted string field
|
||||
line = line[quoteLen:]
|
||||
for {
|
||||
i := bytes.IndexRune(line, quote)
|
||||
i := bytes.IndexAny(line, string(quote)+string(quoteEscape))
|
||||
if i >= 0 {
|
||||
// Hit next quote.
|
||||
// Hit next quote or escape quote
|
||||
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
||||
line = line[i+quoteLen:]
|
||||
|
||||
escape := nextRune(line[i:]) == quoteEscape
|
||||
if escape {
|
||||
line = line[i+quoteEscapeLen:]
|
||||
} else {
|
||||
line = line[i+quoteLen:]
|
||||
}
|
||||
|
||||
switch rn := nextRune(line); {
|
||||
case escape && quoteEscape != quote:
|
||||
r.recordBuffer = append(r.recordBuffer, encodeRune(rn)...)
|
||||
line = line[utf8.RuneLen(rn):]
|
||||
case rn == quote:
|
||||
// `""` sequence (append quote).
|
||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
||||
|
||||
@@ -30,6 +30,7 @@ import (
|
||||
type Writer struct {
|
||||
Comma rune // Field delimiter (set to ',' by NewWriter)
|
||||
Quote rune // Fields quote character
|
||||
QuoteEscape rune
|
||||
AlwaysQuote bool // True to quote all fields
|
||||
UseCRLF bool // True to use \r\n as the line terminator
|
||||
w *bufio.Writer
|
||||
@@ -38,9 +39,10 @@ type Writer struct {
|
||||
// NewWriter returns a new Writer that writes to w.
|
||||
func NewWriter(w io.Writer) *Writer {
|
||||
return &Writer{
|
||||
Comma: ',',
|
||||
Quote: '"',
|
||||
w: bufio.NewWriter(w),
|
||||
Comma: ',',
|
||||
Quote: '"',
|
||||
QuoteEscape: '"',
|
||||
w: bufio.NewWriter(w),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +95,7 @@ func (w *Writer) Write(record []string) error {
|
||||
var err error
|
||||
switch nextRune([]byte(field)) {
|
||||
case w.Quote:
|
||||
_, err = w.w.WriteRune(w.Quote)
|
||||
_, err = w.w.WriteRune(w.QuoteEscape)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user