mirror of
https://github.com/minio/minio.git
synced 2024-12-26 15:15:55 -05:00
9902c9baaa
This commit modifies csv parser, a fork of golang csv parser to support a custom quote escape character. The quote escape character is used to escape the quote character when a csv field contains a quote character as part of data.
180 lines
4.9 KiB
Go
180 lines
4.9 KiB
Go
// Copyright 2011 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in https://golang.org/LICENSE
|
|
|
|
package csv
|
|
|
|
import (
|
|
"bufio"
|
|
"io"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
// A Writer writes records using CSV encoding.
|
|
//
|
|
// As returned by NewWriter, a Writer writes records terminated by a
|
|
// newline and uses ',' as the field delimiter. The exported fields can be
|
|
// changed to customize the details before the first call to Write or WriteAll.
|
|
//
|
|
// Comma is the field delimiter.
|
|
//
|
|
// If UseCRLF is true, the Writer ends each output line with \r\n instead of \n.
|
|
//
|
|
// The writes of individual records are buffered.
|
|
// After all data has been written, the client should call the
|
|
// Flush method to guarantee all data has been forwarded to
|
|
// the underlying io.Writer. Any errors that occurred should
|
|
// be checked by calling the Error method.
|
|
type Writer struct {
|
|
Comma rune // Field delimiter (set to ',' by NewWriter)
|
|
Quote rune // Fields quote character
|
|
QuoteEscape rune
|
|
AlwaysQuote bool // True to quote all fields
|
|
UseCRLF bool // True to use \r\n as the line terminator
|
|
w *bufio.Writer
|
|
}
|
|
|
|
// NewWriter returns a new Writer that writes to w.
|
|
func NewWriter(w io.Writer) *Writer {
|
|
return &Writer{
|
|
Comma: ',',
|
|
Quote: '"',
|
|
QuoteEscape: '"',
|
|
w: bufio.NewWriter(w),
|
|
}
|
|
}
|
|
|
|
// Write writes a single CSV record to w along with any necessary quoting.
|
|
// A record is a slice of strings with each string being one field.
|
|
// Writes are buffered, so Flush must eventually be called to ensure
|
|
// that the record is written to the underlying io.Writer.
|
|
func (w *Writer) Write(record []string) error {
|
|
if !validDelim(w.Comma) {
|
|
return errInvalidDelim
|
|
}
|
|
|
|
for n, field := range record {
|
|
if n > 0 {
|
|
if _, err := w.w.WriteRune(w.Comma); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
// If we don't have to have a quoted field then just
|
|
// write out the field and continue to the next field.
|
|
if !w.AlwaysQuote && !w.fieldNeedsQuotes(field) {
|
|
if _, err := w.w.WriteString(field); err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
|
|
if _, err := w.w.WriteRune(w.Quote); err != nil {
|
|
return err
|
|
}
|
|
|
|
specialChars := "\r\n" + string(w.Quote)
|
|
|
|
for len(field) > 0 {
|
|
// Search for special characters.
|
|
i := strings.IndexAny(field, specialChars)
|
|
if i < 0 {
|
|
i = len(field)
|
|
}
|
|
|
|
// Copy verbatim everything before the special character.
|
|
if _, err := w.w.WriteString(field[:i]); err != nil {
|
|
return err
|
|
}
|
|
field = field[i:]
|
|
|
|
// Encode the special character.
|
|
if len(field) > 0 {
|
|
var err error
|
|
switch nextRune([]byte(field)) {
|
|
case w.Quote:
|
|
_, err = w.w.WriteRune(w.QuoteEscape)
|
|
if err != nil {
|
|
break
|
|
}
|
|
_, err = w.w.WriteRune(w.Quote)
|
|
case '\r':
|
|
if !w.UseCRLF {
|
|
err = w.w.WriteByte('\r')
|
|
}
|
|
case '\n':
|
|
if w.UseCRLF {
|
|
_, err = w.w.WriteString("\r\n")
|
|
} else {
|
|
err = w.w.WriteByte('\n')
|
|
}
|
|
}
|
|
field = field[1:]
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
if _, err := w.w.WriteRune(w.Quote); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
var err error
|
|
if w.UseCRLF {
|
|
_, err = w.w.WriteString("\r\n")
|
|
} else {
|
|
err = w.w.WriteByte('\n')
|
|
}
|
|
return err
|
|
}
|
|
|
|
// Flush writes any buffered data to the underlying io.Writer.
|
|
// To check if an error occurred during the Flush, call Error.
|
|
func (w *Writer) Flush() {
|
|
w.w.Flush()
|
|
}
|
|
|
|
// Error reports any error that has occurred during a previous Write or Flush.
|
|
func (w *Writer) Error() error {
|
|
_, err := w.w.Write(nil)
|
|
return err
|
|
}
|
|
|
|
// WriteAll writes multiple CSV records to w using Write and then calls Flush,
|
|
// returning any error from the Flush.
|
|
func (w *Writer) WriteAll(records [][]string) error {
|
|
for _, record := range records {
|
|
err := w.Write(record)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return w.w.Flush()
|
|
}
|
|
|
|
// fieldNeedsQuotes reports whether our field must be enclosed in quotes.
|
|
// Fields with a Comma, fields with a quote or newline, and
|
|
// fields which start with a space must be enclosed in quotes.
|
|
// We used to quote empty strings, but we do not anymore (as of Go 1.4).
|
|
// The two representations should be equivalent, but Postgres distinguishes
|
|
// quoted vs non-quoted empty string during database imports, and it has
|
|
// an option to force the quoted behavior for non-quoted CSV but it has
|
|
// no option to force the non-quoted behavior for quoted CSV, making
|
|
// CSV with quoted empty strings strictly less useful.
|
|
// Not quoting the empty string also makes this package match the behavior
|
|
// of Microsoft Excel and Google Drive.
|
|
// For Postgres, quote the data terminating string `\.`.
|
|
func (w *Writer) fieldNeedsQuotes(field string) bool {
|
|
if field == "" {
|
|
return false
|
|
}
|
|
if field == `\.` || strings.ContainsAny(field, "\r\n"+string(w.Quote)+string(w.Comma)) {
|
|
return true
|
|
}
|
|
|
|
r1, _ := utf8.DecodeRuneInString(field)
|
|
return unicode.IsSpace(r1)
|
|
}
|