mirror of
https://github.com/minio/minio.git
synced 2025-11-10 05:59:43 -05:00
sql: Add support of escape quote in CSV (#9231)
This commit modifies csv parser, a fork of golang csv parser to support a custom quote escape character. The quote escape character is used to escape the quote character when a csv field contains a quote character as part of data.
This commit is contained in:
@@ -104,8 +104,15 @@ func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (er
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
args.QuoteCharacter = s
|
||||
// Not supported yet
|
||||
case "QuoteEscapeCharacter":
|
||||
switch utf8.RuneCountInString(s) {
|
||||
case 0:
|
||||
args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
case 1:
|
||||
args.QuoteEscapeCharacter = s
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", s)
|
||||
}
|
||||
case "Comments":
|
||||
args.CommentCharacter = s
|
||||
default:
|
||||
@@ -115,7 +122,6 @@ func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (er
|
||||
}
|
||||
}
|
||||
|
||||
args.QuoteEscapeCharacter = args.QuoteCharacter
|
||||
args.unmarshaled = true
|
||||
return nil
|
||||
}
|
||||
@@ -176,15 +182,21 @@ func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) err
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
// Not supported yet
|
||||
case "QuoteEscapeCharacter":
|
||||
switch utf8.RuneCountInString(s) {
|
||||
case 0:
|
||||
args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
case 1:
|
||||
args.QuoteEscapeCharacter = s
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
default:
|
||||
return errors.New("unrecognized option")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
args.QuoteEscapeCharacter = args.QuoteCharacter
|
||||
args.unmarshaled = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -299,6 +299,7 @@ func NewReader(readCloser io.ReadCloser, args *ReaderArgs) (*Reader, error) {
|
||||
// Add the first rune of args.QuoteChracter
|
||||
ret.Quote = append(ret.Quote, []rune(args.QuoteCharacter)[0])
|
||||
}
|
||||
ret.QuoteEscape = []rune(args.QuoteEscapeCharacter)[0]
|
||||
ret.FieldsPerRecord = -1
|
||||
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||
// non-doubled quote may appear in a quoted field.
|
||||
|
||||
@@ -63,7 +63,13 @@ func TestRead(t *testing.T) {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
record.WriteCSV(&result, []rune(c.fieldDelimiter)[0], '"', false)
|
||||
opts := sql.WriteCSVOpts{
|
||||
FieldDelimiter: []rune(c.fieldDelimiter)[0],
|
||||
Quote: '"',
|
||||
QuoteEscape: '"',
|
||||
AlwaysQuote: false,
|
||||
}
|
||||
record.WriteCSV(&result, opts)
|
||||
result.Truncate(result.Len() - 1)
|
||||
result.WriteString(c.recordDelimiter)
|
||||
}
|
||||
@@ -242,8 +248,14 @@ func TestReadExtended(t *testing.T) {
|
||||
break
|
||||
}
|
||||
if fields < 10 {
|
||||
opts := sql.WriteCSVOpts{
|
||||
FieldDelimiter: ',',
|
||||
Quote: '"',
|
||||
QuoteEscape: '"',
|
||||
AlwaysQuote: false,
|
||||
}
|
||||
// Write with fixed delimiters, newlines.
|
||||
err := record.WriteCSV(&result, ',', '"', false)
|
||||
err := record.WriteCSV(&result, opts)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
@@ -453,8 +465,15 @@ func TestReadFailures(t *testing.T) {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
|
||||
opts := sql.WriteCSVOpts{
|
||||
FieldDelimiter: ',',
|
||||
Quote: '"',
|
||||
QuoteEscape: '"',
|
||||
AlwaysQuote: false,
|
||||
}
|
||||
// Write with fixed delimiters, newlines.
|
||||
err := record.WriteCSV(&result, ',', '"', false)
|
||||
err := record.WriteCSV(&result, opts)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@@ -92,11 +92,12 @@ func (r *Record) Clone(dst sql.Record) sql.Record {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune, quote rune, alwaysQuote bool) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, opts sql.WriteCSVOpts) error {
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
w.Quote = quote
|
||||
w.Comma = opts.FieldDelimiter
|
||||
w.AlwaysQuote = opts.AlwaysQuote
|
||||
w.Quote = opts.Quote
|
||||
w.QuoteEscape = opts.QuoteEscape
|
||||
if err := w.Write(r.csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -108,7 +108,7 @@ func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune, quote rune, alwaysQuote bool) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, opts sql.WriteCSVOpts) error {
|
||||
var csvRecord []string
|
||||
for _, kv := range r.KVS {
|
||||
var columnValue string
|
||||
@@ -136,9 +136,10 @@ func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune, quote rune, alw
|
||||
}
|
||||
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.Quote = quote
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
w.Comma = opts.FieldDelimiter
|
||||
w.Quote = opts.Quote
|
||||
w.AlwaysQuote = opts.AlwaysQuote
|
||||
w.QuoteEscape = opts.QuoteEscape
|
||||
if err := w.Write(csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -353,10 +353,13 @@ func (s3Select *S3Select) marshal(buf *bytes.Buffer, record sql.Record) error {
|
||||
}()
|
||||
|
||||
bufioWriter.Reset(buf)
|
||||
err := record.WriteCSV(bufioWriter,
|
||||
[]rune(s3Select.Output.CSVArgs.FieldDelimiter)[0],
|
||||
[]rune(s3Select.Output.CSVArgs.QuoteCharacter)[0],
|
||||
strings.ToLower(s3Select.Output.CSVArgs.QuoteFields) == "always")
|
||||
opts := sql.WriteCSVOpts{
|
||||
FieldDelimiter: []rune(s3Select.Output.CSVArgs.FieldDelimiter)[0],
|
||||
Quote: []rune(s3Select.Output.CSVArgs.QuoteCharacter)[0],
|
||||
QuoteEscape: []rune(s3Select.Output.CSVArgs.QuoteEscapeCharacter)[0],
|
||||
AlwaysQuote: strings.ToLower(s3Select.Output.CSVArgs.QuoteFields) == "always",
|
||||
}
|
||||
err := record.WriteCSV(bufioWriter, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/minio/minio/pkg/s3select/json"
|
||||
"github.com/minio/minio/pkg/s3select/sql"
|
||||
"github.com/minio/simdjson-go"
|
||||
)
|
||||
|
||||
@@ -131,11 +132,17 @@ func TestNDJSON(t *testing.T) {
|
||||
t.Error(err)
|
||||
}
|
||||
var gotB, wantB bytes.Buffer
|
||||
err = rec.WriteCSV(&gotB, ',', '"', false)
|
||||
opts := sql.WriteCSVOpts{
|
||||
FieldDelimiter: ',',
|
||||
Quote: '"',
|
||||
QuoteEscape: '"',
|
||||
AlwaysQuote: false,
|
||||
}
|
||||
err = rec.WriteCSV(&gotB, opts)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
err = want.WriteCSV(&wantB, ',', '"', false)
|
||||
err = want.WriteCSV(&wantB, opts)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@@ -141,7 +141,7 @@ func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter, quote rune, alwaysQuote bool) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, opts sql.WriteCSVOpts) error {
|
||||
csvRecord := make([]string, 0, 10)
|
||||
var tmp simdjson.Iter
|
||||
obj := r.object
|
||||
@@ -173,9 +173,10 @@ allElems:
|
||||
csvRecord = append(csvRecord, columnValue)
|
||||
}
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.Quote = quote
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
w.Comma = opts.FieldDelimiter
|
||||
w.Quote = opts.Quote
|
||||
w.QuoteEscape = opts.QuoteEscape
|
||||
w.AlwaysQuote = opts.AlwaysQuote
|
||||
if err := w.Write(csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -39,6 +39,14 @@ const (
|
||||
SelectFmtParquet
|
||||
)
|
||||
|
||||
// WriteCSVOpts - encapsulates options for Select CSV output
|
||||
type WriteCSVOpts struct {
|
||||
FieldDelimiter rune
|
||||
Quote rune
|
||||
QuoteEscape rune
|
||||
AlwaysQuote bool
|
||||
}
|
||||
|
||||
// Record - is a type containing columns and their values.
|
||||
type Record interface {
|
||||
Get(name string) (*Value, error)
|
||||
@@ -46,7 +54,7 @@ type Record interface {
|
||||
// Set a value.
|
||||
// Can return a different record type.
|
||||
Set(name string, value *Value) (Record, error)
|
||||
WriteCSV(writer io.Writer, fieldDelimiter, quote rune, alwaysQuote bool) error
|
||||
WriteCSV(writer io.Writer, opts WriteCSVOpts) error
|
||||
WriteJSON(writer io.Writer) error
|
||||
|
||||
// Clone the record and if possible use the destination provided.
|
||||
|
||||
Reference in New Issue
Block a user