mirror of
https://github.com/minio/minio.git
synced 2025-11-10 05:59:43 -05:00
Support configurable quote character parameter in Select (#8955)
This commit is contained in:
@@ -18,8 +18,11 @@ package csv
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -55,68 +58,64 @@ func (args *ReaderArgs) IsEmpty() bool {
|
||||
}
|
||||
|
||||
// UnmarshalXML - decodes XML data.
|
||||
func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
// Make subtype to avoid recursive UnmarshalXML().
|
||||
type subReaderArgs ReaderArgs
|
||||
parsedArgs := subReaderArgs{}
|
||||
if err := d.DecodeElement(&parsedArgs, &start); err != nil {
|
||||
return err
|
||||
func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) {
|
||||
args.FileHeaderInfo = none
|
||||
args.RecordDelimiter = defaultRecordDelimiter
|
||||
args.FieldDelimiter = defaultFieldDelimiter
|
||||
args.QuoteCharacter = defaultQuoteCharacter
|
||||
args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
args.CommentCharacter = defaultCommentCharacter
|
||||
args.AllowQuotedRecordDelimiter = false
|
||||
|
||||
for {
|
||||
// Read tokens from the XML document in a stream.
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
tagName := se.Name.Local
|
||||
switch tagName {
|
||||
case "AllowQuotedRecordDelimiter":
|
||||
var b bool
|
||||
if err = d.DecodeElement(&b, &se); err != nil {
|
||||
return err
|
||||
}
|
||||
args.AllowQuotedRecordDelimiter = b
|
||||
default:
|
||||
var s string
|
||||
if err = d.DecodeElement(&s, &se); err != nil {
|
||||
return err
|
||||
}
|
||||
switch tagName {
|
||||
case "FileHeaderInfo":
|
||||
args.FileHeaderInfo = strings.ToLower(s)
|
||||
case "RecordDelimiter":
|
||||
args.RecordDelimiter = s
|
||||
case "FieldDelimiter":
|
||||
args.FieldDelimiter = s
|
||||
case "QuoteCharacter":
|
||||
if utf8.RuneCountInString(s) > 1 {
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
args.QuoteCharacter = s
|
||||
// Not supported yet
|
||||
case "QuoteEscapeCharacter":
|
||||
case "Comments":
|
||||
args.CommentCharacter = s
|
||||
default:
|
||||
return errors.New("unrecognized option")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsedArgs.FileHeaderInfo = strings.ToLower(parsedArgs.FileHeaderInfo)
|
||||
switch parsedArgs.FileHeaderInfo {
|
||||
case "":
|
||||
parsedArgs.FileHeaderInfo = none
|
||||
case none, use, ignore:
|
||||
default:
|
||||
return errInvalidFileHeaderInfo(fmt.Errorf("invalid FileHeaderInfo '%v'", parsedArgs.FileHeaderInfo))
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.RecordDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.RecordDelimiter = defaultRecordDelimiter
|
||||
case 1, 2:
|
||||
default:
|
||||
return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter)
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.FieldDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.FieldDelimiter = defaultFieldDelimiter
|
||||
case 1:
|
||||
default:
|
||||
return fmt.Errorf("invalid FieldDelimiter '%v'", parsedArgs.FieldDelimiter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteCharacter = defaultQuoteCharacter
|
||||
case defaultQuoteCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", parsedArgs.QuoteCharacter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteEscapeCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
case defaultQuoteEscapeCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", parsedArgs.QuoteEscapeCharacter)
|
||||
}
|
||||
|
||||
switch parsedArgs.CommentCharacter {
|
||||
case "":
|
||||
parsedArgs.CommentCharacter = defaultCommentCharacter
|
||||
case defaultCommentCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported Comments '%v'", parsedArgs.CommentCharacter)
|
||||
}
|
||||
|
||||
if parsedArgs.AllowQuotedRecordDelimiter {
|
||||
return fmt.Errorf("flag AllowQuotedRecordDelimiter is unsupported at the moment")
|
||||
}
|
||||
|
||||
*args = ReaderArgs(parsedArgs)
|
||||
args.QuoteEscapeCharacter = args.QuoteCharacter
|
||||
args.unmarshaled = true
|
||||
return nil
|
||||
}
|
||||
@@ -138,55 +137,54 @@ func (args *WriterArgs) IsEmpty() bool {
|
||||
|
||||
// UnmarshalXML - decodes XML data.
|
||||
func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
// Make subtype to avoid recursive UnmarshalXML().
|
||||
type subWriterArgs WriterArgs
|
||||
parsedArgs := subWriterArgs{}
|
||||
if err := d.DecodeElement(&parsedArgs, &start); err != nil {
|
||||
return err
|
||||
|
||||
args.QuoteFields = asneeded
|
||||
args.RecordDelimiter = defaultRecordDelimiter
|
||||
args.FieldDelimiter = defaultFieldDelimiter
|
||||
args.QuoteCharacter = defaultQuoteCharacter
|
||||
args.QuoteEscapeCharacter = defaultQuoteCharacter
|
||||
|
||||
for {
|
||||
// Read tokens from the XML document in a stream.
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
var s string
|
||||
if err = d.DecodeElement(&s, &se); err != nil {
|
||||
return err
|
||||
}
|
||||
switch se.Name.Local {
|
||||
case "QuoteFields":
|
||||
args.QuoteFields = strings.ToLower(s)
|
||||
case "RecordDelimiter":
|
||||
args.RecordDelimiter = s
|
||||
case "FieldDelimiter":
|
||||
args.FieldDelimiter = s
|
||||
case "QuoteCharacter":
|
||||
switch utf8.RuneCountInString(s) {
|
||||
case 0:
|
||||
args.QuoteCharacter = "\x00"
|
||||
case 1:
|
||||
args.QuoteCharacter = s
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
// Not supported yet
|
||||
case "QuoteEscapeCharacter":
|
||||
default:
|
||||
return errors.New("unrecognized option")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsedArgs.QuoteFields = strings.ToLower(parsedArgs.QuoteFields)
|
||||
switch parsedArgs.QuoteFields {
|
||||
case "":
|
||||
parsedArgs.QuoteFields = asneeded
|
||||
case always, asneeded:
|
||||
default:
|
||||
return errInvalidQuoteFields(fmt.Errorf("invalid QuoteFields '%v'", parsedArgs.QuoteFields))
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.RecordDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.RecordDelimiter = defaultRecordDelimiter
|
||||
case 1, 2:
|
||||
default:
|
||||
return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter)
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.FieldDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.FieldDelimiter = defaultFieldDelimiter
|
||||
case 1:
|
||||
default:
|
||||
return fmt.Errorf("invalid FieldDelimiter '%v'", parsedArgs.FieldDelimiter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteCharacter = defaultQuoteCharacter
|
||||
case defaultQuoteCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", parsedArgs.QuoteCharacter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteEscapeCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
case defaultQuoteEscapeCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", parsedArgs.QuoteEscapeCharacter)
|
||||
}
|
||||
|
||||
*args = WriterArgs(parsedArgs)
|
||||
args.QuoteEscapeCharacter = args.QuoteCharacter
|
||||
args.unmarshaled = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -294,6 +294,11 @@ func NewReader(readCloser io.ReadCloser, args *ReaderArgs) (*Reader, error) {
|
||||
ret := csv.NewReader(r)
|
||||
ret.Comma = []rune(args.FieldDelimiter)[0]
|
||||
ret.Comment = []rune(args.CommentCharacter)[0]
|
||||
ret.Quote = []rune{}
|
||||
if len([]rune(args.QuoteCharacter)) > 0 {
|
||||
// Add the first rune of args.QuoteChracter
|
||||
ret.Quote = append(ret.Quote, []rune(args.QuoteCharacter)[0])
|
||||
}
|
||||
ret.FieldsPerRecord = -1
|
||||
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||
// non-doubled quote may appear in a quoted field.
|
||||
|
||||
@@ -63,7 +63,7 @@ func TestRead(t *testing.T) {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
record.WriteCSV(&result, []rune(c.fieldDelimiter)[0])
|
||||
record.WriteCSV(&result, []rune(c.fieldDelimiter)[0], '"', false)
|
||||
result.Truncate(result.Len() - 1)
|
||||
result.WriteString(c.recordDelimiter)
|
||||
}
|
||||
@@ -243,7 +243,7 @@ func TestReadExtended(t *testing.T) {
|
||||
}
|
||||
if fields < 10 {
|
||||
// Write with fixed delimiters, newlines.
|
||||
err := record.WriteCSV(&result, ',')
|
||||
err := record.WriteCSV(&result, ',', '"', false)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
@@ -454,7 +454,7 @@ func TestReadFailures(t *testing.T) {
|
||||
break
|
||||
}
|
||||
// Write with fixed delimiters, newlines.
|
||||
err := record.WriteCSV(&result, ',')
|
||||
err := record.WriteCSV(&result, ',', '"', false)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@@ -92,9 +92,11 @@ func (r *Record) Clone(dst sql.Record) sql.Record {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune, quote rune, alwaysQuote bool) error {
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
w.Quote = quote
|
||||
if err := w.Write(r.csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user