mirror of
https://github.com/minio/minio.git
synced 2025-11-10 05:59:43 -05:00
Support configurable quote character parameter in Select (#8955)
This commit is contained in:
@@ -18,8 +18,11 @@ package csv
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -55,68 +58,64 @@ func (args *ReaderArgs) IsEmpty() bool {
|
||||
}
|
||||
|
||||
// UnmarshalXML - decodes XML data.
|
||||
func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
// Make subtype to avoid recursive UnmarshalXML().
|
||||
type subReaderArgs ReaderArgs
|
||||
parsedArgs := subReaderArgs{}
|
||||
if err := d.DecodeElement(&parsedArgs, &start); err != nil {
|
||||
return err
|
||||
func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err error) {
|
||||
args.FileHeaderInfo = none
|
||||
args.RecordDelimiter = defaultRecordDelimiter
|
||||
args.FieldDelimiter = defaultFieldDelimiter
|
||||
args.QuoteCharacter = defaultQuoteCharacter
|
||||
args.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
args.CommentCharacter = defaultCommentCharacter
|
||||
args.AllowQuotedRecordDelimiter = false
|
||||
|
||||
for {
|
||||
// Read tokens from the XML document in a stream.
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
tagName := se.Name.Local
|
||||
switch tagName {
|
||||
case "AllowQuotedRecordDelimiter":
|
||||
var b bool
|
||||
if err = d.DecodeElement(&b, &se); err != nil {
|
||||
return err
|
||||
}
|
||||
args.AllowQuotedRecordDelimiter = b
|
||||
default:
|
||||
var s string
|
||||
if err = d.DecodeElement(&s, &se); err != nil {
|
||||
return err
|
||||
}
|
||||
switch tagName {
|
||||
case "FileHeaderInfo":
|
||||
args.FileHeaderInfo = strings.ToLower(s)
|
||||
case "RecordDelimiter":
|
||||
args.RecordDelimiter = s
|
||||
case "FieldDelimiter":
|
||||
args.FieldDelimiter = s
|
||||
case "QuoteCharacter":
|
||||
if utf8.RuneCountInString(s) > 1 {
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
args.QuoteCharacter = s
|
||||
// Not supported yet
|
||||
case "QuoteEscapeCharacter":
|
||||
case "Comments":
|
||||
args.CommentCharacter = s
|
||||
default:
|
||||
return errors.New("unrecognized option")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsedArgs.FileHeaderInfo = strings.ToLower(parsedArgs.FileHeaderInfo)
|
||||
switch parsedArgs.FileHeaderInfo {
|
||||
case "":
|
||||
parsedArgs.FileHeaderInfo = none
|
||||
case none, use, ignore:
|
||||
default:
|
||||
return errInvalidFileHeaderInfo(fmt.Errorf("invalid FileHeaderInfo '%v'", parsedArgs.FileHeaderInfo))
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.RecordDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.RecordDelimiter = defaultRecordDelimiter
|
||||
case 1, 2:
|
||||
default:
|
||||
return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter)
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.FieldDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.FieldDelimiter = defaultFieldDelimiter
|
||||
case 1:
|
||||
default:
|
||||
return fmt.Errorf("invalid FieldDelimiter '%v'", parsedArgs.FieldDelimiter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteCharacter = defaultQuoteCharacter
|
||||
case defaultQuoteCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", parsedArgs.QuoteCharacter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteEscapeCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
case defaultQuoteEscapeCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", parsedArgs.QuoteEscapeCharacter)
|
||||
}
|
||||
|
||||
switch parsedArgs.CommentCharacter {
|
||||
case "":
|
||||
parsedArgs.CommentCharacter = defaultCommentCharacter
|
||||
case defaultCommentCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported Comments '%v'", parsedArgs.CommentCharacter)
|
||||
}
|
||||
|
||||
if parsedArgs.AllowQuotedRecordDelimiter {
|
||||
return fmt.Errorf("flag AllowQuotedRecordDelimiter is unsupported at the moment")
|
||||
}
|
||||
|
||||
*args = ReaderArgs(parsedArgs)
|
||||
args.QuoteEscapeCharacter = args.QuoteCharacter
|
||||
args.unmarshaled = true
|
||||
return nil
|
||||
}
|
||||
@@ -138,55 +137,54 @@ func (args *WriterArgs) IsEmpty() bool {
|
||||
|
||||
// UnmarshalXML - decodes XML data.
|
||||
func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error {
|
||||
// Make subtype to avoid recursive UnmarshalXML().
|
||||
type subWriterArgs WriterArgs
|
||||
parsedArgs := subWriterArgs{}
|
||||
if err := d.DecodeElement(&parsedArgs, &start); err != nil {
|
||||
return err
|
||||
|
||||
args.QuoteFields = asneeded
|
||||
args.RecordDelimiter = defaultRecordDelimiter
|
||||
args.FieldDelimiter = defaultFieldDelimiter
|
||||
args.QuoteCharacter = defaultQuoteCharacter
|
||||
args.QuoteEscapeCharacter = defaultQuoteCharacter
|
||||
|
||||
for {
|
||||
// Read tokens from the XML document in a stream.
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
switch se := t.(type) {
|
||||
case xml.StartElement:
|
||||
var s string
|
||||
if err = d.DecodeElement(&s, &se); err != nil {
|
||||
return err
|
||||
}
|
||||
switch se.Name.Local {
|
||||
case "QuoteFields":
|
||||
args.QuoteFields = strings.ToLower(s)
|
||||
case "RecordDelimiter":
|
||||
args.RecordDelimiter = s
|
||||
case "FieldDelimiter":
|
||||
args.FieldDelimiter = s
|
||||
case "QuoteCharacter":
|
||||
switch utf8.RuneCountInString(s) {
|
||||
case 0:
|
||||
args.QuoteCharacter = "\x00"
|
||||
case 1:
|
||||
args.QuoteCharacter = s
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", s)
|
||||
}
|
||||
// Not supported yet
|
||||
case "QuoteEscapeCharacter":
|
||||
default:
|
||||
return errors.New("unrecognized option")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
parsedArgs.QuoteFields = strings.ToLower(parsedArgs.QuoteFields)
|
||||
switch parsedArgs.QuoteFields {
|
||||
case "":
|
||||
parsedArgs.QuoteFields = asneeded
|
||||
case always, asneeded:
|
||||
default:
|
||||
return errInvalidQuoteFields(fmt.Errorf("invalid QuoteFields '%v'", parsedArgs.QuoteFields))
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.RecordDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.RecordDelimiter = defaultRecordDelimiter
|
||||
case 1, 2:
|
||||
default:
|
||||
return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter)
|
||||
}
|
||||
|
||||
switch len([]rune(parsedArgs.FieldDelimiter)) {
|
||||
case 0:
|
||||
parsedArgs.FieldDelimiter = defaultFieldDelimiter
|
||||
case 1:
|
||||
default:
|
||||
return fmt.Errorf("invalid FieldDelimiter '%v'", parsedArgs.FieldDelimiter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteCharacter = defaultQuoteCharacter
|
||||
case defaultQuoteCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteCharacter '%v'", parsedArgs.QuoteCharacter)
|
||||
}
|
||||
|
||||
switch parsedArgs.QuoteEscapeCharacter {
|
||||
case "":
|
||||
parsedArgs.QuoteEscapeCharacter = defaultQuoteEscapeCharacter
|
||||
case defaultQuoteEscapeCharacter:
|
||||
default:
|
||||
return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", parsedArgs.QuoteEscapeCharacter)
|
||||
}
|
||||
|
||||
*args = WriterArgs(parsedArgs)
|
||||
args.QuoteEscapeCharacter = args.QuoteCharacter
|
||||
args.unmarshaled = true
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -294,6 +294,11 @@ func NewReader(readCloser io.ReadCloser, args *ReaderArgs) (*Reader, error) {
|
||||
ret := csv.NewReader(r)
|
||||
ret.Comma = []rune(args.FieldDelimiter)[0]
|
||||
ret.Comment = []rune(args.CommentCharacter)[0]
|
||||
ret.Quote = []rune{}
|
||||
if len([]rune(args.QuoteCharacter)) > 0 {
|
||||
// Add the first rune of args.QuoteChracter
|
||||
ret.Quote = append(ret.Quote, []rune(args.QuoteCharacter)[0])
|
||||
}
|
||||
ret.FieldsPerRecord = -1
|
||||
// If LazyQuotes is true, a quote may appear in an unquoted field and a
|
||||
// non-doubled quote may appear in a quoted field.
|
||||
|
||||
@@ -63,7 +63,7 @@ func TestRead(t *testing.T) {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
record.WriteCSV(&result, []rune(c.fieldDelimiter)[0])
|
||||
record.WriteCSV(&result, []rune(c.fieldDelimiter)[0], '"', false)
|
||||
result.Truncate(result.Len() - 1)
|
||||
result.WriteString(c.recordDelimiter)
|
||||
}
|
||||
@@ -243,7 +243,7 @@ func TestReadExtended(t *testing.T) {
|
||||
}
|
||||
if fields < 10 {
|
||||
// Write with fixed delimiters, newlines.
|
||||
err := record.WriteCSV(&result, ',')
|
||||
err := record.WriteCSV(&result, ',', '"', false)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
@@ -454,7 +454,7 @@ func TestReadFailures(t *testing.T) {
|
||||
break
|
||||
}
|
||||
// Write with fixed delimiters, newlines.
|
||||
err := record.WriteCSV(&result, ',')
|
||||
err := record.WriteCSV(&result, ',', '"', false)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@@ -92,9 +92,11 @@ func (r *Record) Clone(dst sql.Record) sql.Record {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune, quote rune, alwaysQuote bool) error {
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
w.Quote = quote
|
||||
if err := w.Write(r.csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
package json
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -27,6 +26,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/bcicen/jstream"
|
||||
csv "github.com/minio/minio/pkg/csvparser"
|
||||
"github.com/minio/minio/pkg/s3select/sql"
|
||||
)
|
||||
|
||||
@@ -108,7 +108,7 @@ func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune, quote rune, alwaysQuote bool) error {
|
||||
var csvRecord []string
|
||||
for _, kv := range r.KVS {
|
||||
var columnValue string
|
||||
@@ -137,6 +137,8 @@ func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.Quote = quote
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
if err := w.Write(csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -353,7 +353,10 @@ func (s3Select *S3Select) marshal(buf *bytes.Buffer, record sql.Record) error {
|
||||
}()
|
||||
|
||||
bufioWriter.Reset(buf)
|
||||
err := record.WriteCSV(bufioWriter, []rune(s3Select.Output.CSVArgs.FieldDelimiter)[0])
|
||||
err := record.WriteCSV(bufioWriter,
|
||||
[]rune(s3Select.Output.CSVArgs.FieldDelimiter)[0],
|
||||
[]rune(s3Select.Output.CSVArgs.QuoteCharacter)[0],
|
||||
strings.ToLower(s3Select.Output.CSVArgs.QuoteFields) == "always")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -252,6 +252,7 @@ func TestJSONQueries(t *testing.T) {
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
<CSV>
|
||||
<QuoteCharacter>"</QuoteCharacter>
|
||||
</CSV>
|
||||
</OutputSerialization>
|
||||
<RequestProgress>
|
||||
@@ -587,6 +588,7 @@ func TestCSVQueries2(t *testing.T) {
|
||||
<CompressionType>NONE</CompressionType>
|
||||
<CSV>
|
||||
<FileHeaderInfo>USE</FileHeaderInfo>
|
||||
<QuoteCharacter>"</QuoteCharacter>
|
||||
</CSV>
|
||||
</InputSerialization>
|
||||
<OutputSerialization>
|
||||
|
||||
@@ -131,11 +131,11 @@ func TestNDJSON(t *testing.T) {
|
||||
t.Error(err)
|
||||
}
|
||||
var gotB, wantB bytes.Buffer
|
||||
err = rec.WriteCSV(&gotB, ',')
|
||||
err = rec.WriteCSV(&gotB, ',', '"', false)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
err = want.WriteCSV(&wantB, ',')
|
||||
err = want.WriteCSV(&wantB, ',', '"', false)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,11 @@
|
||||
package simdj
|
||||
|
||||
import (
|
||||
"encoding/csv"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
csv "github.com/minio/minio/pkg/csvparser"
|
||||
|
||||
"github.com/bcicen/jstream"
|
||||
"github.com/minio/minio/pkg/s3select/json"
|
||||
"github.com/minio/minio/pkg/s3select/sql"
|
||||
@@ -140,7 +141,7 @@ func (r *Record) Set(name string, value *sql.Value) (sql.Record, error) {
|
||||
}
|
||||
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter, quote rune, alwaysQuote bool) error {
|
||||
csvRecord := make([]string, 0, 10)
|
||||
var tmp simdjson.Iter
|
||||
obj := r.object
|
||||
@@ -173,6 +174,8 @@ allElems:
|
||||
}
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
w.Quote = quote
|
||||
w.AlwaysQuote = alwaysQuote
|
||||
if err := w.Write(csvRecord); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -46,7 +46,7 @@ type Record interface {
|
||||
// Set a value.
|
||||
// Can return a different record type.
|
||||
Set(name string, value *Value) (Record, error)
|
||||
WriteCSV(writer io.Writer, fieldDelimiter rune) error
|
||||
WriteCSV(writer io.Writer, fieldDelimiter, quote rune, alwaysQuote bool) error
|
||||
WriteJSON(writer io.Writer) error
|
||||
|
||||
// Clone the record and if possible use the destination provided.
|
||||
|
||||
Reference in New Issue
Block a user