mirror of
https://github.com/minio/minio.git
synced 2025-11-09 21:49:46 -05:00
Support configurable quote character parameter in Select (#8955)
This commit is contained in:
@@ -113,6 +113,9 @@ type Reader struct {
|
||||
// or the Unicode replacement character (0xFFFD).
|
||||
Comma rune
|
||||
|
||||
// Quote is the single character used for marking fields limits
|
||||
Quote []rune
|
||||
|
||||
// Comment, if not 0, is the comment character. Lines beginning with the
|
||||
// Comment character without preceding whitespace are ignored.
|
||||
// With leading whitespace the Comment character becomes part of the
|
||||
@@ -171,6 +174,7 @@ type Reader struct {
|
||||
func NewReader(r io.Reader) *Reader {
|
||||
return &Reader{
|
||||
Comma: ',',
|
||||
Quote: []rune(`"`),
|
||||
r: bufio.NewReader(r),
|
||||
}
|
||||
}
|
||||
@@ -255,6 +259,13 @@ func nextRune(b []byte) rune {
|
||||
return r
|
||||
}
|
||||
|
||||
func encodeRune(r rune) []byte {
|
||||
rlen := utf8.RuneLen(r)
|
||||
p := make([]byte, rlen)
|
||||
_ = utf8.EncodeRune(p, r)
|
||||
return p
|
||||
}
|
||||
|
||||
func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||
if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
|
||||
return nil, errInvalidDelim
|
||||
@@ -280,9 +291,17 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
|
||||
return nil, errRead
|
||||
}
|
||||
|
||||
var quote rune
|
||||
var quoteLen int
|
||||
if len(r.Quote) > 0 {
|
||||
quote = r.Quote[0]
|
||||
quoteLen = utf8.RuneLen(quote)
|
||||
}
|
||||
|
||||
encodedQuote := encodeRune(quote)
|
||||
|
||||
// Parse each field in the record.
|
||||
var err error
|
||||
const quoteLen = len(`"`)
|
||||
commaLen := utf8.RuneLen(r.Comma)
|
||||
recLine := r.numLine // Starting line for record
|
||||
r.recordBuffer = r.recordBuffer[:0]
|
||||
@@ -292,7 +311,7 @@ parseField:
|
||||
if r.TrimLeadingSpace {
|
||||
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
|
||||
}
|
||||
if len(line) == 0 || line[0] != '"' {
|
||||
if len(line) == 0 || quoteLen == 0 || nextRune(line) != quote {
|
||||
// Non-quoted string field
|
||||
i := bytes.IndexRune(line, r.Comma)
|
||||
field := line
|
||||
@@ -303,7 +322,7 @@ parseField:
|
||||
}
|
||||
// Check to make sure a quote does not appear in field.
|
||||
if !r.LazyQuotes {
|
||||
if j := bytes.IndexByte(field, '"'); j >= 0 {
|
||||
if j := bytes.IndexRune(field, quote); j >= 0 {
|
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
|
||||
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
|
||||
break parseField
|
||||
@@ -320,15 +339,15 @@ parseField:
|
||||
// Quoted string field
|
||||
line = line[quoteLen:]
|
||||
for {
|
||||
i := bytes.IndexByte(line, '"')
|
||||
i := bytes.IndexRune(line, quote)
|
||||
if i >= 0 {
|
||||
// Hit next quote.
|
||||
r.recordBuffer = append(r.recordBuffer, line[:i]...)
|
||||
line = line[i+quoteLen:]
|
||||
switch rn := nextRune(line); {
|
||||
case rn == '"':
|
||||
case rn == quote:
|
||||
// `""` sequence (append quote).
|
||||
r.recordBuffer = append(r.recordBuffer, '"')
|
||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
||||
line = line[quoteLen:]
|
||||
case rn == r.Comma:
|
||||
// `",` sequence (end of field).
|
||||
@@ -341,7 +360,7 @@ parseField:
|
||||
break parseField
|
||||
case r.LazyQuotes:
|
||||
// `"` sequence (bare quote).
|
||||
r.recordBuffer = append(r.recordBuffer, '"')
|
||||
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
|
||||
default:
|
||||
// `"*` sequence (invalid non-escaped quote).
|
||||
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])
|
||||
|
||||
@@ -28,15 +28,18 @@ import (
|
||||
// the underlying io.Writer. Any errors that occurred should
|
||||
// be checked by calling the Error method.
|
||||
type Writer struct {
|
||||
Comma rune // Field delimiter (set to ',' by NewWriter)
|
||||
UseCRLF bool // True to use \r\n as the line terminator
|
||||
w *bufio.Writer
|
||||
Comma rune // Field delimiter (set to ',' by NewWriter)
|
||||
Quote rune // Fields quote character
|
||||
AlwaysQuote bool // True to quote all fields
|
||||
UseCRLF bool // True to use \r\n as the line terminator
|
||||
w *bufio.Writer
|
||||
}
|
||||
|
||||
// NewWriter returns a new Writer that writes to w.
|
||||
func NewWriter(w io.Writer) *Writer {
|
||||
return &Writer{
|
||||
Comma: ',',
|
||||
Quote: '"',
|
||||
w: bufio.NewWriter(w),
|
||||
}
|
||||
}
|
||||
@@ -59,19 +62,22 @@ func (w *Writer) Write(record []string) error {
|
||||
|
||||
// If we don't have to have a quoted field then just
|
||||
// write out the field and continue to the next field.
|
||||
if !w.fieldNeedsQuotes(field) {
|
||||
if !w.AlwaysQuote && !w.fieldNeedsQuotes(field) {
|
||||
if _, err := w.w.WriteString(field); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
if err := w.w.WriteByte('"'); err != nil {
|
||||
if _, err := w.w.WriteRune(w.Quote); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
specialChars := "\r\n" + string(w.Quote)
|
||||
|
||||
for len(field) > 0 {
|
||||
// Search for special characters.
|
||||
i := strings.IndexAny(field, "\"\r\n")
|
||||
i := strings.IndexAny(field, specialChars)
|
||||
if i < 0 {
|
||||
i = len(field)
|
||||
}
|
||||
@@ -85,9 +91,13 @@ func (w *Writer) Write(record []string) error {
|
||||
// Encode the special character.
|
||||
if len(field) > 0 {
|
||||
var err error
|
||||
switch field[0] {
|
||||
case '"':
|
||||
_, err = w.w.WriteString(`""`)
|
||||
switch nextRune([]byte(field)) {
|
||||
case w.Quote:
|
||||
_, err = w.w.WriteRune(w.Quote)
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
_, err = w.w.WriteRune(w.Quote)
|
||||
case '\r':
|
||||
if !w.UseCRLF {
|
||||
err = w.w.WriteByte('\r')
|
||||
@@ -105,7 +115,7 @@ func (w *Writer) Write(record []string) error {
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := w.w.WriteByte('"'); err != nil {
|
||||
if _, err := w.w.WriteRune(w.Quote); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -158,7 +168,7 @@ func (w *Writer) fieldNeedsQuotes(field string) bool {
|
||||
if field == "" {
|
||||
return false
|
||||
}
|
||||
if field == `\.` || strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") {
|
||||
if field == `\.` || strings.ContainsAny(field, "\r\n"+string(w.Quote)+string(w.Comma)) {
|
||||
return true
|
||||
}
|
||||
|
||||
|
||||
@@ -11,11 +11,13 @@ import (
|
||||
)
|
||||
|
||||
var writeTests = []struct {
|
||||
Input [][]string
|
||||
Output string
|
||||
Error error
|
||||
UseCRLF bool
|
||||
Comma rune
|
||||
Input [][]string
|
||||
Output string
|
||||
Error error
|
||||
UseCRLF bool
|
||||
Comma rune
|
||||
Quote rune
|
||||
AlwaysQuote bool
|
||||
}{
|
||||
{Input: [][]string{{"abc"}}, Output: "abc\n"},
|
||||
{Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true},
|
||||
@@ -46,6 +48,7 @@ var writeTests = []struct {
|
||||
{Input: [][]string{{"a", "a", ""}}, Output: "a|a|\n", Comma: '|'},
|
||||
{Input: [][]string{{",", ",", ""}}, Output: ",|,|\n", Comma: '|'},
|
||||
{Input: [][]string{{"foo"}}, Comma: '"', Error: errInvalidDelim},
|
||||
{Input: [][]string{{"a", "a", ""}}, Quote: '"', AlwaysQuote: true, Output: "\"a\"|\"a\"|\"\"\n", Comma: '|'},
|
||||
}
|
||||
|
||||
func TestWrite(t *testing.T) {
|
||||
@@ -56,6 +59,10 @@ func TestWrite(t *testing.T) {
|
||||
if tt.Comma != 0 {
|
||||
f.Comma = tt.Comma
|
||||
}
|
||||
if tt.Quote != 0 {
|
||||
f.Quote = tt.Quote
|
||||
}
|
||||
f.AlwaysQuote = tt.AlwaysQuote
|
||||
err := f.WriteAll(tt.Input)
|
||||
if err != tt.Error {
|
||||
t.Errorf("Unexpected error:\ngot %v\nwant %v", err, tt.Error)
|
||||
|
||||
Reference in New Issue
Block a user