Support configurable quote character parameter in Select (#8955)

This commit is contained in:
Anis Elleuch
2020-03-13 22:09:34 -07:00
committed by GitHub
parent 3ca9f5ffa3
commit 35ecc04223
18 changed files with 567 additions and 142 deletions

View File

@@ -113,6 +113,9 @@ type Reader struct {
// or the Unicode replacement character (0xFFFD).
Comma rune
// Quote is the single character used for marking fields limits
Quote []rune
// Comment, if not 0, is the comment character. Lines beginning with the
// Comment character without preceding whitespace are ignored.
// With leading whitespace the Comment character becomes part of the
@@ -171,6 +174,7 @@ type Reader struct {
func NewReader(r io.Reader) *Reader {
return &Reader{
Comma: ',',
Quote: []rune(`"`),
r: bufio.NewReader(r),
}
}
@@ -255,6 +259,13 @@ func nextRune(b []byte) rune {
return r
}
func encodeRune(r rune) []byte {
rlen := utf8.RuneLen(r)
p := make([]byte, rlen)
_ = utf8.EncodeRune(p, r)
return p
}
func (r *Reader) readRecord(dst []string) ([]string, error) {
if r.Comma == r.Comment || !validDelim(r.Comma) || (r.Comment != 0 && !validDelim(r.Comment)) {
return nil, errInvalidDelim
@@ -280,9 +291,17 @@ func (r *Reader) readRecord(dst []string) ([]string, error) {
return nil, errRead
}
var quote rune
var quoteLen int
if len(r.Quote) > 0 {
quote = r.Quote[0]
quoteLen = utf8.RuneLen(quote)
}
encodedQuote := encodeRune(quote)
// Parse each field in the record.
var err error
const quoteLen = len(`"`)
commaLen := utf8.RuneLen(r.Comma)
recLine := r.numLine // Starting line for record
r.recordBuffer = r.recordBuffer[:0]
@@ -292,7 +311,7 @@ parseField:
if r.TrimLeadingSpace {
line = bytes.TrimLeftFunc(line, unicode.IsSpace)
}
if len(line) == 0 || line[0] != '"' {
if len(line) == 0 || quoteLen == 0 || nextRune(line) != quote {
// Non-quoted string field
i := bytes.IndexRune(line, r.Comma)
field := line
@@ -303,7 +322,7 @@ parseField:
}
// Check to make sure a quote does not appear in field.
if !r.LazyQuotes {
if j := bytes.IndexByte(field, '"'); j >= 0 {
if j := bytes.IndexRune(field, quote); j >= 0 {
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line[j:])])
err = &ParseError{StartLine: recLine, Line: r.numLine, Column: col, Err: ErrBareQuote}
break parseField
@@ -320,15 +339,15 @@ parseField:
// Quoted string field
line = line[quoteLen:]
for {
i := bytes.IndexByte(line, '"')
i := bytes.IndexRune(line, quote)
if i >= 0 {
// Hit next quote.
r.recordBuffer = append(r.recordBuffer, line[:i]...)
line = line[i+quoteLen:]
switch rn := nextRune(line); {
case rn == '"':
case rn == quote:
// `""` sequence (append quote).
r.recordBuffer = append(r.recordBuffer, '"')
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
line = line[quoteLen:]
case rn == r.Comma:
// `",` sequence (end of field).
@@ -341,7 +360,7 @@ parseField:
break parseField
case r.LazyQuotes:
// `"` sequence (bare quote).
r.recordBuffer = append(r.recordBuffer, '"')
r.recordBuffer = append(r.recordBuffer, encodedQuote...)
default:
// `"*` sequence (invalid non-escaped quote).
col := utf8.RuneCount(fullLine[:len(fullLine)-len(line)-quoteLen])

View File

@@ -28,15 +28,18 @@ import (
// the underlying io.Writer. Any errors that occurred should
// be checked by calling the Error method.
type Writer struct {
Comma rune // Field delimiter (set to ',' by NewWriter)
UseCRLF bool // True to use \r\n as the line terminator
w *bufio.Writer
Comma rune // Field delimiter (set to ',' by NewWriter)
Quote rune // Fields quote character
AlwaysQuote bool // True to quote all fields
UseCRLF bool // True to use \r\n as the line terminator
w *bufio.Writer
}
// NewWriter returns a new Writer that writes to w.
func NewWriter(w io.Writer) *Writer {
return &Writer{
Comma: ',',
Quote: '"',
w: bufio.NewWriter(w),
}
}
@@ -59,19 +62,22 @@ func (w *Writer) Write(record []string) error {
// If we don't have to have a quoted field then just
// write out the field and continue to the next field.
if !w.fieldNeedsQuotes(field) {
if !w.AlwaysQuote && !w.fieldNeedsQuotes(field) {
if _, err := w.w.WriteString(field); err != nil {
return err
}
continue
}
if err := w.w.WriteByte('"'); err != nil {
if _, err := w.w.WriteRune(w.Quote); err != nil {
return err
}
specialChars := "\r\n" + string(w.Quote)
for len(field) > 0 {
// Search for special characters.
i := strings.IndexAny(field, "\"\r\n")
i := strings.IndexAny(field, specialChars)
if i < 0 {
i = len(field)
}
@@ -85,9 +91,13 @@ func (w *Writer) Write(record []string) error {
// Encode the special character.
if len(field) > 0 {
var err error
switch field[0] {
case '"':
_, err = w.w.WriteString(`""`)
switch nextRune([]byte(field)) {
case w.Quote:
_, err = w.w.WriteRune(w.Quote)
if err != nil {
break
}
_, err = w.w.WriteRune(w.Quote)
case '\r':
if !w.UseCRLF {
err = w.w.WriteByte('\r')
@@ -105,7 +115,7 @@ func (w *Writer) Write(record []string) error {
}
}
}
if err := w.w.WriteByte('"'); err != nil {
if _, err := w.w.WriteRune(w.Quote); err != nil {
return err
}
}
@@ -158,7 +168,7 @@ func (w *Writer) fieldNeedsQuotes(field string) bool {
if field == "" {
return false
}
if field == `\.` || strings.ContainsRune(field, w.Comma) || strings.ContainsAny(field, "\"\r\n") {
if field == `\.` || strings.ContainsAny(field, "\r\n"+string(w.Quote)+string(w.Comma)) {
return true
}

View File

@@ -11,11 +11,13 @@ import (
)
var writeTests = []struct {
Input [][]string
Output string
Error error
UseCRLF bool
Comma rune
Input [][]string
Output string
Error error
UseCRLF bool
Comma rune
Quote rune
AlwaysQuote bool
}{
{Input: [][]string{{"abc"}}, Output: "abc\n"},
{Input: [][]string{{"abc"}}, Output: "abc\r\n", UseCRLF: true},
@@ -46,6 +48,7 @@ var writeTests = []struct {
{Input: [][]string{{"a", "a", ""}}, Output: "a|a|\n", Comma: '|'},
{Input: [][]string{{",", ",", ""}}, Output: ",|,|\n", Comma: '|'},
{Input: [][]string{{"foo"}}, Comma: '"', Error: errInvalidDelim},
{Input: [][]string{{"a", "a", ""}}, Quote: '"', AlwaysQuote: true, Output: "\"a\"|\"a\"|\"\"\n", Comma: '|'},
}
func TestWrite(t *testing.T) {
@@ -56,6 +59,10 @@ func TestWrite(t *testing.T) {
if tt.Comma != 0 {
f.Comma = tt.Comma
}
if tt.Quote != 0 {
f.Quote = tt.Quote
}
f.AlwaysQuote = tt.AlwaysQuote
err := f.WriteAll(tt.Input)
if err != tt.Error {
t.Errorf("Unexpected error:\ngot %v\nwant %v", err, tt.Error)