mirror of
https://github.com/minio/minio.git
synced 2025-11-10 05:59:43 -05:00
speed up the performance of s3select on csv (#7945)
This commit is contained in:
committed by
Harshavardhana
parent
fa3546bb03
commit
ec9bfd3aef
@@ -82,10 +82,11 @@ func (rr *recordReader) Read(p []byte) (n int, err error) {
|
||||
|
||||
// Reader - CSV record reader for S3Select.
|
||||
type Reader struct {
|
||||
args *ReaderArgs
|
||||
readCloser io.ReadCloser
|
||||
csvReader *csv.Reader
|
||||
columnNames []string
|
||||
args *ReaderArgs
|
||||
readCloser io.ReadCloser
|
||||
csvReader *csv.Reader
|
||||
columnNames []string
|
||||
nameIndexMap map[string]int64
|
||||
}
|
||||
|
||||
// Read - reads single record.
|
||||
@@ -99,23 +100,24 @@ func (r *Reader) Read() (sql.Record, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
columnNames := r.columnNames
|
||||
if columnNames == nil {
|
||||
columnNames = make([]string, len(csvRecord))
|
||||
if r.columnNames == nil {
|
||||
r.columnNames = make([]string, len(csvRecord))
|
||||
for i := range csvRecord {
|
||||
columnNames[i] = fmt.Sprintf("_%v", i+1)
|
||||
r.columnNames[i] = fmt.Sprintf("_%v", i+1)
|
||||
}
|
||||
}
|
||||
|
||||
nameIndexMap := make(map[string]int64)
|
||||
for i := range columnNames {
|
||||
nameIndexMap[columnNames[i]] = int64(i)
|
||||
if r.nameIndexMap == nil {
|
||||
r.nameIndexMap = make(map[string]int64)
|
||||
for i := range r.columnNames {
|
||||
r.nameIndexMap[r.columnNames[i]] = int64(i)
|
||||
}
|
||||
}
|
||||
|
||||
return &Record{
|
||||
columnNames: columnNames,
|
||||
columnNames: r.columnNames,
|
||||
csvRecord: csvRecord,
|
||||
nameIndexMap: nameIndexMap,
|
||||
nameIndexMap: r.nameIndexMap,
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"strings"
|
||||
@@ -39,6 +40,7 @@ func TestRead(t *testing.T) {
|
||||
for i, c := range cases {
|
||||
var err error
|
||||
var record sql.Record
|
||||
var result bytes.Buffer
|
||||
|
||||
r, _ := NewReader(ioutil.NopCloser(strings.NewReader(c.content)), &ReaderArgs{
|
||||
FileHeaderInfo: none,
|
||||
@@ -51,22 +53,22 @@ func TestRead(t *testing.T) {
|
||||
unmarshaled: true,
|
||||
})
|
||||
|
||||
result := ""
|
||||
for {
|
||||
record, err = r.Read()
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
s, _ := record.MarshalCSV([]rune(c.fieldDelimiter)[0])
|
||||
result += string(s) + c.recordDelimiter
|
||||
record.WriteCSV(&result, []rune(c.fieldDelimiter)[0])
|
||||
result.Truncate(result.Len() - 1)
|
||||
result.WriteString(c.recordDelimiter)
|
||||
}
|
||||
r.Close()
|
||||
if err != io.EOF {
|
||||
t.Fatalf("Case %d failed with %s", i, err)
|
||||
}
|
||||
|
||||
if result != c.content {
|
||||
t.Errorf("Case %d failed: expected %v result %v", i, c.content, result)
|
||||
if result.String() != c.content {
|
||||
t.Errorf("Case %d failed: expected %v result %v", i, c.content, result.String())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,11 +17,11 @@
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/csv"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/bcicen/jstream"
|
||||
"github.com/minio/minio/pkg/s3select/sql"
|
||||
@@ -61,30 +61,28 @@ func (r *Record) Set(name string, value *sql.Value) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalCSV - encodes to CSV data.
|
||||
func (r *Record) MarshalCSV(fieldDelimiter rune) ([]byte, error) {
|
||||
buf := new(bytes.Buffer)
|
||||
w := csv.NewWriter(buf)
|
||||
// WriteCSV - encodes to CSV data.
|
||||
func (r *Record) WriteCSV(writer io.Writer, fieldDelimiter rune) error {
|
||||
w := csv.NewWriter(writer)
|
||||
w.Comma = fieldDelimiter
|
||||
if err := w.Write(r.csvRecord); err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
w.Flush()
|
||||
if err := w.Error(); err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
|
||||
data := buf.Bytes()
|
||||
return data[:len(data)-1], nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// MarshalJSON - encodes to JSON data.
|
||||
func (r *Record) MarshalJSON() ([]byte, error) {
|
||||
// WriteJSON - encodes to JSON data.
|
||||
func (r *Record) WriteJSON(writer io.Writer) error {
|
||||
var kvs jstream.KVS = make([]jstream.KV, len(r.columnNames))
|
||||
for i := 0; i < len(r.columnNames); i++ {
|
||||
kvs[i] = jstream.KV{Key: r.columnNames[i], Value: r.csvRecord[i]}
|
||||
}
|
||||
return json.Marshal(kvs)
|
||||
return json.NewEncoder(writer).Encode(kvs)
|
||||
}
|
||||
|
||||
// Raw - returns the underlying data with format info.
|
||||
|
||||
Reference in New Issue
Block a user