Support JSON to CSV and CSV to JSON output format conversion (#6910)

This PR implements one of the pending items in issue #6286
in S3 API a user can request CSV output for a JSON document
and a JSON output for a CSV document. This PR refactors
the code a little bit to bring this feature.
This commit is contained in:
Harshavardhana
2018-12-07 14:55:32 -08:00
committed by kannappanr
parent 313ba74b09
commit 4c7c571875
6 changed files with 150 additions and 56 deletions

View File

@@ -57,9 +57,12 @@ type Options struct {
// SQL expression meant to be evaluated.
Expression string
// What the outputted CSV will be delimited by .
// Output CSV will be delimited by.
OutputFieldDelimiter string
// Output CSV record will be delimited by.
OutputRecordDelimiter string
// Size of incoming object
StreamSize int64
@@ -68,6 +71,9 @@ type Options struct {
// Progress enabled, enable/disable progress messages.
Progress bool
// Output format type, supported values are CSV and JSON
OutputType format.Type
}
// cinput represents a record producing input from a formatted object.
@@ -147,6 +153,9 @@ func (reader *cinput) readHeader() error {
reader.firstRow = nil
} else {
reader.firstRow, readErr = reader.reader.Read()
if readErr != nil {
return format.ErrCSVParsingError
}
reader.header = make([]string, len(reader.firstRow))
for i := range reader.firstRow {
reader.header[i] = "_" + strconv.Itoa(i)
@@ -173,8 +182,13 @@ func (reader *cinput) Read() ([]byte, error) {
if dec != nil {
var data []byte
var err error
for i, value := range dec {
data, err = sjson.SetBytes(data, reader.header[i], value)
// Navigate column values in reverse order to preserve
// the input order for AWS S3 compatibility, because
// sjson adds json key/value pairs in first in last out
// fashion. This should be fixed in sjson ideally. Following
// work around is needed to circumvent this issue for now.
for i := len(dec) - 1; i >= 0; i-- {
data, err = sjson.SetBytes(data, reader.header[i], dec[i])
if err != nil {
return nil, err
}
@@ -184,11 +198,16 @@ func (reader *cinput) Read() ([]byte, error) {
return nil, nil
}
// OutputFieldDelimiter - returns the delimiter specified in input request
// OutputFieldDelimiter - returns the requested output field delimiter.
func (reader *cinput) OutputFieldDelimiter() string {
return reader.options.OutputFieldDelimiter
}
// OutputRecordDelimiter - returns the requested output record delimiter.
func (reader *cinput) OutputRecordDelimiter() string {
return reader.options.OutputFieldDelimiter
}
// HasHeader - returns true or false depending upon the header.
func (reader *cinput) HasHeader() bool {
return reader.options.HasHeader
@@ -285,11 +304,16 @@ func (reader *cinput) CreateProgressXML() (string, error) {
return xml.Header + string(out), nil
}
// Type - return the data format type {
// Type - return the data format type
func (reader *cinput) Type() format.Type {
return format.CSV
}
// OutputType - return the data format type
func (reader *cinput) OutputType() format.Type {
return reader.options.OutputType
}
// ColNameErrs is a function which makes sure that the headers are requested are
// present in the file otherwise it throws an error.
func (reader *cinput) ColNameErrs(columnNames []string) error {