SQL select query for CSV/JSON (#6648)

select * , select column names have been implemented for CSV.
select * is implemented for JSON.
This commit is contained in:
Ashish Kumar Sinha
2018-10-23 00:42:22 +05:30
committed by kannappanr
parent acf46cc3b5
commit c0b4bf0a3e
111 changed files with 12888 additions and 1398 deletions

View File

@@ -25,90 +25,6 @@ const (
responseRequestIDKey = "x-amz-request-id"
)
// CSVFileHeaderInfo -Can be either USE IGNORE OR NONE, defines what to do with
// the first row
type CSVFileHeaderInfo string
// Constants for file header info.
const (
CSVFileHeaderInfoNone CSVFileHeaderInfo = "NONE"
CSVFileHeaderInfoIgnore = "IGNORE"
CSVFileHeaderInfoUse = "USE"
)
// SelectCompressionType - ONLY GZIP is supported
type SelectCompressionType string
// Constants for compression types under select API.
const (
SelectCompressionNONE SelectCompressionType = "NONE"
SelectCompressionGZIP = "GZIP"
SelectCompressionBZIP = "BZIP2"
)
// CSVQuoteFields - Can be either Always or AsNeeded
type CSVQuoteFields string
// Constants for csv quote styles.
const (
CSVQuoteFieldsAlways CSVQuoteFields = "Always"
CSVQuoteFieldsAsNeeded = "AsNeeded"
)
// QueryExpressionType - Currently can only be SQL
type QueryExpressionType string
// Constants for expression type.
const (
QueryExpressionTypeSQL QueryExpressionType = "SQL"
)
// JSONType determines json input serialization type.
type JSONType string
// Constants for JSONTypes.
const (
JSONDocumentType JSONType = "Document"
JSONLinesType = "Lines"
)
// ObjectSelectRequest - represents the input select body
type ObjectSelectRequest struct {
XMLName xml.Name `xml:"SelectObjectContentRequest" json:"-"`
Expression string
ExpressionType QueryExpressionType
InputSerialization struct {
CompressionType SelectCompressionType
Parquet *struct{}
CSV *struct {
FileHeaderInfo CSVFileHeaderInfo
RecordDelimiter string
FieldDelimiter string
QuoteCharacter string
QuoteEscapeCharacter string
Comments string
}
JSON *struct {
Type JSONType
}
}
OutputSerialization struct {
CSV *struct {
QuoteFields CSVQuoteFields
RecordDelimiter string
FieldDelimiter string
QuoteCharacter string
QuoteEscapeCharacter string
}
JSON *struct {
RecordDelimiter string
}
}
RequestProgress struct {
Enabled bool
}
}
// ObjectIdentifier carries key name for the object to delete.
type ObjectIdentifier struct {
ObjectName string `xml:"Key"`

View File

@@ -253,7 +253,6 @@ const (
ErrParseUnsupportedAlias
ErrParseUnsupportedSyntax
ErrParseUnknownOperator
ErrParseInvalidPathComponent
ErrParseMissingIdentAfterAt
ErrParseUnexpectedOperator
ErrParseUnexpectedTerm
@@ -292,7 +291,6 @@ const (
ErrEvaluatorInvalidTimestampFormatPatternToken
ErrEvaluatorInvalidTimestampFormatPatternSymbol
ErrEvaluatorBindingDoesNotExist
ErrInvalidColumnIndex
ErrMissingHeaders
ErrAdminConfigNotificationTargetsFailed
ErrAdminProfilerNotEnabled
@@ -1223,11 +1221,6 @@ var errorCodeResponse = map[APIErrorCode]APIError{
Description: "The SQL expression contains an invalid operator.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrParseInvalidPathComponent: {
Code: "ParseInvalidPathComponent",
Description: "The SQL expression contains an invalid path component.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrParseMissingIdentAfterAt: {
Code: "ParseMissingIdentAfterAt",
Description: "Did not find the expected identifier after the @ symbol in the SQL expression.",
@@ -1413,11 +1406,6 @@ var errorCodeResponse = map[APIErrorCode]APIError{
Description: "Time stamp format pattern contains an invalid symbol in the SQL expression.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrInvalidColumnIndex: {
Code: "InvalidColumnIndex",
Description: "Column index in the SQL expression is invalid.",
HTTPStatusCode: http.StatusBadRequest,
},
ErrEvaluatorBindingDoesNotExist: {
Code: "ErrEvaluatorBindingDoesNotExist",
Description: "A column name or a path provided does not exist in the SQL expression",
@@ -1577,8 +1565,6 @@ func toAPIErrorCode(err error) (apiErr APIErrorCode) {
apiErr = ErrParseUnsupportedSyntax
case s3select.ErrParseUnknownOperator:
apiErr = ErrParseUnknownOperator
case s3select.ErrParseInvalidPathComponent:
apiErr = ErrParseInvalidPathComponent
case s3select.ErrParseMissingIdentAfterAt:
apiErr = ErrParseMissingIdentAfterAt
case s3select.ErrParseUnexpectedOperator:
@@ -1651,8 +1637,6 @@ func toAPIErrorCode(err error) (apiErr APIErrorCode) {
apiErr = ErrEvaluatorInvalidTimestampFormatPatternToken
case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbol:
apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbol
case s3select.ErrInvalidColumnIndex:
apiErr = ErrInvalidColumnIndex
case s3select.ErrEvaluatorBindingDoesNotExist:
apiErr = ErrEvaluatorBindingDoesNotExist
case s3select.ErrMissingHeaders:

View File

@@ -70,12 +70,6 @@ func setHeadGetRespHeaders(w http.ResponseWriter, reqParams url.Values) {
}
}
// This function replaces "",'' with `` for the select parser
func cleanExpr(expr string) string {
r := strings.NewReplacer("\"", "`", "'", "`")
return r.Replace(expr)
}
// SelectObjectContentHandler - GET Object?select
// ----------
// This implementation of the GET operation retrieves object content based
@@ -149,8 +143,7 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
writeErrorResponse(w, ErrEmptyRequestBody, r.URL)
return
}
var selectReq ObjectSelectRequest
var selectReq s3select.ObjectSelectRequest
if err := xmlDecoder(r.Body, &selectReq, r.ContentLength); err != nil {
writeErrorResponse(w, ErrMalformedXML, r.URL)
return
@@ -179,22 +172,21 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
objInfo := gr.ObjInfo
if selectReq.InputSerialization.CompressionType == SelectCompressionGZIP {
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionGZIP {
if !strings.Contains(objInfo.ContentType, "gzip") {
writeErrorResponse(w, ErrInvalidDataSource, r.URL)
return
}
}
if selectReq.InputSerialization.CompressionType == SelectCompressionBZIP {
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionBZIP {
if !strings.Contains(objInfo.ContentType, "bzip") {
writeErrorResponse(w, ErrInvalidDataSource, r.URL)
return
}
}
if selectReq.InputSerialization.CompressionType == SelectCompressionNONE ||
selectReq.InputSerialization.CompressionType == "" {
selectReq.InputSerialization.CompressionType = SelectCompressionNONE
if !strings.Contains(objInfo.ContentType, "text/csv") {
if selectReq.InputSerialization.CompressionType == "" {
selectReq.InputSerialization.CompressionType = s3select.SelectCompressionNONE
if !strings.Contains(objInfo.ContentType, "text/csv") && !strings.Contains(objInfo.ContentType, "application/json") {
writeErrorResponse(w, ErrInvalidDataSource, r.URL)
return
}
@@ -207,28 +199,45 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
writeErrorResponse(w, ErrExpressionTooLong, r.URL)
return
}
if selectReq.InputSerialization.CSV == nil || selectReq.OutputSerialization.CSV == nil {
if selectReq.InputSerialization.CSV == nil && selectReq.InputSerialization.JSON == nil {
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL)
return
}
if selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoUse &&
selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoNone &&
selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoIgnore &&
selectReq.InputSerialization.CSV.FileHeaderInfo != "" {
writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL)
return
}
if selectReq.OutputSerialization.CSV.QuoteFields != CSVQuoteFieldsAlways &&
selectReq.OutputSerialization.CSV.QuoteFields != CSVQuoteFieldsAsNeeded &&
selectReq.OutputSerialization.CSV.QuoteFields != "" {
writeErrorResponse(w, ErrInvalidQuoteFields, r.URL)
return
}
if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 {
if selectReq.OutputSerialization.CSV == nil && selectReq.OutputSerialization.JSON == nil {
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL)
return
}
if selectReq.InputSerialization.CSV != nil {
if selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoUse &&
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoNone &&
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoIgnore &&
selectReq.InputSerialization.CSV.FileHeaderInfo != "" {
writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL)
return
}
if selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAlways &&
selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAsNeeded &&
selectReq.OutputSerialization.CSV.QuoteFields != "" {
writeErrorResponse(w, ErrInvalidQuoteFields, r.URL)
return
}
if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 {
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL)
return
}
}
if selectReq.InputSerialization.JSON != nil {
if selectReq.InputSerialization.JSON.Type != s3select.JSONTypeDocument &&
selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType &&
selectReq.InputSerialization.JSON.Type != "" {
writeErrorResponse(w, ErrInvalidJSONType, r.URL)
return
}
}
// Set encryption response headers
if objectAPI.IsEncryptionSupported() {
if crypto.IsEncrypted(objInfo.UserDefined) {
@@ -242,44 +251,23 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
}
}
//s3select //Options
if selectReq.OutputSerialization.CSV.FieldDelimiter == "" {
selectReq.OutputSerialization.CSV.FieldDelimiter = ","
s3s, err := s3select.New(gr, objInfo.Size, selectReq)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
if selectReq.InputSerialization.CSV.FileHeaderInfo == "" {
selectReq.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoNone
// Parses the select query and checks for an error
_, _, _, _, _, _, err = s3select.ParseSelect(s3s)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
if selectReq.InputSerialization.CSV.RecordDelimiter == "" {
selectReq.InputSerialization.CSV.RecordDelimiter = "\n"
}
if selectReq.InputSerialization.CSV != nil {
options := &s3select.Options{
HasHeader: selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoNone,
RecordDelimiter: selectReq.InputSerialization.CSV.RecordDelimiter,
FieldDelimiter: selectReq.InputSerialization.CSV.FieldDelimiter,
Comments: selectReq.InputSerialization.CSV.Comments,
Name: "S3Object", // Default table name for all objects
ReadFrom: gr,
Compressed: string(selectReq.InputSerialization.CompressionType),
Expression: cleanExpr(selectReq.Expression),
OutputFieldDelimiter: selectReq.OutputSerialization.CSV.FieldDelimiter,
StreamSize: objInfo.Size,
HeaderOpt: selectReq.InputSerialization.CSV.FileHeaderInfo == CSVFileHeaderInfoUse,
Progress: selectReq.RequestProgress.Enabled,
}
s3s, err := s3select.NewInput(options)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
_, _, _, _, _, _, err = s3s.ParseSelect(options.Expression)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
return
}
if err = s3s.Execute(w); err != nil {
logger.LogIf(ctx, err)
}
// Executes the query on data-set
if err = s3select.Execute(w, s3s); err != nil {
logger.LogIf(ctx, err)
}
for k, v := range objInfo.UserDefined {