mirror of
https://github.com/minio/minio.git
synced 2025-11-09 13:39:46 -05:00
SQL select query for CSV/JSON (#6648)
select * , select column names have been implemented for CSV. select * is implemented for JSON.
This commit is contained in:
committed by
kannappanr
parent
acf46cc3b5
commit
c0b4bf0a3e
@@ -25,90 +25,6 @@ const (
|
||||
responseRequestIDKey = "x-amz-request-id"
|
||||
)
|
||||
|
||||
// CSVFileHeaderInfo -Can be either USE IGNORE OR NONE, defines what to do with
|
||||
// the first row
|
||||
type CSVFileHeaderInfo string
|
||||
|
||||
// Constants for file header info.
|
||||
const (
|
||||
CSVFileHeaderInfoNone CSVFileHeaderInfo = "NONE"
|
||||
CSVFileHeaderInfoIgnore = "IGNORE"
|
||||
CSVFileHeaderInfoUse = "USE"
|
||||
)
|
||||
|
||||
// SelectCompressionType - ONLY GZIP is supported
|
||||
type SelectCompressionType string
|
||||
|
||||
// Constants for compression types under select API.
|
||||
const (
|
||||
SelectCompressionNONE SelectCompressionType = "NONE"
|
||||
SelectCompressionGZIP = "GZIP"
|
||||
SelectCompressionBZIP = "BZIP2"
|
||||
)
|
||||
|
||||
// CSVQuoteFields - Can be either Always or AsNeeded
|
||||
type CSVQuoteFields string
|
||||
|
||||
// Constants for csv quote styles.
|
||||
const (
|
||||
CSVQuoteFieldsAlways CSVQuoteFields = "Always"
|
||||
CSVQuoteFieldsAsNeeded = "AsNeeded"
|
||||
)
|
||||
|
||||
// QueryExpressionType - Currently can only be SQL
|
||||
type QueryExpressionType string
|
||||
|
||||
// Constants for expression type.
|
||||
const (
|
||||
QueryExpressionTypeSQL QueryExpressionType = "SQL"
|
||||
)
|
||||
|
||||
// JSONType determines json input serialization type.
|
||||
type JSONType string
|
||||
|
||||
// Constants for JSONTypes.
|
||||
const (
|
||||
JSONDocumentType JSONType = "Document"
|
||||
JSONLinesType = "Lines"
|
||||
)
|
||||
|
||||
// ObjectSelectRequest - represents the input select body
|
||||
type ObjectSelectRequest struct {
|
||||
XMLName xml.Name `xml:"SelectObjectContentRequest" json:"-"`
|
||||
Expression string
|
||||
ExpressionType QueryExpressionType
|
||||
InputSerialization struct {
|
||||
CompressionType SelectCompressionType
|
||||
Parquet *struct{}
|
||||
CSV *struct {
|
||||
FileHeaderInfo CSVFileHeaderInfo
|
||||
RecordDelimiter string
|
||||
FieldDelimiter string
|
||||
QuoteCharacter string
|
||||
QuoteEscapeCharacter string
|
||||
Comments string
|
||||
}
|
||||
JSON *struct {
|
||||
Type JSONType
|
||||
}
|
||||
}
|
||||
OutputSerialization struct {
|
||||
CSV *struct {
|
||||
QuoteFields CSVQuoteFields
|
||||
RecordDelimiter string
|
||||
FieldDelimiter string
|
||||
QuoteCharacter string
|
||||
QuoteEscapeCharacter string
|
||||
}
|
||||
JSON *struct {
|
||||
RecordDelimiter string
|
||||
}
|
||||
}
|
||||
RequestProgress struct {
|
||||
Enabled bool
|
||||
}
|
||||
}
|
||||
|
||||
// ObjectIdentifier carries key name for the object to delete.
|
||||
type ObjectIdentifier struct {
|
||||
ObjectName string `xml:"Key"`
|
||||
|
||||
@@ -253,7 +253,6 @@ const (
|
||||
ErrParseUnsupportedAlias
|
||||
ErrParseUnsupportedSyntax
|
||||
ErrParseUnknownOperator
|
||||
ErrParseInvalidPathComponent
|
||||
ErrParseMissingIdentAfterAt
|
||||
ErrParseUnexpectedOperator
|
||||
ErrParseUnexpectedTerm
|
||||
@@ -292,7 +291,6 @@ const (
|
||||
ErrEvaluatorInvalidTimestampFormatPatternToken
|
||||
ErrEvaluatorInvalidTimestampFormatPatternSymbol
|
||||
ErrEvaluatorBindingDoesNotExist
|
||||
ErrInvalidColumnIndex
|
||||
ErrMissingHeaders
|
||||
ErrAdminConfigNotificationTargetsFailed
|
||||
ErrAdminProfilerNotEnabled
|
||||
@@ -1223,11 +1221,6 @@ var errorCodeResponse = map[APIErrorCode]APIError{
|
||||
Description: "The SQL expression contains an invalid operator.",
|
||||
HTTPStatusCode: http.StatusBadRequest,
|
||||
},
|
||||
ErrParseInvalidPathComponent: {
|
||||
Code: "ParseInvalidPathComponent",
|
||||
Description: "The SQL expression contains an invalid path component.",
|
||||
HTTPStatusCode: http.StatusBadRequest,
|
||||
},
|
||||
ErrParseMissingIdentAfterAt: {
|
||||
Code: "ParseMissingIdentAfterAt",
|
||||
Description: "Did not find the expected identifier after the @ symbol in the SQL expression.",
|
||||
@@ -1413,11 +1406,6 @@ var errorCodeResponse = map[APIErrorCode]APIError{
|
||||
Description: "Time stamp format pattern contains an invalid symbol in the SQL expression.",
|
||||
HTTPStatusCode: http.StatusBadRequest,
|
||||
},
|
||||
ErrInvalidColumnIndex: {
|
||||
Code: "InvalidColumnIndex",
|
||||
Description: "Column index in the SQL expression is invalid.",
|
||||
HTTPStatusCode: http.StatusBadRequest,
|
||||
},
|
||||
ErrEvaluatorBindingDoesNotExist: {
|
||||
Code: "ErrEvaluatorBindingDoesNotExist",
|
||||
Description: "A column name or a path provided does not exist in the SQL expression",
|
||||
@@ -1577,8 +1565,6 @@ func toAPIErrorCode(err error) (apiErr APIErrorCode) {
|
||||
apiErr = ErrParseUnsupportedSyntax
|
||||
case s3select.ErrParseUnknownOperator:
|
||||
apiErr = ErrParseUnknownOperator
|
||||
case s3select.ErrParseInvalidPathComponent:
|
||||
apiErr = ErrParseInvalidPathComponent
|
||||
case s3select.ErrParseMissingIdentAfterAt:
|
||||
apiErr = ErrParseMissingIdentAfterAt
|
||||
case s3select.ErrParseUnexpectedOperator:
|
||||
@@ -1651,8 +1637,6 @@ func toAPIErrorCode(err error) (apiErr APIErrorCode) {
|
||||
apiErr = ErrEvaluatorInvalidTimestampFormatPatternToken
|
||||
case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbol:
|
||||
apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbol
|
||||
case s3select.ErrInvalidColumnIndex:
|
||||
apiErr = ErrInvalidColumnIndex
|
||||
case s3select.ErrEvaluatorBindingDoesNotExist:
|
||||
apiErr = ErrEvaluatorBindingDoesNotExist
|
||||
case s3select.ErrMissingHeaders:
|
||||
|
||||
@@ -70,12 +70,6 @@ func setHeadGetRespHeaders(w http.ResponseWriter, reqParams url.Values) {
|
||||
}
|
||||
}
|
||||
|
||||
// This function replaces "",'' with `` for the select parser
|
||||
func cleanExpr(expr string) string {
|
||||
r := strings.NewReplacer("\"", "`", "'", "`")
|
||||
return r.Replace(expr)
|
||||
}
|
||||
|
||||
// SelectObjectContentHandler - GET Object?select
|
||||
// ----------
|
||||
// This implementation of the GET operation retrieves object content based
|
||||
@@ -149,8 +143,7 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
writeErrorResponse(w, ErrEmptyRequestBody, r.URL)
|
||||
return
|
||||
}
|
||||
|
||||
var selectReq ObjectSelectRequest
|
||||
var selectReq s3select.ObjectSelectRequest
|
||||
if err := xmlDecoder(r.Body, &selectReq, r.ContentLength); err != nil {
|
||||
writeErrorResponse(w, ErrMalformedXML, r.URL)
|
||||
return
|
||||
@@ -179,22 +172,21 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
|
||||
objInfo := gr.ObjInfo
|
||||
|
||||
if selectReq.InputSerialization.CompressionType == SelectCompressionGZIP {
|
||||
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionGZIP {
|
||||
if !strings.Contains(objInfo.ContentType, "gzip") {
|
||||
writeErrorResponse(w, ErrInvalidDataSource, r.URL)
|
||||
return
|
||||
}
|
||||
}
|
||||
if selectReq.InputSerialization.CompressionType == SelectCompressionBZIP {
|
||||
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionBZIP {
|
||||
if !strings.Contains(objInfo.ContentType, "bzip") {
|
||||
writeErrorResponse(w, ErrInvalidDataSource, r.URL)
|
||||
return
|
||||
}
|
||||
}
|
||||
if selectReq.InputSerialization.CompressionType == SelectCompressionNONE ||
|
||||
selectReq.InputSerialization.CompressionType == "" {
|
||||
selectReq.InputSerialization.CompressionType = SelectCompressionNONE
|
||||
if !strings.Contains(objInfo.ContentType, "text/csv") {
|
||||
if selectReq.InputSerialization.CompressionType == "" {
|
||||
selectReq.InputSerialization.CompressionType = s3select.SelectCompressionNONE
|
||||
if !strings.Contains(objInfo.ContentType, "text/csv") && !strings.Contains(objInfo.ContentType, "application/json") {
|
||||
writeErrorResponse(w, ErrInvalidDataSource, r.URL)
|
||||
return
|
||||
}
|
||||
@@ -207,28 +199,45 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
writeErrorResponse(w, ErrExpressionTooLong, r.URL)
|
||||
return
|
||||
}
|
||||
if selectReq.InputSerialization.CSV == nil || selectReq.OutputSerialization.CSV == nil {
|
||||
if selectReq.InputSerialization.CSV == nil && selectReq.InputSerialization.JSON == nil {
|
||||
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL)
|
||||
return
|
||||
}
|
||||
if selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoUse &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoNone &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoIgnore &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != "" {
|
||||
writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL)
|
||||
return
|
||||
}
|
||||
if selectReq.OutputSerialization.CSV.QuoteFields != CSVQuoteFieldsAlways &&
|
||||
selectReq.OutputSerialization.CSV.QuoteFields != CSVQuoteFieldsAsNeeded &&
|
||||
selectReq.OutputSerialization.CSV.QuoteFields != "" {
|
||||
writeErrorResponse(w, ErrInvalidQuoteFields, r.URL)
|
||||
return
|
||||
}
|
||||
if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 {
|
||||
if selectReq.OutputSerialization.CSV == nil && selectReq.OutputSerialization.JSON == nil {
|
||||
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL)
|
||||
return
|
||||
}
|
||||
|
||||
if selectReq.InputSerialization.CSV != nil {
|
||||
if selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoUse &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoNone &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoIgnore &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != "" {
|
||||
writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL)
|
||||
return
|
||||
}
|
||||
if selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAlways &&
|
||||
selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAsNeeded &&
|
||||
selectReq.OutputSerialization.CSV.QuoteFields != "" {
|
||||
writeErrorResponse(w, ErrInvalidQuoteFields, r.URL)
|
||||
return
|
||||
}
|
||||
if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 {
|
||||
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
if selectReq.InputSerialization.JSON != nil {
|
||||
if selectReq.InputSerialization.JSON.Type != s3select.JSONTypeDocument &&
|
||||
selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType &&
|
||||
selectReq.InputSerialization.JSON.Type != "" {
|
||||
writeErrorResponse(w, ErrInvalidJSONType, r.URL)
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Set encryption response headers
|
||||
if objectAPI.IsEncryptionSupported() {
|
||||
if crypto.IsEncrypted(objInfo.UserDefined) {
|
||||
@@ -242,44 +251,23 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
}
|
||||
}
|
||||
|
||||
//s3select //Options
|
||||
if selectReq.OutputSerialization.CSV.FieldDelimiter == "" {
|
||||
selectReq.OutputSerialization.CSV.FieldDelimiter = ","
|
||||
s3s, err := s3select.New(gr, objInfo.Size, selectReq)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
|
||||
return
|
||||
}
|
||||
if selectReq.InputSerialization.CSV.FileHeaderInfo == "" {
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoNone
|
||||
|
||||
// Parses the select query and checks for an error
|
||||
_, _, _, _, _, _, err = s3select.ParseSelect(s3s)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
|
||||
return
|
||||
}
|
||||
if selectReq.InputSerialization.CSV.RecordDelimiter == "" {
|
||||
selectReq.InputSerialization.CSV.RecordDelimiter = "\n"
|
||||
}
|
||||
if selectReq.InputSerialization.CSV != nil {
|
||||
options := &s3select.Options{
|
||||
HasHeader: selectReq.InputSerialization.CSV.FileHeaderInfo != CSVFileHeaderInfoNone,
|
||||
RecordDelimiter: selectReq.InputSerialization.CSV.RecordDelimiter,
|
||||
FieldDelimiter: selectReq.InputSerialization.CSV.FieldDelimiter,
|
||||
Comments: selectReq.InputSerialization.CSV.Comments,
|
||||
Name: "S3Object", // Default table name for all objects
|
||||
ReadFrom: gr,
|
||||
Compressed: string(selectReq.InputSerialization.CompressionType),
|
||||
Expression: cleanExpr(selectReq.Expression),
|
||||
OutputFieldDelimiter: selectReq.OutputSerialization.CSV.FieldDelimiter,
|
||||
StreamSize: objInfo.Size,
|
||||
HeaderOpt: selectReq.InputSerialization.CSV.FileHeaderInfo == CSVFileHeaderInfoUse,
|
||||
Progress: selectReq.RequestProgress.Enabled,
|
||||
}
|
||||
s3s, err := s3select.NewInput(options)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
|
||||
return
|
||||
}
|
||||
_, _, _, _, _, _, err = s3s.ParseSelect(options.Expression)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(err), r.URL)
|
||||
return
|
||||
}
|
||||
if err = s3s.Execute(w); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
}
|
||||
|
||||
// Executes the query on data-set
|
||||
if err = s3select.Execute(w, s3s); err != nil {
|
||||
logger.LogIf(ctx, err)
|
||||
|
||||
}
|
||||
|
||||
for k, v := range objInfo.UserDefined {
|
||||
|
||||
Reference in New Issue
Block a user