mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
Refactor s3select to support parquet. (#7023)
Also handle pretty formatted JSON documents.
This commit is contained in:
@@ -28,8 +28,6 @@ import (
|
||||
"github.com/minio/minio/pkg/dns"
|
||||
"github.com/minio/minio/pkg/event"
|
||||
"github.com/minio/minio/pkg/hash"
|
||||
"github.com/minio/minio/pkg/s3select"
|
||||
"github.com/minio/minio/pkg/s3select/format"
|
||||
)
|
||||
|
||||
// APIError structure
|
||||
@@ -1512,168 +1510,6 @@ func toAPIErrorCode(ctx context.Context, err error) (apiErr APIErrorCode) {
|
||||
case errOperationTimedOut, context.Canceled, context.DeadlineExceeded:
|
||||
apiErr = ErrOperationTimedOut
|
||||
}
|
||||
switch err {
|
||||
case s3select.ErrBusy:
|
||||
apiErr = ErrBusy
|
||||
case s3select.ErrUnauthorizedAccess:
|
||||
apiErr = ErrUnauthorizedAccess
|
||||
case s3select.ErrExpressionTooLong:
|
||||
apiErr = ErrExpressionTooLong
|
||||
case s3select.ErrIllegalSQLFunctionArgument:
|
||||
apiErr = ErrIllegalSQLFunctionArgument
|
||||
case s3select.ErrInvalidKeyPath:
|
||||
apiErr = ErrInvalidKeyPath
|
||||
case s3select.ErrInvalidCompressionFormat:
|
||||
apiErr = ErrInvalidCompressionFormat
|
||||
case s3select.ErrInvalidFileHeaderInfo:
|
||||
apiErr = ErrInvalidFileHeaderInfo
|
||||
case s3select.ErrInvalidJSONType:
|
||||
apiErr = ErrInvalidJSONType
|
||||
case s3select.ErrInvalidQuoteFields:
|
||||
apiErr = ErrInvalidQuoteFields
|
||||
case s3select.ErrInvalidRequestParameter:
|
||||
apiErr = ErrInvalidRequestParameter
|
||||
case s3select.ErrInvalidDataType:
|
||||
apiErr = ErrInvalidDataType
|
||||
case s3select.ErrInvalidTextEncoding:
|
||||
apiErr = ErrInvalidTextEncoding
|
||||
case s3select.ErrInvalidTableAlias:
|
||||
apiErr = ErrInvalidTableAlias
|
||||
case s3select.ErrMissingRequiredParameter:
|
||||
apiErr = ErrMissingRequiredParameter
|
||||
case s3select.ErrObjectSerializationConflict:
|
||||
apiErr = ErrObjectSerializationConflict
|
||||
case s3select.ErrUnsupportedSQLOperation:
|
||||
apiErr = ErrUnsupportedSQLOperation
|
||||
case s3select.ErrUnsupportedSQLStructure:
|
||||
apiErr = ErrUnsupportedSQLStructure
|
||||
case s3select.ErrUnsupportedSyntax:
|
||||
apiErr = ErrUnsupportedSyntax
|
||||
case s3select.ErrUnsupportedRangeHeader:
|
||||
apiErr = ErrUnsupportedRangeHeader
|
||||
case s3select.ErrLexerInvalidChar:
|
||||
apiErr = ErrLexerInvalidChar
|
||||
case s3select.ErrLexerInvalidOperator:
|
||||
apiErr = ErrLexerInvalidOperator
|
||||
case s3select.ErrLexerInvalidLiteral:
|
||||
apiErr = ErrLexerInvalidLiteral
|
||||
case s3select.ErrLexerInvalidIONLiteral:
|
||||
apiErr = ErrLexerInvalidIONLiteral
|
||||
case s3select.ErrParseExpectedDatePart:
|
||||
apiErr = ErrParseExpectedDatePart
|
||||
case s3select.ErrParseExpectedKeyword:
|
||||
apiErr = ErrParseExpectedKeyword
|
||||
case s3select.ErrParseExpectedTokenType:
|
||||
apiErr = ErrParseExpectedTokenType
|
||||
case s3select.ErrParseExpected2TokenTypes:
|
||||
apiErr = ErrParseExpected2TokenTypes
|
||||
case s3select.ErrParseExpectedNumber:
|
||||
apiErr = ErrParseExpectedNumber
|
||||
case s3select.ErrParseExpectedRightParenBuiltinFunctionCall:
|
||||
apiErr = ErrParseExpectedRightParenBuiltinFunctionCall
|
||||
case s3select.ErrParseExpectedTypeName:
|
||||
apiErr = ErrParseExpectedTypeName
|
||||
case s3select.ErrParseExpectedWhenClause:
|
||||
apiErr = ErrParseExpectedWhenClause
|
||||
case s3select.ErrParseUnsupportedToken:
|
||||
apiErr = ErrParseUnsupportedToken
|
||||
case s3select.ErrParseUnsupportedLiteralsGroupBy:
|
||||
apiErr = ErrParseUnsupportedLiteralsGroupBy
|
||||
case s3select.ErrParseExpectedMember:
|
||||
apiErr = ErrParseExpectedMember
|
||||
case s3select.ErrParseUnsupportedSelect:
|
||||
apiErr = ErrParseUnsupportedSelect
|
||||
case s3select.ErrParseUnsupportedCase:
|
||||
apiErr = ErrParseUnsupportedCase
|
||||
case s3select.ErrParseUnsupportedCaseClause:
|
||||
apiErr = ErrParseUnsupportedCaseClause
|
||||
case s3select.ErrParseUnsupportedAlias:
|
||||
apiErr = ErrParseUnsupportedAlias
|
||||
case s3select.ErrParseUnsupportedSyntax:
|
||||
apiErr = ErrParseUnsupportedSyntax
|
||||
case s3select.ErrParseUnknownOperator:
|
||||
apiErr = ErrParseUnknownOperator
|
||||
case s3select.ErrParseMissingIdentAfterAt:
|
||||
apiErr = ErrParseMissingIdentAfterAt
|
||||
case s3select.ErrParseUnexpectedOperator:
|
||||
apiErr = ErrParseUnexpectedOperator
|
||||
case s3select.ErrParseUnexpectedTerm:
|
||||
apiErr = ErrParseUnexpectedTerm
|
||||
case s3select.ErrParseUnexpectedToken:
|
||||
apiErr = ErrParseUnexpectedToken
|
||||
case s3select.ErrParseUnexpectedKeyword:
|
||||
apiErr = ErrParseUnexpectedKeyword
|
||||
case s3select.ErrParseExpectedExpression:
|
||||
apiErr = ErrParseExpectedExpression
|
||||
case s3select.ErrParseExpectedLeftParenAfterCast:
|
||||
apiErr = ErrParseExpectedLeftParenAfterCast
|
||||
case s3select.ErrParseExpectedLeftParenValueConstructor:
|
||||
apiErr = ErrParseExpectedLeftParenValueConstructor
|
||||
case s3select.ErrParseExpectedLeftParenBuiltinFunctionCall:
|
||||
apiErr = ErrParseExpectedLeftParenBuiltinFunctionCall
|
||||
case s3select.ErrParseExpectedArgumentDelimiter:
|
||||
apiErr = ErrParseExpectedArgumentDelimiter
|
||||
case s3select.ErrParseCastArity:
|
||||
apiErr = ErrParseCastArity
|
||||
case s3select.ErrParseInvalidTypeParam:
|
||||
apiErr = ErrParseInvalidTypeParam
|
||||
case s3select.ErrParseEmptySelect:
|
||||
apiErr = ErrParseEmptySelect
|
||||
case s3select.ErrParseSelectMissingFrom:
|
||||
apiErr = ErrParseSelectMissingFrom
|
||||
case s3select.ErrParseExpectedIdentForGroupName:
|
||||
apiErr = ErrParseExpectedIdentForGroupName
|
||||
case s3select.ErrParseExpectedIdentForAlias:
|
||||
apiErr = ErrParseExpectedIdentForAlias
|
||||
case s3select.ErrParseUnsupportedCallWithStar:
|
||||
apiErr = ErrParseUnsupportedCallWithStar
|
||||
case s3select.ErrParseNonUnaryAgregateFunctionCall:
|
||||
apiErr = ErrParseNonUnaryAgregateFunctionCall
|
||||
case s3select.ErrParseMalformedJoin:
|
||||
apiErr = ErrParseMalformedJoin
|
||||
case s3select.ErrParseExpectedIdentForAt:
|
||||
apiErr = ErrParseExpectedIdentForAt
|
||||
case s3select.ErrParseAsteriskIsNotAloneInSelectList:
|
||||
apiErr = ErrParseAsteriskIsNotAloneInSelectList
|
||||
case s3select.ErrParseCannotMixSqbAndWildcardInSelectList:
|
||||
apiErr = ErrParseCannotMixSqbAndWildcardInSelectList
|
||||
case s3select.ErrParseInvalidContextForWildcardInSelectList:
|
||||
apiErr = ErrParseInvalidContextForWildcardInSelectList
|
||||
case s3select.ErrIncorrectSQLFunctionArgumentType:
|
||||
apiErr = ErrIncorrectSQLFunctionArgumentType
|
||||
case s3select.ErrValueParseFailure:
|
||||
apiErr = ErrValueParseFailure
|
||||
case s3select.ErrIntegerOverflow:
|
||||
apiErr = ErrIntegerOverflow
|
||||
case s3select.ErrLikeInvalidInputs:
|
||||
apiErr = ErrLikeInvalidInputs
|
||||
case s3select.ErrCastFailed:
|
||||
apiErr = ErrCastFailed
|
||||
case s3select.ErrInvalidCast:
|
||||
apiErr = ErrInvalidCast
|
||||
case s3select.ErrEvaluatorInvalidTimestampFormatPattern:
|
||||
apiErr = ErrEvaluatorInvalidTimestampFormatPattern
|
||||
case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing:
|
||||
apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing
|
||||
case s3select.ErrEvaluatorTimestampFormatPatternDuplicateFields:
|
||||
apiErr = ErrEvaluatorTimestampFormatPatternDuplicateFields
|
||||
case s3select.ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch:
|
||||
apiErr = ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch
|
||||
case s3select.ErrEvaluatorUnterminatedTimestampFormatPatternToken:
|
||||
apiErr = ErrEvaluatorUnterminatedTimestampFormatPatternToken
|
||||
case s3select.ErrEvaluatorInvalidTimestampFormatPatternToken:
|
||||
apiErr = ErrEvaluatorInvalidTimestampFormatPatternToken
|
||||
case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbol:
|
||||
apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbol
|
||||
case s3select.ErrEvaluatorBindingDoesNotExist:
|
||||
apiErr = ErrEvaluatorBindingDoesNotExist
|
||||
case s3select.ErrMissingHeaders:
|
||||
apiErr = ErrMissingHeaders
|
||||
case format.ErrParseInvalidPathComponent:
|
||||
apiErr = ErrMissingHeaders
|
||||
case format.ErrInvalidColumnIndex:
|
||||
apiErr = ErrInvalidColumnIndex
|
||||
}
|
||||
|
||||
// Compression errors
|
||||
switch err {
|
||||
|
||||
@@ -33,7 +33,6 @@ import (
|
||||
|
||||
snappy "github.com/golang/snappy"
|
||||
"github.com/gorilla/mux"
|
||||
"github.com/klauspost/readahead"
|
||||
miniogo "github.com/minio/minio-go"
|
||||
"github.com/minio/minio-go/pkg/encrypt"
|
||||
"github.com/minio/minio/cmd/crypto"
|
||||
@@ -106,6 +105,12 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
writeErrorResponseHeadersOnly(w, toAPIErrorCode(ctx, err))
|
||||
return
|
||||
}
|
||||
|
||||
getObjectInfo := objectAPI.GetObjectInfo
|
||||
if api.CacheAPI() != nil {
|
||||
getObjectInfo = api.CacheAPI().GetObjectInfo
|
||||
}
|
||||
|
||||
// Check for auth type to return S3 compatible error.
|
||||
// type to return the correct error (NoSuchKey vs AccessDenied)
|
||||
if s3Error := checkRequestAuthType(ctx, r, policy.GetObjectAction, bucket, object); s3Error != ErrNone {
|
||||
@@ -129,11 +134,6 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
ConditionValues: getConditionValues(r, ""),
|
||||
IsOwner: false,
|
||||
}) {
|
||||
getObjectInfo := objectAPI.GetObjectInfo
|
||||
if api.CacheAPI() != nil {
|
||||
getObjectInfo = api.CacheAPI().GetObjectInfo
|
||||
}
|
||||
|
||||
_, err = getObjectInfo(ctx, bucket, object, opts)
|
||||
if toAPIErrorCode(ctx, err) == ErrNoSuchKey {
|
||||
s3Error = ErrNoSuchKey
|
||||
@@ -156,18 +156,14 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
return
|
||||
}
|
||||
|
||||
var selectReq s3select.ObjectSelectRequest
|
||||
if err := xmlDecoder(r.Body, &selectReq, r.ContentLength); err != nil {
|
||||
writeErrorResponse(w, ErrMalformedXML, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
|
||||
if !strings.EqualFold(string(selectReq.ExpressionType), "SQL") {
|
||||
writeErrorResponse(w, ErrInvalidExpressionType, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
if len(selectReq.Expression) >= s3select.MaxExpressionLength {
|
||||
writeErrorResponse(w, ErrExpressionTooLong, r.URL, guessIsBrowserReq(r))
|
||||
s3Select, err := s3select.NewS3Select(r.Body)
|
||||
if err != nil {
|
||||
if serr, ok := err.(s3select.SelectError); ok {
|
||||
w.WriteHeader(serr.HTTPStatusCode())
|
||||
w.Write(s3select.NewErrorMessage(serr.ErrorCode(), serr.ErrorMessage()))
|
||||
} else {
|
||||
writeErrorResponse(w, ErrInternalError, r.URL, guessIsBrowserReq(r))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -175,123 +171,38 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
|
||||
if api.CacheAPI() != nil {
|
||||
getObjectNInfo = api.CacheAPI().GetObjectNInfo
|
||||
}
|
||||
getObject := func(offset, length int64) (rc io.ReadCloser, err error) {
|
||||
isSuffixLength := false
|
||||
if offset < 0 {
|
||||
isSuffixLength = true
|
||||
}
|
||||
rs := &HTTPRangeSpec{
|
||||
IsSuffixLength: isSuffixLength,
|
||||
Start: offset,
|
||||
End: length,
|
||||
}
|
||||
|
||||
gr, err := getObjectNInfo(ctx, bucket, object, nil, r.Header, readLock, opts)
|
||||
return getObjectNInfo(ctx, bucket, object, rs, r.Header, readLock, ObjectOptions{})
|
||||
}
|
||||
|
||||
if err = s3Select.Open(getObject); err != nil {
|
||||
if serr, ok := err.(s3select.SelectError); ok {
|
||||
w.WriteHeader(serr.HTTPStatusCode())
|
||||
w.Write(s3select.NewErrorMessage(serr.ErrorCode(), serr.ErrorMessage()))
|
||||
} else {
|
||||
writeErrorResponse(w, ErrInternalError, r.URL, guessIsBrowserReq(r))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
s3Select.Evaluate(w)
|
||||
s3Select.Close()
|
||||
|
||||
objInfo, err := getObjectInfo(ctx, bucket, object, opts)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r))
|
||||
logger.LogIf(ctx, err)
|
||||
return
|
||||
}
|
||||
defer gr.Close()
|
||||
|
||||
objInfo := gr.ObjInfo
|
||||
|
||||
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionGZIP {
|
||||
if !strings.Contains(objInfo.ContentType, "gzip") {
|
||||
writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
}
|
||||
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionBZIP {
|
||||
if !strings.Contains(objInfo.ContentType, "bzip") {
|
||||
writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
}
|
||||
if selectReq.InputSerialization.CompressionType == "" {
|
||||
selectReq.InputSerialization.CompressionType = s3select.SelectCompressionNONE
|
||||
if !strings.Contains(objInfo.ContentType, "text/csv") && !strings.Contains(objInfo.ContentType, "application/json") {
|
||||
writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
}
|
||||
if !strings.EqualFold(string(selectReq.ExpressionType), "SQL") {
|
||||
writeErrorResponse(w, ErrInvalidExpressionType, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
if len(selectReq.Expression) >= s3select.MaxExpressionLength {
|
||||
writeErrorResponse(w, ErrExpressionTooLong, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
if selectReq.InputSerialization.CSV == nil && selectReq.InputSerialization.JSON == nil {
|
||||
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
if selectReq.OutputSerialization.CSV == nil && selectReq.OutputSerialization.JSON == nil {
|
||||
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
|
||||
if selectReq.InputSerialization.CSV != nil {
|
||||
if selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoUse &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoNone &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoIgnore &&
|
||||
selectReq.InputSerialization.CSV.FileHeaderInfo != "" {
|
||||
writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
if selectReq.OutputSerialization.CSV != nil {
|
||||
if selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAlways &&
|
||||
selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAsNeeded &&
|
||||
selectReq.OutputSerialization.CSV.QuoteFields != "" {
|
||||
writeErrorResponse(w, ErrInvalidQuoteFields, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
}
|
||||
if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 {
|
||||
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
if selectReq.InputSerialization.JSON != nil {
|
||||
if selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType {
|
||||
writeErrorResponse(w, ErrInvalidJSONType, r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Set encryption response headers
|
||||
if objectAPI.IsEncryptionSupported() {
|
||||
objInfo.UserDefined = CleanMinioInternalMetadataKeys(objInfo.UserDefined)
|
||||
if crypto.IsEncrypted(objInfo.UserDefined) {
|
||||
switch {
|
||||
case crypto.S3.IsEncrypted(objInfo.UserDefined):
|
||||
w.Header().Set(crypto.SSEHeader, crypto.SSEAlgorithmAES256)
|
||||
case crypto.SSEC.IsEncrypted(objInfo.UserDefined):
|
||||
w.Header().Set(crypto.SSECAlgorithm, r.Header.Get(crypto.SSECAlgorithm))
|
||||
w.Header().Set(crypto.SSECKeyMD5, r.Header.Get(crypto.SSECKeyMD5))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
reader := readahead.NewReader(gr)
|
||||
defer reader.Close()
|
||||
|
||||
size := objInfo.Size
|
||||
if objInfo.IsCompressed() {
|
||||
size = objInfo.GetActualSize()
|
||||
if size < 0 {
|
||||
writeErrorResponse(w, toAPIErrorCode(ctx, errInvalidDecompressedSize), r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
s3s, err := s3select.New(reader, size, selectReq)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
|
||||
// Parses the select query and checks for an error
|
||||
_, _, _, _, _, _, err = s3select.ParseSelect(s3s)
|
||||
if err != nil {
|
||||
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r))
|
||||
return
|
||||
}
|
||||
|
||||
// Executes the query on data-set
|
||||
s3select.Execute(w, s3s)
|
||||
|
||||
// Get host and port from Request.RemoteAddr.
|
||||
host, port, err := net.SplitHostPort(handlers.GetSourceIP(r))
|
||||
|
||||
Reference in New Issue
Block a user