Refactor s3select to support parquet. (#7023)

Also handle pretty formatted JSON documents.
This commit is contained in:
Bala FA
2019-01-09 06:23:04 +05:30
committed by kannappanr
parent e98d89274f
commit b0deea27df
124 changed files with 27376 additions and 4152 deletions

View File

@@ -28,8 +28,6 @@ import (
"github.com/minio/minio/pkg/dns"
"github.com/minio/minio/pkg/event"
"github.com/minio/minio/pkg/hash"
"github.com/minio/minio/pkg/s3select"
"github.com/minio/minio/pkg/s3select/format"
)
// APIError structure
@@ -1512,168 +1510,6 @@ func toAPIErrorCode(ctx context.Context, err error) (apiErr APIErrorCode) {
case errOperationTimedOut, context.Canceled, context.DeadlineExceeded:
apiErr = ErrOperationTimedOut
}
switch err {
case s3select.ErrBusy:
apiErr = ErrBusy
case s3select.ErrUnauthorizedAccess:
apiErr = ErrUnauthorizedAccess
case s3select.ErrExpressionTooLong:
apiErr = ErrExpressionTooLong
case s3select.ErrIllegalSQLFunctionArgument:
apiErr = ErrIllegalSQLFunctionArgument
case s3select.ErrInvalidKeyPath:
apiErr = ErrInvalidKeyPath
case s3select.ErrInvalidCompressionFormat:
apiErr = ErrInvalidCompressionFormat
case s3select.ErrInvalidFileHeaderInfo:
apiErr = ErrInvalidFileHeaderInfo
case s3select.ErrInvalidJSONType:
apiErr = ErrInvalidJSONType
case s3select.ErrInvalidQuoteFields:
apiErr = ErrInvalidQuoteFields
case s3select.ErrInvalidRequestParameter:
apiErr = ErrInvalidRequestParameter
case s3select.ErrInvalidDataType:
apiErr = ErrInvalidDataType
case s3select.ErrInvalidTextEncoding:
apiErr = ErrInvalidTextEncoding
case s3select.ErrInvalidTableAlias:
apiErr = ErrInvalidTableAlias
case s3select.ErrMissingRequiredParameter:
apiErr = ErrMissingRequiredParameter
case s3select.ErrObjectSerializationConflict:
apiErr = ErrObjectSerializationConflict
case s3select.ErrUnsupportedSQLOperation:
apiErr = ErrUnsupportedSQLOperation
case s3select.ErrUnsupportedSQLStructure:
apiErr = ErrUnsupportedSQLStructure
case s3select.ErrUnsupportedSyntax:
apiErr = ErrUnsupportedSyntax
case s3select.ErrUnsupportedRangeHeader:
apiErr = ErrUnsupportedRangeHeader
case s3select.ErrLexerInvalidChar:
apiErr = ErrLexerInvalidChar
case s3select.ErrLexerInvalidOperator:
apiErr = ErrLexerInvalidOperator
case s3select.ErrLexerInvalidLiteral:
apiErr = ErrLexerInvalidLiteral
case s3select.ErrLexerInvalidIONLiteral:
apiErr = ErrLexerInvalidIONLiteral
case s3select.ErrParseExpectedDatePart:
apiErr = ErrParseExpectedDatePart
case s3select.ErrParseExpectedKeyword:
apiErr = ErrParseExpectedKeyword
case s3select.ErrParseExpectedTokenType:
apiErr = ErrParseExpectedTokenType
case s3select.ErrParseExpected2TokenTypes:
apiErr = ErrParseExpected2TokenTypes
case s3select.ErrParseExpectedNumber:
apiErr = ErrParseExpectedNumber
case s3select.ErrParseExpectedRightParenBuiltinFunctionCall:
apiErr = ErrParseExpectedRightParenBuiltinFunctionCall
case s3select.ErrParseExpectedTypeName:
apiErr = ErrParseExpectedTypeName
case s3select.ErrParseExpectedWhenClause:
apiErr = ErrParseExpectedWhenClause
case s3select.ErrParseUnsupportedToken:
apiErr = ErrParseUnsupportedToken
case s3select.ErrParseUnsupportedLiteralsGroupBy:
apiErr = ErrParseUnsupportedLiteralsGroupBy
case s3select.ErrParseExpectedMember:
apiErr = ErrParseExpectedMember
case s3select.ErrParseUnsupportedSelect:
apiErr = ErrParseUnsupportedSelect
case s3select.ErrParseUnsupportedCase:
apiErr = ErrParseUnsupportedCase
case s3select.ErrParseUnsupportedCaseClause:
apiErr = ErrParseUnsupportedCaseClause
case s3select.ErrParseUnsupportedAlias:
apiErr = ErrParseUnsupportedAlias
case s3select.ErrParseUnsupportedSyntax:
apiErr = ErrParseUnsupportedSyntax
case s3select.ErrParseUnknownOperator:
apiErr = ErrParseUnknownOperator
case s3select.ErrParseMissingIdentAfterAt:
apiErr = ErrParseMissingIdentAfterAt
case s3select.ErrParseUnexpectedOperator:
apiErr = ErrParseUnexpectedOperator
case s3select.ErrParseUnexpectedTerm:
apiErr = ErrParseUnexpectedTerm
case s3select.ErrParseUnexpectedToken:
apiErr = ErrParseUnexpectedToken
case s3select.ErrParseUnexpectedKeyword:
apiErr = ErrParseUnexpectedKeyword
case s3select.ErrParseExpectedExpression:
apiErr = ErrParseExpectedExpression
case s3select.ErrParseExpectedLeftParenAfterCast:
apiErr = ErrParseExpectedLeftParenAfterCast
case s3select.ErrParseExpectedLeftParenValueConstructor:
apiErr = ErrParseExpectedLeftParenValueConstructor
case s3select.ErrParseExpectedLeftParenBuiltinFunctionCall:
apiErr = ErrParseExpectedLeftParenBuiltinFunctionCall
case s3select.ErrParseExpectedArgumentDelimiter:
apiErr = ErrParseExpectedArgumentDelimiter
case s3select.ErrParseCastArity:
apiErr = ErrParseCastArity
case s3select.ErrParseInvalidTypeParam:
apiErr = ErrParseInvalidTypeParam
case s3select.ErrParseEmptySelect:
apiErr = ErrParseEmptySelect
case s3select.ErrParseSelectMissingFrom:
apiErr = ErrParseSelectMissingFrom
case s3select.ErrParseExpectedIdentForGroupName:
apiErr = ErrParseExpectedIdentForGroupName
case s3select.ErrParseExpectedIdentForAlias:
apiErr = ErrParseExpectedIdentForAlias
case s3select.ErrParseUnsupportedCallWithStar:
apiErr = ErrParseUnsupportedCallWithStar
case s3select.ErrParseNonUnaryAgregateFunctionCall:
apiErr = ErrParseNonUnaryAgregateFunctionCall
case s3select.ErrParseMalformedJoin:
apiErr = ErrParseMalformedJoin
case s3select.ErrParseExpectedIdentForAt:
apiErr = ErrParseExpectedIdentForAt
case s3select.ErrParseAsteriskIsNotAloneInSelectList:
apiErr = ErrParseAsteriskIsNotAloneInSelectList
case s3select.ErrParseCannotMixSqbAndWildcardInSelectList:
apiErr = ErrParseCannotMixSqbAndWildcardInSelectList
case s3select.ErrParseInvalidContextForWildcardInSelectList:
apiErr = ErrParseInvalidContextForWildcardInSelectList
case s3select.ErrIncorrectSQLFunctionArgumentType:
apiErr = ErrIncorrectSQLFunctionArgumentType
case s3select.ErrValueParseFailure:
apiErr = ErrValueParseFailure
case s3select.ErrIntegerOverflow:
apiErr = ErrIntegerOverflow
case s3select.ErrLikeInvalidInputs:
apiErr = ErrLikeInvalidInputs
case s3select.ErrCastFailed:
apiErr = ErrCastFailed
case s3select.ErrInvalidCast:
apiErr = ErrInvalidCast
case s3select.ErrEvaluatorInvalidTimestampFormatPattern:
apiErr = ErrEvaluatorInvalidTimestampFormatPattern
case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing:
apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing
case s3select.ErrEvaluatorTimestampFormatPatternDuplicateFields:
apiErr = ErrEvaluatorTimestampFormatPatternDuplicateFields
case s3select.ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch:
apiErr = ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch
case s3select.ErrEvaluatorUnterminatedTimestampFormatPatternToken:
apiErr = ErrEvaluatorUnterminatedTimestampFormatPatternToken
case s3select.ErrEvaluatorInvalidTimestampFormatPatternToken:
apiErr = ErrEvaluatorInvalidTimestampFormatPatternToken
case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbol:
apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbol
case s3select.ErrEvaluatorBindingDoesNotExist:
apiErr = ErrEvaluatorBindingDoesNotExist
case s3select.ErrMissingHeaders:
apiErr = ErrMissingHeaders
case format.ErrParseInvalidPathComponent:
apiErr = ErrMissingHeaders
case format.ErrInvalidColumnIndex:
apiErr = ErrInvalidColumnIndex
}
// Compression errors
switch err {

View File

@@ -33,7 +33,6 @@ import (
snappy "github.com/golang/snappy"
"github.com/gorilla/mux"
"github.com/klauspost/readahead"
miniogo "github.com/minio/minio-go"
"github.com/minio/minio-go/pkg/encrypt"
"github.com/minio/minio/cmd/crypto"
@@ -106,6 +105,12 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
writeErrorResponseHeadersOnly(w, toAPIErrorCode(ctx, err))
return
}
getObjectInfo := objectAPI.GetObjectInfo
if api.CacheAPI() != nil {
getObjectInfo = api.CacheAPI().GetObjectInfo
}
// Check for auth type to return S3 compatible error.
// type to return the correct error (NoSuchKey vs AccessDenied)
if s3Error := checkRequestAuthType(ctx, r, policy.GetObjectAction, bucket, object); s3Error != ErrNone {
@@ -129,11 +134,6 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
ConditionValues: getConditionValues(r, ""),
IsOwner: false,
}) {
getObjectInfo := objectAPI.GetObjectInfo
if api.CacheAPI() != nil {
getObjectInfo = api.CacheAPI().GetObjectInfo
}
_, err = getObjectInfo(ctx, bucket, object, opts)
if toAPIErrorCode(ctx, err) == ErrNoSuchKey {
s3Error = ErrNoSuchKey
@@ -156,18 +156,14 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
return
}
var selectReq s3select.ObjectSelectRequest
if err := xmlDecoder(r.Body, &selectReq, r.ContentLength); err != nil {
writeErrorResponse(w, ErrMalformedXML, r.URL, guessIsBrowserReq(r))
return
}
if !strings.EqualFold(string(selectReq.ExpressionType), "SQL") {
writeErrorResponse(w, ErrInvalidExpressionType, r.URL, guessIsBrowserReq(r))
return
}
if len(selectReq.Expression) >= s3select.MaxExpressionLength {
writeErrorResponse(w, ErrExpressionTooLong, r.URL, guessIsBrowserReq(r))
s3Select, err := s3select.NewS3Select(r.Body)
if err != nil {
if serr, ok := err.(s3select.SelectError); ok {
w.WriteHeader(serr.HTTPStatusCode())
w.Write(s3select.NewErrorMessage(serr.ErrorCode(), serr.ErrorMessage()))
} else {
writeErrorResponse(w, ErrInternalError, r.URL, guessIsBrowserReq(r))
}
return
}
@@ -175,123 +171,38 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
if api.CacheAPI() != nil {
getObjectNInfo = api.CacheAPI().GetObjectNInfo
}
getObject := func(offset, length int64) (rc io.ReadCloser, err error) {
isSuffixLength := false
if offset < 0 {
isSuffixLength = true
}
rs := &HTTPRangeSpec{
IsSuffixLength: isSuffixLength,
Start: offset,
End: length,
}
gr, err := getObjectNInfo(ctx, bucket, object, nil, r.Header, readLock, opts)
return getObjectNInfo(ctx, bucket, object, rs, r.Header, readLock, ObjectOptions{})
}
if err = s3Select.Open(getObject); err != nil {
if serr, ok := err.(s3select.SelectError); ok {
w.WriteHeader(serr.HTTPStatusCode())
w.Write(s3select.NewErrorMessage(serr.ErrorCode(), serr.ErrorMessage()))
} else {
writeErrorResponse(w, ErrInternalError, r.URL, guessIsBrowserReq(r))
}
return
}
s3Select.Evaluate(w)
s3Select.Close()
objInfo, err := getObjectInfo(ctx, bucket, object, opts)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r))
logger.LogIf(ctx, err)
return
}
defer gr.Close()
objInfo := gr.ObjInfo
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionGZIP {
if !strings.Contains(objInfo.ContentType, "gzip") {
writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r))
return
}
}
if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionBZIP {
if !strings.Contains(objInfo.ContentType, "bzip") {
writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r))
return
}
}
if selectReq.InputSerialization.CompressionType == "" {
selectReq.InputSerialization.CompressionType = s3select.SelectCompressionNONE
if !strings.Contains(objInfo.ContentType, "text/csv") && !strings.Contains(objInfo.ContentType, "application/json") {
writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r))
return
}
}
if !strings.EqualFold(string(selectReq.ExpressionType), "SQL") {
writeErrorResponse(w, ErrInvalidExpressionType, r.URL, guessIsBrowserReq(r))
return
}
if len(selectReq.Expression) >= s3select.MaxExpressionLength {
writeErrorResponse(w, ErrExpressionTooLong, r.URL, guessIsBrowserReq(r))
return
}
if selectReq.InputSerialization.CSV == nil && selectReq.InputSerialization.JSON == nil {
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r))
return
}
if selectReq.OutputSerialization.CSV == nil && selectReq.OutputSerialization.JSON == nil {
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r))
return
}
if selectReq.InputSerialization.CSV != nil {
if selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoUse &&
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoNone &&
selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoIgnore &&
selectReq.InputSerialization.CSV.FileHeaderInfo != "" {
writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL, guessIsBrowserReq(r))
return
}
if selectReq.OutputSerialization.CSV != nil {
if selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAlways &&
selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAsNeeded &&
selectReq.OutputSerialization.CSV.QuoteFields != "" {
writeErrorResponse(w, ErrInvalidQuoteFields, r.URL, guessIsBrowserReq(r))
return
}
}
if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 {
writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r))
return
}
}
if selectReq.InputSerialization.JSON != nil {
if selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType {
writeErrorResponse(w, ErrInvalidJSONType, r.URL, guessIsBrowserReq(r))
return
}
}
// Set encryption response headers
if objectAPI.IsEncryptionSupported() {
objInfo.UserDefined = CleanMinioInternalMetadataKeys(objInfo.UserDefined)
if crypto.IsEncrypted(objInfo.UserDefined) {
switch {
case crypto.S3.IsEncrypted(objInfo.UserDefined):
w.Header().Set(crypto.SSEHeader, crypto.SSEAlgorithmAES256)
case crypto.SSEC.IsEncrypted(objInfo.UserDefined):
w.Header().Set(crypto.SSECAlgorithm, r.Header.Get(crypto.SSECAlgorithm))
w.Header().Set(crypto.SSECKeyMD5, r.Header.Get(crypto.SSECKeyMD5))
}
}
}
reader := readahead.NewReader(gr)
defer reader.Close()
size := objInfo.Size
if objInfo.IsCompressed() {
size = objInfo.GetActualSize()
if size < 0 {
writeErrorResponse(w, toAPIErrorCode(ctx, errInvalidDecompressedSize), r.URL, guessIsBrowserReq(r))
return
}
}
s3s, err := s3select.New(reader, size, selectReq)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r))
return
}
// Parses the select query and checks for an error
_, _, _, _, _, _, err = s3select.ParseSelect(s3s)
if err != nil {
writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r))
return
}
// Executes the query on data-set
s3select.Execute(w, s3s)
// Get host and port from Request.RemoteAddr.
host, port, err := net.SplitHostPort(handlers.GetSourceIP(r))