diff --git a/.travis.yml b/.travis.yml index f572829e1..d2127be83 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,22 +16,24 @@ matrix: sudo: required env: - ARCH=x86_64 + - CGO_ENABLED=0 go: 1.11.4 script: - make - diff -au <(gofmt -s -d cmd) <(printf "") - diff -au <(gofmt -s -d pkg) <(printf "") - - for d in $(go list ./... | grep -v browser); do go test -v -race --timeout 15m "$d"; done + - for d in $(go list ./... | grep -v browser); do CGO_ENABLED=1 go test -v -race --timeout 15m "$d"; done - make verify - make coverage - cd browser && yarn && yarn test && cd .. - os: windows env: - ARCH=x86_64 + - CGO_ENABLED=0 go: 1.11.4 script: - go build --ldflags="$(go run buildscripts/gen-ldflags.go)" -o %GOPATH%\bin\minio.exe - - for d in $(go list ./... | grep -v browser); do go test -v -race --timeout 20m "$d"; done + - for d in $(go list ./... | grep -v browser); do CGO_ENABLED=1 go test -v -race --timeout 20m "$d"; done - bash buildscripts/go-coverage.sh before_script: diff --git a/Makefile b/Makefile index 16214a543..0be4b01c1 100644 --- a/Makefile +++ b/Makefile @@ -60,7 +60,7 @@ spelling: check: test test: verifiers build @echo "Running unit tests" - @go test -tags kqueue ./... + @CGO_ENABLED=0 go test -tags kqueue ./... verify: build @echo "Verifying build" diff --git a/buildscripts/go-coverage.sh b/buildscripts/go-coverage.sh index 1167a7558..f948ee543 100755 --- a/buildscripts/go-coverage.sh +++ b/buildscripts/go-coverage.sh @@ -4,7 +4,7 @@ set -e echo "" > coverage.txt for d in $(go list ./... | grep -v browser); do - go test -v -coverprofile=profile.out -covermode=atomic "$d" + CGO_ENABLED=0 go test -v -coverprofile=profile.out -covermode=atomic "$d" if [ -f profile.out ]; then cat profile.out >> coverage.txt rm profile.out diff --git a/cmd/api-errors.go b/cmd/api-errors.go index cafa75bbd..174a22fdc 100644 --- a/cmd/api-errors.go +++ b/cmd/api-errors.go @@ -28,8 +28,6 @@ import ( "github.com/minio/minio/pkg/dns" "github.com/minio/minio/pkg/event" "github.com/minio/minio/pkg/hash" - "github.com/minio/minio/pkg/s3select" - "github.com/minio/minio/pkg/s3select/format" ) // APIError structure @@ -1512,168 +1510,6 @@ func toAPIErrorCode(ctx context.Context, err error) (apiErr APIErrorCode) { case errOperationTimedOut, context.Canceled, context.DeadlineExceeded: apiErr = ErrOperationTimedOut } - switch err { - case s3select.ErrBusy: - apiErr = ErrBusy - case s3select.ErrUnauthorizedAccess: - apiErr = ErrUnauthorizedAccess - case s3select.ErrExpressionTooLong: - apiErr = ErrExpressionTooLong - case s3select.ErrIllegalSQLFunctionArgument: - apiErr = ErrIllegalSQLFunctionArgument - case s3select.ErrInvalidKeyPath: - apiErr = ErrInvalidKeyPath - case s3select.ErrInvalidCompressionFormat: - apiErr = ErrInvalidCompressionFormat - case s3select.ErrInvalidFileHeaderInfo: - apiErr = ErrInvalidFileHeaderInfo - case s3select.ErrInvalidJSONType: - apiErr = ErrInvalidJSONType - case s3select.ErrInvalidQuoteFields: - apiErr = ErrInvalidQuoteFields - case s3select.ErrInvalidRequestParameter: - apiErr = ErrInvalidRequestParameter - case s3select.ErrInvalidDataType: - apiErr = ErrInvalidDataType - case s3select.ErrInvalidTextEncoding: - apiErr = ErrInvalidTextEncoding - case s3select.ErrInvalidTableAlias: - apiErr = ErrInvalidTableAlias - case s3select.ErrMissingRequiredParameter: - apiErr = ErrMissingRequiredParameter - case s3select.ErrObjectSerializationConflict: - apiErr = ErrObjectSerializationConflict - case s3select.ErrUnsupportedSQLOperation: - apiErr = ErrUnsupportedSQLOperation - case s3select.ErrUnsupportedSQLStructure: - apiErr = ErrUnsupportedSQLStructure - case s3select.ErrUnsupportedSyntax: - apiErr = ErrUnsupportedSyntax - case s3select.ErrUnsupportedRangeHeader: - apiErr = ErrUnsupportedRangeHeader - case s3select.ErrLexerInvalidChar: - apiErr = ErrLexerInvalidChar - case s3select.ErrLexerInvalidOperator: - apiErr = ErrLexerInvalidOperator - case s3select.ErrLexerInvalidLiteral: - apiErr = ErrLexerInvalidLiteral - case s3select.ErrLexerInvalidIONLiteral: - apiErr = ErrLexerInvalidIONLiteral - case s3select.ErrParseExpectedDatePart: - apiErr = ErrParseExpectedDatePart - case s3select.ErrParseExpectedKeyword: - apiErr = ErrParseExpectedKeyword - case s3select.ErrParseExpectedTokenType: - apiErr = ErrParseExpectedTokenType - case s3select.ErrParseExpected2TokenTypes: - apiErr = ErrParseExpected2TokenTypes - case s3select.ErrParseExpectedNumber: - apiErr = ErrParseExpectedNumber - case s3select.ErrParseExpectedRightParenBuiltinFunctionCall: - apiErr = ErrParseExpectedRightParenBuiltinFunctionCall - case s3select.ErrParseExpectedTypeName: - apiErr = ErrParseExpectedTypeName - case s3select.ErrParseExpectedWhenClause: - apiErr = ErrParseExpectedWhenClause - case s3select.ErrParseUnsupportedToken: - apiErr = ErrParseUnsupportedToken - case s3select.ErrParseUnsupportedLiteralsGroupBy: - apiErr = ErrParseUnsupportedLiteralsGroupBy - case s3select.ErrParseExpectedMember: - apiErr = ErrParseExpectedMember - case s3select.ErrParseUnsupportedSelect: - apiErr = ErrParseUnsupportedSelect - case s3select.ErrParseUnsupportedCase: - apiErr = ErrParseUnsupportedCase - case s3select.ErrParseUnsupportedCaseClause: - apiErr = ErrParseUnsupportedCaseClause - case s3select.ErrParseUnsupportedAlias: - apiErr = ErrParseUnsupportedAlias - case s3select.ErrParseUnsupportedSyntax: - apiErr = ErrParseUnsupportedSyntax - case s3select.ErrParseUnknownOperator: - apiErr = ErrParseUnknownOperator - case s3select.ErrParseMissingIdentAfterAt: - apiErr = ErrParseMissingIdentAfterAt - case s3select.ErrParseUnexpectedOperator: - apiErr = ErrParseUnexpectedOperator - case s3select.ErrParseUnexpectedTerm: - apiErr = ErrParseUnexpectedTerm - case s3select.ErrParseUnexpectedToken: - apiErr = ErrParseUnexpectedToken - case s3select.ErrParseUnexpectedKeyword: - apiErr = ErrParseUnexpectedKeyword - case s3select.ErrParseExpectedExpression: - apiErr = ErrParseExpectedExpression - case s3select.ErrParseExpectedLeftParenAfterCast: - apiErr = ErrParseExpectedLeftParenAfterCast - case s3select.ErrParseExpectedLeftParenValueConstructor: - apiErr = ErrParseExpectedLeftParenValueConstructor - case s3select.ErrParseExpectedLeftParenBuiltinFunctionCall: - apiErr = ErrParseExpectedLeftParenBuiltinFunctionCall - case s3select.ErrParseExpectedArgumentDelimiter: - apiErr = ErrParseExpectedArgumentDelimiter - case s3select.ErrParseCastArity: - apiErr = ErrParseCastArity - case s3select.ErrParseInvalidTypeParam: - apiErr = ErrParseInvalidTypeParam - case s3select.ErrParseEmptySelect: - apiErr = ErrParseEmptySelect - case s3select.ErrParseSelectMissingFrom: - apiErr = ErrParseSelectMissingFrom - case s3select.ErrParseExpectedIdentForGroupName: - apiErr = ErrParseExpectedIdentForGroupName - case s3select.ErrParseExpectedIdentForAlias: - apiErr = ErrParseExpectedIdentForAlias - case s3select.ErrParseUnsupportedCallWithStar: - apiErr = ErrParseUnsupportedCallWithStar - case s3select.ErrParseNonUnaryAgregateFunctionCall: - apiErr = ErrParseNonUnaryAgregateFunctionCall - case s3select.ErrParseMalformedJoin: - apiErr = ErrParseMalformedJoin - case s3select.ErrParseExpectedIdentForAt: - apiErr = ErrParseExpectedIdentForAt - case s3select.ErrParseAsteriskIsNotAloneInSelectList: - apiErr = ErrParseAsteriskIsNotAloneInSelectList - case s3select.ErrParseCannotMixSqbAndWildcardInSelectList: - apiErr = ErrParseCannotMixSqbAndWildcardInSelectList - case s3select.ErrParseInvalidContextForWildcardInSelectList: - apiErr = ErrParseInvalidContextForWildcardInSelectList - case s3select.ErrIncorrectSQLFunctionArgumentType: - apiErr = ErrIncorrectSQLFunctionArgumentType - case s3select.ErrValueParseFailure: - apiErr = ErrValueParseFailure - case s3select.ErrIntegerOverflow: - apiErr = ErrIntegerOverflow - case s3select.ErrLikeInvalidInputs: - apiErr = ErrLikeInvalidInputs - case s3select.ErrCastFailed: - apiErr = ErrCastFailed - case s3select.ErrInvalidCast: - apiErr = ErrInvalidCast - case s3select.ErrEvaluatorInvalidTimestampFormatPattern: - apiErr = ErrEvaluatorInvalidTimestampFormatPattern - case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing: - apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing - case s3select.ErrEvaluatorTimestampFormatPatternDuplicateFields: - apiErr = ErrEvaluatorTimestampFormatPatternDuplicateFields - case s3select.ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch: - apiErr = ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch - case s3select.ErrEvaluatorUnterminatedTimestampFormatPatternToken: - apiErr = ErrEvaluatorUnterminatedTimestampFormatPatternToken - case s3select.ErrEvaluatorInvalidTimestampFormatPatternToken: - apiErr = ErrEvaluatorInvalidTimestampFormatPatternToken - case s3select.ErrEvaluatorInvalidTimestampFormatPatternSymbol: - apiErr = ErrEvaluatorInvalidTimestampFormatPatternSymbol - case s3select.ErrEvaluatorBindingDoesNotExist: - apiErr = ErrEvaluatorBindingDoesNotExist - case s3select.ErrMissingHeaders: - apiErr = ErrMissingHeaders - case format.ErrParseInvalidPathComponent: - apiErr = ErrMissingHeaders - case format.ErrInvalidColumnIndex: - apiErr = ErrInvalidColumnIndex - } // Compression errors switch err { diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go index b7bc1610e..8e85ff7cd 100644 --- a/cmd/object-handlers.go +++ b/cmd/object-handlers.go @@ -33,7 +33,6 @@ import ( snappy "github.com/golang/snappy" "github.com/gorilla/mux" - "github.com/klauspost/readahead" miniogo "github.com/minio/minio-go" "github.com/minio/minio-go/pkg/encrypt" "github.com/minio/minio/cmd/crypto" @@ -106,6 +105,12 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r writeErrorResponseHeadersOnly(w, toAPIErrorCode(ctx, err)) return } + + getObjectInfo := objectAPI.GetObjectInfo + if api.CacheAPI() != nil { + getObjectInfo = api.CacheAPI().GetObjectInfo + } + // Check for auth type to return S3 compatible error. // type to return the correct error (NoSuchKey vs AccessDenied) if s3Error := checkRequestAuthType(ctx, r, policy.GetObjectAction, bucket, object); s3Error != ErrNone { @@ -129,11 +134,6 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r ConditionValues: getConditionValues(r, ""), IsOwner: false, }) { - getObjectInfo := objectAPI.GetObjectInfo - if api.CacheAPI() != nil { - getObjectInfo = api.CacheAPI().GetObjectInfo - } - _, err = getObjectInfo(ctx, bucket, object, opts) if toAPIErrorCode(ctx, err) == ErrNoSuchKey { s3Error = ErrNoSuchKey @@ -156,18 +156,14 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r return } - var selectReq s3select.ObjectSelectRequest - if err := xmlDecoder(r.Body, &selectReq, r.ContentLength); err != nil { - writeErrorResponse(w, ErrMalformedXML, r.URL, guessIsBrowserReq(r)) - return - } - - if !strings.EqualFold(string(selectReq.ExpressionType), "SQL") { - writeErrorResponse(w, ErrInvalidExpressionType, r.URL, guessIsBrowserReq(r)) - return - } - if len(selectReq.Expression) >= s3select.MaxExpressionLength { - writeErrorResponse(w, ErrExpressionTooLong, r.URL, guessIsBrowserReq(r)) + s3Select, err := s3select.NewS3Select(r.Body) + if err != nil { + if serr, ok := err.(s3select.SelectError); ok { + w.WriteHeader(serr.HTTPStatusCode()) + w.Write(s3select.NewErrorMessage(serr.ErrorCode(), serr.ErrorMessage())) + } else { + writeErrorResponse(w, ErrInternalError, r.URL, guessIsBrowserReq(r)) + } return } @@ -175,123 +171,38 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r if api.CacheAPI() != nil { getObjectNInfo = api.CacheAPI().GetObjectNInfo } + getObject := func(offset, length int64) (rc io.ReadCloser, err error) { + isSuffixLength := false + if offset < 0 { + isSuffixLength = true + } + rs := &HTTPRangeSpec{ + IsSuffixLength: isSuffixLength, + Start: offset, + End: length, + } - gr, err := getObjectNInfo(ctx, bucket, object, nil, r.Header, readLock, opts) + return getObjectNInfo(ctx, bucket, object, rs, r.Header, readLock, ObjectOptions{}) + } + + if err = s3Select.Open(getObject); err != nil { + if serr, ok := err.(s3select.SelectError); ok { + w.WriteHeader(serr.HTTPStatusCode()) + w.Write(s3select.NewErrorMessage(serr.ErrorCode(), serr.ErrorMessage())) + } else { + writeErrorResponse(w, ErrInternalError, r.URL, guessIsBrowserReq(r)) + } + return + } + + s3Select.Evaluate(w) + s3Select.Close() + + objInfo, err := getObjectInfo(ctx, bucket, object, opts) if err != nil { - writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r)) + logger.LogIf(ctx, err) return } - defer gr.Close() - - objInfo := gr.ObjInfo - - if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionGZIP { - if !strings.Contains(objInfo.ContentType, "gzip") { - writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r)) - return - } - } - if selectReq.InputSerialization.CompressionType == s3select.SelectCompressionBZIP { - if !strings.Contains(objInfo.ContentType, "bzip") { - writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r)) - return - } - } - if selectReq.InputSerialization.CompressionType == "" { - selectReq.InputSerialization.CompressionType = s3select.SelectCompressionNONE - if !strings.Contains(objInfo.ContentType, "text/csv") && !strings.Contains(objInfo.ContentType, "application/json") { - writeErrorResponse(w, ErrInvalidDataSource, r.URL, guessIsBrowserReq(r)) - return - } - } - if !strings.EqualFold(string(selectReq.ExpressionType), "SQL") { - writeErrorResponse(w, ErrInvalidExpressionType, r.URL, guessIsBrowserReq(r)) - return - } - if len(selectReq.Expression) >= s3select.MaxExpressionLength { - writeErrorResponse(w, ErrExpressionTooLong, r.URL, guessIsBrowserReq(r)) - return - } - if selectReq.InputSerialization.CSV == nil && selectReq.InputSerialization.JSON == nil { - writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r)) - return - } - if selectReq.OutputSerialization.CSV == nil && selectReq.OutputSerialization.JSON == nil { - writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r)) - return - } - - if selectReq.InputSerialization.CSV != nil { - if selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoUse && - selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoNone && - selectReq.InputSerialization.CSV.FileHeaderInfo != s3select.CSVFileHeaderInfoIgnore && - selectReq.InputSerialization.CSV.FileHeaderInfo != "" { - writeErrorResponse(w, ErrInvalidFileHeaderInfo, r.URL, guessIsBrowserReq(r)) - return - } - if selectReq.OutputSerialization.CSV != nil { - if selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAlways && - selectReq.OutputSerialization.CSV.QuoteFields != s3select.CSVQuoteFieldsAsNeeded && - selectReq.OutputSerialization.CSV.QuoteFields != "" { - writeErrorResponse(w, ErrInvalidQuoteFields, r.URL, guessIsBrowserReq(r)) - return - } - } - if len(selectReq.InputSerialization.CSV.RecordDelimiter) > 2 { - writeErrorResponse(w, ErrInvalidRequestParameter, r.URL, guessIsBrowserReq(r)) - return - } - - } - if selectReq.InputSerialization.JSON != nil { - if selectReq.InputSerialization.JSON.Type != s3select.JSONLinesType { - writeErrorResponse(w, ErrInvalidJSONType, r.URL, guessIsBrowserReq(r)) - return - } - - } - - // Set encryption response headers - if objectAPI.IsEncryptionSupported() { - objInfo.UserDefined = CleanMinioInternalMetadataKeys(objInfo.UserDefined) - if crypto.IsEncrypted(objInfo.UserDefined) { - switch { - case crypto.S3.IsEncrypted(objInfo.UserDefined): - w.Header().Set(crypto.SSEHeader, crypto.SSEAlgorithmAES256) - case crypto.SSEC.IsEncrypted(objInfo.UserDefined): - w.Header().Set(crypto.SSECAlgorithm, r.Header.Get(crypto.SSECAlgorithm)) - w.Header().Set(crypto.SSECKeyMD5, r.Header.Get(crypto.SSECKeyMD5)) - } - } - } - - reader := readahead.NewReader(gr) - defer reader.Close() - - size := objInfo.Size - if objInfo.IsCompressed() { - size = objInfo.GetActualSize() - if size < 0 { - writeErrorResponse(w, toAPIErrorCode(ctx, errInvalidDecompressedSize), r.URL, guessIsBrowserReq(r)) - return - } - } - - s3s, err := s3select.New(reader, size, selectReq) - if err != nil { - writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r)) - return - } - - // Parses the select query and checks for an error - _, _, _, _, _, _, err = s3select.ParseSelect(s3s) - if err != nil { - writeErrorResponse(w, toAPIErrorCode(ctx, err), r.URL, guessIsBrowserReq(r)) - return - } - - // Executes the query on data-set - s3select.Execute(w, s3s) // Get host and port from Request.RemoteAddr. host, port, err := net.SplitHostPort(handlers.GetSourceIP(r)) diff --git a/pkg/ioutil/delimited-reader.go b/pkg/ioutil/delimited-reader.go deleted file mode 100644 index 71b02d7be..000000000 --- a/pkg/ioutil/delimited-reader.go +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ioutil - -import ( - "bufio" - "io" -) - -var ( - nByte byte = 10 // the byte that corresponds to the '\n' rune. - rByte byte = 13 // the byte that corresponds to the '\r' rune. -) - -// DelimitedReader reduces the custom delimiter to `\n`. -type DelimitedReader struct { - r *bufio.Reader - delimiter []rune // Select can have upto 2 characters as delimiter. - assignEmpty bool // Decides whether the next read byte should be discarded. -} - -// NewDelimitedReader detects the custom delimiter and replaces with `\n`. -func NewDelimitedReader(r io.Reader, delimiter []rune) *DelimitedReader { - return &DelimitedReader{r: bufio.NewReader(r), delimiter: delimiter, assignEmpty: false} -} - -// Reads and replaces the custom delimiter with `\n`. -func (r *DelimitedReader) Read(p []byte) (n int, err error) { - n, err = r.r.Read(p) - if err != nil { - return - } - for i, b := range p { - if r.assignEmpty { - swapAndNullify(p, i) - r.assignEmpty = false - continue - } - if b == rByte && rune(b) != r.delimiter[0] { - // Replace the carriage returns with `\n`. - // Mac styled csv will have `\r` as their record delimiter. - p[i] = nByte - } else if rune(b) == r.delimiter[0] { // Eg, `\r\n`,`ab`,`a` are valid delimiters - if i+1 == len(p) && len(r.delimiter) > 1 { - // If the first delimiter match falls on the boundary, - // Peek the next byte and if it matches, discard it in the next byte read. - if nextByte, nerr := r.r.Peek(1); nerr == nil { - if rune(nextByte[0]) == r.delimiter[1] { - p[i] = nByte - // To Discard in the next read. - r.assignEmpty = true - } - } - } else if len(r.delimiter) > 1 && rune(p[i+1]) == r.delimiter[1] { - // The second delimiter falls in the same chunk. - p[i] = nByte - r.assignEmpty = true - } else if len(r.delimiter) == 1 { - // Replace with `\n` incase of single charecter delimiter match. - p[i] = nByte - } - } - } - return -} - -// Occupy the first byte space and nullify the last byte. -func swapAndNullify(p []byte, n int) { - for i := n; i < len(p)-1; i++ { - p[i] = p[i+1] - } - p[len(p)-1] = 0 -} diff --git a/pkg/ioutil/delimited-reader_test.go b/pkg/ioutil/delimited-reader_test.go deleted file mode 100644 index 452fc5dfd..000000000 --- a/pkg/ioutil/delimited-reader_test.go +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2016, 2017, 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package ioutil - -import ( - "bytes" - "io" - "strings" - "testing" -) - -// Test for DelimitedCSVReader. -func TestDelimitedReader(t *testing.T) { - expected := "username,age\nbanana,12\ncarrot,23\napple,34\nbrinjal,90\nraddish,45" - - inputs := []struct { - inputcsv string - delimiter string - chunkSize int - }{ - // case 1 - with default `\n` delimiter. - {"username,age\nbanana,12\ncarrot,23\napple,34\nbrinjal,90\nraddish,45", "\n", 10}, - // case 2 - with carriage return `\r` which should be replaced with `\n` by default. - {"username,age\rbanana,12\rcarrot,23\rapple,34\rbrinjal,90\rraddish,45", "\n", 10}, - // case 3 - with a double character delimiter (octals). - {"username,age\r\nbanana,12\r\ncarrot,23\r\napple,34\r\nbrinjal,90\r\nraddish,45", "\r\n", 10}, - // case 4 - with a double character delimiter. - {"username,agexvbanana,12xvcarrot,23xvapple,34xvbrinjal,90xvraddish,45", "xv", 10}, - // case 5 - with a double character delimiter `\t ` - {"username,age\t banana,12\t carrot,23\t apple,34\t brinjal,90\t raddish,45", "\t ", 10}, - // case 6 - This is a special case where the first delimiter match falls in the 13'th byte space - // ie, the last byte space of the read chunk, In this case the reader should peek in the next byte - // and replace with `\n`. - {"username,agexxbanana,12xxcarrot,23xxapple,34xxbrinjal,90xxraddish,45", "xx", 13}, - } - - for c, input := range inputs { - var readcsv []byte - var err error - delimitedReader := NewDelimitedReader(strings.NewReader(input.inputcsv), []rune(input.delimiter)) - for err == nil { - chunk := make([]byte, input.chunkSize) - _, err = delimitedReader.Read(chunk) - readcsv = append(readcsv, chunk...) - } - if err != io.EOF { - t.Fatalf("Case %d: Error in delimited read", c+1) - } - expected := []byte(expected) - cleanCsv := removeNulls(readcsv) - if !bytes.Equal(cleanCsv, expected) { - t.Fatalf("Case %d: Expected the delimited csv to be `%s`, but instead found `%s`", c+1, string(expected), string(cleanCsv)) - } - } - -} - -// Removes all the tailing nulls in chunks. -// Null chunks will be assigned if there is a reduction -// Eg, When `xv` is reduced to `\n`, the last byte is nullified. -func removeNulls(csv []byte) []byte { - cleanCsv := []byte{} - for _, p := range csv { - if p != 0 { - cleanCsv = append(cleanCsv, p) - } - } - return cleanCsv -} diff --git a/pkg/s3select/csv/args.go b/pkg/s3select/csv/args.go new file mode 100644 index 000000000..f935b998c --- /dev/null +++ b/pkg/s3select/csv/args.go @@ -0,0 +1,190 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package csv + +import ( + "encoding/xml" + "fmt" + "strings" +) + +const ( + none = "none" + use = "use" + ignore = "ignore" + + defaultRecordDelimiter = "\n" + defaultFieldDelimiter = "," + defaultQuoteCharacter = `"` + defaultQuoteEscapeCharacter = `"` + defaultCommentCharacter = "#" + + always = "always" + asneeded = "asneeded" +) + +// ReaderArgs - represents elements inside in request XML. +type ReaderArgs struct { + FileHeaderInfo string `xml:"FileHeaderInfo"` + RecordDelimiter string `xml:"RecordDelimiter"` + FieldDelimiter string `xml:"FieldDelimiter"` + QuoteCharacter string `xml:"QuoteCharacter"` + QuoteEscapeCharacter string `xml:"QuoteEscapeCharacter"` + CommentCharacter string `xml:"Comments"` + AllowQuotedRecordDelimiter bool `xml:"AllowQuotedRecordDelimiter"` + unmarshaled bool +} + +// IsEmpty - returns whether reader args is empty or not. +func (args *ReaderArgs) IsEmpty() bool { + return !args.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subReaderArgs ReaderArgs + parsedArgs := subReaderArgs{} + if err := d.DecodeElement(&parsedArgs, &start); err != nil { + return err + } + + parsedArgs.FileHeaderInfo = strings.ToLower(parsedArgs.FileHeaderInfo) + switch parsedArgs.FileHeaderInfo { + case none, use, ignore: + default: + return errInvalidFileHeaderInfo(fmt.Errorf("invalid FileHeaderInfo '%v'", parsedArgs.FileHeaderInfo)) + } + + switch len(parsedArgs.RecordDelimiter) { + case 0: + parsedArgs.RecordDelimiter = defaultRecordDelimiter + case 1, 2: + default: + return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter) + } + + switch len(parsedArgs.FieldDelimiter) { + case 0: + parsedArgs.FieldDelimiter = defaultFieldDelimiter + case 1: + default: + return fmt.Errorf("invalid FieldDelimiter '%v'", parsedArgs.FieldDelimiter) + } + + switch parsedArgs.QuoteCharacter { + case "": + parsedArgs.QuoteCharacter = defaultQuoteCharacter + case defaultQuoteCharacter: + default: + return fmt.Errorf("unsupported QuoteCharacter '%v'", parsedArgs.QuoteCharacter) + } + + switch parsedArgs.QuoteEscapeCharacter { + case "": + parsedArgs.QuoteEscapeCharacter = defaultQuoteEscapeCharacter + case defaultQuoteEscapeCharacter: + default: + return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", parsedArgs.QuoteEscapeCharacter) + } + + switch parsedArgs.CommentCharacter { + case "": + parsedArgs.CommentCharacter = defaultCommentCharacter + case defaultCommentCharacter: + default: + return fmt.Errorf("unsupported Comments '%v'", parsedArgs.CommentCharacter) + } + + if parsedArgs.AllowQuotedRecordDelimiter { + return fmt.Errorf("flag AllowQuotedRecordDelimiter is unsupported at the moment") + } + + *args = ReaderArgs(parsedArgs) + args.unmarshaled = true + return nil +} + +// WriterArgs - represents elements inside in request XML. +type WriterArgs struct { + QuoteFields string `xml:"QuoteFields"` + RecordDelimiter string `xml:"RecordDelimiter"` + FieldDelimiter string `xml:"FieldDelimiter"` + QuoteCharacter string `xml:"QuoteCharacter"` + QuoteEscapeCharacter string `xml:"QuoteEscapeCharacter"` + unmarshaled bool +} + +// IsEmpty - returns whether writer args is empty or not. +func (args *WriterArgs) IsEmpty() bool { + return !args.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subWriterArgs WriterArgs + parsedArgs := subWriterArgs{} + if err := d.DecodeElement(&parsedArgs, &start); err != nil { + return err + } + + parsedArgs.QuoteFields = strings.ToLower(parsedArgs.QuoteFields) + switch parsedArgs.QuoteFields { + case "": + parsedArgs.QuoteFields = asneeded + case always, asneeded: + default: + return errInvalidQuoteFields(fmt.Errorf("invalid QuoteFields '%v'", parsedArgs.QuoteFields)) + } + + switch len(parsedArgs.RecordDelimiter) { + case 0: + parsedArgs.RecordDelimiter = defaultRecordDelimiter + case 1, 2: + default: + return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter) + } + + switch len(parsedArgs.FieldDelimiter) { + case 0: + parsedArgs.FieldDelimiter = defaultFieldDelimiter + case 1: + default: + return fmt.Errorf("invalid FieldDelimiter '%v'", parsedArgs.FieldDelimiter) + } + + switch parsedArgs.QuoteCharacter { + case "": + parsedArgs.QuoteCharacter = defaultQuoteCharacter + case defaultQuoteCharacter: + default: + return fmt.Errorf("unsupported QuoteCharacter '%v'", parsedArgs.QuoteCharacter) + } + + switch parsedArgs.QuoteEscapeCharacter { + case "": + parsedArgs.QuoteEscapeCharacter = defaultQuoteEscapeCharacter + case defaultQuoteEscapeCharacter: + default: + return fmt.Errorf("unsupported QuoteEscapeCharacter '%v'", parsedArgs.QuoteEscapeCharacter) + } + + *args = WriterArgs(parsedArgs) + args.unmarshaled = true + return nil +} diff --git a/pkg/s3select/csv/errors.go b/pkg/s3select/csv/errors.go new file mode 100644 index 000000000..269f34c53 --- /dev/null +++ b/pkg/s3select/csv/errors.go @@ -0,0 +1,71 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package csv + +type s3Error struct { + code string + message string + statusCode int + cause error +} + +func (err *s3Error) Cause() error { + return err.cause +} + +func (err *s3Error) ErrorCode() string { + return err.code +} + +func (err *s3Error) ErrorMessage() string { + return err.message +} + +func (err *s3Error) HTTPStatusCode() int { + return err.statusCode +} + +func (err *s3Error) Error() string { + return err.message +} + +func errInvalidFileHeaderInfo(err error) *s3Error { + return &s3Error{ + code: "InvalidFileHeaderInfo", + message: "The FileHeaderInfo is invalid. Only NONE, USE, and IGNORE are supported.", + statusCode: 400, + cause: err, + } +} + +func errInvalidQuoteFields(err error) *s3Error { + return &s3Error{ + code: "InvalidQuoteFields", + message: "The QuoteFields is invalid. Only ALWAYS and ASNEEDED are supported.", + statusCode: 400, + cause: err, + } +} + +func errCSVParsingError(err error) *s3Error { + return &s3Error{ + code: "CSVParsingError", + message: "Encountered an error parsing the CSV file. Check the file and try again.", + statusCode: 400, + cause: err, + } +} diff --git a/pkg/s3select/csv/reader.go b/pkg/s3select/csv/reader.go new file mode 100644 index 000000000..858fa5a54 --- /dev/null +++ b/pkg/s3select/csv/reader.go @@ -0,0 +1,166 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package csv + +import ( + "bytes" + "encoding/csv" + "fmt" + "io" + + "github.com/minio/minio/pkg/s3select/sql" +) + +type recordReader struct { + reader io.Reader + recordDelimiter []byte + oneByte []byte + useOneByte bool +} + +func (rr *recordReader) Read(p []byte) (n int, err error) { + if rr.useOneByte { + p[0] = rr.oneByte[0] + rr.useOneByte = false + n, err = rr.reader.Read(p[1:]) + n++ + } else { + n, err = rr.reader.Read(p) + } + + if err != nil { + return 0, err + } + + if string(rr.recordDelimiter) == "\n" { + return n, nil + } + + for { + i := bytes.Index(p, rr.recordDelimiter) + if i < 0 { + break + } + + p[i] = '\n' + if len(rr.recordDelimiter) > 1 { + p = append(p[:i+1], p[i+len(rr.recordDelimiter):]...) + } + } + + n = len(p) + if len(rr.recordDelimiter) == 1 || p[n-1] != rr.recordDelimiter[0] { + return n, nil + } + + if _, err = rr.reader.Read(rr.oneByte); err != nil { + return 0, err + } + + if rr.oneByte[0] == rr.recordDelimiter[1] { + p[n-1] = '\n' + return n, nil + } + + rr.useOneByte = true + return n, nil +} + +// Reader - CSV record reader for S3Select. +type Reader struct { + args *ReaderArgs + readCloser io.ReadCloser + csvReader *csv.Reader + columnNames []string +} + +// Read - reads single record. +func (r *Reader) Read() (sql.Record, error) { + csvRecord, err := r.csvReader.Read() + if err != nil { + if err != io.EOF { + return nil, errCSVParsingError(err) + } + + return nil, err + } + + columnNames := r.columnNames + if columnNames == nil { + columnNames = make([]string, len(csvRecord)) + for i := range csvRecord { + columnNames[i] = fmt.Sprintf("_%v", i+1) + } + } + + nameIndexMap := make(map[string]int64) + for i := range columnNames { + nameIndexMap[columnNames[i]] = int64(i) + } + + return &Record{ + columnNames: columnNames, + csvRecord: csvRecord, + nameIndexMap: nameIndexMap, + }, nil +} + +// Close - closes underlaying reader. +func (r *Reader) Close() error { + return r.readCloser.Close() +} + +// NewReader - creates new CSV reader using readCloser. +func NewReader(readCloser io.ReadCloser, args *ReaderArgs) (*Reader, error) { + if args == nil || args.IsEmpty() { + panic(fmt.Errorf("empty args passed %v", args)) + } + + csvReader := csv.NewReader(&recordReader{ + reader: readCloser, + recordDelimiter: []byte(args.RecordDelimiter), + oneByte: []byte{0}, + }) + csvReader.Comma = []rune(args.FieldDelimiter)[0] + csvReader.Comment = []rune(args.CommentCharacter)[0] + csvReader.FieldsPerRecord = -1 + + r := &Reader{ + args: args, + readCloser: readCloser, + csvReader: csvReader, + } + + if args.FileHeaderInfo == none { + return r, nil + } + + record, err := csvReader.Read() + if err != nil { + if err != io.EOF { + return nil, errCSVParsingError(err) + } + + return nil, err + } + + if args.FileHeaderInfo == use { + r.columnNames = record + } + + return r, nil +} diff --git a/pkg/s3select/csv/record.go b/pkg/s3select/csv/record.go new file mode 100644 index 000000000..f58d48aa0 --- /dev/null +++ b/pkg/s3select/csv/record.go @@ -0,0 +1,95 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package csv + +import ( + "bytes" + "encoding/csv" + "fmt" + + "github.com/minio/minio/pkg/s3select/sql" + "github.com/tidwall/sjson" +) + +// Record - is CSV record. +type Record struct { + columnNames []string + csvRecord []string + nameIndexMap map[string]int64 +} + +// Get - gets the value for a column name. +func (r *Record) Get(name string) (*sql.Value, error) { + index, found := r.nameIndexMap[name] + if !found { + return nil, fmt.Errorf("column %v not found", name) + } + + if index >= int64(len(r.csvRecord)) { + // No value found for column 'name', hence return empty string for compatibility. + return sql.NewString(""), nil + } + + return sql.NewString(r.csvRecord[index]), nil +} + +// Set - sets the value for a column name. +func (r *Record) Set(name string, value *sql.Value) error { + r.columnNames = append(r.columnNames, name) + r.csvRecord = append(r.csvRecord, value.CSVString()) + return nil +} + +// MarshalCSV - encodes to CSV data. +func (r *Record) MarshalCSV(fieldDelimiter rune) ([]byte, error) { + buf := new(bytes.Buffer) + w := csv.NewWriter(buf) + w.Comma = fieldDelimiter + if err := w.Write(r.csvRecord); err != nil { + return nil, err + } + w.Flush() + if err := w.Error(); err != nil { + return nil, err + } + + data := buf.Bytes() + return data[:len(data)-1], nil +} + +// MarshalJSON - encodes to JSON data. +func (r *Record) MarshalJSON() ([]byte, error) { + data := "{}" + + var err error + for i := len(r.columnNames) - 1; i >= 0; i-- { + if i >= len(r.csvRecord) { + continue + } + + if data, err = sjson.Set(data, r.columnNames[i], r.csvRecord[i]); err != nil { + return nil, err + } + } + + return []byte(data), nil +} + +// NewRecord - creates new CSV record. +func NewRecord() *Record { + return &Record{} +} diff --git a/pkg/s3select/datatypes.go b/pkg/s3select/datatypes.go deleted file mode 100644 index a7834044f..000000000 --- a/pkg/s3select/datatypes.go +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package s3select - -import ( - "encoding/xml" -) - -// CSVFileHeaderInfo -Can be either USE IGNORE OR NONE, defines what to do with -// the first row -type CSVFileHeaderInfo string - -// Constants for file header info. -const ( - CSVFileHeaderInfoNone CSVFileHeaderInfo = "NONE" - CSVFileHeaderInfoIgnore = "IGNORE" - CSVFileHeaderInfoUse = "USE" -) - -// The maximum character per record is set to be 1 MB. -const ( - MaxCharsPerRecord = 1000000 -) - -// SelectCompressionType - ONLY GZIP is supported -type SelectCompressionType string - -// JSONType determines json input serialization type. -type JSONType string - -// Constants for compression types under select API. -const ( - SelectCompressionNONE SelectCompressionType = "NONE" - SelectCompressionGZIP = "GZIP" - SelectCompressionBZIP = "BZIP2" -) - -// CSVQuoteFields - Can be either Always or AsNeeded -type CSVQuoteFields string - -// Constants for csv quote styles. -const ( - CSVQuoteFieldsAlways CSVQuoteFields = "Always" - CSVQuoteFieldsAsNeeded = "AsNeeded" -) - -// QueryExpressionType - Currently can only be SQL -type QueryExpressionType string - -// Constants for expression type. -const ( - QueryExpressionTypeSQL QueryExpressionType = "SQL" -) - -// Constants for JSONTypes. -const ( - JSONTypeDocument JSONType = "DOCUMENT" - JSONLinesType = "LINES" -) - -// ObjectSelectRequest - represents the input select body -type ObjectSelectRequest struct { - XMLName xml.Name `xml:"SelectObjectContentRequest" json:"-"` - Expression string - ExpressionType QueryExpressionType - InputSerialization struct { - CompressionType SelectCompressionType - Parquet *struct{} - CSV *struct { - FileHeaderInfo CSVFileHeaderInfo - RecordDelimiter string - FieldDelimiter string - QuoteCharacter string - QuoteEscapeCharacter string - Comments string - } - JSON *struct { - Type JSONType - } - } - OutputSerialization struct { - CSV *struct { - QuoteFields CSVQuoteFields - RecordDelimiter string - FieldDelimiter string - QuoteCharacter string - QuoteEscapeCharacter string - } - JSON *struct { - RecordDelimiter string - } - } - RequestProgress struct { - Enabled bool - } -} diff --git a/pkg/s3select/errors.go b/pkg/s3select/errors.go index c9ec7028b..d3887f447 100644 --- a/pkg/s3select/errors.go +++ b/pkg/s3select/errors.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2018 Minio, Inc. + * Minio Cloud Storage, (C) 2019 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,456 +16,111 @@ package s3select -import ( - "errors" - - "github.com/minio/minio/pkg/s3select/format" -) - -//S3 errors below - -// ErrBusy is an error if the service is too busy. -var ErrBusy = errors.New("The service is unavailable. Please retry") - -// ErrUnauthorizedAccess is an error if you lack the appropriate credentials to -// access the object. -var ErrUnauthorizedAccess = errors.New("You are not authorized to perform this operation") - -// ErrExpressionTooLong is an error if your SQL expression too long for -// processing. -var ErrExpressionTooLong = errors.New("The SQL expression is too long: The maximum byte-length for the SQL expression is 256 KB") - -// ErrIllegalSQLFunctionArgument is an error if you provide an illegal argument -// in the SQL function. -var ErrIllegalSQLFunctionArgument = errors.New("Illegal argument was used in the SQL function") - -// ErrInvalidKeyPath is an error if you provide a key in the SQL expression that -// is invalid. -var ErrInvalidKeyPath = errors.New("Key path in the SQL expression is invalid") - -// ErrColumnTooLong is an error if your query results in a column that is -// greater than the max amount of characters per column of 1mb -var ErrColumnTooLong = errors.New("The length of a column in the result is greater than maxCharsPerColumn of 1 MB") - -// ErrOverMaxColumn is an error if the number of columns from the resulting -// query is greater than 1Mb. -var ErrOverMaxColumn = errors.New("The number of columns in the result is greater than maxColumnNumber of 1 MB") - -// ErrOverMaxRecordSize is an error if the length of a record in the result is -// greater than 1 Mb. -var ErrOverMaxRecordSize = errors.New("The length of a record in the result is greater than maxCharsPerRecord of 1 MB") - -// ErrMissingHeaders is an error if some of the headers that are requested in -// the Select Query are not present in the file. -var ErrMissingHeaders = errors.New("Some headers in the query are missing from the file. Check the file and try again") - -// ErrInvalidCompressionFormat is an error if an unsupported compression type is -// utilized with the select object query. -var ErrInvalidCompressionFormat = errors.New("The file is not in a supported compression format. Only GZIP is supported at this time") - -// ErrInvalidFileHeaderInfo is an error if the argument provided to the -// FileHeader Argument is incorrect. -var ErrInvalidFileHeaderInfo = errors.New("The FileHeaderInfo is invalid. Only NONE, USE, and IGNORE are supported") - -// ErrInvalidJSONType is an error if the json format provided as an argument is -// invalid. -var ErrInvalidJSONType = errors.New("The JsonType is invalid. Only DOCUMENT and LINES are supported at this time") - -// ErrInvalidQuoteFields is an error if the arguments provided to the -// QuoteFields options are not valid. -var ErrInvalidQuoteFields = errors.New("The QuoteFields is invalid. Only ALWAYS and ASNEEDED are supported") - -// ErrInvalidRequestParameter is an error if the value of a parameter in the -// request element is not valid. -var ErrInvalidRequestParameter = errors.New("The value of a parameter in Request element is invalid. Check the service API documentation and try again") - -// ErrExternalEvalException is an error that arises if the query can not be -// evaluated. -var ErrExternalEvalException = errors.New("The query cannot be evaluated. Check the file and try again") - -// ErrInvalidDataType is an error that occurs if the SQL expression contains an -// invalid data type. -var ErrInvalidDataType = errors.New("The SQL expression contains an invalid data type") - -// ErrUnrecognizedFormatException is an error that arises if there is an invalid -// record type. -var ErrUnrecognizedFormatException = errors.New("Encountered an invalid record type") - -// ErrInvalidTextEncoding is an error if the text encoding is not valid. -var ErrInvalidTextEncoding = errors.New("Invalid encoding type. Only UTF-8 encoding is supported at this time") - -// ErrInvalidTableAlias is an error that arises if the table alias provided in -// the SQL expression is invalid. -var ErrInvalidTableAlias = errors.New("The SQL expression contains an invalid table alias") - -// ErrMultipleDataSourcesUnsupported is an error that arises if multiple data -// sources are provided. -var ErrMultipleDataSourcesUnsupported = errors.New("Multiple data sources are not supported") - -// ErrMissingRequiredParameter is an error that arises if a required argument -// is omitted from the Request. -var ErrMissingRequiredParameter = errors.New("The Request entity is missing a required parameter. Check the service documentation and try again") - -// ErrObjectSerializationConflict is an error that arises if an unsupported -// output seralization is provided. -var ErrObjectSerializationConflict = errors.New("The Request entity can only contain one of CSV or JSON. Check the service documentation and try again") - -// ErrUnsupportedSQLOperation is an error that arises if an unsupported SQL -// operation is used. -var ErrUnsupportedSQLOperation = errors.New("Encountered an unsupported SQL operation") - -// ErrUnsupportedSQLStructure is an error that occurs if an unsupported SQL -// structure is used. -var ErrUnsupportedSQLStructure = errors.New("Encountered an unsupported SQL structure. Check the SQL Reference") - -// ErrUnsupportedStorageClass is an error that occurs if an invalid storace -// class is present. -var ErrUnsupportedStorageClass = errors.New("Encountered an invalid storage class. Only STANDARD, STANDARD_IA, and ONEZONE_IA storage classes are supported at this time") - -// ErrUnsupportedSyntax is an error that occurs if invalid syntax is present in -// the query. -var ErrUnsupportedSyntax = errors.New("Encountered invalid syntax") - -// ErrUnsupportedRangeHeader is an error that occurs if a range header is -// provided. -var ErrUnsupportedRangeHeader = errors.New("Range header is not supported for this operation") - -// ErrLexerInvalidChar is an error that occurs if the SQL expression contains an -// invalid character. -var ErrLexerInvalidChar = errors.New("The SQL expression contains an invalid character") - -// ErrLexerInvalidOperator is an error that occurs if an invalid operator is -// used. -var ErrLexerInvalidOperator = errors.New("The SQL expression contains an invalid operator") - -// ErrLexerInvalidLiteral is an error that occurs if an invalid literal is used. -var ErrLexerInvalidLiteral = errors.New("The SQL expression contains an invalid literal") - -// ErrLexerInvalidIONLiteral is an error that occurs if an invalid operator is -// used -var ErrLexerInvalidIONLiteral = errors.New("The SQL expression contains an invalid operator") - -// ErrParseExpectedDatePart is an error that occurs if the date part is not -// found in the SQL expression. -var ErrParseExpectedDatePart = errors.New("Did not find the expected date part in the SQL expression") - -// ErrParseExpectedKeyword is an error that occurs if the expected keyword was -// not found in the expression. -var ErrParseExpectedKeyword = errors.New("Did not find the expected keyword in the SQL expression") - -// ErrParseExpectedTokenType is an error that occurs if the expected token is -// not found in the SQL expression. -var ErrParseExpectedTokenType = errors.New("Did not find the expected token in the SQL expression") - -// ErrParseExpected2TokenTypes is an error that occurs if 2 token types are not -// found. -var ErrParseExpected2TokenTypes = errors.New("Did not find the expected token in the SQL expression") - -// ErrParseExpectedNumber is an error that occurs if a number is expected but -// not found in the expression. -var ErrParseExpectedNumber = errors.New("Did not find the expected number in the SQL expression") - -// ErrParseExpectedRightParenBuiltinFunctionCall is an error that occurs if a -// right parenthesis is missing. -var ErrParseExpectedRightParenBuiltinFunctionCall = errors.New("Did not find the expected right parenthesis character in the SQL expression") - -// ErrParseExpectedTypeName is an error that occurs if a type name is expected -// but not found. -var ErrParseExpectedTypeName = errors.New("Did not find the expected type name in the SQL expression") - -// ErrParseExpectedWhenClause is an error that occurs if a When clause is -// expected but not found. -var ErrParseExpectedWhenClause = errors.New("Did not find the expected WHEN clause in the SQL expression. CASE is not supported") - -// ErrParseUnsupportedToken is an error that occurs if the SQL expression -// contains an unsupported token. -var ErrParseUnsupportedToken = errors.New("The SQL expression contains an unsupported token") - -// ErrParseUnsupportedLiteralsGroupBy is an error that occurs if the SQL -// expression has an unsupported use of Group By. -var ErrParseUnsupportedLiteralsGroupBy = errors.New("The SQL expression contains an unsupported use of GROUP BY") - -// ErrParseExpectedMember is an error that occurs if there is an unsupported use -// of member in the SQL expression. -var ErrParseExpectedMember = errors.New("The SQL expression contains an unsupported use of MEMBER") - -// ErrParseUnsupportedSelect is an error that occurs if there is an unsupported -// use of Select. -var ErrParseUnsupportedSelect = errors.New("The SQL expression contains an unsupported use of SELECT") - -// ErrParseUnsupportedCase is an error that occurs if there is an unsupported -// use of case. -var ErrParseUnsupportedCase = errors.New("The SQL expression contains an unsupported use of CASE") - -// ErrParseUnsupportedCaseClause is an error that occurs if there is an -// unsupported use of case. -var ErrParseUnsupportedCaseClause = errors.New("The SQL expression contains an unsupported use of CASE") - -// ErrParseUnsupportedAlias is an error that occurs if there is an unsupported -// use of Alias. -var ErrParseUnsupportedAlias = errors.New("The SQL expression contains an unsupported use of ALIAS") - -// ErrParseUnsupportedSyntax is an error that occurs if there is an -// UnsupportedSyntax in the SQL expression. -var ErrParseUnsupportedSyntax = errors.New("The SQL expression contains unsupported syntax") - -// ErrParseUnknownOperator is an error that occurs if there is an invalid -// operator present in the SQL expression. -var ErrParseUnknownOperator = errors.New("The SQL expression contains an invalid operator") - -// ErrParseMissingIdentAfterAt is an error that occurs if the wrong symbol -// follows the "@" symbol in the SQL expression. -var ErrParseMissingIdentAfterAt = errors.New("Did not find the expected identifier after the @ symbol in the SQL expression") - -// ErrParseUnexpectedOperator is an error that occurs if the SQL expression -// contains an unexpected operator. -var ErrParseUnexpectedOperator = errors.New("The SQL expression contains an unexpected operator") - -// ErrParseUnexpectedTerm is an error that occurs if the SQL expression contains -// an unexpected term. -var ErrParseUnexpectedTerm = errors.New("The SQL expression contains an unexpected term") - -// ErrParseUnexpectedToken is an error that occurs if the SQL expression -// contains an unexpected token. -var ErrParseUnexpectedToken = errors.New("The SQL expression contains an unexpected token") - -// ErrParseUnexpectedKeyword is an error that occurs if the SQL expression -// contains an unexpected keyword. -var ErrParseUnexpectedKeyword = errors.New("The SQL expression contains an unexpected keyword") - -// ErrParseExpectedExpression is an error that occurs if the SQL expression is -// not found. -var ErrParseExpectedExpression = errors.New("Did not find the expected SQL expression") - -// ErrParseExpectedLeftParenAfterCast is an error that occurs if the left -// parenthesis is missing after a cast in the SQL expression. -var ErrParseExpectedLeftParenAfterCast = errors.New("Did not find the expected left parenthesis after CAST in the SQL expression") - -// ErrParseExpectedLeftParenValueConstructor is an error that occurs if the left -// parenthesis is not found in the SQL expression. -var ErrParseExpectedLeftParenValueConstructor = errors.New("Did not find expected the left parenthesis in the SQL expression") - -// ErrParseExpectedLeftParenBuiltinFunctionCall is an error that occurs if the -// left parenthesis is not found in the SQL expression function call. -var ErrParseExpectedLeftParenBuiltinFunctionCall = errors.New("Did not find the expected left parenthesis in the SQL expression") - -// ErrParseExpectedArgumentDelimiter is an error that occurs if the argument -// delimiter for the SQL expression is not provided. -var ErrParseExpectedArgumentDelimiter = errors.New("Did not find the expected argument delimiter in the SQL expression") - -// ErrParseCastArity is an error that occurs because the CAST has incorrect -// arity. -var ErrParseCastArity = errors.New("The SQL expression CAST has incorrect arity") - -// ErrParseInvalidTypeParam is an error that occurs because there is an invalid -// parameter value. -var ErrParseInvalidTypeParam = errors.New("The SQL expression contains an invalid parameter value") - -// ErrParseEmptySelect is an error that occurs because the SQL expression -// contains an empty Select -var ErrParseEmptySelect = errors.New("The SQL expression contains an empty SELECT") - -// ErrParseSelectMissingFrom is an error that occurs because there is a missing -// From after the Select List. -var ErrParseSelectMissingFrom = errors.New("The SQL expression contains a missing FROM after SELECT list") - -// ErrParseExpectedIdentForGroupName is an error that occurs because Group is -// not supported in the SQL expression. -var ErrParseExpectedIdentForGroupName = errors.New("GROUP is not supported in the SQL expression") - -// ErrParseExpectedIdentForAlias is an error that occurs if expected identifier -// for alias is not in the SQL expression. -var ErrParseExpectedIdentForAlias = errors.New("Did not find the expected identifier for the alias in the SQL expression") - -// ErrParseUnsupportedCallWithStar is an error that occurs if COUNT is used with -// an argument other than "*". -var ErrParseUnsupportedCallWithStar = errors.New("Only COUNT with (*) as a parameter is supported in the SQL expression") - -// ErrParseNonUnaryAgregateFunctionCall is an error that occurs if more than one -// argument is provided as an argument for aggregation functions. -var ErrParseNonUnaryAgregateFunctionCall = errors.New("Only one argument is supported for aggregate functions in the SQL expression") - -// ErrParseMalformedJoin is an error that occurs if a "join" operation is -// attempted in the SQL expression as this is not supported. -var ErrParseMalformedJoin = errors.New("JOIN is not supported in the SQL expression") - -// ErrParseExpectedIdentForAt is an error that occurs if after "AT" an Alias -// identifier is not provided. -var ErrParseExpectedIdentForAt = errors.New("Did not find the expected identifier for AT name in the SQL expression") - -// ErrParseAsteriskIsNotAloneInSelectList is an error that occurs if in addition -// to an asterix, more column names are provided as arguments in the SQL -// expression. -var ErrParseAsteriskIsNotAloneInSelectList = errors.New("Other expressions are not allowed in the SELECT list when '*' is used without dot notation in the SQL expression") - -// ErrParseCannotMixSqbAndWildcardInSelectList is an error that occurs if list -// indexing and an asterix are mixed in the SQL expression. -var ErrParseCannotMixSqbAndWildcardInSelectList = errors.New("Cannot mix [] and * in the same expression in a SELECT list in SQL expression") - -// ErrParseInvalidContextForWildcardInSelectList is an error that occurs if the -// asterix is used improperly within the SQL expression. -var ErrParseInvalidContextForWildcardInSelectList = errors.New("Invalid use of * in SELECT list in the SQL expression") - -// ErrEvaluatorBindingDoesNotExist is an error that occurs if a column name or -// path provided in the expression does not exist. -var ErrEvaluatorBindingDoesNotExist = errors.New("A column name or a path provided does not exist in the SQL expression") - -// ErrIncorrectSQLFunctionArgumentType is an error that occurs if the wrong -// argument is provided to a SQL function. -var ErrIncorrectSQLFunctionArgumentType = errors.New("Incorrect type of arguments in function call in the SQL expression") - -// ErrAmbiguousFieldName is an error that occurs if the column name which is not -// case sensitive, is not descriptive enough to retrieve a singular column. -var ErrAmbiguousFieldName = errors.New("Field name matches to multiple fields in the file. Check the SQL expression and the file, and try again") - -// ErrEvaluatorInvalidArguments is an error that occurs if there are not the -// correct number of arguments in a functional call to a SQL expression. -var ErrEvaluatorInvalidArguments = errors.New("Incorrect number of arguments in the function call in the SQL expression") - -// ErrValueParseFailure is an error that occurs if the Time Stamp is not parsed -// correctly in the SQL expression. -var ErrValueParseFailure = errors.New("Time stamp parse failure in the SQL expression") - -// ErrIntegerOverflow is an error that occurs if there is an IntegerOverflow or -// IntegerUnderFlow in the SQL expression. -var ErrIntegerOverflow = errors.New("Int overflow or underflow in the SQL expression") - -// ErrLikeInvalidInputs is an error that occurs if invalid inputs are provided -// to the argument LIKE Clause. -var ErrLikeInvalidInputs = errors.New("Invalid argument given to the LIKE clause in the SQL expression") - -// ErrCastFailed occurs if the attempt to convert data types in the cast is not -// done correctly. -var ErrCastFailed = errors.New("Attempt to convert from one data type to another using CAST failed in the SQL expression") - -// ErrInvalidCast is an error that occurs if the attempt to convert data types -// failed and was done in an improper fashion. -var ErrInvalidCast = errors.New("Attempt to convert from one data type to another using CAST failed in the SQL expression") - -// ErrEvaluatorInvalidTimestampFormatPattern is an error that occurs if the Time -// Stamp Format needs more additional fields to be filled. -var ErrEvaluatorInvalidTimestampFormatPattern = errors.New("Time stamp format pattern requires additional fields in the SQL expression") - -// ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing is an error that -// occurs if the format of the time stamp can not be parsed. -var ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing = errors.New("Time stamp format pattern contains a valid format symbol that cannot be applied to time stamp parsing in the SQL expression") - -// ErrEvaluatorTimestampFormatPatternDuplicateFields is an error that occurs if -// the time stamp format pattern contains multiple format specifications which -// can not be clearly resolved. -var ErrEvaluatorTimestampFormatPatternDuplicateFields = errors.New("Time stamp format pattern contains multiple format specifiers representing the time stamp field in the SQL expression") - -//ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch is an error that -//occurs if the time stamp format pattern contains a 12 hour day of format but -//does not have an AM/PM field. -var ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch = errors.New("Time stamp format pattern contains a 12-hour hour of day format symbol but doesn't also contain an AM/PM field, or it contains a 24-hour hour of day format specifier and contains an AM/PM field in the SQL expression") - -// ErrEvaluatorUnterminatedTimestampFormatPatternToken is an error that occurs -// if there is an unterminated token in the SQL expression for time stamp -// format. -var ErrEvaluatorUnterminatedTimestampFormatPatternToken = errors.New("Time stamp format pattern contains unterminated token in the SQL expression") - -// ErrEvaluatorInvalidTimestampFormatPatternToken is an error that occurs if -// there is an invalid token in the time stamp format within the SQL expression. -var ErrEvaluatorInvalidTimestampFormatPatternToken = errors.New("Time stamp format pattern contains an invalid token in the SQL expression") - -// ErrEvaluatorInvalidTimestampFormatPatternSymbol is an error that occurs if -// the time stamp format pattern has an invalid symbol within the SQL -// expression. -var ErrEvaluatorInvalidTimestampFormatPatternSymbol = errors.New("Time stamp format pattern contains an invalid symbol in the SQL expression") - -// S3 select API errors - TODO fix the errors. -var errorCodeResponse = map[error]string{ - ErrBusy: "Busy", - ErrUnauthorizedAccess: "UnauthorizedAccess", - ErrExpressionTooLong: "ExpressionTooLong", - ErrIllegalSQLFunctionArgument: "IllegalSqlFunctionArgument", - format.ErrInvalidColumnIndex: "InvalidColumnIndex", - ErrInvalidKeyPath: "InvalidKeyPath", - ErrColumnTooLong: "ColumnTooLong", - ErrOverMaxColumn: "OverMaxColumn", - ErrOverMaxRecordSize: "OverMaxRecordSize", - ErrMissingHeaders: "MissingHeaders", - ErrInvalidCompressionFormat: "InvalidCompressionFormat", - format.ErrTruncatedInput: "TruncatedInput", - ErrInvalidFileHeaderInfo: "InvalidFileHeaderInfo", - ErrInvalidJSONType: "InvalidJsonType", - ErrInvalidQuoteFields: "InvalidQuoteFields", - ErrInvalidRequestParameter: "InvalidRequestParameter", - format.ErrCSVParsingError: "CSVParsingError", - format.ErrJSONParsingError: "JSONParsingError", - ErrExternalEvalException: "ExternalEvalException", - ErrInvalidDataType: "InvalidDataType", - ErrUnrecognizedFormatException: "UnrecognizedFormatException", - ErrInvalidTextEncoding: "InvalidTextEncoding", - ErrInvalidTableAlias: "InvalidTableAlias", - ErrMultipleDataSourcesUnsupported: "MultipleDataSourcesUnsupported", - ErrMissingRequiredParameter: "MissingRequiredParameter", - ErrObjectSerializationConflict: "ObjectSerializationConflict", - ErrUnsupportedSQLOperation: "UnsupportedSqlOperation", - ErrUnsupportedSQLStructure: "UnsupportedSqlStructure", - ErrUnsupportedStorageClass: "UnsupportedStorageClass", - ErrUnsupportedSyntax: "UnsupportedSyntax", - ErrUnsupportedRangeHeader: "UnsupportedRangeHeader", - ErrLexerInvalidChar: "LexerInvalidChar", - ErrLexerInvalidOperator: "LexerInvalidOperator", - ErrLexerInvalidLiteral: "LexerInvalidLiteral", - ErrLexerInvalidIONLiteral: "LexerInvalidIONLiteral", - ErrParseExpectedDatePart: "ParseExpectedDatePart", - ErrParseExpectedKeyword: "ParseExpectedKeyword", - ErrParseExpectedTokenType: "ParseExpectedTokenType", - ErrParseExpected2TokenTypes: "ParseExpected2TokenTypes", - ErrParseExpectedNumber: "ParseExpectedNumber", - ErrParseExpectedRightParenBuiltinFunctionCall: "ParseExpectedRightParenBuiltinFunctionCall", - ErrParseExpectedTypeName: "ParseExpectedTypeName", - ErrParseExpectedWhenClause: "ParseExpectedWhenClause", - ErrParseUnsupportedToken: "ParseUnsupportedToken", - ErrParseUnsupportedLiteralsGroupBy: "ParseUnsupportedLiteralsGroupBy", - ErrParseExpectedMember: "ParseExpectedMember", - ErrParseUnsupportedSelect: "ParseUnsupportedSelect", - ErrParseUnsupportedCase: "ParseUnsupportedCase:", - ErrParseUnsupportedCaseClause: "ParseUnsupportedCaseClause", - ErrParseUnsupportedAlias: "ParseUnsupportedAlias", - ErrParseUnsupportedSyntax: "ParseUnsupportedSyntax", - ErrParseUnknownOperator: "ParseUnknownOperator", - format.ErrParseInvalidPathComponent: "ParseInvalidPathComponent", - ErrParseMissingIdentAfterAt: "ParseMissingIdentAfterAt", - ErrParseUnexpectedOperator: "ParseUnexpectedOperator", - ErrParseUnexpectedTerm: "ParseUnexpectedTerm", - ErrParseUnexpectedToken: "ParseUnexpectedToken", - ErrParseUnexpectedKeyword: "ParseUnexpectedKeyword", - ErrParseExpectedExpression: "ParseExpectedExpression", - ErrParseExpectedLeftParenAfterCast: "ParseExpectedLeftParenAfterCast", - ErrParseExpectedLeftParenValueConstructor: "ParseExpectedLeftParenValueConstructor", - ErrParseExpectedLeftParenBuiltinFunctionCall: "ParseExpectedLeftParenBuiltinFunctionCall", - ErrParseExpectedArgumentDelimiter: "ParseExpectedArgumentDelimiter", - ErrParseCastArity: "ParseCastArity", - ErrParseInvalidTypeParam: "ParseInvalidTypeParam", - ErrParseEmptySelect: "ParseEmptySelect", - ErrParseSelectMissingFrom: "ParseSelectMissingFrom", - ErrParseExpectedIdentForGroupName: "ParseExpectedIdentForGroupName", - ErrParseExpectedIdentForAlias: "ParseExpectedIdentForAlias", - ErrParseUnsupportedCallWithStar: "ParseUnsupportedCallWithStar", - ErrParseNonUnaryAgregateFunctionCall: "ParseNonUnaryAgregateFunctionCall", - ErrParseMalformedJoin: "ParseMalformedJoin", - ErrParseExpectedIdentForAt: "ParseExpectedIdentForAt", - ErrParseAsteriskIsNotAloneInSelectList: "ParseAsteriskIsNotAloneInSelectList", - ErrParseCannotMixSqbAndWildcardInSelectList: "ParseCannotMixSqbAndWildcardInSelectList", - ErrParseInvalidContextForWildcardInSelectList: "ParseInvalidContextForWildcardInSelectList", - ErrEvaluatorBindingDoesNotExist: "EvaluatorBindingDoesNotExist", - ErrIncorrectSQLFunctionArgumentType: "IncorrectSqlFunctionArgumentType", - ErrAmbiguousFieldName: "AmbiguousFieldName", - ErrEvaluatorInvalidArguments: "EvaluatorInvalidArguments", - ErrValueParseFailure: "ValueParseFailure", - ErrIntegerOverflow: "IntegerOverflow", - ErrLikeInvalidInputs: "LikeInvalidInputs", - ErrCastFailed: "CastFailed", - ErrInvalidCast: "Attempt to convert from one data type to another using CAST failed in the SQL expression.", - ErrEvaluatorInvalidTimestampFormatPattern: "EvaluatorInvalidTimestampFormatPattern", - ErrEvaluatorInvalidTimestampFormatPatternSymbolForParsing: "EvaluatorInvalidTimestampFormatPatternSymbolForParsing", - ErrEvaluatorTimestampFormatPatternDuplicateFields: "EvaluatorTimestampFormatPatternDuplicateFields", - ErrEvaluatorTimestampFormatPatternHourClockAmPmMismatch: "EvaluatorTimestampFormatPatternHourClockAmPmMismatch", - ErrEvaluatorUnterminatedTimestampFormatPatternToken: "EvaluatorUnterminatedTimestampFormatPatternToken", - ErrEvaluatorInvalidTimestampFormatPatternToken: "EvaluatorInvalidTimestampFormatPatternToken", - ErrEvaluatorInvalidTimestampFormatPatternSymbol: "EvaluatorInvalidTimestampFormatPatternSymbol", +// SelectError - represents s3 select error specified in +// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html#RESTObjectSELECTContent-responses-special-errors. +type SelectError interface { + Cause() error + ErrorCode() string + ErrorMessage() string + HTTPStatusCode() int + Error() string +} + +type s3Error struct { + code string + message string + statusCode int + cause error +} + +func (err *s3Error) Cause() error { + return err.cause +} + +func (err *s3Error) ErrorCode() string { + return err.code +} + +func (err *s3Error) ErrorMessage() string { + return err.message +} + +func (err *s3Error) HTTPStatusCode() int { + return err.statusCode +} + +func (err *s3Error) Error() string { + return err.message +} + +func errMalformedXML(err error) *s3Error { + return &s3Error{ + code: "MalformedXML", + message: "The XML provided was not well-formed or did not validate against our published schema. Check the service documentation and try again.", + statusCode: 400, + cause: err, + } +} + +func errInvalidCompressionFormat(err error) *s3Error { + return &s3Error{ + code: "InvalidCompressionFormat", + message: "The file is not in a supported compression format. Only GZIP and BZIP2 are supported.", + statusCode: 400, + cause: err, + } +} + +func errInvalidDataSource(err error) *s3Error { + return &s3Error{ + code: "InvalidDataSource", + message: "Invalid data source type. Only CSV, JSON, and Parquet are supported.", + statusCode: 400, + cause: err, + } +} + +func errInvalidRequestParameter(err error) *s3Error { + return &s3Error{ + code: "InvalidRequestParameter", + message: "The value of a parameter in SelectRequest element is invalid. Check the service API documentation and try again.", + statusCode: 400, + cause: err, + } +} + +func errObjectSerializationConflict(err error) *s3Error { + return &s3Error{ + code: "ObjectSerializationConflict", + message: "InputSerialization specifies more than one format (CSV, JSON, or Parquet), or OutputSerialization specifies more than one format (CSV or JSON). InputSerialization and OutputSerialization can only specify one format each.", + statusCode: 400, + cause: err, + } +} + +func errInvalidExpressionType(err error) *s3Error { + return &s3Error{ + code: "InvalidExpressionType", + message: "The ExpressionType is invalid. Only SQL expressions are supported.", + statusCode: 400, + cause: err, + } +} + +func errMissingRequiredParameter(err error) *s3Error { + return &s3Error{ + code: "MissingRequiredParameter", + message: "The SelectRequest entity is missing a required parameter. Check the service documentation and try again.", + statusCode: 400, + cause: err, + } +} + +func errTruncatedInput(err error) *s3Error { + return &s3Error{ + code: "TruncatedInput", + message: "Object decompression failed. Check that the object is properly compressed using the format specified in the request.", + statusCode: 400, + cause: err, + } } diff --git a/pkg/s3select/evaluate.go b/pkg/s3select/evaluate.go deleted file mode 100644 index ca27b272e..000000000 --- a/pkg/s3select/evaluate.go +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package s3select - -import ( - "strings" - - "github.com/tidwall/gjson" - "github.com/xwb1989/sqlparser" - - "github.com/minio/minio/pkg/s3select/format" -) - -// stringOps is a function which handles the case in a clause -// if there is a need to perform a string function -func stringOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string { - var value string - funcName := myFunc.Name.CompliantName() - switch tempArg := myFunc.Exprs[0].(type) { - case *sqlparser.AliasedExpr: - switch col := tempArg.Expr.(type) { - case *sqlparser.FuncExpr: - // myReturnVal is actually the tail recursive value being used in the eval func. - return applyStrFunc(gjson.Parse(myReturnVal), funcName) - case *sqlparser.ColName: - value = applyStrFunc(gjson.GetBytes(record, col.Name.CompliantName()), funcName) - case *sqlparser.SQLVal: - value = applyStrFunc(gjson.ParseBytes(col.Val), funcName) - } - } - return value -} - -// coalOps is a function which decomposes a COALESCE func expr into its struct. -func coalOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string { - myArgs := make([]string, len(myFunc.Exprs)) - - for i, expr := range myFunc.Exprs { - switch tempArg := expr.(type) { - case *sqlparser.AliasedExpr: - switch col := tempArg.Expr.(type) { - case *sqlparser.FuncExpr: - // myReturnVal is actually the tail recursive value being used in the eval func. - return myReturnVal - case *sqlparser.ColName: - myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String() - case *sqlparser.SQLVal: - myArgs[i] = string(col.Val) - } - } - } - return processCoalNoIndex(myArgs) -} - -// nullOps is a function which decomposes a NullIf func expr into its struct. -func nullOps(myFunc *sqlparser.FuncExpr, record []byte, myReturnVal string) string { - myArgs := make([]string, 2) - - for i, expr := range myFunc.Exprs { - switch tempArg := expr.(type) { - case *sqlparser.AliasedExpr: - switch col := tempArg.Expr.(type) { - case *sqlparser.FuncExpr: - return myReturnVal - case *sqlparser.ColName: - myArgs[i] = gjson.GetBytes(record, col.Name.CompliantName()).String() - case *sqlparser.SQLVal: - myArgs[i] = string(col.Val) - } - } - } - if myArgs[0] == myArgs[1] { - return "" - } - return myArgs[0] -} - -// isValidString is a function that ensures the -// current index is one with a StrFunc -func isValidFunc(myList []int, index int) bool { - if myList == nil { - return false - } - for _, i := range myList { - if i == index { - return true - } - } - return false -} - -// processCoalNoIndex is a function which evaluates a given COALESCE clause. -func processCoalNoIndex(coalStore []string) string { - for _, coal := range coalStore { - if coal != "null" && coal != "missing" && coal != "" { - return coal - } - } - return "null" -} - -// evaluateFuncExpr is a function that allows for tail recursive evaluation of -// nested function expressions -func evaluateFuncExpr(myVal *sqlparser.FuncExpr, myReturnVal string, record []byte) string { - if myVal == nil { - return myReturnVal - } - // retrieve all the relevant arguments of the function - var mySubFunc []*sqlparser.FuncExpr - mySubFunc = make([]*sqlparser.FuncExpr, len(myVal.Exprs)) - for i, expr := range myVal.Exprs { - switch col := expr.(type) { - case *sqlparser.AliasedExpr: - switch temp := col.Expr.(type) { - case *sqlparser.FuncExpr: - mySubFunc[i] = temp - } - } - } - // Need to do tree recursion so as to explore all possible directions of the - // nested function recursion - for i := 0; i < len(mySubFunc); i++ { - if supportedString(myVal.Name.CompliantName()) { - if mySubFunc != nil { - return stringOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record)) - } - return stringOps(myVal, record, myReturnVal) - } else if strings.ToUpper(myVal.Name.CompliantName()) == "NULLIF" { - if mySubFunc != nil { - return nullOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record)) - } - return nullOps(myVal, record, myReturnVal) - } else if strings.ToUpper(myVal.Name.CompliantName()) == "COALESCE" { - if mySubFunc != nil { - return coalOps(myVal, record, evaluateFuncExpr(mySubFunc[i], myReturnVal, record)) - } - return coalOps(myVal, record, myReturnVal) - } - } - return "" -} - -// evaluateFuncErr is a function that flags errors in nested functions. -func evaluateFuncErr(myVal *sqlparser.FuncExpr, reader format.Select) error { - if myVal == nil { - return nil - } - if !supportedFunc(myVal.Name.CompliantName()) { - return ErrUnsupportedSQLOperation - } - for _, expr := range myVal.Exprs { - switch tempArg := expr.(type) { - case *sqlparser.StarExpr: - return ErrParseUnsupportedCallWithStar - case *sqlparser.AliasedExpr: - switch col := tempArg.Expr.(type) { - case *sqlparser.FuncExpr: - if err := evaluateFuncErr(col, reader); err != nil { - return err - } - case *sqlparser.ColName: - if err := reader.ColNameErrs([]string{col.Name.CompliantName()}); err != nil { - return err - } - } - } - } - return nil -} - -// evaluateIsExpr is a function for evaluating expressions of the form "column is ...." -func evaluateIsExpr(myFunc *sqlparser.IsExpr, row []byte, alias string) (bool, error) { - getMyVal := func() (myVal string) { - switch myIs := myFunc.Expr.(type) { - // case for literal val - case *sqlparser.SQLVal: - myVal = string(myIs.Val) - // case for nested func val - case *sqlparser.FuncExpr: - myVal = evaluateFuncExpr(myIs, "", row) - // case for col val - case *sqlparser.ColName: - myVal = gjson.GetBytes(row, myIs.Name.CompliantName()).String() - } - return myVal - } - - operator := strings.ToLower(myFunc.Operator) - switch operator { - case "is null": - return getMyVal() == "", nil - case "is not null": - return getMyVal() != "", nil - default: - return false, ErrUnsupportedSQLOperation - } -} - -// supportedString is a function that checks whether the function is a supported -// string one -func supportedString(strFunc string) bool { - return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER"}) -} - -// supportedFunc is a function that checks whether the function is a supported -// S3 one. -func supportedFunc(strFunc string) bool { - return format.StringInSlice(strings.ToUpper(strFunc), []string{"TRIM", "SUBSTRING", "CHAR_LENGTH", "CHARACTER_LENGTH", "LOWER", "UPPER", "COALESCE", "NULLIF"}) -} diff --git a/pkg/s3select/format/csv/csv.go b/pkg/s3select/format/csv/csv.go deleted file mode 100644 index 2e2f14e09..000000000 --- a/pkg/s3select/format/csv/csv.go +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package csv - -import ( - "encoding/csv" - "encoding/xml" - "io" - "strconv" - "strings" - - "github.com/tidwall/sjson" - - "github.com/minio/minio/pkg/ioutil" - "github.com/minio/minio/pkg/s3select/format" -) - -// Options options are passed to the underlying encoding/csv reader. -type Options struct { - // HasHeader when true, will treat the first row as a header row. - HasHeader bool - - // RecordDelimiter is the string that records are delimited by. - RecordDelimiter string - - // FieldDelimiter is the string that fields are delimited by. - FieldDelimiter string - - // Comments is the string the first character of a line of - // text matches the comment character. - Comments string - - // Name of the table that is used for querying - Name string - - // ReadFrom is where the data will be read from. - ReadFrom io.Reader - - // If true then we need to add gzip or bzip reader. - // to extract the csv. - Compressed string - - // SQL expression meant to be evaluated. - Expression string - - // Output CSV will be delimited by. - OutputFieldDelimiter string - - // Output CSV record will be delimited by. - OutputRecordDelimiter string - - // Size of incoming object - StreamSize int64 - - // Whether Header is "USE" or another - HeaderOpt bool - - // Progress enabled, enable/disable progress messages. - Progress bool - - // Output format type, supported values are CSV and JSON - OutputType format.Type -} - -// cinput represents a record producing input from a formatted object. -type cinput struct { - options *Options - reader *csv.Reader - firstRow []string - header []string - minOutputLength int - stats struct { - BytesScanned int64 - BytesReturned int64 - BytesProcessed int64 - } -} - -// New sets up a new Input, the first row is read when this is run. -// If there is a problem with reading the first row, the error is returned. -// Otherwise, the returned reader can be reliably consumed with Read(). -// until Read() return err. -func New(opts *Options) (format.Select, error) { - // DelimitedReader treats custom record delimiter like `\r\n`,`\r`,`ab` etc and replaces it with `\n`. - normalizedReader := ioutil.NewDelimitedReader(opts.ReadFrom, []rune(opts.RecordDelimiter)) - reader := &cinput{ - options: opts, - reader: csv.NewReader(normalizedReader), - } - reader.stats.BytesScanned = opts.StreamSize - reader.stats.BytesProcessed = 0 - reader.stats.BytesReturned = 0 - reader.firstRow = nil - - reader.reader.FieldsPerRecord = -1 - if reader.options.FieldDelimiter != "" { - reader.reader.Comma = rune(reader.options.FieldDelimiter[0]) - } - - if reader.options.Comments != "" { - reader.reader.Comment = rune(reader.options.Comments[0]) - } - - // QuoteCharacter - " (defaulted currently) - reader.reader.LazyQuotes = true - - if err := reader.readHeader(); err != nil { - return nil, err - } - - return reader, nil -} - -// Replace the spaces in columnnames with underscores -func cleanHeader(columns []string) []string { - for i := range columns { - // Even if header name is specified, some CSV's - // might have column header names might be empty - // and non-empty. In such a scenario we prepare - // indexed value. - if columns[i] == "" { - columns[i] = "_" + strconv.Itoa(i) - } - columns[i] = strings.Replace(columns[i], " ", "_", -1) - } - return columns -} - -// readHeader reads the header into the header variable if the header is present -// as the first row of the csv -func (reader *cinput) readHeader() error { - var readErr error - if reader.options.HasHeader { - reader.firstRow, readErr = reader.reader.Read() - if readErr != nil { - return format.ErrCSVParsingError - } - reader.header = cleanHeader(reader.firstRow) - reader.firstRow = nil - } else { - reader.firstRow, readErr = reader.reader.Read() - if readErr != nil { - return format.ErrCSVParsingError - } - reader.header = make([]string, len(reader.firstRow)) - for i := range reader.firstRow { - reader.header[i] = "_" + strconv.Itoa(i) - } - } - reader.minOutputLength = len(reader.header) - return nil -} - -// Progress - return true if progress was requested. -func (reader *cinput) Progress() bool { - return reader.options.Progress -} - -// UpdateBytesProcessed - populates the bytes Processed -func (reader *cinput) UpdateBytesProcessed(size int64) { - reader.stats.BytesProcessed += size - -} - -// Read returns byte sequence -func (reader *cinput) Read() ([]byte, error) { - dec := reader.readRecord() - if dec != nil { - var data []byte - var err error - // Navigate column values in reverse order to preserve - // the input order for AWS S3 compatibility, because - // sjson adds json key/value pairs in first in last out - // fashion. This should be fixed in sjson ideally. Following - // work around is needed to circumvent this issue for now. - for i := len(dec) - 1; i >= 0; i-- { - data, err = sjson.SetBytes(data, reader.header[i], dec[i]) - if err != nil { - return nil, err - } - } - return data, nil - } - return nil, nil -} - -// OutputFieldDelimiter - returns the requested output field delimiter. -func (reader *cinput) OutputFieldDelimiter() string { - return reader.options.OutputFieldDelimiter -} - -// OutputRecordDelimiter - returns the requested output record delimiter. -func (reader *cinput) OutputRecordDelimiter() string { - return reader.options.OutputRecordDelimiter -} - -// HasHeader - returns true or false depending upon the header. -func (reader *cinput) HasHeader() bool { - return reader.options.HasHeader -} - -// Expression - return the Select Expression for -func (reader *cinput) Expression() string { - return reader.options.Expression -} - -// UpdateBytesReturned - updates the Bytes returned for -func (reader *cinput) UpdateBytesReturned(size int64) { - reader.stats.BytesReturned += size -} - -// Header returns the header of the reader. Either the first row if a header -// set in the options, or c#, where # is the column number, starting with 0. -func (reader *cinput) Header() []string { - return reader.header -} - -// readRecord reads a single record from the stream and it always returns successfully. -// If the record is empty, an empty []string is returned. -// Record expand to match the current row size, adding blank fields as needed. -// Records never return less then the number of fields in the first row. -// Returns nil on EOF -// In the event of a parse error due to an invalid record, it is logged, and -// an empty []string is returned with the number of fields in the first row, -// as if the record were empty. -// -// In general, this is a very tolerant of problems reader. -func (reader *cinput) readRecord() []string { - var row []string - var fileErr error - - if reader.firstRow != nil { - row = reader.firstRow - reader.firstRow = nil - return row - } - - row, fileErr = reader.reader.Read() - emptysToAppend := reader.minOutputLength - len(row) - if fileErr == io.EOF || fileErr == io.ErrClosedPipe { - return nil - } else if _, ok := fileErr.(*csv.ParseError); ok { - emptysToAppend = reader.minOutputLength - } - - if emptysToAppend > 0 { - for counter := 0; counter < emptysToAppend; counter++ { - row = append(row, "") - } - } - - return row -} - -// CreateStatXML is the function which does the marshaling from the stat -// structs into XML so that the progress and stat message can be sent -func (reader *cinput) CreateStatXML() (string, error) { - if reader.options.Compressed == "NONE" { - reader.stats.BytesProcessed = reader.options.StreamSize - reader.stats.BytesScanned = reader.stats.BytesProcessed - } - out, err := xml.Marshal(&format.Stats{ - BytesScanned: reader.stats.BytesScanned, - BytesProcessed: reader.stats.BytesProcessed, - BytesReturned: reader.stats.BytesReturned, - }) - if err != nil { - return "", err - } - return xml.Header + string(out), nil -} - -// CreateProgressXML is the function which does the marshaling from the progress -// structs into XML so that the progress and stat message can be sent -func (reader *cinput) CreateProgressXML() (string, error) { - if reader.options.HasHeader { - reader.stats.BytesProcessed += format.ProcessSize(reader.header) - } - if reader.options.Compressed == "NONE" { - reader.stats.BytesScanned = reader.stats.BytesProcessed - } - out, err := xml.Marshal(&format.Progress{ - BytesScanned: reader.stats.BytesScanned, - BytesProcessed: reader.stats.BytesProcessed, - BytesReturned: reader.stats.BytesReturned, - }) - if err != nil { - return "", err - } - return xml.Header + string(out), nil -} - -// Type - return the data format type -func (reader *cinput) Type() format.Type { - return format.CSV -} - -// OutputType - return the data format type -func (reader *cinput) OutputType() format.Type { - return reader.options.OutputType -} - -// ColNameErrs is a function which makes sure that the headers are requested are -// present in the file otherwise it throws an error. -func (reader *cinput) ColNameErrs(columnNames []string) error { - for i := 0; i < len(columnNames); i++ { - if columnNames[i] == "" { - continue - } - if !format.IsInt(columnNames[i]) && !reader.options.HeaderOpt { - return format.ErrInvalidColumnIndex - } - if format.IsInt(columnNames[i]) { - tempInt, _ := strconv.Atoi(columnNames[i]) - if tempInt > len(reader.Header()) || tempInt == 0 { - return format.ErrInvalidColumnIndex - } - } else { - if reader.options.HeaderOpt && !format.StringInSlice(columnNames[i], reader.Header()) { - return format.ErrParseInvalidPathComponent - } - } - } - return nil -} diff --git a/pkg/s3select/format/errors.go b/pkg/s3select/format/errors.go deleted file mode 100644 index 598a0efa5..000000000 --- a/pkg/s3select/format/errors.go +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package format - -import "errors" - -// ErrTruncatedInput is an error if the object is not compressed properly and an -// error occurs during decompression. -var ErrTruncatedInput = errors.New("Object decompression failed. Check that the object is properly compressed using the format specified in the request") - -// ErrCSVParsingError is an error if the CSV presents an error while being -// parsed. -var ErrCSVParsingError = errors.New("Encountered an Error parsing the CSV file. Check the file and try again") - -// ErrInvalidColumnIndex is an error if you provide a column index which is not -// valid. -var ErrInvalidColumnIndex = errors.New("Column index in the SQL expression is invalid") - -// ErrParseInvalidPathComponent is an error that occurs if there is an invalid -// path component. -var ErrParseInvalidPathComponent = errors.New("The SQL expression contains an invalid path component") - -// ErrJSONParsingError is an error if while parsing the JSON an error arises. -var ErrJSONParsingError = errors.New("Encountered an error parsing the JSON file. Check the file and try again") diff --git a/pkg/s3select/format/helpers.go b/pkg/s3select/format/helpers.go deleted file mode 100644 index a4a1ecde7..000000000 --- a/pkg/s3select/format/helpers.go +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package format - -import "strconv" - -// IsInt - returns a true or false, whether a string can -// be represented as an int. -func IsInt(s string) bool { - _, err := strconv.Atoi(s) - return err == nil -} - -// StringInSlice - this function finds whether a string is in a list -func StringInSlice(x string, list []string) bool { - for _, y := range list { - if x == y { - return true - } - } - return false -} - -// ProcessSize - this function processes size so that we can calculate bytes BytesProcessed. -func ProcessSize(myrecord []string) int64 { - if len(myrecord) > 0 { - var size int64 - size = int64(len(myrecord)-1) + 1 - for i := range myrecord { - size += int64(len(myrecord[i])) - } - - return size - } - return 0 -} diff --git a/pkg/s3select/format/json/json.go b/pkg/s3select/format/json/json.go deleted file mode 100644 index f9958a87d..000000000 --- a/pkg/s3select/format/json/json.go +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package json - -import ( - "bufio" - "encoding/xml" - "io" - - "github.com/minio/minio/pkg/s3select/format" - "github.com/tidwall/gjson" -) - -// Options options are passed to the underlying encoding/json reader. -type Options struct { - - // Name of the table that is used for querying - Name string - - // ReadFrom is where the data will be read from. - ReadFrom io.Reader - - // If true then we need to add gzip or bzip reader. - // to extract the csv. - Compressed string - - // SQL expression meant to be evaluated. - Expression string - - // Input record delimiter. - RecordDelimiter string - - // Output CSV will be delimited by. - OutputFieldDelimiter string - - // Output record delimiter. - OutputRecordDelimiter string - - // Size of incoming object - StreamSize int64 - - // True if DocumentType is DOCUMENTS - DocumentType bool - - // Progress enabled, enable/disable progress messages. - Progress bool - - // Output format type, supported values are CSV and JSON - OutputType format.Type -} - -// jinput represents a record producing input from a formatted file or pipe. -type jinput struct { - options *Options - reader *bufio.Reader - header []string - minOutputLength int - stats struct { - BytesScanned int64 - BytesReturned int64 - BytesProcessed int64 - } -} - -// New sets up a new, the first Json is read when this is run. -// If there is a problem with reading the first Json, the error is returned. -// Otherwise, the returned reader can be reliably consumed with jsonRead() -// until jsonRead() returns nil. -func New(opts *Options) (format.Select, error) { - reader := &jinput{ - options: opts, - reader: bufio.NewReader(opts.ReadFrom), - } - reader.stats.BytesScanned = opts.StreamSize - reader.stats.BytesProcessed = 0 - reader.stats.BytesReturned = 0 - return reader, nil -} - -// Progress - return true if progress was requested. -func (reader *jinput) Progress() bool { - return reader.options.Progress -} - -// UpdateBytesProcessed - populates the bytes Processed -func (reader *jinput) UpdateBytesProcessed(size int64) { - reader.stats.BytesProcessed += size -} - -// Read the file and returns -func (reader *jinput) Read() ([]byte, error) { - data, _, err := reader.reader.ReadLine() - if err != nil { - if err == io.EOF || err == io.ErrClosedPipe { - err = nil - } else { - err = format.ErrJSONParsingError - } - } - if err == nil { - var header []string - gjson.ParseBytes(data).ForEach(func(key, value gjson.Result) bool { - header = append(header, key.String()) - return true - }) - reader.header = header - } - return data, err -} - -// OutputFieldDelimiter - returns the delimiter specified in input request, -// for JSON output this value is empty, but does have a value when -// output type is CSV. -func (reader *jinput) OutputFieldDelimiter() string { - return reader.options.OutputFieldDelimiter -} - -// OutputRecordDelimiter - returns the delimiter specified in input request, after each JSON record. -func (reader *jinput) OutputRecordDelimiter() string { - return reader.options.OutputRecordDelimiter -} - -// HasHeader - returns true or false depending upon the header. -func (reader *jinput) HasHeader() bool { - return true -} - -// Expression - return the Select Expression for -func (reader *jinput) Expression() string { - return reader.options.Expression -} - -// UpdateBytesReturned - updates the Bytes returned for -func (reader *jinput) UpdateBytesReturned(size int64) { - reader.stats.BytesReturned += size -} - -// Header returns a nil in case of -func (reader *jinput) Header() []string { - return reader.header -} - -// CreateStatXML is the function which does the marshaling from the stat -// structs into XML so that the progress and stat message can be sent -func (reader *jinput) CreateStatXML() (string, error) { - if reader.options.Compressed == "NONE" { - reader.stats.BytesProcessed = reader.options.StreamSize - reader.stats.BytesScanned = reader.stats.BytesProcessed - } - out, err := xml.Marshal(&format.Stats{ - BytesScanned: reader.stats.BytesScanned, - BytesProcessed: reader.stats.BytesProcessed, - BytesReturned: reader.stats.BytesReturned, - }) - if err != nil { - return "", err - } - return xml.Header + string(out), nil -} - -// CreateProgressXML is the function which does the marshaling from the progress -// structs into XML so that the progress and stat message can be sent -func (reader *jinput) CreateProgressXML() (string, error) { - if !(reader.options.Compressed != "NONE") { - reader.stats.BytesScanned = reader.stats.BytesProcessed - } - out, err := xml.Marshal(&format.Progress{ - BytesScanned: reader.stats.BytesScanned, - BytesProcessed: reader.stats.BytesProcessed, - BytesReturned: reader.stats.BytesReturned, - }) - if err != nil { - return "", err - } - return xml.Header + string(out), nil -} - -// Type - return the data format type { -func (reader *jinput) Type() format.Type { - return format.JSON -} - -// OutputType - return the data format type { -func (reader *jinput) OutputType() format.Type { - return reader.options.OutputType -} - -// ColNameErrs - this is a dummy function for JSON input type. -func (reader *jinput) ColNameErrs(columnNames []string) error { - return nil -} diff --git a/pkg/s3select/format/select.go b/pkg/s3select/format/select.go deleted file mode 100644 index a2a0ca47b..000000000 --- a/pkg/s3select/format/select.go +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package format - -import "encoding/xml" - -// Select Interface helper methods, implementing features needed for -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -type Select interface { - Type() Type - OutputType() Type - Read() ([]byte, error) - Header() []string - HasHeader() bool - OutputFieldDelimiter() string - OutputRecordDelimiter() string - UpdateBytesProcessed(int64) - Expression() string - UpdateBytesReturned(int64) - CreateStatXML() (string, error) - CreateProgressXML() (string, error) - ColNameErrs(columnNames []string) error - Progress() bool -} - -// Progress represents a struct that represents the format for XML of the -// progress messages -type Progress struct { - XMLName xml.Name `xml:"Progress" json:"-"` - BytesScanned int64 `xml:"BytesScanned"` - BytesProcessed int64 `xml:"BytesProcessed"` - BytesReturned int64 `xml:"BytesReturned"` -} - -// Stats represents a struct that represents the format for XML of the stat -// messages -type Stats struct { - XMLName xml.Name `xml:"Stats" json:"-"` - BytesScanned int64 `xml:"BytesScanned"` - BytesProcessed int64 `xml:"BytesProcessed"` - BytesReturned int64 `xml:"BytesReturned"` -} - -// Type different types of support data format types. -type Type string - -// Different data format types. -const ( - JSON Type = "json" - CSV Type = "csv" -) diff --git a/pkg/s3select/genmessage.go b/pkg/s3select/genmessage.go new file mode 100644 index 000000000..f5009c320 --- /dev/null +++ b/pkg/s3select/genmessage.go @@ -0,0 +1,182 @@ +// +build ignore + +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3select + +import ( + "bytes" + "encoding/binary" + "fmt" + "hash/crc32" +) + +func genRecordsHeader() { + buf := new(bytes.Buffer) + + buf.WriteByte(13) + buf.WriteString(":message-type") + buf.WriteByte(7) + buf.Write([]byte{0, 5}) + buf.WriteString("event") + + buf.WriteByte(13) + buf.WriteString(":content-type") + buf.WriteByte(7) + buf.Write([]byte{0, 24}) + buf.WriteString("application/octet-stream") + + buf.WriteByte(11) + buf.WriteString(":event-type") + buf.WriteByte(7) + buf.Write([]byte{0, 7}) + buf.WriteString("Records") + + fmt.Println(buf.Bytes()) +} + +// Continuation Message +// ==================== +// Header specification +// -------------------- +// Continuation messages contain two headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-cont.png +// +// Payload specification +// --------------------- +// Continuation messages have no payload. +func genContinuationMessage() { + buf := new(bytes.Buffer) + + buf.WriteByte(13) + buf.WriteString(":message-type") + buf.WriteByte(7) + buf.Write([]byte{0, 5}) + buf.WriteString("event") + + buf.WriteByte(11) + buf.WriteString(":event-type") + buf.WriteByte(7) + buf.Write([]byte{0, 4}) + buf.WriteString("Cont") + + header := buf.Bytes() + headerLength := len(header) + payloadLength := 0 + totalLength := totalByteLength(headerLength, payloadLength) + + buf = new(bytes.Buffer) + binary.Write(buf, binary.BigEndian, uint32(totalLength)) + binary.Write(buf, binary.BigEndian, uint32(headerLength)) + prelude := buf.Bytes() + binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(prelude)) + buf.Write(header) + message := buf.Bytes() + binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(message)) + + fmt.Println(buf.Bytes()) +} + +func genProgressHeader() { + buf := new(bytes.Buffer) + + buf.WriteByte(13) + buf.WriteString(":message-type") + buf.WriteByte(7) + buf.Write([]byte{0, 5}) + buf.WriteString("event") + + buf.WriteByte(13) + buf.WriteString(":content-type") + buf.WriteByte(7) + buf.Write([]byte{0, 8}) + buf.WriteString("text/xml") + + buf.WriteByte(11) + buf.WriteString(":event-type") + buf.WriteByte(7) + buf.Write([]byte{0, 8}) + buf.WriteString("Progress") + + fmt.Println(buf.Bytes()) +} + +func genStatsHeader() { + buf := new(bytes.Buffer) + + buf.WriteByte(13) + buf.WriteString(":message-type") + buf.WriteByte(7) + buf.Write([]byte{0, 5}) + buf.WriteString("event") + + buf.WriteByte(13) + buf.WriteString(":content-type") + buf.WriteByte(7) + buf.Write([]byte{0, 8}) + buf.WriteString("text/xml") + + buf.WriteByte(11) + buf.WriteString(":event-type") + buf.WriteByte(7) + buf.Write([]byte{0, 5}) + buf.WriteString("Stats") + + fmt.Println(buf.Bytes()) +} + +// End Message +// =========== +// Header specification +// -------------------- +// End messages contain two headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-end.png +// +// Payload specification +// --------------------- +// End messages have no payload. +func genEndMessage() { + buf := new(bytes.Buffer) + + buf.WriteByte(13) + buf.WriteString(":message-type") + buf.WriteByte(7) + buf.Write([]byte{0, 5}) + buf.WriteString("event") + + buf.WriteByte(11) + buf.WriteString(":event-type") + buf.WriteByte(7) + buf.Write([]byte{0, 3}) + buf.WriteString("End") + + header := buf.Bytes() + headerLength := len(header) + payloadLength := 0 + totalLength := totalByteLength(headerLength, payloadLength) + + buf = new(bytes.Buffer) + binary.Write(buf, binary.BigEndian, uint32(totalLength)) + binary.Write(buf, binary.BigEndian, uint32(headerLength)) + prelude := buf.Bytes() + binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(prelude)) + buf.Write(header) + message := buf.Bytes() + binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(message)) + + fmt.Println(buf.Bytes()) +} diff --git a/pkg/s3select/helpers.go b/pkg/s3select/helpers.go deleted file mode 100644 index 89831d242..000000000 --- a/pkg/s3select/helpers.go +++ /dev/null @@ -1,563 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package s3select - -import ( - "fmt" - "math" - "strconv" - "strings" - - "github.com/minio/minio/pkg/s3select/format" - "github.com/tidwall/gjson" - "github.com/xwb1989/sqlparser" -) - -// MaxExpressionLength - 256KiB -const MaxExpressionLength = 256 * 1024 - -// matchesMyWhereClause takes []byte, process the where clause and returns true if the row suffices -func matchesMyWhereClause(record []byte, alias string, whereClause sqlparser.Expr) (bool, error) { - var conversionColumn string - var operator string - var operand gjson.Result - if fmt.Sprintf("%v", whereClause) == "false" { - return false, nil - } - switch expr := whereClause.(type) { - case *sqlparser.IsExpr: - return evaluateIsExpr(expr, record, alias) - case *sqlparser.RangeCond: - operator = expr.Operator - if operator != "between" && operator != "not between" { - return false, ErrUnsupportedSQLOperation - } - result, err := evaluateBetween(expr, alias, record) - if err != nil { - return false, err - } - if operator == "not between" { - return !result, nil - } - return result, nil - case *sqlparser.ComparisonExpr: - operator = expr.Operator - switch right := expr.Right.(type) { - case *sqlparser.FuncExpr: - operand = gjson.Parse(evaluateFuncExpr(right, "", record)) - case *sqlparser.SQLVal: - operand = gjson.ParseBytes(right.Val) - } - var myVal string - switch left := expr.Left.(type) { - case *sqlparser.FuncExpr: - myVal = evaluateFuncExpr(left, "", record) - conversionColumn = "" - case *sqlparser.ColName: - conversionColumn = left.Name.CompliantName() - } - if myVal != "" { - return evaluateOperator(gjson.Parse(myVal), operator, operand) - } - return evaluateOperator(gjson.GetBytes(record, conversionColumn), operator, operand) - case *sqlparser.AndExpr: - var leftVal bool - var rightVal bool - switch left := expr.Left.(type) { - case *sqlparser.ComparisonExpr: - temp, err := matchesMyWhereClause(record, alias, left) - if err != nil { - return false, err - } - leftVal = temp - } - switch right := expr.Right.(type) { - case *sqlparser.ComparisonExpr: - temp, err := matchesMyWhereClause(record, alias, right) - if err != nil { - return false, err - } - rightVal = temp - } - return (rightVal && leftVal), nil - case *sqlparser.OrExpr: - var leftVal bool - var rightVal bool - switch left := expr.Left.(type) { - case *sqlparser.ComparisonExpr: - leftVal, _ = matchesMyWhereClause(record, alias, left) - - } - switch right := expr.Right.(type) { - case *sqlparser.ComparisonExpr: - rightVal, _ = matchesMyWhereClause(record, alias, right) - } - return (rightVal || leftVal), nil - } - return true, nil -} - -func applyStrFunc(rawArg gjson.Result, funcName string) string { - switch strings.ToUpper(funcName) { - case "TRIM": - // parser has an issue which does not allow it to support - // Trim with other arguments - return strings.Trim(rawArg.String(), " ") - case "SUBSTRING": - // TODO: parser has an issue which does not support substring - return rawArg.String() - case "CHAR_LENGTH": - return strconv.Itoa(len(rawArg.String())) - case "CHARACTER_LENGTH": - return strconv.Itoa(len(rawArg.String())) - case "LOWER": - return strings.ToLower(rawArg.String()) - case "UPPER": - return strings.ToUpper(rawArg.String()) - } - return rawArg.String() - -} - -// evaluateBetween is a function which evaluates a Between Clause. -func evaluateBetween(betweenExpr *sqlparser.RangeCond, alias string, record []byte) (bool, error) { - var colToVal gjson.Result - var colFromVal gjson.Result - var conversionColumn string - var funcName string - switch colTo := betweenExpr.To.(type) { - case sqlparser.Expr: - switch colToMyVal := colTo.(type) { - case *sqlparser.FuncExpr: - colToVal = gjson.Parse(stringOps(colToMyVal, record, "")) - case *sqlparser.SQLVal: - colToVal = gjson.ParseBytes(colToMyVal.Val) - } - } - switch colFrom := betweenExpr.From.(type) { - case sqlparser.Expr: - switch colFromMyVal := colFrom.(type) { - case *sqlparser.FuncExpr: - colFromVal = gjson.Parse(stringOps(colFromMyVal, record, "")) - case *sqlparser.SQLVal: - colFromVal = gjson.ParseBytes(colFromMyVal.Val) - } - } - var myFuncVal string - switch left := betweenExpr.Left.(type) { - case *sqlparser.FuncExpr: - myFuncVal = evaluateFuncExpr(left, "", record) - conversionColumn = "" - case *sqlparser.ColName: - conversionColumn = cleanCol(left.Name.CompliantName(), alias) - } - toGreater, err := evaluateOperator(colToVal, ">", colFromVal) - if err != nil { - return false, err - } - if toGreater { - return evalBetweenGreater(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal) - } - return evalBetweenLess(conversionColumn, record, funcName, colFromVal, colToVal, myFuncVal) -} - -func evalBetween(conversionColumn string, record []byte, funcName string, colFromVal gjson.Result, colToVal gjson.Result, myColVal string, operator string) (bool, error) { - if format.IsInt(conversionColumn) { - myVal, err := evaluateOperator(gjson.GetBytes(record, "_"+conversionColumn), operator, colFromVal) - if err != nil { - return false, err - } - var myOtherVal bool - myOtherVal, err = evaluateOperator(colToVal, operator, gjson.GetBytes(record, "_"+conversionColumn)) - if err != nil { - return false, err - } - return (myVal && myOtherVal), nil - } - if myColVal != "" { - myVal, err := evaluateOperator(gjson.Parse(myColVal), operator, colFromVal) - if err != nil { - return false, err - } - var myOtherVal bool - myOtherVal, err = evaluateOperator(colToVal, operator, gjson.Parse(myColVal)) - if err != nil { - return false, err - } - return (myVal && myOtherVal), nil - } - myVal, err := evaluateOperator(gjson.GetBytes(record, conversionColumn), operator, colFromVal) - if err != nil { - return false, err - } - var myOtherVal bool - myOtherVal, err = evaluateOperator(colToVal, operator, gjson.GetBytes(record, conversionColumn)) - if err != nil { - return false, err - } - return (myVal && myOtherVal), nil -} - -// evalBetweenGreater is a function which evaluates the between given that the -// TO is > than the FROM. -func evalBetweenGreater(conversionColumn string, record []byte, funcName string, colFromVal gjson.Result, colToVal gjson.Result, myColVal string) (bool, error) { - return evalBetween(conversionColumn, record, funcName, colFromVal, colToVal, myColVal, ">=") -} - -// evalBetweenLess is a function which evaluates the between given that the -// FROM is > than the TO. -func evalBetweenLess(conversionColumn string, record []byte, funcName string, colFromVal gjson.Result, colToVal gjson.Result, myColVal string) (bool, error) { - return evalBetween(conversionColumn, record, funcName, colFromVal, colToVal, myColVal, "<=") -} - -// This is a really important function it actually evaluates the boolean -// statement and therefore actually returns a bool, it functions as the lowest -// level of the state machine. -func evaluateOperator(myTblVal gjson.Result, operator string, operand gjson.Result) (bool, error) { - if err := checkValidOperator(operator); err != nil { - return false, err - } - if !myTblVal.Exists() { - return false, nil - } - switch { - case operand.Type == gjson.String || operand.Type == gjson.Null: - return stringEval(myTblVal.String(), operator, operand.String()) - case operand.Type == gjson.Number: - opInt := format.IsInt(operand.Raw) - tblValInt := format.IsInt(strings.Trim(myTblVal.Raw, "\"")) - if opInt && tblValInt { - return intEval(int64(myTblVal.Float()), operator, operand.Int()) - } - if !opInt && !tblValInt { - return floatEval(myTblVal.Float(), operator, operand.Float()) - } - switch operator { - case "!=": - return true, nil - } - return false, nil - case myTblVal.Type != operand.Type: - return false, nil - default: - return false, ErrUnsupportedSyntax - } -} - -// checkValidOperator ensures that the current operator is supported -func checkValidOperator(operator string) error { - listOfOps := []string{">", "<", "=", "<=", ">=", "!=", "like"} - for i := range listOfOps { - if operator == listOfOps[i] { - return nil - } - } - return ErrParseUnknownOperator -} - -// stringEval is for evaluating the state of string comparison. -func stringEval(myRecordVal string, operator string, myOperand string) (bool, error) { - switch operator { - case ">": - return myRecordVal > myOperand, nil - case "<": - return myRecordVal < myOperand, nil - case "=": - return myRecordVal == myOperand, nil - case "<=": - return myRecordVal <= myOperand, nil - case ">=": - return myRecordVal >= myOperand, nil - case "!=": - return myRecordVal != myOperand, nil - case "like": - return likeConvert(myOperand, myRecordVal) - } - return false, ErrUnsupportedSyntax -} - -// intEval is for evaluating integer comparisons. -func intEval(myRecordVal int64, operator string, myOperand int64) (bool, error) { - - switch operator { - case ">": - return myRecordVal > myOperand, nil - case "<": - return myRecordVal < myOperand, nil - case "=": - return myRecordVal == myOperand, nil - case "<=": - return myRecordVal <= myOperand, nil - case ">=": - return myRecordVal >= myOperand, nil - case "!=": - return myRecordVal != myOperand, nil - } - return false, ErrUnsupportedSyntax -} - -// floatEval is for evaluating the comparison of floats. -func floatEval(myRecordVal float64, operator string, myOperand float64) (bool, error) { - // Basically need some logic thats like, if the types dont match check for a cast - switch operator { - case ">": - return myRecordVal > myOperand, nil - case "<": - return myRecordVal < myOperand, nil - case "=": - return myRecordVal == myOperand, nil - case "<=": - return myRecordVal <= myOperand, nil - case ">=": - return myRecordVal >= myOperand, nil - case "!=": - return myRecordVal != myOperand, nil - } - return false, ErrUnsupportedSyntax -} - -// prefixMatch allows for matching a prefix only like query e.g a% -func prefixMatch(pattern string, record string) bool { - for i := 0; i < len(pattern)-1; i++ { - if pattern[i] != record[i] && pattern[i] != byte('_') { - return false - } - } - return true -} - -// suffixMatch allows for matching a suffix only like query e.g %an -func suffixMatch(pattern string, record string) bool { - for i := len(pattern) - 1; i > 0; i-- { - if pattern[i] != record[len(record)-(len(pattern)-i)] && pattern[i] != byte('_') { - return false - } - } - return true -} - -// This function is for evaluating select statements which are case sensitive -func likeConvert(pattern string, record string) (bool, error) { - // If pattern is empty just return false - if pattern == "" || record == "" { - return false, nil - } - // for suffix match queries e.g %a - if len(pattern) >= 2 && pattern[0] == byte('%') && strings.Count(pattern, "%") == 1 { - return suffixMatch(pattern, record), nil - } - // for prefix match queries e.g a% - if len(pattern) >= 2 && pattern[len(pattern)-1] == byte('%') && strings.Count(pattern, "%") == 1 { - return prefixMatch(pattern, record), nil - } - charCount := 0 - currPos := 0 - // Loop through the pattern so that a boolean can be returned - for i := 0; i < len(pattern); i++ { - if pattern[i] == byte('_') { - // if its an underscore it can be anything so shift current position for - // pattern and string - charCount++ - // if there have been more characters in the pattern than record, clearly - // there should be a return - if i != len(pattern)-1 { - if pattern[i+1] != byte('%') && pattern[i+1] != byte('_') { - if currPos != len(record)-1 && pattern[i+1] != record[currPos+1] { - return false, nil - } - } - } - if charCount > len(record) { - return false, nil - } - // if the pattern has been fully evaluated, then just return. - if len(pattern) == i+1 { - return true, nil - } - i++ - currPos++ - } - if pattern[i] == byte('%') || pattern[i] == byte('*') { - // if there is a wildcard then want to return true if its last and flag it. - if currPos == len(record) { - return false, nil - } - if i+1 == len(pattern) { - return true, nil - } - } else { - charCount++ - matched := false - // iterate through the pattern and check if there is a match for the - // character - for currPos < len(record) { - if record[currPos] == pattern[i] || pattern[i] == byte('_') { - matched = true - break - } - currPos++ - } - currPos++ - // if the character did not match then return should occur. - if !matched { - return false, nil - } - } - } - if charCount > len(record) { - return false, nil - } - if currPos < len(record) { - return false, nil - } - return true, nil -} - -// cleanCol cleans a column name from the parser so that the name is returned to -// original. -func cleanCol(myCol string, alias string) string { - if len(myCol) <= 0 { - return myCol - } - if !strings.HasPrefix(myCol, alias) && myCol[0] == '_' { - myCol = alias + myCol - } - - if strings.Contains(myCol, ".") { - myCol = strings.Replace(myCol, alias+"._", "", len(myCol)) - } - myCol = strings.Replace(myCol, alias+"_", "", len(myCol)) - return myCol -} - -// whereClauseNameErrs is a function which returns an error if there is a column -// in the where clause which does not exist. -func whereClauseNameErrs(whereClause interface{}, alias string, f format.Select) error { - var conversionColumn string - switch expr := whereClause.(type) { - // case for checking errors within a clause of the form "col_name is ..." - case *sqlparser.IsExpr: - switch myCol := expr.Expr.(type) { - case *sqlparser.FuncExpr: - if err := evaluateFuncErr(myCol, f); err != nil { - return err - } - case *sqlparser.ColName: - conversionColumn = cleanCol(myCol.Name.CompliantName(), alias) - } - case *sqlparser.RangeCond: - switch left := expr.Left.(type) { - case *sqlparser.FuncExpr: - if err := evaluateFuncErr(left, f); err != nil { - return err - } - case *sqlparser.ColName: - conversionColumn = cleanCol(left.Name.CompliantName(), alias) - } - case *sqlparser.ComparisonExpr: - switch left := expr.Left.(type) { - case *sqlparser.FuncExpr: - if err := evaluateFuncErr(left, f); err != nil { - return err - } - case *sqlparser.ColName: - conversionColumn = cleanCol(left.Name.CompliantName(), alias) - } - case *sqlparser.AndExpr: - switch left := expr.Left.(type) { - case *sqlparser.ComparisonExpr: - return whereClauseNameErrs(left, alias, f) - } - switch right := expr.Right.(type) { - case *sqlparser.ComparisonExpr: - return whereClauseNameErrs(right, alias, f) - } - case *sqlparser.OrExpr: - switch left := expr.Left.(type) { - case *sqlparser.ComparisonExpr: - return whereClauseNameErrs(left, alias, f) - } - switch right := expr.Right.(type) { - case *sqlparser.ComparisonExpr: - return whereClauseNameErrs(right, alias, f) - } - } - if conversionColumn != "" { - return f.ColNameErrs([]string{conversionColumn}) - } - return nil -} - -// aggFuncToStr converts an array of floats into a properly formatted string. -func aggFuncToStr(aggVals []float64, f format.Select) string { - // Define a number formatting function - numToStr := func(f float64) string { - if f == math.Trunc(f) { - return strconv.FormatInt(int64(f), 10) - } - return strconv.FormatFloat(f, 'f', 6, 64) - } - - // Display all whole numbers in aggVals as integers - vals := make([]string, len(aggVals)) - for i, v := range aggVals { - vals[i] = numToStr(v) - } - - // Intersperse field delimiter - return strings.Join(vals, f.OutputFieldDelimiter()) -} - -// checkForDuplicates ensures we do not have an ambigious column name. -func checkForDuplicates(columns []string, columnsMap map[string]int) error { - for i, column := range columns { - columns[i] = strings.Replace(column, " ", "_", len(column)) - if _, exist := columnsMap[columns[i]]; exist { - return ErrAmbiguousFieldName - } - columnsMap[columns[i]] = i - } - return nil -} - -// parseErrs is the function which handles all the errors that could occur -// through use of function arguments such as column names in NULLIF -func parseErrs(columnNames []string, whereClause interface{}, alias string, myFuncs SelectFuncs, f format.Select) error { - // Below code cleans up column names. - processColumnNames(columnNames, alias, f) - if columnNames[0] != "*" { - if err := f.ColNameErrs(columnNames); err != nil { - return err - } - } - // Below code ensures the whereClause has no errors. - if whereClause != nil { - tempClause := whereClause - if err := whereClauseNameErrs(tempClause, alias, f); err != nil { - return err - } - } - for i := 0; i < len(myFuncs.funcExpr); i++ { - if myFuncs.funcExpr[i] == nil { - continue - } - if err := evaluateFuncErr(myFuncs.funcExpr[i], f); err != nil { - return err - } - } - return nil -} diff --git a/pkg/s3select/input.go b/pkg/s3select/input.go deleted file mode 100644 index 1cfb9f1a2..000000000 --- a/pkg/s3select/input.go +++ /dev/null @@ -1,224 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package s3select - -import ( - "bytes" - "compress/bzip2" - "io" - "net/http" - "strings" - "time" - - humanize "github.com/dustin/go-humanize" - "github.com/klauspost/pgzip" - - "github.com/minio/minio/pkg/s3select/format" - "github.com/minio/minio/pkg/s3select/format/csv" - "github.com/minio/minio/pkg/s3select/format/json" -) - -const ( - // progressTime is the time interval for which a progress message is sent. - progressTime time.Duration = 60 * time.Second - // continuationTime is the time interval for which a continuation message is - // sent. - continuationTime time.Duration = 5 * time.Second -) - -// Row is a Struct for keeping track of key aspects of a row. -type Row struct { - record string - err error -} - -// This function replaces "",'' with `` for the select parser -func cleanExpr(expr string) string { - r := strings.NewReplacer("\"", "`") - return r.Replace(expr) -} - -// New - initialize new select format -func New(reader io.Reader, size int64, req ObjectSelectRequest) (s3s format.Select, err error) { - switch req.InputSerialization.CompressionType { - case SelectCompressionGZIP: - if reader, err = pgzip.NewReader(reader); err != nil { - return nil, format.ErrTruncatedInput - } - case SelectCompressionBZIP: - reader = bzip2.NewReader(reader) - } - - // Initializating options for CSV - if req.InputSerialization.CSV != nil { - if req.InputSerialization.CSV.FileHeaderInfo == "" { - req.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoNone - } - if req.InputSerialization.CSV.RecordDelimiter == "" { - req.InputSerialization.CSV.RecordDelimiter = "\n" - } - options := &csv.Options{ - Name: "S3Object", // Default table name for all objects - HasHeader: req.InputSerialization.CSV.FileHeaderInfo == CSVFileHeaderInfoUse, - RecordDelimiter: req.InputSerialization.CSV.RecordDelimiter, - FieldDelimiter: req.InputSerialization.CSV.FieldDelimiter, - Comments: req.InputSerialization.CSV.Comments, - ReadFrom: reader, - Compressed: string(req.InputSerialization.CompressionType), - Expression: cleanExpr(req.Expression), - StreamSize: size, - HeaderOpt: req.InputSerialization.CSV.FileHeaderInfo == CSVFileHeaderInfoUse, - Progress: req.RequestProgress.Enabled, - } - if req.OutputSerialization.CSV != nil { - if req.OutputSerialization.CSV.FieldDelimiter == "" { - req.OutputSerialization.CSV.FieldDelimiter = "," - } - options.OutputFieldDelimiter = req.OutputSerialization.CSV.FieldDelimiter - options.OutputRecordDelimiter = req.OutputSerialization.CSV.RecordDelimiter - options.OutputType = format.CSV - } - if req.OutputSerialization.JSON != nil { - options.OutputRecordDelimiter = req.OutputSerialization.JSON.RecordDelimiter - options.OutputType = format.JSON - } - // Initialize CSV input type - s3s, err = csv.New(options) - } else if req.InputSerialization.JSON != nil { - options := &json.Options{ - Name: "S3Object", // Default table name for all objects - ReadFrom: reader, - Compressed: string(req.InputSerialization.CompressionType), - Expression: cleanExpr(req.Expression), - StreamSize: size, - DocumentType: req.InputSerialization.JSON.Type == JSONTypeDocument, - Progress: req.RequestProgress.Enabled, - } - if req.OutputSerialization.JSON != nil { - options.OutputRecordDelimiter = req.OutputSerialization.JSON.RecordDelimiter - options.OutputType = format.JSON - } - if req.OutputSerialization.CSV != nil { - options.OutputFieldDelimiter = req.OutputSerialization.CSV.FieldDelimiter - options.OutputRecordDelimiter = req.OutputSerialization.CSV.RecordDelimiter - options.OutputType = format.CSV - } - // Initialize JSON input type - s3s, err = json.New(options) - } - return s3s, err -} - -// Execute is the function where all the blocking occurs, It writes to the HTTP -// response writer in a streaming fashion so that the client can actively use -// the results before the query is finally finished executing. The -func Execute(writer io.Writer, f format.Select) error { - rowCh := make(chan Row) - curBuf := bytes.NewBuffer(make([]byte, humanize.MiByte)) - curBuf.Reset() - progressTicker := time.NewTicker(progressTime) - continuationTimer := time.NewTimer(continuationTime) - defer progressTicker.Stop() - defer continuationTimer.Stop() - - go runSelectParser(f, rowCh) - for { - select { - case row, ok := <-rowCh: - if ok && row.err != nil { - _, err := writeErrorMessage(row.err, curBuf).WriteTo(writer) - flusher, okFlush := writer.(http.Flusher) - if okFlush { - flusher.Flush() - } - if err != nil { - return err - } - curBuf.Reset() - close(rowCh) - return nil - } else if ok { - _, err := writeRecordMessage(row.record, curBuf).WriteTo(writer) - flusher, okFlush := writer.(http.Flusher) - if okFlush { - flusher.Flush() - } - if err != nil { - return err - } - curBuf.Reset() - f.UpdateBytesReturned(int64(len(row.record))) - if !continuationTimer.Stop() { - <-continuationTimer.C - } - continuationTimer.Reset(continuationTime) - } else if !ok { - statPayload, err := f.CreateStatXML() - if err != nil { - return err - } - _, err = writeStatMessage(statPayload, curBuf).WriteTo(writer) - flusher, ok := writer.(http.Flusher) - if ok { - flusher.Flush() - } - if err != nil { - return err - } - curBuf.Reset() - _, err = writeEndMessage(curBuf).WriteTo(writer) - flusher, ok = writer.(http.Flusher) - if ok { - flusher.Flush() - } - if err != nil { - return err - } - return nil - } - - case <-progressTicker.C: - // Send progress messages only if requested by client. - if f.Progress() { - progressPayload, err := f.CreateProgressXML() - if err != nil { - return err - } - _, err = writeProgressMessage(progressPayload, curBuf).WriteTo(writer) - flusher, ok := writer.(http.Flusher) - if ok { - flusher.Flush() - } - if err != nil { - return err - } - curBuf.Reset() - } - case <-continuationTimer.C: - _, err := writeContinuationMessage(curBuf).WriteTo(writer) - flusher, ok := writer.(http.Flusher) - if ok { - flusher.Flush() - } - if err != nil { - return err - } - curBuf.Reset() - continuationTimer.Reset(continuationTime) - } - } -} diff --git a/pkg/s3select/json/args.go b/pkg/s3select/json/args.go new file mode 100644 index 000000000..cc06d6362 --- /dev/null +++ b/pkg/s3select/json/args.go @@ -0,0 +1,95 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package json + +import ( + "encoding/xml" + "fmt" + "strings" +) + +const ( + document = "document" + lines = "lines" + + defaultRecordDelimiter = "\n" +) + +// ReaderArgs - represents elements inside in request XML. +type ReaderArgs struct { + ContentType string `xml:"Type"` + unmarshaled bool +} + +// IsEmpty - returns whether reader args is empty or not. +func (args *ReaderArgs) IsEmpty() bool { + return !args.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subReaderArgs ReaderArgs + parsedArgs := subReaderArgs{} + if err := d.DecodeElement(&parsedArgs, &start); err != nil { + return err + } + + parsedArgs.ContentType = strings.ToLower(parsedArgs.ContentType) + switch parsedArgs.ContentType { + case document, lines: + default: + return errInvalidJSONType(fmt.Errorf("invalid ContentType '%v'", parsedArgs.ContentType)) + } + + *args = ReaderArgs(parsedArgs) + args.unmarshaled = true + return nil +} + +// WriterArgs - represents elements inside in request XML. +type WriterArgs struct { + RecordDelimiter string `xml:"RecordDelimiter"` + unmarshaled bool +} + +// IsEmpty - returns whether writer args is empty or not. +func (args *WriterArgs) IsEmpty() bool { + return !args.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (args *WriterArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subWriterArgs WriterArgs + parsedArgs := subWriterArgs{} + if err := d.DecodeElement(&parsedArgs, &start); err != nil { + return err + } + + switch len(parsedArgs.RecordDelimiter) { + case 0: + parsedArgs.RecordDelimiter = defaultRecordDelimiter + case 1, 2: + default: + return fmt.Errorf("invalid RecordDelimiter '%v'", parsedArgs.RecordDelimiter) + } + + *args = WriterArgs(parsedArgs) + args.unmarshaled = true + return nil +} diff --git a/pkg/s3select/json/errors.go b/pkg/s3select/json/errors.go new file mode 100644 index 000000000..22aaf697d --- /dev/null +++ b/pkg/s3select/json/errors.go @@ -0,0 +1,62 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package json + +type s3Error struct { + code string + message string + statusCode int + cause error +} + +func (err *s3Error) Cause() error { + return err.cause +} + +func (err *s3Error) ErrorCode() string { + return err.code +} + +func (err *s3Error) ErrorMessage() string { + return err.message +} + +func (err *s3Error) HTTPStatusCode() int { + return err.statusCode +} + +func (err *s3Error) Error() string { + return err.message +} + +func errInvalidJSONType(err error) *s3Error { + return &s3Error{ + code: "InvalidJsonType", + message: "The JsonType is invalid. Only DOCUMENT and LINES are supported.", + statusCode: 400, + cause: err, + } +} + +func errJSONParsingError(err error) *s3Error { + return &s3Error{ + code: "JSONParsingError", + message: "Encountered an error parsing the JSON file. Check the file and try again.", + statusCode: 400, + cause: err, + } +} diff --git a/pkg/s3select/json/reader.go b/pkg/s3select/json/reader.go new file mode 100644 index 000000000..fb4e15ba2 --- /dev/null +++ b/pkg/s3select/json/reader.go @@ -0,0 +1,217 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package json + +import ( + "bytes" + "io" + "io/ioutil" + "strconv" + + "github.com/minio/minio/pkg/s3select/sql" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +func toSingleLineJSON(input string, currentKey string, result gjson.Result) (output string, err error) { + switch { + case result.IsObject(): + result.ForEach(func(key, value gjson.Result) bool { + jsonKey := key.String() + if currentKey != "" { + jsonKey = currentKey + "." + key.String() + } + output, err = toSingleLineJSON(input, jsonKey, value) + input = output + return err == nil + }) + case result.IsArray(): + i := 0 + result.ForEach(func(key, value gjson.Result) bool { + if currentKey == "" { + panic("currentKey is empty") + } + + indexKey := currentKey + "." + strconv.Itoa(i) + output, err = toSingleLineJSON(input, indexKey, value) + input = output + i++ + return err == nil + }) + default: + output, err = sjson.Set(input, currentKey, result.Value()) + } + + return output, err +} + +type objectReader struct { + reader io.Reader + err error + + p []byte + start int + end int + + escaped bool + quoteOpened bool + curlyCount uint64 + endOfObject bool +} + +func (or *objectReader) objectEndIndex(p []byte, length int) int { + for i := 0; i < length; i++ { + if p[i] == '\\' { + or.escaped = !or.escaped + continue + } + + if p[i] == '"' && !or.escaped { + or.quoteOpened = !or.quoteOpened + } + + or.escaped = false + + switch p[i] { + case '{': + if !or.quoteOpened { + or.curlyCount++ + } + case '}': + if or.quoteOpened || or.curlyCount == 0 { + break + } + + if or.curlyCount--; or.curlyCount == 0 { + return i + 1 + } + } + } + + return -1 +} + +func (or *objectReader) Read(p []byte) (n int, err error) { + if or.endOfObject { + return 0, io.EOF + } + + if or.p != nil { + n = copy(p, or.p[or.start:or.end]) + or.start += n + if or.start == or.end { + // made full copy. + or.p = nil + or.start = 0 + or.end = 0 + } + } else { + if or.err != nil { + return 0, or.err + } + + n, err = or.reader.Read(p) + or.err = err + switch err { + case nil: + case io.EOF, io.ErrUnexpectedEOF, io.ErrClosedPipe: + or.err = io.EOF + default: + return 0, err + } + } + + index := or.objectEndIndex(p, n) + if index == -1 || index == n { + return n, nil + } + + or.endOfObject = true + if or.p == nil { + or.p = p + or.start = index + or.end = n + } else { + or.start -= index + } + + return index, nil +} + +func (or *objectReader) Reset() error { + or.endOfObject = false + + if or.p != nil { + return nil + } + + return or.err +} + +// Reader - JSON record reader for S3Select. +type Reader struct { + args *ReaderArgs + objectReader *objectReader + readCloser io.ReadCloser +} + +// Read - reads single record. +func (r *Reader) Read() (sql.Record, error) { + if err := r.objectReader.Reset(); err != nil { + return nil, err + } + + data, err := ioutil.ReadAll(r.objectReader) + if err != nil { + return nil, errJSONParsingError(err) + } + + data = bytes.TrimSpace(data) + if len(data) == 0 { + return nil, io.EOF + } + + if !gjson.ValidBytes(data) { + return nil, errJSONParsingError(err) + } + + if bytes.Count(data, []byte("\n")) > 0 { + var s string + if s, err = toSingleLineJSON("", "", gjson.ParseBytes(data)); err != nil { + return nil, errJSONParsingError(err) + } + data = []byte(s) + } + + return &Record{ + data: data, + }, nil +} + +// Close - closes underlaying reader. +func (r *Reader) Close() error { + return r.readCloser.Close() +} + +// NewReader - creates new JSON reader using readCloser. +func NewReader(readCloser io.ReadCloser, args *ReaderArgs) *Reader { + return &Reader{ + args: args, + objectReader: &objectReader{reader: readCloser}, + readCloser: readCloser, + } +} diff --git a/pkg/s3select/json/record.go b/pkg/s3select/json/record.go new file mode 100644 index 000000000..8af69cc1f --- /dev/null +++ b/pkg/s3select/json/record.go @@ -0,0 +1,107 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package json + +import ( + "bytes" + "encoding/csv" + "fmt" + "strings" + + "github.com/minio/minio/pkg/s3select/sql" + "github.com/tidwall/gjson" + "github.com/tidwall/sjson" +) + +// Record - is JSON record. +type Record struct { + data []byte +} + +// Get - gets the value for a column name. +func (r *Record) Get(name string) (*sql.Value, error) { + result := gjson.GetBytes(r.data, name) + switch result.Type { + case gjson.False: + return sql.NewBool(false), nil + case gjson.Number: + return sql.NewFloat(result.Float()), nil + case gjson.String: + return sql.NewString(result.String()), nil + case gjson.True: + return sql.NewBool(true), nil + } + + return nil, fmt.Errorf("unsupported gjson value %v; %v", result, result.Type) +} + +// Set - sets the value for a column name. +func (r *Record) Set(name string, value *sql.Value) (err error) { + var v interface{} + switch value.Type() { + case sql.Bool: + v = value.BoolValue() + case sql.Int: + v = value.IntValue() + case sql.Float: + v = value.FloatValue() + case sql.String: + v = value.StringValue() + default: + return fmt.Errorf("unsupported sql value %v and type %v", value, value.Type()) + } + + name = strings.Replace(name, "*", "__ALL__", -1) + r.data, err = sjson.SetBytes(r.data, name, v) + return err +} + +// MarshalCSV - encodes to CSV data. +func (r *Record) MarshalCSV(fieldDelimiter rune) ([]byte, error) { + var csvRecord []string + result := gjson.ParseBytes(r.data) + result.ForEach(func(key, value gjson.Result) bool { + csvRecord = append(csvRecord, value.String()) + return true + }) + + buf := new(bytes.Buffer) + w := csv.NewWriter(buf) + w.Comma = fieldDelimiter + if err := w.Write(csvRecord); err != nil { + return nil, err + } + w.Flush() + if err := w.Error(); err != nil { + return nil, err + } + + data := buf.Bytes() + return data[:len(data)-1], nil +} + +// MarshalJSON - encodes to JSON data. +func (r *Record) MarshalJSON() ([]byte, error) { + return r.data, nil +} + +// NewRecord - creates new empty JSON record. +func NewRecord() *Record { + return &Record{ + data: []byte("{}"), + } +} diff --git a/pkg/s3select/message.go b/pkg/s3select/message.go new file mode 100644 index 000000000..9cfd2f90d --- /dev/null +++ b/pkg/s3select/message.go @@ -0,0 +1,384 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3select + +import ( + "bytes" + "encoding/binary" + "fmt" + "hash/crc32" + "net/http" + "strconv" + "sync/atomic" + "time" +) + +// A message is in the format specified in +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-frame-overview.png +// hence the calculation is made accordingly. +func totalByteLength(headerLength, payloadLength int) int { + return 4 + 4 + 4 + headerLength + payloadLength + 4 +} + +func genMessage(header, payload []byte) []byte { + headerLength := len(header) + payloadLength := len(payload) + totalLength := totalByteLength(headerLength, payloadLength) + + buf := new(bytes.Buffer) + binary.Write(buf, binary.BigEndian, uint32(totalLength)) + binary.Write(buf, binary.BigEndian, uint32(headerLength)) + prelude := buf.Bytes() + binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(prelude)) + buf.Write(header) + if payload != nil { + buf.Write(payload) + } + message := buf.Bytes() + binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(message)) + + return buf.Bytes() +} + +// Refer genRecordsHeader(). +var recordsHeader = []byte{ + 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', + 13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 24, 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'o', 'c', 't', 'e', 't', '-', 's', 't', 'r', 'e', 'a', 'm', + 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 7, 'R', 'e', 'c', 'o', 'r', 'd', 's', +} + +// newRecordsMessage - creates new Records Message which can contain a single record, partial records, +// or multiple records. Depending on the size of the result, a response can contain one or more of these messages. +// +// Header specification +// Records messages contain three headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-record.png +// +// Payload specification +// Records message payloads can contain a single record, partial records, or multiple records. +func newRecordsMessage(payload []byte) []byte { + return genMessage(recordsHeader, payload) +} + +// continuationMessage - S3 periodically sends this message to keep the TCP connection open. +// These messages appear in responses at random. The client must detect the message type and process accordingly. +// +// Header specification: +// Continuation messages contain two headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-cont.png +// +// Payload specification: +// Continuation messages have no payload. +var continuationMessage = []byte{ + 0, 0, 0, 57, // total byte-length. + 0, 0, 0, 41, // headers byte-length. + 139, 161, 157, 242, // prelude crc. + 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers. + 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 4, 'C', 'o', 'n', 't', // headers. + 156, 134, 74, 13, // message crc. +} + +// Refer genProgressHeader(). +var progressHeader = []byte{ + 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', + 13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l', + 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 'P', 'r', 'o', 'g', 'r', 'e', 's', 's', +} + +// newProgressMessage - creates new Progress Message. S3 periodically sends this message, if requested. +// It contains information about the progress of a query that has started but has not yet completed. +// +// Header specification: +// Progress messages contain three headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-progress.png +// +// Payload specification: +// Progress message payload is an XML document containing information about the progress of a request. +// * BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed). +// * BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed). +// * BytesReturned => Current number of bytes of records payload data returned by S3. +// +// For uncompressed files, BytesScanned and BytesProcessed are equal. +// +// Example: +// +// +// +// 512 +// 1024 +// 1024 +// +// +func newProgressMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte { + payload := []byte(`` + + strconv.FormatInt(bytesScanned, 10) + `` + + strconv.FormatInt(bytesProcessed, 10) + `` + + strconv.FormatInt(bytesReturned, 10) + ``) + return genMessage(progressHeader, payload) +} + +// Refer genStatsHeader(). +var statsHeader = []byte{ + 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', + 13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l', + 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'S', 't', 'a', 't', 's', +} + +// newStatsMessage - creates new Stats Message. S3 sends this message at the end of the request. +// It contains statistics about the query. +// +// Header specification: +// Stats messages contain three headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-stats.png +// +// Payload specification: +// Stats message payload is an XML document containing information about a request's stats when processing is complete. +// * BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed). +// * BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed). +// * BytesReturned => Total number of bytes of records payload data returned by S3. +// +// For uncompressed files, BytesScanned and BytesProcessed are equal. +// +// Example: +// +// +// +// 512 +// 1024 +// 1024 +// +func newStatsMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte { + payload := []byte(`` + + strconv.FormatInt(bytesScanned, 10) + `` + + strconv.FormatInt(bytesProcessed, 10) + `` + + strconv.FormatInt(bytesReturned, 10) + ``) + return genMessage(statsHeader, payload) +} + +// endMessage - indicates that the request is complete, and no more messages will be sent. +// You should not assume that the request is complete until the client receives an End message. +// +// Header specification: +// End messages contain two headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-end.png +// +// Payload specification: +// End messages have no payload. +var endMessage = []byte{ + 0, 0, 0, 56, // total byte-length. + 0, 0, 0, 40, // headers byte-length. + 193, 198, 132, 212, // prelude crc. + 13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers. + 11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 3, 'E', 'n', 'd', // headers. + 207, 151, 211, 146, // message crc. +} + +// newErrorMessage - creates new Request Level Error Message. S3 sends this message if the request failed for any reason. +// It contains the error code and error message for the failure. If S3 sends a RequestLevelError message, +// it doesn't send an End message. +// +// Header specification: +// Request-level error messages contain three headers, as follows: +// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-error.png +// +// Payload specification: +// Request-level error messages have no payload. +func newErrorMessage(errorCode, errorMessage []byte) []byte { + buf := new(bytes.Buffer) + + buf.Write([]byte{13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'r', 'r', 'o', 'r'}) + + buf.Write([]byte{14, ':', 'e', 'r', 'r', 'o', 'r', '-', 'm', 'e', 's', 's', 'a', 'g', 'e', 7}) + binary.Write(buf, binary.BigEndian, uint16(len(errorMessage))) + buf.Write(errorMessage) + + buf.Write([]byte{11, ':', 'e', 'r', 'r', 'o', 'r', '-', 'c', 'o', 'd', 'e', 7}) + binary.Write(buf, binary.BigEndian, uint16(len(errorCode))) + buf.Write(errorCode) + + return genMessage(buf.Bytes(), nil) +} + +// NewErrorMessage - creates new Request Level Error Message specified in +// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html. +func NewErrorMessage(errorCode, errorMessage string) []byte { + return newErrorMessage([]byte(errorCode), []byte(errorMessage)) +} + +type messageWriter struct { + writer http.ResponseWriter + getProgressFunc func() (int64, int64) + bytesReturned int64 + + dataCh chan []byte + doneCh chan struct{} + closeCh chan struct{} + stopped uint32 + closed uint32 +} + +func (writer *messageWriter) write(data []byte) bool { + if _, err := writer.writer.Write(data); err != nil { + return false + } + + writer.writer.(http.Flusher).Flush() + return true +} + +func (writer *messageWriter) start() { + keepAliveTicker := time.NewTicker(1 * time.Second) + var progressTicker *time.Ticker + if writer.getProgressFunc != nil { + progressTicker = time.NewTicker(1 * time.Minute) + } + + go func() { + quitFlag := 0 + for quitFlag == 0 { + if progressTicker == nil { + select { + case data, ok := <-writer.dataCh: + if !ok { + quitFlag = 1 + break + } + if !writer.write(data) { + quitFlag = 1 + } + case <-writer.doneCh: + quitFlag = 2 + case <-keepAliveTicker.C: + if !writer.write(continuationMessage) { + quitFlag = 1 + } + } + } else { + select { + case data, ok := <-writer.dataCh: + if !ok { + quitFlag = 1 + break + } + if !writer.write(data) { + quitFlag = 1 + } + case <-writer.doneCh: + quitFlag = 2 + case <-keepAliveTicker.C: + if !writer.write(continuationMessage) { + quitFlag = 1 + } + case <-progressTicker.C: + bytesScanned, bytesProcessed := writer.getProgressFunc() + bytesReturned := atomic.LoadInt64(&writer.bytesReturned) + if !writer.write(newProgressMessage(bytesScanned, bytesProcessed, bytesReturned)) { + quitFlag = 1 + } + } + } + } + + atomic.StoreUint32(&writer.stopped, 1) + close(writer.closeCh) + + keepAliveTicker.Stop() + if progressTicker != nil { + progressTicker.Stop() + } + + if quitFlag == 2 { + for data := range writer.dataCh { + if _, err := writer.writer.Write(data); err != nil { + break + } + } + } + }() +} + +func (writer *messageWriter) close() { + if atomic.SwapUint32(&writer.closed, 1) == 0 { + close(writer.doneCh) + for range writer.closeCh { + close(writer.dataCh) + } + } +} + +func (writer *messageWriter) send(data []byte) error { + err := func() error { + if atomic.LoadUint32(&writer.stopped) == 1 { + return fmt.Errorf("writer already closed") + } + + select { + case writer.dataCh <- data: + case <-writer.doneCh: + return fmt.Errorf("closed writer") + } + + return nil + }() + + if err != nil { + writer.close() + } + + return err +} + +func (writer *messageWriter) SendRecords(payload []byte) error { + err := writer.send(newRecordsMessage(payload)) + if err == nil { + atomic.AddInt64(&writer.bytesReturned, int64(len(payload))) + } + return err +} + +func (writer *messageWriter) SendStats(bytesScanned, bytesProcessed int64) error { + bytesReturned := atomic.LoadInt64(&writer.bytesReturned) + err := writer.send(newStatsMessage(bytesScanned, bytesProcessed, bytesReturned)) + if err != nil { + return err + } + + err = writer.send(endMessage) + writer.close() + return err +} + +func (writer *messageWriter) SendError(errorCode, errorMessage string) error { + err := writer.send(newErrorMessage([]byte(errorCode), []byte(errorMessage))) + if err == nil { + writer.close() + } + return err +} + +func newMessageWriter(w http.ResponseWriter, getProgressFunc func() (bytesScanned, bytesProcessed int64)) *messageWriter { + writer := &messageWriter{ + writer: w, + getProgressFunc: getProgressFunc, + + dataCh: make(chan []byte), + doneCh: make(chan struct{}), + closeCh: make(chan struct{}), + } + writer.start() + return writer +} diff --git a/pkg/s3select/output.go b/pkg/s3select/output.go deleted file mode 100644 index b5e323c28..000000000 --- a/pkg/s3select/output.go +++ /dev/null @@ -1,460 +0,0 @@ -/* - * Minio Cloud Storage, (C) 2018 Minio, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -// DO NOT EDIT THIS PACKAGE DIRECTLY: This follows the protocol defined by -// AmazonS3 found at -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -// Consult the Spec before making direct edits. - -package s3select - -import ( - "bytes" - "encoding/binary" - "hash/crc32" -) - -// Record Headers -// -11 -event type - 7 - 7 "Records" -// -13 -content-type -7 -24 "application/octet-stream" -// -13 -message-type -7 5 "event" -// This is predefined from AMZ protocol found here: -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -var recordHeaders []byte - -// End Headers -// -13 -message-type -7 -5 "event" -// -11 -:event-type -7 -3 "End" -// This is predefined from AMZ protocol found here: -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -var endHeaders []byte - -// Continuation Headers -// -13 -message-type -7 -5 "event" -// -11 -:event-type -7 -4 "Cont" -// This is predefined from AMZ protocol found here: -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -var contHeaders []byte - -// Stat Headers -// -11 -event type - 7 - 5 "Stat" -20 -// -13 -content-type -7 -8 "text/xml" -25 -// -13 -message-type -7 -5 "event" -22 -// This is predefined from AMZ protocol found here: -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -var statHeaders []byte - -// Progress Headers -// -11 -event type - 7 - 8 "Progress" -23 -// -13 -content-type -7 -8 "text/xml" -25 -// -13 -message-type -7 -5 "event" -22 -// This is predefined from AMZ protocol found here: -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -var progressHeaders []byte - -// The length of the nonvariable portion of the ErrHeaders -// The below are the specifications of the header for a "error" event -// -11 -error-code - 7 - DEFINED "DEFINED" -// -14 -error-message -7 -DEFINED "DEFINED" -// -13 -message-type -7 -5 "error" -// This is predefined from AMZ protocol found here: -// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html -var errHdrLen int - -func init() { - recordHeaders = writeRecordHeader() - endHeaders = writeEndHeader() - contHeaders = writeContHeader() - statHeaders = writeStatHeader() - progressHeaders = writeProgressHeader() - errHdrLen = 55 - -} - -// encodeString encodes a string in a []byte, lenBytes is the number of bytes -// used to encode the length of the string. -func encodeHeaderStringValue(s string) []byte { - n := uint16(len(s)) - lenSlice := make([]byte, 2) - binary.BigEndian.PutUint16(lenSlice[0:], n) - return append(lenSlice, []byte(s)...) -} -func encodeHeaderStringName(s string) []byte { - lenSlice := make([]byte, 1) - lenSlice[0] = byte(len(s)) - return append(lenSlice, []byte(s)...) -} - -// encodeNumber encodes a number in a []byte, lenBytes is the number of bytes -// used to encode the length of the string. -func encodeNumber(n byte, lenBytes int) []byte { - lenSlice := make([]byte, lenBytes) - lenSlice[0] = n - return lenSlice -} - -// writePayloadSize writes the 4byte payload size portion of the protocol. -func writePayloadSize(payloadSize int, headerLength int) []byte { - totalByteLen := make([]byte, 4) - totalMsgLen := uint32(payloadSize + headerLength + 16) - binary.BigEndian.PutUint32(totalByteLen, totalMsgLen) - return totalByteLen -} - -// writeHeaderSize writes the 4byte header size portion of the protocol. -func writeHeaderSize(headerLength int) []byte { - totalHeaderLen := make([]byte, 4) - totalLen := uint32(headerLength) - binary.BigEndian.PutUint32(totalHeaderLen, totalLen) - return totalHeaderLen -} - -// writeCRC writes the CRC for both the prelude and and the end of the protocol. -func writeCRC(buffer []byte) []byte { - // Calculate the CRC here: - crc := make([]byte, 4) - cksum := crc32.ChecksumIEEE(buffer) - binary.BigEndian.PutUint32(crc, cksum) - return crc -} - -// writePayload writes the Payload for those protocols which the Payload is -// necessary. -func writePayload(myPayload string) []byte { - convertedPayload := []byte(myPayload) - payloadStore := make([]byte, len(convertedPayload)) - copy(payloadStore[0:], myPayload) - return payloadStore -} - -// writeRecordHeader is a function which writes the headers for the continuation -// Message -func writeRecordHeader() []byte { - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - var currentMessage = &bytes.Buffer{} - // 11 -event type - 7 - 7 "Records" - // header name - currentMessage.Write(encodeHeaderStringName(":event-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("Records")) - // Creation of the Header for Content-Type // 13 -content-type -7 -24 - // "application/octet-stream" - // header name - currentMessage.Write(encodeHeaderStringName(":content-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("application/octet-stream")) - // Creation of the Header for message-type 13 -message-type -7 5 "event" - // header name - currentMessage.Write(encodeHeaderStringName(":message-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("event")) - return currentMessage.Bytes() -} - -// writeEndHeader is a function which writes the headers for the continuation -// Message -func writeEndHeader() []byte { - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - var currentMessage = &bytes.Buffer{} - // header name - currentMessage.Write(encodeHeaderStringName(":event-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("End")) - - // Creation of the Header for message-type 13 -message-type -7 5 "event" - // header name - currentMessage.Write(encodeHeaderStringName(":message-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("event")) - return currentMessage.Bytes() -} - -// writeContHeader is a function which writes the headers for the continuation -// Message -func writeContHeader() []byte { - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - var currentMessage = &bytes.Buffer{} - // header name - currentMessage.Write(encodeHeaderStringName(":event-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("Cont")) - - // Creation of the Header for message-type 13 -message-type -7 5 "event" - // header name - currentMessage.Write(encodeHeaderStringName(":message-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("event")) - return currentMessage.Bytes() - -} - -// writeStatHeader is a function which writes the headers for the Stat -// Message -func writeStatHeader() []byte { - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - var currentMessage = &bytes.Buffer{} - // header name - currentMessage.Write(encodeHeaderStringName(":event-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("Stats")) - // Creation of the Header for Content-Type // 13 -content-type -7 -8 - // "text/xml" - // header name - currentMessage.Write(encodeHeaderStringName(":content-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("text/xml")) - - // Creation of the Header for message-type 13 -message-type -7 5 "event" - currentMessage.Write(encodeHeaderStringName(":message-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("event")) - return currentMessage.Bytes() - -} - -// writeProgressHeader is a function which writes the headers for the Progress -// Message -func writeProgressHeader() []byte { - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - var currentMessage = &bytes.Buffer{} - // header name - currentMessage.Write(encodeHeaderStringName(":event-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("Progress")) - // Creation of the Header for Content-Type // 13 -content-type -7 -8 - // "text/xml" - // header name - currentMessage.Write(encodeHeaderStringName(":content-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("text/xml")) - - // Creation of the Header for message-type 13 -message-type -7 5 "event" - // header name - currentMessage.Write(encodeHeaderStringName(":message-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("event")) - return currentMessage.Bytes() - -} - -// writeRecordMessage is the function which constructs the binary message for a -// record message to be sent. -func writeRecordMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer { - // The below are the specifications of the header for a "record" event - // 11 -event type - 7 - 7 "Records" - // 13 -content-type -7 -24 "application/octet-stream" - // 13 -message-type -7 5 "event" - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - headerLen := len(recordHeaders) - // Writes the total size of the message. - currentMessage.Write(writePayloadSize(len(payload), headerLen)) - // Writes the total size of the header. - currentMessage.Write(writeHeaderSize(headerLen)) - // Writes the CRC of the Prelude - currentMessage.Write(writeCRC(currentMessage.Bytes())) - currentMessage.Write(recordHeaders) - - // This part is where the payload is written, this will be only one row, since - // we're sending one message at a types - currentMessage.Write(writePayload(payload)) - - // Now we do a CRC check on the entire messages - currentMessage.Write(writeCRC(currentMessage.Bytes())) - return currentMessage - -} - -// writeContinuationMessage is the function which constructs the binary message -// for a continuation message to be sent. -func writeContinuationMessage(currentMessage *bytes.Buffer) *bytes.Buffer { - // 11 -event type - 7 - 4 "Cont" - // 13 -message-type -7 5 "event" - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - headerLen := len(contHeaders) - currentMessage.Write(writePayloadSize(0, headerLen)) - - currentMessage.Write(writeHeaderSize(headerLen)) - - // Calculate the Prelude CRC here: - currentMessage.Write(writeCRC(currentMessage.Bytes())) - - currentMessage.Write(contHeaders) - - //Now we do a CRC check on the entire messages - currentMessage.Write(writeCRC(currentMessage.Bytes())) - return currentMessage - -} - -// writeEndMessage is the function which constructs the binary message -// for a end message to be sent. -func writeEndMessage(currentMessage *bytes.Buffer) *bytes.Buffer { - // 11 -event type - 7 - 3 "End" - // 13 -message-type -7 5 "event" - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - headerLen := len(endHeaders) - currentMessage.Write(writePayloadSize(0, headerLen)) - - currentMessage.Write(writeHeaderSize(headerLen)) - - //Calculate the Prelude CRC here: - currentMessage.Write(writeCRC(currentMessage.Bytes())) - - currentMessage.Write(endHeaders) - - // Now we do a CRC check on the entire messages - currentMessage.Write(writeCRC(currentMessage.Bytes())) - return currentMessage - -} - -// writeStateMessage is the function which constructs the binary message for a -// state message to be sent. -func writeStatMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer { - // 11 -event type - 7 - 5 "Stat" 20 - // 13 -content-type -7 -8 "text/xml" 25 - // 13 -message-type -7 5 "event" 22 - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - headerLen := len(statHeaders) - - currentMessage.Write(writePayloadSize(len(payload), headerLen)) - - currentMessage.Write(writeHeaderSize(headerLen)) - - currentMessage.Write(writeCRC(currentMessage.Bytes())) - - currentMessage.Write(statHeaders) - - // This part is where the payload is written, this will be only one row, since - // we're sending one message at a types - currentMessage.Write(writePayload(payload)) - - // Now we do a CRC check on the entire messages - currentMessage.Write(writeCRC(currentMessage.Bytes())) - return currentMessage - -} - -// writeProgressMessage is the function which constructs the binary message for -// a progress message to be sent. -func writeProgressMessage(payload string, currentMessage *bytes.Buffer) *bytes.Buffer { - // The below are the specifications of the header for a "Progress" event - // 11 -event type - 7 - 8 "Progress" 23 - // 13 -content-type -7 -8 "text/xml" 25 - // 13 -message-type -7 5 "event" 22 - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - headerLen := len(progressHeaders) - - currentMessage.Write(writePayloadSize(len(payload), headerLen)) - - currentMessage.Write(writeHeaderSize(headerLen)) - - currentMessage.Write(writeCRC(currentMessage.Bytes())) - - currentMessage.Write(progressHeaders) - - // This part is where the payload is written, this will be only one row, since - // we're sending one message at a types - currentMessage.Write(writePayload(payload)) - - // Now we do a CRC check on the entire messages - currentMessage.Write(writeCRC(currentMessage.Bytes())) - return currentMessage - -} - -// writeErrorMessage is the function which constructs the binary message for a -// error message to be sent. -func writeErrorMessage(errorMessage error, currentMessage *bytes.Buffer) *bytes.Buffer { - - // The below are the specifications of the header for a "error" event - // 11 -error-code - 7 - DEFINED "DEFINED" - // 14 -error-message -7 -DEFINED "DEFINED" - // 13 -message-type -7 5 "error" - // This is predefined from AMZ protocol found here: - // https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html - sizeOfErrorCode := len(errorCodeResponse[errorMessage]) - sizeOfErrorMessage := len(errorMessage.Error()) - headerLen := errHdrLen + sizeOfErrorCode + sizeOfErrorMessage - - currentMessage.Write(writePayloadSize(0, headerLen)) - - currentMessage.Write(writeHeaderSize(headerLen)) - - currentMessage.Write(writeCRC(currentMessage.Bytes())) - // header name - currentMessage.Write(encodeHeaderStringName(":error-code")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue(errorCodeResponse[errorMessage])) - - // 14 -error-message -7 -DEFINED "DEFINED" - - // header name - currentMessage.Write(encodeHeaderStringName(":error-message")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue(errorMessage.Error())) - // Creation of the Header for message-type 13 -message-type -7 5 "error" - // header name - currentMessage.Write(encodeHeaderStringName(":message-type")) - // header type - currentMessage.Write(encodeNumber(7, 1)) - // header value and header value length - currentMessage.Write(encodeHeaderStringValue("error")) - - // Now we do a CRC check on the entire messages - currentMessage.Write(writeCRC(currentMessage.Bytes())) - return currentMessage - -} diff --git a/pkg/s3select/parquet/args.go b/pkg/s3select/parquet/args.go new file mode 100644 index 000000000..10b01f3c6 --- /dev/null +++ b/pkg/s3select/parquet/args.go @@ -0,0 +1,42 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import "encoding/xml" + +// ReaderArgs - represents elements inside in request XML. +type ReaderArgs struct { + unmarshaled bool +} + +// IsEmpty - returns whether reader args is empty or not. +func (args *ReaderArgs) IsEmpty() bool { + return !args.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (args *ReaderArgs) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subReaderArgs ReaderArgs + parsedArgs := subReaderArgs{} + if err := d.DecodeElement(&parsedArgs, &start); err != nil { + return err + } + + args.unmarshaled = true + return nil +} diff --git a/pkg/s3select/parquet/errors.go b/pkg/s3select/parquet/errors.go new file mode 100644 index 000000000..193850d3c --- /dev/null +++ b/pkg/s3select/parquet/errors.go @@ -0,0 +1,53 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +type s3Error struct { + code string + message string + statusCode int + cause error +} + +func (err *s3Error) Cause() error { + return err.cause +} + +func (err *s3Error) ErrorCode() string { + return err.code +} + +func (err *s3Error) ErrorMessage() string { + return err.message +} + +func (err *s3Error) HTTPStatusCode() int { + return err.statusCode +} + +func (err *s3Error) Error() string { + return err.message +} + +func errParquetParsingError(err error) *s3Error { + return &s3Error{ + code: "ParquetParsingError", + message: "Error parsing Parquet file. Please check the file and try again.", + statusCode: 400, + cause: err, + } +} diff --git a/pkg/s3select/parquet/reader.go b/pkg/s3select/parquet/reader.go new file mode 100644 index 000000000..a25892f44 --- /dev/null +++ b/pkg/s3select/parquet/reader.go @@ -0,0 +1,93 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import ( + "io" + + "github.com/minio/minio/pkg/s3select/json" + "github.com/minio/minio/pkg/s3select/sql" + parquetgo "github.com/minio/parquet-go" + parquetgen "github.com/minio/parquet-go/gen-go/parquet" +) + +// Reader - Parquet record reader for S3Select. +type Reader struct { + args *ReaderArgs + file *parquetgo.File +} + +// Read - reads single record. +func (r *Reader) Read() (sql.Record, error) { + parquetRecord, err := r.file.Read() + if err != nil { + if err != io.EOF { + return nil, errParquetParsingError(err) + } + + return nil, err + } + + record := json.NewRecord() + for name, v := range parquetRecord { + var value *sql.Value + switch v.Type { + case parquetgen.Type_BOOLEAN: + value = sql.NewBool(v.Value.(bool)) + case parquetgen.Type_INT32: + value = sql.NewInt(int64(v.Value.(int32))) + case parquetgen.Type_INT64: + value = sql.NewInt(v.Value.(int64)) + case parquetgen.Type_FLOAT: + value = sql.NewFloat(float64(v.Value.(float32))) + case parquetgen.Type_DOUBLE: + value = sql.NewFloat(v.Value.(float64)) + case parquetgen.Type_INT96, parquetgen.Type_BYTE_ARRAY, parquetgen.Type_FIXED_LEN_BYTE_ARRAY: + value = sql.NewString(string(v.Value.([]byte))) + default: + return nil, errParquetParsingError(nil) + } + + if err = record.Set(name, value); err != nil { + return nil, errParquetParsingError(err) + } + } + + return record, nil +} + +// Close - closes underlaying readers. +func (r *Reader) Close() error { + return r.file.Close() +} + +// NewReader - creates new Parquet reader using readerFunc callback. +func NewReader(getReaderFunc func(offset, length int64) (io.ReadCloser, error), args *ReaderArgs) (*Reader, error) { + file, err := parquetgo.Open(getReaderFunc, nil) + if err != nil { + if err != io.EOF { + return nil, errParquetParsingError(err) + } + + return nil, err + } + + return &Reader{ + args: args, + file: file, + }, nil +} diff --git a/pkg/s3select/progress.go b/pkg/s3select/progress.go new file mode 100644 index 000000000..1ca93c49b --- /dev/null +++ b/pkg/s3select/progress.go @@ -0,0 +1,90 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3select + +import ( + "compress/bzip2" + "fmt" + "io" + "sync/atomic" + + gzip "github.com/klauspost/pgzip" +) + +type countUpReader struct { + reader io.Reader + bytesRead int64 +} + +func (r *countUpReader) Read(p []byte) (n int, err error) { + n, err = r.reader.Read(p) + atomic.AddInt64(&r.bytesRead, int64(n)) + return n, err +} + +func (r *countUpReader) BytesRead() int64 { + return atomic.LoadInt64(&r.bytesRead) +} + +func newCountUpReader(reader io.Reader) *countUpReader { + return &countUpReader{ + reader: reader, + } +} + +type progressReader struct { + rc io.ReadCloser + scannedReader *countUpReader + processedReader *countUpReader +} + +func (pr *progressReader) Read(p []byte) (n int, err error) { + return pr.processedReader.Read(p) +} + +func (pr *progressReader) Close() error { + return pr.rc.Close() +} + +func (pr *progressReader) Stats() (bytesScanned, bytesProcessed int64) { + return pr.scannedReader.BytesRead(), pr.processedReader.BytesRead() +} + +func newProgressReader(rc io.ReadCloser, compType CompressionType) (*progressReader, error) { + scannedReader := newCountUpReader(rc) + var r io.Reader + var err error + + switch compType { + case noneType: + r = scannedReader + case gzipType: + if r, err = gzip.NewReader(scannedReader); err != nil { + return nil, errTruncatedInput(err) + } + case bzip2Type: + r = bzip2.NewReader(scannedReader) + default: + return nil, errInvalidCompressionFormat(fmt.Errorf("unknown compression type '%v'", compType)) + } + + return &progressReader{ + rc: rc, + scannedReader: scannedReader, + processedReader: newCountUpReader(r), + }, nil +} diff --git a/pkg/s3select/select.go b/pkg/s3select/select.go index c371db3db..98b1ac40e 100644 --- a/pkg/s3select/select.go +++ b/pkg/s3select/select.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2018 Minio, Inc. + * Minio Cloud Storage, (C) 2019 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,436 +17,377 @@ package s3select import ( - "math" - "sort" - "strconv" + "encoding/xml" + "fmt" + "io" + "net/http" "strings" - "github.com/minio/minio/pkg/s3select/format" - "github.com/tidwall/gjson" - "github.com/xwb1989/sqlparser" + "github.com/minio/minio/pkg/s3select/csv" + "github.com/minio/minio/pkg/s3select/json" + "github.com/minio/minio/pkg/s3select/parquet" + "github.com/minio/minio/pkg/s3select/sql" ) -// SelectFuncs contains the relevant values from the parser for S3 Select -// Functions -type SelectFuncs struct { - funcExpr []*sqlparser.FuncExpr - index []int +type recordReader interface { + Read() (sql.Record, error) + Close() error } -// RunSqlParser allows us to easily bundle all the functions from above and run -// them in the appropriate order. -func runSelectParser(f format.Select, rowCh chan Row) { - reqCols, alias, limit, wc, aggFunctionNames, fns, err := ParseSelect(f) +const ( + csvFormat = "csv" + jsonFormat = "json" + parquetFormat = "parquet" +) + +// CompressionType - represents value inside in request XML. +type CompressionType string + +const ( + noneType CompressionType = "none" + gzipType CompressionType = "gzip" + bzip2Type CompressionType = "bzip2" +) + +// UnmarshalXML - decodes XML data. +func (c *CompressionType) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + var s string + if err := d.DecodeElement(&s, &start); err != nil { + return errMalformedXML(err) + } + + parsedType := CompressionType(strings.ToLower(s)) + if s == "" { + parsedType = noneType + } + + switch parsedType { + case noneType, gzipType, bzip2Type: + default: + return errInvalidCompressionFormat(fmt.Errorf("invalid compression format '%v'", s)) + } + + *c = parsedType + return nil +} + +// InputSerialization - represents elements inside in request XML. +type InputSerialization struct { + CompressionType CompressionType `xml:"CompressionType"` + CSVArgs csv.ReaderArgs `xml:"CSV"` + JSONArgs json.ReaderArgs `xml:"JSON"` + ParquetArgs parquet.ReaderArgs `xml:"Parquet"` + unmarshaled bool + format string +} + +// IsEmpty - returns whether input serialization is empty or not. +func (input *InputSerialization) IsEmpty() bool { + return !input.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (input *InputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subInputSerialization InputSerialization + parsedInput := subInputSerialization{} + if err := d.DecodeElement(&parsedInput, &start); err != nil { + return errMalformedXML(err) + } + + found := 0 + if !parsedInput.CSVArgs.IsEmpty() { + parsedInput.format = csvFormat + found++ + } + if !parsedInput.JSONArgs.IsEmpty() { + parsedInput.format = jsonFormat + found++ + } + if !parsedInput.ParquetArgs.IsEmpty() { + if parsedInput.CompressionType != noneType { + return errInvalidRequestParameter(fmt.Errorf("CompressionType must be NONE for Parquet format")) + } + + parsedInput.format = parquetFormat + found++ + } + + if found != 1 { + return errInvalidDataSource(nil) + } + + *input = InputSerialization(parsedInput) + input.unmarshaled = true + return nil +} + +// OutputSerialization - represents elements inside in request XML. +type OutputSerialization struct { + CSVArgs csv.WriterArgs `xml:"CSV"` + JSONArgs json.WriterArgs `xml:"JSON"` + unmarshaled bool + format string +} + +// IsEmpty - returns whether output serialization is empty or not. +func (output *OutputSerialization) IsEmpty() bool { + return !output.unmarshaled +} + +// UnmarshalXML - decodes XML data. +func (output *OutputSerialization) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subOutputSerialization OutputSerialization + parsedOutput := subOutputSerialization{} + if err := d.DecodeElement(&parsedOutput, &start); err != nil { + return errMalformedXML(err) + } + + found := 0 + if !parsedOutput.CSVArgs.IsEmpty() { + parsedOutput.format = csvFormat + found++ + } + if !parsedOutput.JSONArgs.IsEmpty() { + parsedOutput.format = jsonFormat + found++ + } + if found != 1 { + return errObjectSerializationConflict(fmt.Errorf("either CSV or JSON should be present in OutputSerialization")) + } + + *output = OutputSerialization(parsedOutput) + output.unmarshaled = true + return nil +} + +// RequestProgress - represents elements inside in request XML. +type RequestProgress struct { + Enabled bool `xml:"Enabled"` +} + +// S3Select - filters the contents on a simple structured query language (SQL) statement. It +// represents elements inside in request XML specified in detail at +// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html. +type S3Select struct { + XMLName xml.Name `xml:"SelectObjectContentRequest"` + Expression string `xml:"Expression"` + ExpressionType string `xml:"ExpressionType"` + Input InputSerialization `xml:"InputSerialization"` + Output OutputSerialization `xml:"OutputSerialization"` + Progress RequestProgress `xml:"RequestProgress"` + + statement *sql.Select + progressReader *progressReader + recordReader recordReader +} + +// UnmarshalXML - decodes XML data. +func (s3Select *S3Select) UnmarshalXML(d *xml.Decoder, start xml.StartElement) error { + // Make subtype to avoid recursive UnmarshalXML(). + type subS3Select S3Select + parsedS3Select := subS3Select{} + if err := d.DecodeElement(&parsedS3Select, &start); err != nil { + if _, ok := err.(*s3Error); ok { + return err + } + + return errMalformedXML(err) + } + + parsedS3Select.ExpressionType = strings.ToLower(parsedS3Select.ExpressionType) + if parsedS3Select.ExpressionType != "sql" { + return errInvalidExpressionType(fmt.Errorf("invalid expression type '%v'", parsedS3Select.ExpressionType)) + } + + if parsedS3Select.Input.IsEmpty() { + return errMissingRequiredParameter(fmt.Errorf("InputSerialization must be provided")) + } + + if parsedS3Select.Output.IsEmpty() { + return errMissingRequiredParameter(fmt.Errorf("OutputSerialization must be provided")) + } + + statement, err := sql.NewSelect(parsedS3Select.Expression) if err != nil { - rowCh <- Row{ - err: err, - } - return - } - processSelectReq(reqCols, alias, wc, limit, aggFunctionNames, rowCh, fns, f) -} - -// ParseSelect parses the SELECT expression, and effectively tokenizes it into -// its separate parts. It returns the requested column names,alias,limit of -// records, and the where clause. -func ParseSelect(f format.Select) ([]string, string, int64, sqlparser.Expr, []string, SelectFuncs, error) { - var sFuncs = SelectFuncs{} - var whereClause sqlparser.Expr - var alias string - var limit int64 - - stmt, err := sqlparser.Parse(f.Expression()) - // TODO: Maybe can parse their errors a bit to return some more of the s3 errors - if err != nil { - return nil, "", 0, nil, nil, sFuncs, ErrLexerInvalidChar + return err } - switch stmt := stmt.(type) { - case *sqlparser.Select: - // evaluates the where clause - fnNames := make([]string, len(stmt.SelectExprs)) - columnNames := make([]string, len(stmt.SelectExprs)) + parsedS3Select.statement = statement - if stmt.Where != nil { - whereClause = stmt.Where.Expr - } - for i, sexpr := range stmt.SelectExprs { - switch expr := sexpr.(type) { - case *sqlparser.StarExpr: - columnNames[0] = "*" - case *sqlparser.AliasedExpr: - switch smallerexpr := expr.Expr.(type) { - case *sqlparser.FuncExpr: - if smallerexpr.IsAggregate() { - fnNames[i] = smallerexpr.Name.CompliantName() - // Will return function name - // Case to deal with if we have functions and not an asterix - switch tempagg := smallerexpr.Exprs[0].(type) { - case *sqlparser.StarExpr: - columnNames[0] = "*" - if smallerexpr.Name.CompliantName() != "count" { - return nil, "", 0, nil, nil, sFuncs, ErrParseUnsupportedCallWithStar - } - case *sqlparser.AliasedExpr: - switch col := tempagg.Expr.(type) { - case *sqlparser.BinaryExpr: - return nil, "", 0, nil, nil, sFuncs, ErrParseNonUnaryAgregateFunctionCall - case *sqlparser.ColName: - columnNames[i] = col.Name.CompliantName() - } - } - // Case to deal with if COALESCE was used.. - } else if supportedFunc(smallerexpr.Name.CompliantName()) { - if sFuncs.funcExpr == nil { - sFuncs.funcExpr = make([]*sqlparser.FuncExpr, len(stmt.SelectExprs)) - sFuncs.index = make([]int, len(stmt.SelectExprs)) - } - sFuncs.funcExpr[i] = smallerexpr - sFuncs.index[i] = i - } else { - return nil, "", 0, nil, nil, sFuncs, ErrUnsupportedSQLOperation - } - case *sqlparser.ColName: - columnNames[i] = smallerexpr.Name.CompliantName() - } - } - } + *s3Select = S3Select(parsedS3Select) + return nil +} - // This code retrieves the alias and makes sure it is set to the correct - // value, if not it sets it to the tablename - for _, fexpr := range stmt.From { - switch smallerexpr := fexpr.(type) { - case *sqlparser.JoinTableExpr: - return nil, "", 0, nil, nil, sFuncs, ErrParseMalformedJoin - case *sqlparser.AliasedTableExpr: - alias = smallerexpr.As.CompliantName() - if alias == "" { - alias = sqlparser.GetTableName(smallerexpr.Expr).CompliantName() - } - } - } - if stmt.Limit != nil { - switch expr := stmt.Limit.Rowcount.(type) { - case *sqlparser.SQLVal: - // The Value of how many rows we're going to limit by - parsedLimit, _ := strconv.Atoi(string(expr.Val[:])) - limit = int64(parsedLimit) - } - } - if stmt.GroupBy != nil { - return nil, "", 0, nil, nil, sFuncs, ErrParseUnsupportedLiteralsGroupBy - } - if stmt.OrderBy != nil { - return nil, "", 0, nil, nil, sFuncs, ErrParseUnsupportedToken - } - if err := parseErrs(columnNames, whereClause, alias, sFuncs, f); err != nil { - return nil, "", 0, nil, nil, sFuncs, err - } - return columnNames, alias, limit, whereClause, fnNames, sFuncs, nil +func (s3Select *S3Select) outputRecord() sql.Record { + switch s3Select.Output.format { + case csvFormat: + return csv.NewRecord() + case jsonFormat: + return json.NewRecord() } - return nil, "", 0, nil, nil, sFuncs, nil + + panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format)) } -type columnKv struct { - Key string - Value int +func (s3Select *S3Select) getProgress() (bytesScanned, bytesProcessed int64) { + if s3Select.progressReader != nil { + return s3Select.progressReader.Stats() + } + + return -1, -1 } -func columnsIndex(reqColNames []string, f format.Select) ([]columnKv, error) { - var ( - columnsKv []columnKv - columnsMap = make(map[string]int) - columns = f.Header() - ) - if f.HasHeader() { - err := checkForDuplicates(columns, columnsMap) - if format.IsInt(reqColNames[0]) { - err = ErrMissingHeaders +// Open - opens S3 object by using callback for SQL selection query. +// Currently CSV, JSON and Apache Parquet formats are supported. +func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadCloser, error)) error { + switch s3Select.Input.format { + case csvFormat: + rc, err := getReader(0, -1) + if err != nil { + return err } + + s3Select.progressReader, err = newProgressReader(rc, s3Select.Input.CompressionType) + if err != nil { + return err + } + + s3Select.recordReader, err = csv.NewReader(s3Select.progressReader, &s3Select.Input.CSVArgs) + if err != nil { + return err + } + + return nil + case jsonFormat: + rc, err := getReader(0, -1) + if err != nil { + return err + } + + s3Select.progressReader, err = newProgressReader(rc, s3Select.Input.CompressionType) + if err != nil { + return err + } + + s3Select.recordReader = json.NewReader(s3Select.progressReader, &s3Select.Input.JSONArgs) + return nil + case parquetFormat: + var err error + s3Select.recordReader, err = parquet.NewReader(getReader, &s3Select.Input.ParquetArgs) + return err + } + + panic(fmt.Errorf("unknown input format '%v'", s3Select.Input.format)) +} + +func (s3Select *S3Select) marshal(record sql.Record) ([]byte, error) { + switch s3Select.Output.format { + case csvFormat: + data, err := record.MarshalCSV([]rune(s3Select.Output.CSVArgs.FieldDelimiter)[0]) if err != nil { return nil, err } - for k, v := range columnsMap { - columnsKv = append(columnsKv, columnKv{ - Key: k, - Value: v, - }) - } - } else { - for i := range columns { - columnsKv = append(columnsKv, columnKv{ - Key: "_" + strconv.Itoa(i), - Value: i, - }) + + return append(data, []byte(s3Select.Output.CSVArgs.RecordDelimiter)...), nil + case jsonFormat: + data, err := record.MarshalJSON() + if err != nil { + return nil, err } + + return append(data, []byte(s3Select.Output.JSONArgs.RecordDelimiter)...), nil } - sort.Slice(columnsKv, func(i, j int) bool { - return columnsKv[i].Value < columnsKv[j].Value - }) - return columnsKv, nil + + panic(fmt.Errorf("unknown output format '%v'", s3Select.Output.format)) } -// This is the main function, It goes row by row and for records which validate -// the where clause it currently prints the appropriate row given the requested -// columns. -func processSelectReq(reqColNames []string, alias string, wc sqlparser.Expr, lrecords int64, fnNames []string, rowCh chan Row, fn SelectFuncs, f format.Select) { - counter := -1 - filtrCount := 0 - functionFlag := false - - // Values used to store our aggregation values. - aggVals := make([]float64, len(reqColNames)) - if lrecords == 0 { - lrecords = math.MaxInt64 +// Evaluate - filters and sends records read from opened reader as per select statement to http response writer. +func (s3Select *S3Select) Evaluate(w http.ResponseWriter) { + getProgressFunc := s3Select.getProgress + if !s3Select.Progress.Enabled { + getProgressFunc = nil } + writer := newMessageWriter(w, getProgressFunc) - var results []string - var columnsKv []columnKv - if f.Type() == format.CSV { - var err error - columnsKv, err = columnsIndex(reqColNames, f) - if err != nil { - rowCh <- Row{ - err: err, - } - return + var inputRecord sql.Record + var outputRecord sql.Record + var err error + var data []byte + sendRecord := func() bool { + if outputRecord == nil { + return true } - results = make([]string, len(columnsKv)) + + if data, err = s3Select.marshal(outputRecord); err != nil { + return false + } + + if err = writer.SendRecords(data); err != nil { + // FIXME: log this error. + err = nil + return false + } + + return true } for { - record, err := f.Read() - if err != nil { - rowCh <- Row{ - err: err, + if inputRecord, err = s3Select.recordReader.Read(); err != nil { + if err != io.EOF { + break } - return - } - if record == nil { - if functionFlag { - rowCh <- Row{ - record: aggFuncToStr(aggVals, f) + "\n", + + if s3Select.statement.IsAggregated() { + outputRecord = s3Select.outputRecord() + if err = s3Select.statement.AggregateResult(outputRecord); err != nil { + break + } + + if !sendRecord() { + break } } - close(rowCh) - return - } - // For JSON multi-line input type columns needs - // to be handled for each record. - if f.Type() == format.JSON { - columnsKv, err = columnsIndex(reqColNames, f) - if err != nil { - rowCh <- Row{ - err: err, - } - return + if err = writer.SendStats(s3Select.getProgress()); err != nil { + // FIXME: log this error. + err = nil } - results = make([]string, len(columnsKv)) + + break } - f.UpdateBytesProcessed(int64(len(record))) - - // Return in case the number of record reaches the LIMIT - // defined in select query - if int64(filtrCount) == lrecords { - close(rowCh) - return + outputRecord = s3Select.outputRecord() + if outputRecord, err = s3Select.statement.Eval(inputRecord, outputRecord); err != nil { + break } - // The call to the where function clause, ensures that - // the rows we print match our where clause. - condition, err := matchesMyWhereClause(record, alias, wc) - if err != nil { - rowCh <- Row{ - err: err, + if !s3Select.statement.IsAggregated() { + if !sendRecord() { + break } - return } + } - if condition { - // if its an asterix we just print everything in the row - if reqColNames[0] == "*" && fnNames[0] == "" { - switch f.OutputType() { - case format.CSV: - for i, kv := range columnsKv { - results[i] = gjson.GetBytes(record, kv.Key).String() - } - rowCh <- Row{ - record: strings.Join(results, f.OutputFieldDelimiter()) + f.OutputRecordDelimiter(), - } - case format.JSON: - rowCh <- Row{ - record: string(record) + f.OutputRecordDelimiter(), - } - } - } else if alias != "" { - // This is for dealing with the case of if we have to deal with a - // request for a column with an index e.g A_1. - if format.IsInt(reqColNames[0]) { - // This checks whether any aggregation function was called as now we - // no longer will go through printing each row, and only print at the end - if len(fnNames) > 0 && fnNames[0] != "" { - functionFlag = true - aggregationFns(counter, filtrCount, aggVals, reqColNames, fnNames, record) - } else { - // The code below finds the appropriate columns of the row given the - // indicies provided in the SQL request. - var rowStr string - rowStr, err = processColNameIndex(record, reqColNames, f) - if err != nil { - rowCh <- Row{ - err: err, - } - return - } - rowCh <- Row{ - record: rowStr + "\n", - } - } - } else { - // This code does aggregation if we were provided column names in the - // form of actual names rather an indices. - if len(fnNames) > 0 && fnNames[0] != "" { - functionFlag = true - aggregationFns(counter, filtrCount, aggVals, reqColNames, fnNames, record) - } else { - // This code prints the appropriate part of the row given the filter - // and select request, if the select request was based on column - // names rather than indices. - var rowStr string - rowStr, err = processColNameLiteral(record, reqColNames, fn, f) - if err != nil { - rowCh <- Row{ - err: err, - } - return - } - rowCh <- Row{ - record: rowStr + "\n", - } - } - } - } - filtrCount++ + if err != nil { + if serr := writer.SendError("InternalError", err.Error()); serr != nil { + // FIXME: log errors. } - counter++ } } -// processColumnNames is a function which allows for cleaning of column names. -func processColumnNames(reqColNames []string, alias string, f format.Select) error { - switch f.Type() { - case format.CSV: - for i := range reqColNames { - // The code below basically cleans the column name of its alias and other - // syntax, so that we can extract its pure name. - reqColNames[i] = cleanCol(reqColNames[i], alias) - } - case format.JSON: - // JSON doesnt have columns so no cleaning required - } - - return nil +// Close - closes opened S3 object. +func (s3Select *S3Select) Close() error { + return s3Select.recordReader.Close() } -// processColNameIndex is the function which creates the row for an index based query. -func processColNameIndex(record []byte, reqColNames []string, f format.Select) (string, error) { - var row []string - for _, colName := range reqColNames { - // COALESCE AND NULLIF do not support index based access. - if reqColNames[0] == "0" { - return "", format.ErrInvalidColumnIndex - } - cindex, err := strconv.Atoi(colName) - if err != nil { - return "", ErrMissingHeaders - } - if cindex > len(f.Header()) { - return "", format.ErrInvalidColumnIndex - } +// NewS3Select - creates new S3Select by given request XML reader. +func NewS3Select(r io.Reader) (*S3Select, error) { + s3Select := &S3Select{} + if err := xml.NewDecoder(r).Decode(s3Select); err != nil { + return nil, err + } - // Subtract 1 because SELECT indexing is not 0 based, it - // starts at 1 generating the key like "_1". - row = append(row, gjson.GetBytes(record, string("_"+strconv.Itoa(cindex-1))).String()) - } - rowStr := strings.Join(row, f.OutputFieldDelimiter()) - if len(rowStr) > MaxCharsPerRecord { - return "", ErrOverMaxRecordSize - } - return rowStr, nil -} - -// processColNameLiteral is the function which creates the row for an name based query. -func processColNameLiteral(record []byte, reqColNames []string, fn SelectFuncs, f format.Select) (string, error) { - row := make([]string, len(reqColNames)) - for i, colName := range reqColNames { - // this is the case to deal with COALESCE. - if colName == "" && isValidFunc(fn.index, i) { - row[i] = evaluateFuncExpr(fn.funcExpr[i], "", record) - continue - } - row[i] = gjson.GetBytes(record, colName).String() - } - rowStr := strings.Join(row, f.OutputFieldDelimiter()) - if len(rowStr) > MaxCharsPerRecord { - return "", ErrOverMaxRecordSize - } - return rowStr, nil -} - -// aggregationFns is a function which performs the actual aggregation -// methods on the given row, it uses an array defined in the main parsing -// function to keep track of values. -func aggregationFns(counter int, filtrCount int, aggVals []float64, storeReqCols []string, storeFns []string, record []byte) error { - for i, storeFn := range storeFns { - switch storeFn { - case "": - continue - case "count": - aggVals[i]++ - default: - // Column names are provided as an index it'll use - // this if statement instead. - var convAggFloat float64 - if format.IsInt(storeReqCols[i]) { - index, _ := strconv.Atoi(storeReqCols[i]) - convAggFloat = gjson.GetBytes(record, "_"+strconv.Itoa(index)).Float() - } else { - // Named columns rather than indices. - convAggFloat = gjson.GetBytes(record, storeReqCols[i]).Float() - } - switch storeFn { - case "min": - if counter == -1 { - aggVals[i] = math.MaxFloat64 - } - if convAggFloat < aggVals[i] { - aggVals[i] = convAggFloat - } - case "max": - // Calculate the max. - if counter == -1 { - aggVals[i] = math.SmallestNonzeroFloat64 - } - if convAggFloat > aggVals[i] { - aggVals[i] = convAggFloat - } - case "sum": - // Calculate the sum. - aggVals[i] += convAggFloat - case "avg": - // Calculating the average. - if filtrCount == 0 { - aggVals[i] = convAggFloat - } else { - aggVals[i] = (convAggFloat + (aggVals[i] * float64(filtrCount))) / float64((filtrCount + 1)) - } - default: - return ErrParseNonUnaryAgregateFunctionCall - } - } - } - return nil + return s3Select, nil } diff --git a/pkg/s3select/select_benchmark_test.go b/pkg/s3select/select_benchmark_test.go new file mode 100644 index 000000000..6ccc61d2f --- /dev/null +++ b/pkg/s3select/select_benchmark_test.go @@ -0,0 +1,170 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3select + +import ( + "bytes" + "encoding/csv" + "io" + "io/ioutil" + "math/rand" + "net/http" + "strconv" + "testing" + "time" + + humanize "github.com/dustin/go-humanize" +) + +var randSrc = rand.New(rand.NewSource(time.Now().UnixNano())) + +const charset = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + +func newRandString(length int) string { + b := make([]byte, length) + for i := range b { + b[i] = charset[randSrc.Intn(len(charset))] + } + return string(b) +} + +func genSampleCSVData(count int) []byte { + buf := &bytes.Buffer{} + csvWriter := csv.NewWriter(buf) + csvWriter.Write([]string{"id", "name", "age", "city"}) + + for i := 0; i < count; i++ { + csvWriter.Write([]string{ + strconv.Itoa(i), + newRandString(10), + newRandString(5), + newRandString(10), + }) + } + + csvWriter.Flush() + return buf.Bytes() +} + +type nullResponseWriter struct { +} + +func (w *nullResponseWriter) Header() http.Header { + return nil +} + +func (w *nullResponseWriter) Write(p []byte) (int, error) { + return len(p), nil +} + +func (w *nullResponseWriter) WriteHeader(statusCode int) { +} + +func (w *nullResponseWriter) Flush() { +} + +func benchmarkSelect(b *testing.B, count int, query string) { + var requestXML = []byte(` + + + ` + query + ` + SQL + + NONE + + USE + + + + + + + + FALSE + + +`) + + s3Select, err := NewS3Select(bytes.NewReader(requestXML)) + if err != nil { + b.Fatal(err) + } + + csvData := genSampleCSVData(count) + + b.ResetTimer() + b.ReportAllocs() + + for i := 0; i < b.N; i++ { + if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) { + return ioutil.NopCloser(bytes.NewReader(csvData)), nil + }); err != nil { + b.Fatal(err) + } + + s3Select.Evaluate(&nullResponseWriter{}) + s3Select.Close() + } +} + +func benchmarkSelectAll(b *testing.B, count int) { + benchmarkSelect(b, count, "select * from S3Object") +} + +// BenchmarkSelectAll_100K - benchmark * function with 100k records. +func BenchmarkSelectAll_100K(b *testing.B) { + benchmarkSelectAll(b, 100*humanize.KiByte) +} + +// BenchmarkSelectAll_1M - benchmark * function with 1m records. +func BenchmarkSelectAll_1M(b *testing.B) { + benchmarkSelectAll(b, 1*humanize.MiByte) +} + +// BenchmarkSelectAll_2M - benchmark * function with 2m records. +func BenchmarkSelectAll_2M(b *testing.B) { + benchmarkSelectAll(b, 2*humanize.MiByte) +} + +// BenchmarkSelectAll_10M - benchmark * function with 10m records. +func BenchmarkSelectAll_10M(b *testing.B) { + benchmarkSelectAll(b, 10*humanize.MiByte) +} + +func benchmarkAggregateCount(b *testing.B, count int) { + benchmarkSelect(b, count, "select count(*) from S3Object") +} + +// BenchmarkAggregateCount_100K - benchmark count(*) function with 100k records. +func BenchmarkAggregateCount_100K(b *testing.B) { + benchmarkAggregateCount(b, 100*humanize.KiByte) +} + +// BenchmarkAggregateCount_1M - benchmark count(*) function with 1m records. +func BenchmarkAggregateCount_1M(b *testing.B) { + benchmarkAggregateCount(b, 1*humanize.MiByte) +} + +// BenchmarkAggregateCount_2M - benchmark count(*) function with 2m records. +func BenchmarkAggregateCount_2M(b *testing.B) { + benchmarkAggregateCount(b, 2*humanize.MiByte) +} + +// BenchmarkAggregateCount_10M - benchmark count(*) function with 10m records. +func BenchmarkAggregateCount_10M(b *testing.B) { + benchmarkAggregateCount(b, 10*humanize.MiByte) +} diff --git a/pkg/s3select/select_test.go b/pkg/s3select/select_test.go index a708d710c..f1c6d36e0 100644 --- a/pkg/s3select/select_test.go +++ b/pkg/s3select/select_test.go @@ -1,5 +1,5 @@ /* - * Minio Cloud Storage, (C) 2018 Minio, Inc. + * Minio Cloud Storage, (C) 2019 Minio, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,594 +18,200 @@ package s3select import ( "bytes" - "encoding/csv" - "fmt" - "math/rand" - "strconv" + "go/build" + "io" + "io/ioutil" + "net/http" + "os" + "path" + "reflect" "testing" - "time" - - humanize "github.com/dustin/go-humanize" - "github.com/tidwall/gjson" - - "github.com/minio/minio/pkg/s3select/format" ) -// This function returns the index of a string in a list -func stringIndex(a string, list []string) int { - for i, v := range list { - if v == a { - return i - } - } - return -1 +type testResponseWriter struct { + statusCode int + response []byte } -// TestHelperFunctions is a unit test which tests some -// small helper string functions. -func TestHelperFunctions(t *testing.T) { - tables := []struct { - myReq string - myList []string - myIndex int - expected bool - }{ - {"test1", []string{"test1", "test2", "test3", "test4", "test5"}, 0, true}, - {"random", []string{"test1", "test2", "test3", "test4", "test5"}, -1, false}, - {"test3", []string{"test1", "test2", "test3", "test4", "test5"}, 2, true}, - } - for _, table := range tables { - if format.StringInSlice(table.myReq, table.myList) != table.expected { - t.Error() - } - if stringIndex(table.myReq, table.myList) != table.myIndex { - t.Error() - } - } +func (w *testResponseWriter) Header() http.Header { + return nil } -// TestStateMachine is a unit test which ensures that the lowest level of the -// interpreter is converting properly. -func TestStateMachine(t *testing.T) { - tables := []struct { - operand string - operator string - leftArg string - err error - expected bool - }{ - {"", ">", "2012", nil, true}, - {"2005", ">", "2012", nil, true}, - {"2005", ">", "2012", nil, true}, - {"2012.0000", ">", "2014.000", nil, true}, - {"2012", "!=", "2014.000", nil, true}, - {"NA", ">", "2014.000", nil, true}, - {"2012", ">", "2014.000", nil, false}, - {"2012.0000", ">", "2014", nil, false}, - {"", "<", "2012", nil, false}, - {"2012.0000", "<", "2014.000", nil, false}, - {"2014", ">", "Random", nil, false}, - {"test3", ">", "aandom", nil, false}, - {"true", ">", "true", ErrUnsupportedSyntax, false}, - } - for i, table := range tables { - val, err := evaluateOperator(gjson.Parse(table.leftArg), table.operator, gjson.Parse(table.operand)) - if err != table.err { - t.Errorf("Test %d: expected %v, got %v", i+1, table.err, err) - } - if val != table.expected { - t.Errorf("Test %d: expected %t, got %t", i+1, table.expected, val) - } - } +func (w *testResponseWriter) Write(p []byte) (int, error) { + w.response = append(w.response, p...) + return len(p), nil } -// TestOperators is a unit test which ensures that the appropriate values are -// being returned from the operators functions. -func TestOperators(t *testing.T) { - tables := []struct { - operator string - err error - }{ - {">", nil}, - {"%", ErrParseUnknownOperator}, - } - for _, table := range tables { - err := checkValidOperator(table.operator) - if err != table.err { - t.Error() - } - } +func (w *testResponseWriter) WriteHeader(statusCode int) { + w.statusCode = statusCode } -// Unit tests for the main function that performs aggreggation. -func TestAggregationFunc(t *testing.T) { - columnsMap := make(map[string]int) - columnsMap["Col1"] = 0 - columnsMap["Col2"] = 1 - tables := []struct { - counter int - filtrCount int - myAggVals []float64 - columnsMap map[string]int - storeReqCols []string - storeFunctions []string - record []byte - err error - expectedVal float64 - }{ - {10, 5, []float64{10, 11, 12, 13, 14}, columnsMap, []string{"Col1"}, []string{"count"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 11}, - {10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"min"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 1}, - {10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"max"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 10}, - {10, 5, []float64{10}, columnsMap, []string{"Col1"}, []string{"sum"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 11}, - {1, 1, []float64{10}, columnsMap, []string{"Col1"}, []string{"avg"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 5.500}, - {10, 5, []float64{0.0000}, columnsMap, []string{"Col1"}, []string{"random"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), - ErrParseNonUnaryAgregateFunctionCall, 0}, - {0, 5, []float64{0}, columnsMap, []string{"0"}, []string{"count"}, []byte("{\"Col1\":\"1\",\"Col2\":\"2\"}"), nil, 1}, - {10, 5, []float64{10}, columnsMap, []string{"1"}, []string{"min"}, []byte("{\"_1\":\"1\",\"_2\":\"2\"}"), nil, 1}, - } - - for _, table := range tables { - err := aggregationFns(table.counter, table.filtrCount, table.myAggVals, table.storeReqCols, table.storeFunctions, table.record) - if table.err != err { - t.Error() - } - if table.myAggVals[0] != table.expectedVal { - t.Error() - } - - } +func (w *testResponseWriter) Flush() { } -// TestStringComparator is a unit test which ensures that the appropriate -// values are being compared for strings. -func TestStringComparator(t *testing.T) { - tables := []struct { - operand string - operator string - myVal string - expected bool - err error - }{ - {"random", ">", "myName", "random" > "myName", nil}, - {"12", "!=", "myName", "12" != "myName", nil}, - {"12", "=", "myName", "12" == "myName", nil}, - {"12", "<=", "myName", "12" <= "myName", nil}, - {"12", ">=", "myName", "12" >= "myName", nil}, - {"12", "<", "myName", "12" < "myName", nil}, - {"name", "like", "_x%", false, nil}, - {"12", "randomoperator", "myName", false, ErrUnsupportedSyntax}, - } - for _, table := range tables { - myVal, err := stringEval(table.operand, table.operator, table.myVal) - if err != table.err { - t.Error() - } - if myVal != table.expected { - t.Error() - } - } -} +func TestCSVINput(t *testing.T) { + var requestXML = []byte(` + + + SELECT one, two, three from S3Object + SQL + + NONE + + USE + + + + + + + + FALSE + + +`) -// TestFloatComparator is a unit test which ensures that the appropriate -// values are being compared for floats. -func TestFloatComparator(t *testing.T) { - tables := []struct { - operand float64 - operator string - myVal float64 - expected bool - err error - }{ - {12.000, ">", 13.000, 12.000 > 13.000, nil}, - {1000.000, "!=", 1000.000, 1000.000 != 1000.000, nil}, - {1000.000, "<", 1000.000, 1000.000 < 1000.000, nil}, - {1000.000, "<=", 1000.000, 1000.000 <= 1000.000, nil}, - {1000.000, ">=", 1000.000, 1000.000 >= 1000.000, nil}, - {1000.000, "=", 1000.000, 1000.000 == 1000.000, nil}, - {17.000, "randomoperator", 0.0, false, ErrUnsupportedSyntax}, - } - for _, table := range tables { - myVal, err := floatEval(table.operand, table.operator, table.myVal) - if err != table.err { - t.Error() - } - if myVal != table.expected { - t.Error() - } - } -} + var csvData = []byte(`one,two,three +10,true,"foo" +-3,false,"bar baz" +`) -// TestIntComparator is a unit test which ensures that the appropriate values -// are being compared for ints. -func TestIntComparator(t *testing.T) { - tables := []struct { - operand int64 - operator string - myVal int64 - expected bool - err error - }{ - {12, ">", 13, 12.000 > 13.000, nil}, - {1000, "!=", 1000, 1000.000 != 1000.000, nil}, - {1000, "<", 1000, 1000.000 < 1000.000, nil}, - {1000, "<=", 1000, 1000.000 <= 1000.000, nil}, - {1000, ">=", 1000, 1000.000 >= 1000.000, nil}, - {1000, "=", 1000, 1000.000 >= 1000.000, nil}, - {17, "randomoperator", 0, false, ErrUnsupportedSyntax}, - } - for _, table := range tables { - myVal, err := intEval(table.operand, table.operator, table.myVal) - if err != table.err { - t.Error() - } - if myVal != table.expected { - t.Error() - } - } -} - -// TestSizeFunction is a function which provides unit testing for the function -// which calculates size. -func TestSizeFunction(t *testing.T) { - tables := []struct { - myRecord []string - expected int64 - }{ - {[]string{"test1", "test2", "test3", "test4", "test5"}, 30}, - } - for _, table := range tables { - if format.ProcessSize(table.myRecord) != table.expected { - t.Error() - } - - } -} - -func TestMatch(t *testing.T) { - testCases := []struct { - pattern string - text string - matched bool - }{ - // Test case - 1. - // Test case so that the match occurs on the opening letter. - { - pattern: "a%", - text: "apple", - matched: true, - }, - // Test case - 2. - // Test case so that the ending letter is true. - { - pattern: "%m", - text: "random", - matched: true, - }, - // Test case - 3. - // Test case so that a character is at the appropriate position. - { - pattern: "_d%", - text: "adam", - matched: true, - }, - // Test case - 4. - // Test case so that a character is at the appropriate position. - { - pattern: "_d%", - text: "apple", - matched: false, - }, - // Test case - 5. - // Test case with checking that it is at least 3 in length - { - pattern: "a_%_%", - text: "ap", - matched: false, - }, - { - pattern: "a_%_%", - text: "apple", - matched: true, - }, - { - pattern: "%or%", - text: "orphan", - matched: true, - }, - { - pattern: "%or%", - text: "dolphin", - matched: false, - }, - { - pattern: "%or%", - text: "dorlphin", - matched: true, - }, - { - pattern: "2__3", - text: "2003", - matched: true, - }, - { - pattern: "_YYYY_", - text: "aYYYYa", - matched: true, - }, - { - pattern: "C%", - text: "CA", - matched: true, - }, - { - pattern: "C%", - text: "SC", - matched: false, - }, - { - pattern: "%C", - text: "SC", - matched: true, - }, - { - pattern: "%C", - text: "CA", - matched: false, - }, - { - pattern: "%C", - text: "ACCC", - matched: true, - }, - { - pattern: "C%", - text: "CCC", - matched: true, - }, - { - pattern: "j%", - text: "mejri", - matched: false, - }, - { - pattern: "a%o", - text: "ando", - matched: true, - }, - { - pattern: "%j", - text: "mejri", - matched: false, - }, - { - pattern: "%ja", - text: "mejrija", - matched: true, - }, - { - pattern: "ja%", - text: "jamal", - matched: true, - }, - { - pattern: "a%o", - text: "andp", - matched: false, - }, - { - pattern: "_r%", - text: "arpa", - matched: true, - }, - { - pattern: "_r%", - text: "apra", - matched: false, - }, - { - pattern: "a_%_%", - text: "appple", - matched: true, - }, - { - pattern: "l_b%", - text: "lebron", - matched: true, - }, - { - pattern: "leb%", - text: "Dalembert", - matched: false, - }, - { - pattern: "leb%", - text: "Landesberg", - matched: false, - }, - { - pattern: "leb%", - text: "Mccalebb", - matched: false, - }, - { - pattern: "%lebb", - text: "Mccalebb", - matched: true, - }, - } - // Iterating over the test cases, call the function under test and asert the output. - for i, testCase := range testCases { - actualResult, err := likeConvert(testCase.pattern, testCase.text) - if err != nil { - t.Error() - } - if testCase.matched != actualResult { - fmt.Println("Expected Pattern", testCase.pattern, "Expected Text", testCase.text) - t.Errorf("Test %d: Expected the result to be `%v`, but instead found it to be `%v`", i+1, testCase.matched, actualResult) - } - } -} - -// TestFuncProcessing is a unit test which ensures that the appropriate values are -// being returned from the Processing... functions. -func TestFuncProcessing(t *testing.T) { - tables := []struct { - myString string - coalList []string - myValString string - myValCoal string - myValNull string - stringFunc string - }{ - {"lower", []string{"random", "hello", "random"}, "LOWER", "random", "", "UPPER"}, - {"LOWER", []string{"missing", "hello", "random"}, "lower", "hello", "null", "LOWER"}, - } - for _, table := range tables { - if table.coalList != nil { - myVal := processCoalNoIndex(table.coalList) - if myVal != table.myValCoal { - t.Error() - } - } - myVal := applyStrFunc(gjson.Result{ - Type: gjson.String, - Str: table.myString, - }, table.stringFunc) - if myVal != table.myValString { - t.Error() - } - - } -} - -const charset = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" - -var seededRand = rand.New(rand.NewSource(time.Now().UnixNano())) - -func StringWithCharset(length int, charset string) string { - b := make([]byte, length) - for i := range b { - b[i] = charset[seededRand.Intn(len(charset))] - } - return string(b) -} - -func String(length int) string { - return StringWithCharset(length, charset) -} - -func genCSV(b *bytes.Buffer, records int) error { - b.Reset() - w := csv.NewWriter(b) - w.Write([]string{"id", "name", "age", "city"}) - - for i := 0; i < records; i++ { - w.Write([]string{ - strconv.Itoa(i), - String(10), - String(5), - String(10), - }) + var expectedResult = []byte{ + 0, 0, 0, 113, 0, 0, 0, 85, 186, 145, 179, 109, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 49, 48, 44, 116, 114, 117, 101, 44, 102, 111, 111, 10, 225, 160, 249, 157, 0, 0, 0, 118, 0, 0, 0, 85, 8, 177, 111, 125, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 45, 51, 44, 102, 97, 108, 115, 101, 44, 98, 97, 114, 32, 98, 97, 122, 10, 120, 72, 77, 126, 0, 0, 0, 235, 0, 0, 0, 67, 213, 243, 57, 141, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 8, 116, 101, 120, 116, 47, 120, 109, 108, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 5, 83, 116, 97, 116, 115, 60, 63, 120, 109, 108, 32, 118, 101, 114, 115, 105, 111, 110, 61, 34, 49, 46, 48, 34, 32, 101, 110, 99, 111, 100, 105, 110, 103, 61, 34, 85, 84, 70, 45, 56, 34, 63, 62, 60, 83, 116, 97, 116, 115, 62, 60, 66, 121, 116, 101, 115, 83, 99, 97, 110, 110, 101, 100, 62, 52, 55, 60, 47, 66, 121, 116, 101, 115, 83, 99, 97, 110, 110, 101, 100, 62, 60, 66, 121, 116, 101, 115, 80, 114, 111, 99, 101, 115, 115, 101, 100, 62, 52, 55, 60, 47, 66, 121, 116, 101, 115, 80, 114, 111, 99, 101, 115, 115, 101, 100, 62, 60, 66, 121, 116, 101, 115, 82, 101, 116, 117, 114, 110, 101, 100, 62, 50, 57, 60, 47, 66, 121, 116, 101, 115, 82, 101, 116, 117, 114, 110, 101, 100, 62, 60, 47, 83, 116, 97, 116, 115, 62, 214, 225, 163, 199, 0, 0, 0, 56, 0, 0, 0, 40, 193, 198, 132, 212, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 3, 69, 110, 100, 207, 151, 211, 146, } - // Write any buffered data to the underlying writer (standard output). - w.Flush() - - return w.Error() -} - -func benchmarkSQLAll(b *testing.B, records int) { - benchmarkSQL(b, records, "select * from S3Object") -} - -func benchmarkSQLAggregate(b *testing.B, records int) { - benchmarkSQL(b, records, "select count(*) from S3Object") -} - -func benchmarkSQL(b *testing.B, records int, query string) { - var ( - buf bytes.Buffer - output bytes.Buffer - ) - genCSV(&buf, records) - - b.ResetTimer() - b.ReportAllocs() - - sreq := ObjectSelectRequest{} - sreq.Expression = query - sreq.ExpressionType = QueryExpressionTypeSQL - sreq.InputSerialization.CSV = &struct { - FileHeaderInfo CSVFileHeaderInfo - RecordDelimiter string - FieldDelimiter string - QuoteCharacter string - QuoteEscapeCharacter string - Comments string - }{} - sreq.InputSerialization.CSV.FileHeaderInfo = CSVFileHeaderInfoUse - sreq.InputSerialization.CSV.RecordDelimiter = "\n" - sreq.InputSerialization.CSV.FieldDelimiter = "," - - sreq.OutputSerialization.CSV = &struct { - QuoteFields CSVQuoteFields - RecordDelimiter string - FieldDelimiter string - QuoteCharacter string - QuoteEscapeCharacter string - }{} - sreq.OutputSerialization.CSV.RecordDelimiter = "\n" - sreq.OutputSerialization.CSV.FieldDelimiter = "," - - s3s, err := New(&buf, int64(buf.Len()), sreq) + s3Select, err := NewS3Select(bytes.NewReader(requestXML)) if err != nil { - b.Fatal(err) + t.Fatal(err) } - for i := 0; i < b.N; i++ { - output.Reset() - if err = Execute(&output, s3s); err != nil { - b.Fatal(err) + if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) { + return ioutil.NopCloser(bytes.NewReader(csvData)), nil + }); err != nil { + t.Fatal(err) + } + + w := &testResponseWriter{} + s3Select.Evaluate(w) + s3Select.Close() + + if !reflect.DeepEqual(w.response, expectedResult) { + t.Fatalf("received response does not match with expected reply") + } +} + +func TestJSONInput(t *testing.T) { + var requestXML = []byte(` + + + SELECT one, two, three from S3Object + SQL + + NONE + + DOCUMENT + + + + + + + + FALSE + + +`) + + var jsonData = []byte(`{"one":10,"two":true,"three":"foo"} +{"one":-3,"two":true,"three":"bar baz"} +`) + + var expectedResult = []byte{ + 0, 0, 0, 113, 0, 0, 0, 85, 186, 145, 179, 109, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 49, 48, 44, 116, 114, 117, 101, 44, 102, 111, 111, 10, 225, 160, 249, 157, 0, 0, 0, 117, 0, 0, 0, 85, 79, 17, 21, 173, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 45, 51, 44, 116, 114, 117, 101, 44, 98, 97, 114, 32, 98, 97, 122, 10, 34, 12, 125, 218, 0, 0, 0, 235, 0, 0, 0, 67, 213, 243, 57, 141, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 8, 116, 101, 120, 116, 47, 120, 109, 108, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 5, 83, 116, 97, 116, 115, 60, 63, 120, 109, 108, 32, 118, 101, 114, 115, 105, 111, 110, 61, 34, 49, 46, 48, 34, 32, 101, 110, 99, 111, 100, 105, 110, 103, 61, 34, 85, 84, 70, 45, 56, 34, 63, 62, 60, 83, 116, 97, 116, 115, 62, 60, 66, 121, 116, 101, 115, 83, 99, 97, 110, 110, 101, 100, 62, 55, 54, 60, 47, 66, 121, 116, 101, 115, 83, 99, 97, 110, 110, 101, 100, 62, 60, 66, 121, 116, 101, 115, 80, 114, 111, 99, 101, 115, 115, 101, 100, 62, 55, 54, 60, 47, 66, 121, 116, 101, 115, 80, 114, 111, 99, 101, 115, 115, 101, 100, 62, 60, 66, 121, 116, 101, 115, 82, 101, 116, 117, 114, 110, 101, 100, 62, 50, 56, 60, 47, 66, 121, 116, 101, 115, 82, 101, 116, 117, 114, 110, 101, 100, 62, 60, 47, 83, 116, 97, 116, 115, 62, 124, 107, 174, 242, 0, 0, 0, 56, 0, 0, 0, 40, 193, 198, 132, 212, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 3, 69, 110, 100, 207, 151, 211, 146, + } + + s3Select, err := NewS3Select(bytes.NewReader(requestXML)) + if err != nil { + t.Fatal(err) + } + + if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) { + return ioutil.NopCloser(bytes.NewReader(jsonData)), nil + }); err != nil { + t.Fatal(err) + } + + w := &testResponseWriter{} + s3Select.Evaluate(w) + s3Select.Close() + + if !reflect.DeepEqual(w.response, expectedResult) { + t.Fatalf("received response does not match with expected reply") + } +} + +func TestParquetInput(t *testing.T) { + var requestXML = []byte(` + + + SELECT one, two, three from S3Object + SQL + + NONE + + + + + + + + + FALSE + + +`) + + getReader := func(offset int64, length int64) (io.ReadCloser, error) { + testdataFile := path.Join(build.Default.GOPATH, "src/github.com/minio/minio/pkg/s3select/testdata.parquet") + file, err := os.Open(testdataFile) + if err != nil { + return nil, err } + + fi, err := file.Stat() + if err != nil { + return nil, err + } + + if offset < 0 { + offset = fi.Size() + offset + } + + if _, err = file.Seek(offset, os.SEEK_SET); err != nil { + return nil, err + } + + return file, nil + } + + var expectedResult = []byte{ + 0, 0, 0, 114, 0, 0, 0, 85, 253, 49, 201, 189, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 50, 46, 53, 44, 102, 111, 111, 44, 116, 114, 117, 101, 10, 209, 8, 249, 77, 0, 0, 0, 114, 0, 0, 0, 85, 253, 49, 201, 189, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 45, 49, 44, 98, 97, 114, 44, 102, 97, 108, 115, 101, 10, 45, 143, 126, 67, 0, 0, 0, 113, 0, 0, 0, 85, 186, 145, 179, 109, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 24, 97, 112, 112, 108, 105, 99, 97, 116, 105, 111, 110, 47, 111, 99, 116, 101, 116, 45, 115, 116, 114, 101, 97, 109, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 7, 82, 101, 99, 111, 114, 100, 115, 45, 49, 44, 98, 97, 122, 44, 116, 114, 117, 101, 10, 230, 139, 42, 176, 0, 0, 0, 235, 0, 0, 0, 67, 213, 243, 57, 141, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 13, 58, 99, 111, 110, 116, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 8, 116, 101, 120, 116, 47, 120, 109, 108, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 5, 83, 116, 97, 116, 115, 60, 63, 120, 109, 108, 32, 118, 101, 114, 115, 105, 111, 110, 61, 34, 49, 46, 48, 34, 32, 101, 110, 99, 111, 100, 105, 110, 103, 61, 34, 85, 84, 70, 45, 56, 34, 63, 62, 60, 83, 116, 97, 116, 115, 62, 60, 66, 121, 116, 101, 115, 83, 99, 97, 110, 110, 101, 100, 62, 45, 49, 60, 47, 66, 121, 116, 101, 115, 83, 99, 97, 110, 110, 101, 100, 62, 60, 66, 121, 116, 101, 115, 80, 114, 111, 99, 101, 115, 115, 101, 100, 62, 45, 49, 60, 47, 66, 121, 116, 101, 115, 80, 114, 111, 99, 101, 115, 115, 101, 100, 62, 60, 66, 121, 116, 101, 115, 82, 101, 116, 117, 114, 110, 101, 100, 62, 51, 56, 60, 47, 66, 121, 116, 101, 115, 82, 101, 116, 117, 114, 110, 101, 100, 62, 60, 47, 83, 116, 97, 116, 115, 62, 199, 176, 2, 83, 0, 0, 0, 56, 0, 0, 0, 40, 193, 198, 132, 212, 13, 58, 109, 101, 115, 115, 97, 103, 101, 45, 116, 121, 112, 101, 7, 0, 5, 101, 118, 101, 110, 116, 11, 58, 101, 118, 101, 110, 116, 45, 116, 121, 112, 101, 7, 0, 3, 69, 110, 100, 207, 151, 211, 146, + } + + s3Select, err := NewS3Select(bytes.NewReader(requestXML)) + if err != nil { + t.Fatal(err) + } + + if err = s3Select.Open(getReader); err != nil { + t.Fatal(err) + } + + w := &testResponseWriter{} + s3Select.Evaluate(w) + s3Select.Close() + + if !reflect.DeepEqual(w.response, expectedResult) { + t.Fatalf("received response does not match with expected reply") } } - -// BenchmarkSQLAggregate_100K - benchmark count(*) function with 100k records. -func BenchmarkSQLAggregate_100K(b *testing.B) { - benchmarkSQLAggregate(b, humanize.KiByte*100) -} - -// BenchmarkSQLAggregate_1M - benchmark count(*) function with 1m records. -func BenchmarkSQLAggregate_1M(b *testing.B) { - benchmarkSQLAggregate(b, humanize.MiByte) -} - -// BenchmarkSQLAggregate_2M - benchmark count(*) function with 2m records. -func BenchmarkSQLAggregate_2M(b *testing.B) { - benchmarkSQLAggregate(b, 2*humanize.MiByte) -} - -// BenchmarkSQLAggregate_10M - benchmark count(*) function with 10m records. -func BenchmarkSQLAggregate_10M(b *testing.B) { - benchmarkSQLAggregate(b, 10*humanize.MiByte) -} - -// BenchmarkSQLAll_100K - benchmark * function with 100k records. -func BenchmarkSQLAll_100K(b *testing.B) { - benchmarkSQLAll(b, humanize.KiByte*100) -} - -// BenchmarkSQLAll_1M - benchmark * function with 1m records. -func BenchmarkSQLAll_1M(b *testing.B) { - benchmarkSQLAll(b, humanize.MiByte) -} - -// BenchmarkSQLAll_2M - benchmark * function with 2m records. -func BenchmarkSQLAll_2M(b *testing.B) { - benchmarkSQLAll(b, 2*humanize.MiByte) -} - -// BenchmarkSQLAll_10M - benchmark * function with 10m records. -func BenchmarkSQLAll_10M(b *testing.B) { - benchmarkSQLAll(b, 10*humanize.MiByte) -} diff --git a/pkg/s3select/sql/arithexpr.go b/pkg/s3select/sql/arithexpr.go new file mode 100644 index 000000000..e611ba54c --- /dev/null +++ b/pkg/s3select/sql/arithexpr.go @@ -0,0 +1,175 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import "fmt" + +// ArithOperator - arithmetic operator. +type ArithOperator string + +const ( + // Add operator '+'. + Add ArithOperator = "+" + + // Subtract operator '-'. + Subtract ArithOperator = "-" + + // Multiply operator '*'. + Multiply ArithOperator = "*" + + // Divide operator '/'. + Divide ArithOperator = "/" + + // Modulo operator '%'. + Modulo ArithOperator = "%" +) + +// arithExpr - arithmetic function. +type arithExpr struct { + left Expr + right Expr + operator ArithOperator + funcType Type +} + +// String - returns string representation of this function. +func (f *arithExpr) String() string { + return fmt.Sprintf("(%v %v %v)", f.left, f.operator, f.right) +} + +func (f *arithExpr) compute(lv, rv *Value) (*Value, error) { + leftValueType := lv.Type() + rightValueType := rv.Type() + if !leftValueType.isNumber() { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to number", f, leftValueType) + return nil, errExternalEvalException(err) + } + if !rightValueType.isNumber() { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to number", f, rightValueType) + return nil, errExternalEvalException(err) + } + + leftValue := lv.FloatValue() + rightValue := rv.FloatValue() + + var result float64 + switch f.operator { + case Add: + result = leftValue + rightValue + case Subtract: + result = leftValue - rightValue + case Multiply: + result = leftValue * rightValue + case Divide: + result = leftValue / rightValue + case Modulo: + result = float64(int64(leftValue) % int64(rightValue)) + } + + if leftValueType == Float || rightValueType == Float { + return NewFloat(result), nil + } + + return NewInt(int64(result)), nil +} + +// Call - evaluates this function for given arg values and returns result as Value. +func (f *arithExpr) Eval(record Record) (*Value, error) { + leftValue, err := f.left.Eval(record) + if err != nil { + return nil, err + } + + rightValue, err := f.right.Eval(record) + if err != nil { + return nil, err + } + + if f.funcType == aggregateFunction { + return nil, nil + } + + return f.compute(leftValue, rightValue) +} + +// AggregateValue - returns aggregated value. +func (f *arithExpr) AggregateValue() (*Value, error) { + if f.funcType != aggregateFunction { + err := fmt.Errorf("%v is not aggreate expression", f) + return nil, errExternalEvalException(err) + } + + lv, err := f.left.AggregateValue() + if err != nil { + return nil, err + } + + rv, err := f.right.AggregateValue() + if err != nil { + return nil, err + } + + return f.compute(lv, rv) +} + +// Type - returns arithmeticFunction or aggregateFunction type. +func (f *arithExpr) Type() Type { + return f.funcType +} + +// ReturnType - returns Float as return type. +func (f *arithExpr) ReturnType() Type { + return Float +} + +// newArithExpr - creates new arithmetic function. +func newArithExpr(operator ArithOperator, left, right Expr) (*arithExpr, error) { + if !left.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: left side expression %v evaluate to %v, not number", operator, left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + if !right.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: right side expression %v evaluate to %v; not number", operator, right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + funcType := arithmeticFunction + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + switch left.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: left side expression %v return type %v is incompatible for aggregate evaluation", operator, left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + + switch right.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: right side expression %v return type %v is incompatible for aggregate evaluation", operator, right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &arithExpr{ + left: left, + right: right, + operator: operator, + funcType: funcType, + }, nil +} diff --git a/pkg/s3select/sql/compexpr.go b/pkg/s3select/sql/compexpr.go new file mode 100644 index 000000000..113894f02 --- /dev/null +++ b/pkg/s3select/sql/compexpr.go @@ -0,0 +1,636 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import ( + "fmt" + "regexp" + "strings" +) + +// ComparisonOperator - comparison operator. +type ComparisonOperator string + +const ( + // Equal operator '='. + Equal ComparisonOperator = "=" + + // NotEqual operator '!=' or '<>'. + NotEqual ComparisonOperator = "!=" + + // LessThan operator '<'. + LessThan ComparisonOperator = "<" + + // GreaterThan operator '>'. + GreaterThan ComparisonOperator = ">" + + // LessThanEqual operator '<='. + LessThanEqual ComparisonOperator = "<=" + + // GreaterThanEqual operator '>='. + GreaterThanEqual ComparisonOperator = ">=" + + // Between operator 'BETWEEN' + Between ComparisonOperator = "between" + + // In operator 'IN' + In ComparisonOperator = "in" + + // Like operator 'LIKE' + Like ComparisonOperator = "like" + + // NotBetween operator 'NOT BETWEEN' + NotBetween ComparisonOperator = "not between" + + // NotIn operator 'NOT IN' + NotIn ComparisonOperator = "not in" + + // NotLike operator 'NOT LIKE' + NotLike ComparisonOperator = "not like" + + // IsNull operator 'IS NULL' + IsNull ComparisonOperator = "is null" + + // IsNotNull operator 'IS NOT NULL' + IsNotNull ComparisonOperator = "is not null" +) + +// String - returns string representation of this operator. +func (operator ComparisonOperator) String() string { + return strings.ToUpper((string(operator))) +} + +func equal(leftValue, rightValue *Value) (bool, error) { + switch { + case leftValue.Type() == Null && rightValue.Type() == Null: + return true, nil + case leftValue.Type() == Bool && rightValue.Type() == Bool: + return leftValue.BoolValue() == rightValue.BoolValue(), nil + case (leftValue.Type() == Int || leftValue.Type() == Float) && + (rightValue.Type() == Int || rightValue.Type() == Float): + return leftValue.FloatValue() == rightValue.FloatValue(), nil + case leftValue.Type() == String && rightValue.Type() == String: + return leftValue.StringValue() == rightValue.StringValue(), nil + case leftValue.Type() == Timestamp && rightValue.Type() == Timestamp: + return leftValue.TimeValue() == rightValue.TimeValue(), nil + } + + return false, fmt.Errorf("left value type %v and right value type %v are incompatible for equality check", leftValue.Type(), rightValue.Type()) +} + +// comparisonExpr - comparison function. +type comparisonExpr struct { + left Expr + right Expr + to Expr + operator ComparisonOperator + funcType Type +} + +// String - returns string representation of this function. +func (f *comparisonExpr) String() string { + switch f.operator { + case Equal, NotEqual, LessThan, GreaterThan, LessThanEqual, GreaterThanEqual, In, Like, NotIn, NotLike: + return fmt.Sprintf("(%v %v %v)", f.left, f.operator, f.right) + case Between, NotBetween: + return fmt.Sprintf("(%v %v %v AND %v)", f.left, f.operator, f.right, f.to) + } + + return fmt.Sprintf("(%v %v %v %v)", f.left, f.right, f.to, f.operator) +} + +func (f *comparisonExpr) equal(leftValue, rightValue *Value) (*Value, error) { + result, err := equal(leftValue, rightValue) + if err != nil { + err = fmt.Errorf("%v: %v", f, err) + return nil, errExternalEvalException(err) + } + + return NewBool(result), nil +} + +func (f *comparisonExpr) notEqual(leftValue, rightValue *Value) (*Value, error) { + result, err := equal(leftValue, rightValue) + if err != nil { + err = fmt.Errorf("%v: %v", f, err) + return nil, errExternalEvalException(err) + } + + return NewBool(!result), nil +} + +func (f *comparisonExpr) lessThan(leftValue, rightValue *Value) (*Value, error) { + if !leftValue.Type().isNumber() { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to number", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + if !rightValue.Type().isNumber() { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to number", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return NewBool(leftValue.FloatValue() < rightValue.FloatValue()), nil +} + +func (f *comparisonExpr) greaterThan(leftValue, rightValue *Value) (*Value, error) { + if !leftValue.Type().isNumber() { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to number", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + if !rightValue.Type().isNumber() { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to number", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return NewBool(leftValue.FloatValue() > rightValue.FloatValue()), nil +} + +func (f *comparisonExpr) lessThanEqual(leftValue, rightValue *Value) (*Value, error) { + if !leftValue.Type().isNumber() { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to number", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + if !rightValue.Type().isNumber() { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to number", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return NewBool(leftValue.FloatValue() <= rightValue.FloatValue()), nil +} + +func (f *comparisonExpr) greaterThanEqual(leftValue, rightValue *Value) (*Value, error) { + if !leftValue.Type().isNumber() { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to number", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + if !rightValue.Type().isNumber() { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to number", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return NewBool(leftValue.FloatValue() >= rightValue.FloatValue()), nil +} + +func (f *comparisonExpr) computeBetween(leftValue, fromValue, toValue *Value) (bool, error) { + if !leftValue.Type().isNumber() { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to number", f, leftValue.Type()) + return false, errExternalEvalException(err) + } + if !fromValue.Type().isNumber() { + err := fmt.Errorf("%v: from side expression evaluated to %v; not to number", f, fromValue.Type()) + return false, errExternalEvalException(err) + } + if !toValue.Type().isNumber() { + err := fmt.Errorf("%v: to side expression evaluated to %v; not to number", f, toValue.Type()) + return false, errExternalEvalException(err) + } + + return leftValue.FloatValue() >= fromValue.FloatValue() && + leftValue.FloatValue() <= toValue.FloatValue(), nil +} + +func (f *comparisonExpr) between(leftValue, fromValue, toValue *Value) (*Value, error) { + result, err := f.computeBetween(leftValue, fromValue, toValue) + if err != nil { + return nil, err + } + + return NewBool(result), nil +} + +func (f *comparisonExpr) notBetween(leftValue, fromValue, toValue *Value) (*Value, error) { + result, err := f.computeBetween(leftValue, fromValue, toValue) + if err != nil { + return nil, err + } + + return NewBool(!result), nil +} + +func (f *comparisonExpr) computeIn(leftValue, rightValue *Value) (found bool, err error) { + if rightValue.Type() != Array { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to Array", f, rightValue.Type()) + return false, errExternalEvalException(err) + } + + values := rightValue.ArrayValue() + + for i := range values { + found, err = equal(leftValue, values[i]) + if err != nil { + return false, err + } + + if found { + return true, nil + } + } + + return false, nil +} + +func (f *comparisonExpr) in(leftValue, rightValue *Value) (*Value, error) { + result, err := f.computeIn(leftValue, rightValue) + if err != nil { + err = fmt.Errorf("%v: %v", f, err) + return nil, errExternalEvalException(err) + } + + return NewBool(result), nil +} + +func (f *comparisonExpr) notIn(leftValue, rightValue *Value) (*Value, error) { + result, err := f.computeIn(leftValue, rightValue) + if err != nil { + err = fmt.Errorf("%v: %v", f, err) + return nil, errExternalEvalException(err) + } + + return NewBool(!result), nil +} + +func (f *comparisonExpr) computeLike(leftValue, rightValue *Value) (matched bool, err error) { + if leftValue.Type() != String { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to string", f, leftValue.Type()) + return false, errExternalEvalException(err) + } + if rightValue.Type() != String { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to string", f, rightValue.Type()) + return false, errExternalEvalException(err) + } + + matched, err = regexp.MatchString(rightValue.StringValue(), leftValue.StringValue()) + if err != nil { + err = fmt.Errorf("%v: %v", f, err) + return false, errExternalEvalException(err) + } + + return matched, nil +} + +func (f *comparisonExpr) like(leftValue, rightValue *Value) (*Value, error) { + result, err := f.computeLike(leftValue, rightValue) + if err != nil { + return nil, err + } + + return NewBool(result), nil +} + +func (f *comparisonExpr) notLike(leftValue, rightValue *Value) (*Value, error) { + result, err := f.computeLike(leftValue, rightValue) + if err != nil { + return nil, err + } + + return NewBool(!result), nil +} + +func (f *comparisonExpr) compute(leftValue, rightValue, toValue *Value) (*Value, error) { + switch f.operator { + case Equal: + return f.equal(leftValue, rightValue) + case NotEqual: + return f.notEqual(leftValue, rightValue) + case LessThan: + return f.lessThan(leftValue, rightValue) + case GreaterThan: + return f.greaterThan(leftValue, rightValue) + case LessThanEqual: + return f.lessThanEqual(leftValue, rightValue) + case GreaterThanEqual: + return f.greaterThanEqual(leftValue, rightValue) + case Between: + return f.between(leftValue, rightValue, toValue) + case In: + return f.in(leftValue, rightValue) + case Like: + return f.like(leftValue, rightValue) + case NotBetween: + return f.notBetween(leftValue, rightValue, toValue) + case NotIn: + return f.notIn(leftValue, rightValue) + case NotLike: + return f.notLike(leftValue, rightValue) + } + + panic(fmt.Errorf("unexpected expression %v", f)) +} + +// Call - evaluates this function for given arg values and returns result as Value. +func (f *comparisonExpr) Eval(record Record) (*Value, error) { + leftValue, err := f.left.Eval(record) + if err != nil { + return nil, err + } + + rightValue, err := f.right.Eval(record) + if err != nil { + return nil, err + } + + var toValue *Value + if f.to != nil { + toValue, err = f.to.Eval(record) + if err != nil { + return nil, err + } + } + + if f.funcType == aggregateFunction { + return nil, nil + } + + return f.compute(leftValue, rightValue, toValue) +} + +// AggregateValue - returns aggregated value. +func (f *comparisonExpr) AggregateValue() (*Value, error) { + if f.funcType != aggregateFunction { + err := fmt.Errorf("%v is not aggreate expression", f) + return nil, errExternalEvalException(err) + } + + leftValue, err := f.left.AggregateValue() + if err != nil { + return nil, err + } + + rightValue, err := f.right.AggregateValue() + if err != nil { + return nil, err + } + + var toValue *Value + if f.to != nil { + toValue, err = f.to.AggregateValue() + if err != nil { + return nil, err + } + } + + return f.compute(leftValue, rightValue, toValue) +} + +// Type - returns comparisonFunction or aggregateFunction type. +func (f *comparisonExpr) Type() Type { + return f.funcType +} + +// ReturnType - returns Bool as return type. +func (f *comparisonExpr) ReturnType() Type { + return Bool +} + +// newComparisonExpr - creates new comparison function. +func newComparisonExpr(operator ComparisonOperator, funcs ...Expr) (*comparisonExpr, error) { + funcType := comparisonFunction + switch operator { + case Equal, NotEqual: + if len(funcs) != 2 { + panic(fmt.Sprintf("exactly two arguments are expected, but found %v", len(funcs))) + } + + left := funcs[0] + if !left.ReturnType().isBaseKind() { + err := fmt.Errorf("operator %v: left side expression %v evaluate to %v is incompatible for equality check", operator, left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + right := funcs[1] + if !right.ReturnType().isBaseKind() { + err := fmt.Errorf("operator %v: right side expression %v evaluate to %v is incompatible for equality check", operator, right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + switch left.Type() { + case column, Array, function, arithmeticFunction, comparisonFunction, logicalFunction, record: + err := fmt.Errorf("operator %v: left side expression %v return type %v is incompatible for equality check", operator, left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + switch right.Type() { + case column, Array, function, arithmeticFunction, comparisonFunction, logicalFunction, record: + err := fmt.Errorf("operator %v: right side expression %v return type %v is incompatible for equality check", operator, right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &comparisonExpr{ + left: left, + right: right, + operator: operator, + funcType: funcType, + }, nil + + case LessThan, GreaterThan, LessThanEqual, GreaterThanEqual: + if len(funcs) != 2 { + panic(fmt.Sprintf("exactly two arguments are expected, but found %v", len(funcs))) + } + + left := funcs[0] + if !left.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: left side expression %v evaluate to %v, not number", operator, left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + right := funcs[1] + if !right.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: right side expression %v evaluate to %v; not number", operator, right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + switch left.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: left side expression %v return type %v is incompatible for aggregate evaluation", operator, left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + + switch right.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: right side expression %v return type %v is incompatible for aggregate evaluation", operator, right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &comparisonExpr{ + left: left, + right: right, + operator: operator, + funcType: funcType, + }, nil + + case In, NotIn: + if len(funcs) != 2 { + panic(fmt.Sprintf("exactly two arguments are expected, but found %v", len(funcs))) + } + + left := funcs[0] + if !left.ReturnType().isBaseKind() { + err := fmt.Errorf("operator %v: left side expression %v evaluate to %v is incompatible for equality check", operator, left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + right := funcs[1] + if right.ReturnType() != Array { + err := fmt.Errorf("operator %v: right side expression %v evaluate to %v is incompatible for equality check", operator, right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + switch left.Type() { + case column, Array, function, arithmeticFunction, comparisonFunction, logicalFunction, record: + err := fmt.Errorf("operator %v: left side expression %v return type %v is incompatible for aggregate evaluation", operator, left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + switch right.Type() { + case Array, aggregateFunction: + default: + err := fmt.Errorf("operator %v: right side expression %v return type %v is incompatible for aggregate evaluation", operator, right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &comparisonExpr{ + left: left, + right: right, + operator: operator, + funcType: funcType, + }, nil + + case Like, NotLike: + if len(funcs) != 2 { + panic(fmt.Sprintf("exactly two arguments are expected, but found %v", len(funcs))) + } + + left := funcs[0] + if !left.ReturnType().isStringKind() { + err := fmt.Errorf("operator %v: left side expression %v evaluate to %v, not string", operator, left, left.ReturnType()) + return nil, errLikeInvalidInputs(err) + } + + right := funcs[1] + if !right.ReturnType().isStringKind() { + err := fmt.Errorf("operator %v: right side expression %v evaluate to %v, not string", operator, right, right.ReturnType()) + return nil, errLikeInvalidInputs(err) + } + + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + switch left.Type() { + case String, aggregateFunction: + default: + err := fmt.Errorf("operator %v: left side expression %v return type %v is incompatible for aggregate evaluation", operator, left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + switch right.Type() { + case String, aggregateFunction: + default: + err := fmt.Errorf("operator %v: right side expression %v return type %v is incompatible for aggregate evaluation", operator, right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &comparisonExpr{ + left: left, + right: right, + operator: operator, + funcType: funcType, + }, nil + case Between, NotBetween: + if len(funcs) != 3 { + panic(fmt.Sprintf("too many values in funcs %v", funcs)) + } + + left := funcs[0] + if !left.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: left side expression %v evaluate to %v, not number", operator, left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + from := funcs[1] + if !from.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: from expression %v evaluate to %v, not number", operator, from, from.ReturnType()) + return nil, errInvalidDataType(err) + } + + to := funcs[2] + if !to.ReturnType().isNumberKind() { + err := fmt.Errorf("operator %v: to expression %v evaluate to %v, not number", operator, to, to.ReturnType()) + return nil, errInvalidDataType(err) + } + + if left.Type() == aggregateFunction || from.Type() == aggregateFunction || to.Type() == aggregateFunction { + funcType = aggregateFunction + switch left.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: left side expression %v return type %v is incompatible for aggregate evaluation", operator, left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + switch from.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: from expression %v return type %v is incompatible for aggregate evaluation", operator, from, from.Type()) + return nil, errUnsupportedSQLOperation(err) + } + switch to.Type() { + case Int, Float, aggregateFunction: + default: + err := fmt.Errorf("operator %v: to expression %v return type %v is incompatible for aggregate evaluation", operator, to, to.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &comparisonExpr{ + left: left, + right: from, + to: to, + operator: operator, + funcType: funcType, + }, nil + case IsNull, IsNotNull: + if len(funcs) != 1 { + panic(fmt.Sprintf("too many values in funcs %v", funcs)) + } + + if funcs[0].Type() == aggregateFunction { + funcType = aggregateFunction + } + + if operator == IsNull { + operator = Equal + } else { + operator = NotEqual + } + + return &comparisonExpr{ + left: funcs[0], + right: newValueExpr(NewNull()), + operator: operator, + funcType: funcType, + }, nil + } + + return nil, errParseUnknownOperator(fmt.Errorf("unknown operator %v", operator)) +} diff --git a/pkg/s3select/sql/errors.go b/pkg/s3select/sql/errors.go new file mode 100644 index 000000000..f97444a27 --- /dev/null +++ b/pkg/s3select/sql/errors.go @@ -0,0 +1,215 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +type s3Error struct { + code string + message string + statusCode int + cause error +} + +func (err *s3Error) Cause() error { + return err.cause +} + +func (err *s3Error) ErrorCode() string { + return err.code +} + +func (err *s3Error) ErrorMessage() string { + return err.message +} + +func (err *s3Error) HTTPStatusCode() int { + return err.statusCode +} + +func (err *s3Error) Error() string { + return err.message +} + +func errUnsupportedSQLStructure(err error) *s3Error { + return &s3Error{ + code: "UnsupportedSqlStructure", + message: "Encountered an unsupported SQL structure. Check the SQL Reference.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedSelect(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedSelect", + message: "The SQL expression contains an unsupported use of SELECT.", + statusCode: 400, + cause: err, + } +} + +func errParseAsteriskIsNotAloneInSelectList(err error) *s3Error { + return &s3Error{ + code: "ParseAsteriskIsNotAloneInSelectList", + message: "Other expressions are not allowed in the SELECT list when '*' is used without dot notation in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseInvalidContextForWildcardInSelectList(err error) *s3Error { + return &s3Error{ + code: "ParseInvalidContextForWildcardInSelectList", + message: "Invalid use of * in SELECT list in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errInvalidDataType(err error) *s3Error { + return &s3Error{ + code: "InvalidDataType", + message: "The SQL expression contains an invalid data type.", + statusCode: 400, + cause: err, + } +} + +func errUnsupportedFunction(err error) *s3Error { + return &s3Error{ + code: "UnsupportedFunction", + message: "Encountered an unsupported SQL function.", + statusCode: 400, + cause: err, + } +} + +func errParseNonUnaryAgregateFunctionCall(err error) *s3Error { + return &s3Error{ + code: "ParseNonUnaryAgregateFunctionCall", + message: "Only one argument is supported for aggregate functions in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errIncorrectSQLFunctionArgumentType(err error) *s3Error { + return &s3Error{ + code: "IncorrectSqlFunctionArgumentType", + message: "Incorrect type of arguments in function call in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorInvalidArguments(err error) *s3Error { + return &s3Error{ + code: "EvaluatorInvalidArguments", + message: "Incorrect number of arguments in the function call in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errUnsupportedSQLOperation(err error) *s3Error { + return &s3Error{ + code: "UnsupportedSqlOperation", + message: "Encountered an unsupported SQL operation.", + statusCode: 400, + cause: err, + } +} + +func errParseUnknownOperator(err error) *s3Error { + return &s3Error{ + code: "ParseUnknownOperator", + message: "The SQL expression contains an invalid operator.", + statusCode: 400, + cause: err, + } +} + +func errLikeInvalidInputs(err error) *s3Error { + return &s3Error{ + code: "LikeInvalidInputs", + message: "Invalid argument given to the LIKE clause in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errExternalEvalException(err error) *s3Error { + return &s3Error{ + code: "ExternalEvalException", + message: "The query cannot be evaluated. Check the file and try again.", + statusCode: 400, + cause: err, + } +} + +func errValueParseFailure(err error) *s3Error { + return &s3Error{ + code: "ValueParseFailure", + message: "Time stamp parse failure in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorBindingDoesNotExist(err error) *s3Error { + return &s3Error{ + code: "EvaluatorBindingDoesNotExist", + message: "A column name or a path provided does not exist in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errInternalError(err error) *s3Error { + return &s3Error{ + code: "InternalError", + message: "Encountered an internal error.", + statusCode: 500, + cause: err, + } +} + +func errParseInvalidTypeParam(err error) *s3Error { + return &s3Error{ + code: "ParseInvalidTypeParam", + message: "The SQL expression contains an invalid parameter value.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedSyntax(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedSyntax", + message: "The SQL expression contains unsupported syntax.", + statusCode: 400, + cause: err, + } +} + +func errInvalidKeyPath(err error) *s3Error { + return &s3Error{ + code: "InvalidKeyPath", + message: "Key path in the SQL expression is invalid.", + statusCode: 400, + cause: err, + } +} diff --git a/pkg/s3select/sql/expr.go b/pkg/s3select/sql/expr.go new file mode 100644 index 000000000..4258f8dfd --- /dev/null +++ b/pkg/s3select/sql/expr.go @@ -0,0 +1,160 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import ( + "fmt" +) + +// Expr - a SQL expression type. +type Expr interface { + AggregateValue() (*Value, error) + Eval(record Record) (*Value, error) + ReturnType() Type + Type() Type +} + +// aliasExpr - aliases expression by alias. +type aliasExpr struct { + alias string + expr Expr +} + +// String - returns string representation of this expression. +func (expr *aliasExpr) String() string { + return fmt.Sprintf("(%v AS %v)", expr.expr, expr.alias) +} + +// Eval - evaluates underlaying expression for given record and returns evaluated result. +func (expr *aliasExpr) Eval(record Record) (*Value, error) { + return expr.expr.Eval(record) +} + +// AggregateValue - returns aggregated value from underlaying expression. +func (expr *aliasExpr) AggregateValue() (*Value, error) { + return expr.expr.AggregateValue() +} + +// Type - returns underlaying expression type. +func (expr *aliasExpr) Type() Type { + return expr.expr.Type() +} + +// ReturnType - returns underlaying expression's return type. +func (expr *aliasExpr) ReturnType() Type { + return expr.expr.ReturnType() +} + +// newAliasExpr - creates new alias expression. +func newAliasExpr(alias string, expr Expr) *aliasExpr { + return &aliasExpr{alias, expr} +} + +// starExpr - asterisk (*) expression. +type starExpr struct { +} + +// String - returns string representation of this expression. +func (expr *starExpr) String() string { + return "*" +} + +// Eval - returns given args as map value. +func (expr *starExpr) Eval(record Record) (*Value, error) { + return newRecordValue(record), nil +} + +// AggregateValue - returns nil value. +func (expr *starExpr) AggregateValue() (*Value, error) { + return nil, nil +} + +// Type - returns record type. +func (expr *starExpr) Type() Type { + return record +} + +// ReturnType - returns record as return type. +func (expr *starExpr) ReturnType() Type { + return record +} + +// newStarExpr - returns new asterisk (*) expression. +func newStarExpr() *starExpr { + return &starExpr{} +} + +type valueExpr struct { + value *Value +} + +func (expr *valueExpr) String() string { + return expr.value.String() +} + +func (expr *valueExpr) Eval(record Record) (*Value, error) { + return expr.value, nil +} + +func (expr *valueExpr) AggregateValue() (*Value, error) { + return expr.value, nil +} + +func (expr *valueExpr) Type() Type { + return expr.value.Type() +} + +func (expr *valueExpr) ReturnType() Type { + return expr.value.Type() +} + +func newValueExpr(value *Value) *valueExpr { + return &valueExpr{value: value} +} + +type columnExpr struct { + name string +} + +func (expr *columnExpr) String() string { + return expr.name +} + +func (expr *columnExpr) Eval(record Record) (*Value, error) { + value, err := record.Get(expr.name) + if err != nil { + return nil, errEvaluatorBindingDoesNotExist(err) + } + + return value, nil +} + +func (expr *columnExpr) AggregateValue() (*Value, error) { + return nil, nil +} + +func (expr *columnExpr) Type() Type { + return column +} + +func (expr *columnExpr) ReturnType() Type { + return column +} + +func newColumnExpr(columnName string) *columnExpr { + return &columnExpr{name: columnName} +} diff --git a/pkg/s3select/sql/funcexpr.go b/pkg/s3select/sql/funcexpr.go new file mode 100644 index 000000000..9ea1ffed0 --- /dev/null +++ b/pkg/s3select/sql/funcexpr.go @@ -0,0 +1,550 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import ( + "fmt" + "strings" + "time" +) + +// FuncName - SQL function name. +type FuncName string + +const ( + // Avg - aggregate SQL function AVG(). + Avg FuncName = "AVG" + + // Count - aggregate SQL function COUNT(). + Count FuncName = "COUNT" + + // Max - aggregate SQL function MAX(). + Max FuncName = "MAX" + + // Min - aggregate SQL function MIN(). + Min FuncName = "MIN" + + // Sum - aggregate SQL function SUM(). + Sum FuncName = "SUM" + + // Coalesce - conditional SQL function COALESCE(). + Coalesce FuncName = "COALESCE" + + // NullIf - conditional SQL function NULLIF(). + NullIf FuncName = "NULLIF" + + // ToTimestamp - conversion SQL function TO_TIMESTAMP(). + ToTimestamp FuncName = "TO_TIMESTAMP" + + // UTCNow - date SQL function UTCNOW(). + UTCNow FuncName = "UTCNOW" + + // CharLength - string SQL function CHAR_LENGTH(). + CharLength FuncName = "CHAR_LENGTH" + + // CharacterLength - string SQL function CHARACTER_LENGTH() same as CHAR_LENGTH(). + CharacterLength FuncName = "CHARACTER_LENGTH" + + // Lower - string SQL function LOWER(). + Lower FuncName = "LOWER" + + // Substring - string SQL function SUBSTRING(). + Substring FuncName = "SUBSTRING" + + // Trim - string SQL function TRIM(). + Trim FuncName = "TRIM" + + // Upper - string SQL function UPPER(). + Upper FuncName = "UPPER" + + // DateAdd FuncName = "DATE_ADD" + // DateDiff FuncName = "DATE_DIFF" + // Extract FuncName = "EXTRACT" + // ToString FuncName = "TO_STRING" + // Cast FuncName = "CAST" // CAST('2007-04-05T14:30Z' AS TIMESTAMP) +) + +func isAggregateFuncName(s string) bool { + switch FuncName(s) { + case Avg, Count, Max, Min, Sum: + return true + } + + return false +} + +func callForNumber(f Expr, record Record) (*Value, error) { + value, err := f.Eval(record) + if err != nil { + return nil, err + } + + if !value.Type().isNumber() { + err := fmt.Errorf("%v evaluated to %v; not to number", f, value.Type()) + return nil, errExternalEvalException(err) + } + + return value, nil +} + +func callForInt(f Expr, record Record) (*Value, error) { + value, err := f.Eval(record) + if err != nil { + return nil, err + } + + if value.Type() != Int { + err := fmt.Errorf("%v evaluated to %v; not to int", f, value.Type()) + return nil, errExternalEvalException(err) + } + + return value, nil +} + +func callForString(f Expr, record Record) (*Value, error) { + value, err := f.Eval(record) + if err != nil { + return nil, err + } + + if value.Type() != String { + err := fmt.Errorf("%v evaluated to %v; not to string", f, value.Type()) + return nil, errExternalEvalException(err) + } + + return value, nil +} + +// funcExpr - SQL function. +type funcExpr struct { + args []Expr + name FuncName + + sumValue float64 + countValue int64 + maxValue float64 + minValue float64 +} + +// String - returns string representation of this function. +func (f *funcExpr) String() string { + var argStrings []string + for _, arg := range f.args { + argStrings = append(argStrings, fmt.Sprintf("%v", arg)) + } + + return fmt.Sprintf("%v(%v)", f.name, strings.Join(argStrings, ",")) +} + +func (f *funcExpr) sum(record Record) (*Value, error) { + value, err := callForNumber(f.args[0], record) + if err != nil { + return nil, err + } + + f.sumValue += value.FloatValue() + f.countValue++ + return nil, nil +} + +func (f *funcExpr) count(record Record) (*Value, error) { + value, err := f.args[0].Eval(record) + if err != nil { + return nil, err + } + + if value.valueType != Null { + f.countValue++ + } + + return nil, nil +} + +func (f *funcExpr) max(record Record) (*Value, error) { + value, err := callForNumber(f.args[0], record) + if err != nil { + return nil, err + } + + v := value.FloatValue() + if v > f.maxValue { + f.maxValue = v + } + + return nil, nil +} + +func (f *funcExpr) min(record Record) (*Value, error) { + value, err := callForNumber(f.args[0], record) + if err != nil { + return nil, err + } + + v := value.FloatValue() + if v < f.minValue { + f.minValue = v + } + return nil, nil +} + +func (f *funcExpr) charLength(record Record) (*Value, error) { + value, err := callForString(f.args[0], record) + if err != nil { + return nil, err + } + + return NewInt(int64(len(value.StringValue()))), nil +} + +func (f *funcExpr) trim(record Record) (*Value, error) { + value, err := callForString(f.args[0], record) + if err != nil { + return nil, err + } + + return NewString(strings.TrimSpace(value.StringValue())), nil +} + +func (f *funcExpr) lower(record Record) (*Value, error) { + value, err := callForString(f.args[0], record) + if err != nil { + return nil, err + } + + return NewString(strings.ToLower(value.StringValue())), nil +} + +func (f *funcExpr) upper(record Record) (*Value, error) { + value, err := callForString(f.args[0], record) + if err != nil { + return nil, err + } + + return NewString(strings.ToUpper(value.StringValue())), nil +} + +func (f *funcExpr) substring(record Record) (*Value, error) { + stringValue, err := callForString(f.args[0], record) + if err != nil { + return nil, err + } + + offsetValue, err := callForInt(f.args[1], record) + if err != nil { + return nil, err + } + + var lengthValue *Value + if len(f.args) == 3 { + lengthValue, err = callForInt(f.args[2], record) + if err != nil { + return nil, err + } + } + + value := stringValue.StringValue() + offset := int(offsetValue.FloatValue()) + if offset < 0 || offset > len(value) { + offset = 0 + } + length := len(value) + if lengthValue != nil { + length = int(lengthValue.FloatValue()) + if length < 0 || length > len(value) { + length = len(value) + } + } + + return NewString(value[offset:length]), nil +} + +func (f *funcExpr) coalesce(record Record) (*Value, error) { + values := make([]*Value, len(f.args)) + var err error + for i := range f.args { + values[i], err = f.args[i].Eval(record) + if err != nil { + return nil, err + } + } + + for i := range values { + if values[i].Type() != Null { + return values[i], nil + } + } + + return values[0], nil +} + +func (f *funcExpr) nullIf(record Record) (*Value, error) { + value1, err := f.args[0].Eval(record) + if err != nil { + return nil, err + } + + value2, err := f.args[1].Eval(record) + if err != nil { + return nil, err + } + + result, err := equal(value1, value2) + if err != nil { + return nil, err + } + + if result { + return NewNull(), nil + } + + return value1, nil +} + +func (f *funcExpr) toTimeStamp(record Record) (*Value, error) { + value, err := callForString(f.args[0], record) + if err != nil { + return nil, err + } + + t, err := time.Parse(time.RFC3339, value.StringValue()) + if err != nil { + err := fmt.Errorf("%v: value '%v': %v", f, value, err) + return nil, errValueParseFailure(err) + } + + return NewTime(t), nil +} + +func (f *funcExpr) utcNow(record Record) (*Value, error) { + return NewTime(time.Now().UTC()), nil +} + +// Call - evaluates this function for given arg values and returns result as Value. +func (f *funcExpr) Eval(record Record) (*Value, error) { + switch f.name { + case Avg, Sum: + return f.sum(record) + case Count: + return f.count(record) + case Max: + return f.max(record) + case Min: + return f.min(record) + case Coalesce: + return f.coalesce(record) + case NullIf: + return f.nullIf(record) + case ToTimestamp: + return f.toTimeStamp(record) + case UTCNow: + return f.utcNow(record) + case Substring: + return f.substring(record) + case CharLength, CharacterLength: + return f.charLength(record) + case Trim: + return f.trim(record) + case Lower: + return f.lower(record) + case Upper: + return f.upper(record) + } + + panic(fmt.Sprintf("unsupported aggregate function %v", f.name)) +} + +// AggregateValue - returns aggregated value. +func (f *funcExpr) AggregateValue() (*Value, error) { + switch f.name { + case Avg: + return NewFloat(f.sumValue / float64(f.countValue)), nil + case Count: + return NewInt(f.countValue), nil + case Max: + return NewFloat(f.maxValue), nil + case Min: + return NewFloat(f.minValue), nil + case Sum: + return NewFloat(f.sumValue), nil + } + + err := fmt.Errorf("%v is not aggreate function", f) + return nil, errExternalEvalException(err) +} + +// Type - returns Function or aggregateFunction type. +func (f *funcExpr) Type() Type { + switch f.name { + case Avg, Count, Max, Min, Sum: + return aggregateFunction + } + + return function +} + +// ReturnType - returns respective primitive type depending on SQL function. +func (f *funcExpr) ReturnType() Type { + switch f.name { + case Avg, Max, Min, Sum: + return Float + case Count: + return Int + case CharLength, CharacterLength, Trim, Lower, Upper, Substring: + return String + case ToTimestamp, UTCNow: + return Timestamp + case Coalesce, NullIf: + return column + } + + return function +} + +// newFuncExpr - creates new SQL function. +func newFuncExpr(funcName FuncName, funcs ...Expr) (*funcExpr, error) { + switch funcName { + case Avg, Max, Min, Sum: + if len(funcs) != 1 { + err := fmt.Errorf("%v(): exactly one argument expected; got %v", funcName, len(funcs)) + return nil, errParseNonUnaryAgregateFunctionCall(err) + } + + if !funcs[0].ReturnType().isNumberKind() { + err := fmt.Errorf("%v(): argument %v evaluate to %v, not number", funcName, funcs[0], funcs[0].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + + case Count: + if len(funcs) != 1 { + err := fmt.Errorf("%v(): exactly one argument expected; got %v", funcName, len(funcs)) + return nil, errParseNonUnaryAgregateFunctionCall(err) + } + + switch funcs[0].ReturnType() { + case Null, Bool, Int, Float, String, Timestamp, column, record: + default: + err := fmt.Errorf("%v(): argument %v evaluate to %v is incompatible", funcName, funcs[0], funcs[0].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + + case CharLength, CharacterLength, Trim, Lower, Upper, ToTimestamp: + if len(funcs) != 1 { + err := fmt.Errorf("%v(): exactly one argument expected; got %v", funcName, len(funcs)) + return nil, errEvaluatorInvalidArguments(err) + } + + if !funcs[0].ReturnType().isStringKind() { + err := fmt.Errorf("%v(): argument %v evaluate to %v, not string", funcName, funcs[0], funcs[0].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + + case Coalesce: + if len(funcs) < 1 { + err := fmt.Errorf("%v(): one or more argument expected; got %v", funcName, len(funcs)) + return nil, errEvaluatorInvalidArguments(err) + } + + for i := range funcs { + if !funcs[i].ReturnType().isBaseKind() { + err := fmt.Errorf("%v(): argument-%v %v evaluate to %v is incompatible", funcName, i+1, funcs[i], funcs[i].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + + case NullIf: + if len(funcs) != 2 { + err := fmt.Errorf("%v(): exactly two arguments expected; got %v", funcName, len(funcs)) + return nil, errEvaluatorInvalidArguments(err) + } + + if !funcs[0].ReturnType().isBaseKind() { + err := fmt.Errorf("%v(): argument-1 %v evaluate to %v is incompatible", funcName, funcs[0], funcs[0].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + if !funcs[1].ReturnType().isBaseKind() { + err := fmt.Errorf("%v(): argument-2 %v evaluate to %v is incompatible", funcName, funcs[1], funcs[1].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + + case UTCNow: + if len(funcs) != 0 { + err := fmt.Errorf("%v(): no argument expected; got %v", funcName, len(funcs)) + return nil, errEvaluatorInvalidArguments(err) + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + + case Substring: + if len(funcs) < 2 || len(funcs) > 3 { + err := fmt.Errorf("%v(): exactly two or three arguments expected; got %v", funcName, len(funcs)) + return nil, errEvaluatorInvalidArguments(err) + } + + if !funcs[0].ReturnType().isStringKind() { + err := fmt.Errorf("%v(): argument-1 %v evaluate to %v, not string", funcName, funcs[0], funcs[0].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + if !funcs[1].ReturnType().isIntKind() { + err := fmt.Errorf("%v(): argument-2 %v evaluate to %v, not int", funcName, funcs[1], funcs[1].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + + if len(funcs) > 2 { + if !funcs[2].ReturnType().isIntKind() { + err := fmt.Errorf("%v(): argument-3 %v evaluate to %v, not int", funcName, funcs[2], funcs[2].ReturnType()) + return nil, errIncorrectSQLFunctionArgumentType(err) + } + } + + return &funcExpr{ + args: funcs, + name: funcName, + }, nil + } + + return nil, errUnsupportedFunction(fmt.Errorf("unknown function name %v", funcName)) +} diff --git a/pkg/s3select/sql/logicalexpr.go b/pkg/s3select/sql/logicalexpr.go new file mode 100644 index 000000000..d307200f7 --- /dev/null +++ b/pkg/s3select/sql/logicalexpr.go @@ -0,0 +1,336 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import "fmt" + +// andExpr - logical AND function. +type andExpr struct { + left Expr + right Expr + funcType Type +} + +// String - returns string representation of this function. +func (f *andExpr) String() string { + return fmt.Sprintf("(%v AND %v)", f.left, f.right) +} + +// Call - evaluates this function for given arg values and returns result as Value. +func (f *andExpr) Eval(record Record) (*Value, error) { + leftValue, err := f.left.Eval(record) + if err != nil { + return nil, err + } + + if f.funcType == aggregateFunction { + _, err = f.right.Eval(record) + return nil, err + } + + if leftValue.Type() != Bool { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to bool", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + + if !leftValue.BoolValue() { + return leftValue, nil + } + + rightValue, err := f.right.Eval(record) + if err != nil { + return nil, err + } + if rightValue.Type() != Bool { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to bool", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return rightValue, nil +} + +// AggregateValue - returns aggregated value. +func (f *andExpr) AggregateValue() (*Value, error) { + if f.funcType != aggregateFunction { + err := fmt.Errorf("%v is not aggreate expression", f) + return nil, errExternalEvalException(err) + } + + leftValue, err := f.left.AggregateValue() + if err != nil { + return nil, err + } + if leftValue.Type() != Bool { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to bool", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + + if !leftValue.BoolValue() { + return leftValue, nil + } + + rightValue, err := f.right.AggregateValue() + if err != nil { + return nil, err + } + if rightValue.Type() != Bool { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to bool", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return rightValue, nil +} + +// Type - returns logicalFunction or aggregateFunction type. +func (f *andExpr) Type() Type { + return f.funcType +} + +// ReturnType - returns Bool as return type. +func (f *andExpr) ReturnType() Type { + return Bool +} + +// newAndExpr - creates new AND logical function. +func newAndExpr(left, right Expr) (*andExpr, error) { + if !left.ReturnType().isBoolKind() { + err := fmt.Errorf("operator AND: left side expression %v evaluate to %v, not bool", left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + if !right.ReturnType().isBoolKind() { + err := fmt.Errorf("operator AND: right side expression %v evaluate to %v; not bool", right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + funcType := logicalFunction + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + if left.Type() == column { + err := fmt.Errorf("operator AND: left side expression %v return type %v is incompatible for aggregate evaluation", left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + + if right.Type() == column { + err := fmt.Errorf("operator AND: right side expression %v return type %v is incompatible for aggregate evaluation", right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &andExpr{ + left: left, + right: right, + funcType: funcType, + }, nil +} + +// orExpr - logical OR function. +type orExpr struct { + left Expr + right Expr + funcType Type +} + +// String - returns string representation of this function. +func (f *orExpr) String() string { + return fmt.Sprintf("(%v OR %v)", f.left, f.right) +} + +// Call - evaluates this function for given arg values and returns result as Value. +func (f *orExpr) Eval(record Record) (*Value, error) { + leftValue, err := f.left.Eval(record) + if err != nil { + return nil, err + } + + if f.funcType == aggregateFunction { + _, err = f.right.Eval(record) + return nil, err + } + + if leftValue.Type() != Bool { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to bool", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + + if leftValue.BoolValue() { + return leftValue, nil + } + + rightValue, err := f.right.Eval(record) + if err != nil { + return nil, err + } + if rightValue.Type() != Bool { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to bool", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return rightValue, nil +} + +// AggregateValue - returns aggregated value. +func (f *orExpr) AggregateValue() (*Value, error) { + if f.funcType != aggregateFunction { + err := fmt.Errorf("%v is not aggreate expression", f) + return nil, errExternalEvalException(err) + } + + leftValue, err := f.left.AggregateValue() + if err != nil { + return nil, err + } + if leftValue.Type() != Bool { + err := fmt.Errorf("%v: left side expression evaluated to %v; not to bool", f, leftValue.Type()) + return nil, errExternalEvalException(err) + } + + if leftValue.BoolValue() { + return leftValue, nil + } + + rightValue, err := f.right.AggregateValue() + if err != nil { + return nil, err + } + if rightValue.Type() != Bool { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to bool", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return rightValue, nil +} + +// Type - returns logicalFunction or aggregateFunction type. +func (f *orExpr) Type() Type { + return f.funcType +} + +// ReturnType - returns Bool as return type. +func (f *orExpr) ReturnType() Type { + return Bool +} + +// newOrExpr - creates new OR logical function. +func newOrExpr(left, right Expr) (*orExpr, error) { + if !left.ReturnType().isBoolKind() { + err := fmt.Errorf("operator OR: left side expression %v evaluate to %v, not bool", left, left.ReturnType()) + return nil, errInvalidDataType(err) + } + + if !right.ReturnType().isBoolKind() { + err := fmt.Errorf("operator OR: right side expression %v evaluate to %v; not bool", right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + funcType := logicalFunction + if left.Type() == aggregateFunction || right.Type() == aggregateFunction { + funcType = aggregateFunction + if left.Type() == column { + err := fmt.Errorf("operator OR: left side expression %v return type %v is incompatible for aggregate evaluation", left, left.Type()) + return nil, errUnsupportedSQLOperation(err) + } + + if right.Type() == column { + err := fmt.Errorf("operator OR: right side expression %v return type %v is incompatible for aggregate evaluation", right, right.Type()) + return nil, errUnsupportedSQLOperation(err) + } + } + + return &orExpr{ + left: left, + right: right, + funcType: funcType, + }, nil +} + +// notExpr - logical NOT function. +type notExpr struct { + right Expr + funcType Type +} + +// String - returns string representation of this function. +func (f *notExpr) String() string { + return fmt.Sprintf("(%v)", f.right) +} + +// Call - evaluates this function for given arg values and returns result as Value. +func (f *notExpr) Eval(record Record) (*Value, error) { + rightValue, err := f.right.Eval(record) + if err != nil { + return nil, err + } + + if f.funcType == aggregateFunction { + return nil, nil + } + + if rightValue.Type() != Bool { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to bool", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return NewBool(!rightValue.BoolValue()), nil +} + +// AggregateValue - returns aggregated value. +func (f *notExpr) AggregateValue() (*Value, error) { + if f.funcType != aggregateFunction { + err := fmt.Errorf("%v is not aggreate expression", f) + return nil, errExternalEvalException(err) + } + + rightValue, err := f.right.AggregateValue() + if err != nil { + return nil, err + } + if rightValue.Type() != Bool { + err := fmt.Errorf("%v: right side expression evaluated to %v; not to bool", f, rightValue.Type()) + return nil, errExternalEvalException(err) + } + + return NewBool(!rightValue.BoolValue()), nil +} + +// Type - returns logicalFunction or aggregateFunction type. +func (f *notExpr) Type() Type { + return f.funcType +} + +// ReturnType - returns Bool as return type. +func (f *notExpr) ReturnType() Type { + return Bool +} + +// newNotExpr - creates new NOT logical function. +func newNotExpr(right Expr) (*notExpr, error) { + if !right.ReturnType().isBoolKind() { + err := fmt.Errorf("operator NOT: right side expression %v evaluate to %v; not bool", right, right.ReturnType()) + return nil, errInvalidDataType(err) + } + + funcType := logicalFunction + if right.Type() == aggregateFunction { + funcType = aggregateFunction + } + + return ¬Expr{ + right: right, + funcType: funcType, + }, nil +} diff --git a/pkg/s3select/sql/record.go b/pkg/s3select/sql/record.go new file mode 100644 index 000000000..76e61235b --- /dev/null +++ b/pkg/s3select/sql/record.go @@ -0,0 +1,25 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +// Record - is a type containing columns and their values. +type Record interface { + Get(name string) (*Value, error) + Set(name string, value *Value) error + MarshalCSV(fieldDelimiter rune) ([]byte, error) + MarshalJSON() ([]byte, error) +} diff --git a/pkg/s3select/sql/sql.go b/pkg/s3select/sql/sql.go new file mode 100644 index 000000000..11b44b357 --- /dev/null +++ b/pkg/s3select/sql/sql.go @@ -0,0 +1,529 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import ( + "fmt" + "strings" + + "github.com/xwb1989/sqlparser" +) + +func getColumnName(colName *sqlparser.ColName) string { + columnName := colName.Qualifier.Name.String() + if qualifier := colName.Qualifier.Qualifier.String(); qualifier != "" { + columnName = qualifier + "." + columnName + } + + if columnName == "" { + columnName = colName.Name.String() + } else { + columnName = columnName + "." + colName.Name.String() + } + + return columnName +} + +func newLiteralExpr(parserExpr sqlparser.Expr, tableAlias string) (Expr, error) { + switch parserExpr.(type) { + case *sqlparser.NullVal: + return newValueExpr(NewNull()), nil + case sqlparser.BoolVal: + return newValueExpr(NewBool((bool(parserExpr.(sqlparser.BoolVal))))), nil + case *sqlparser.SQLVal: + sqlValue := parserExpr.(*sqlparser.SQLVal) + value, err := NewValue(sqlValue) + if err != nil { + return nil, err + } + return newValueExpr(value), nil + case *sqlparser.ColName: + columnName := getColumnName(parserExpr.(*sqlparser.ColName)) + if tableAlias != "" { + if !strings.HasPrefix(columnName, tableAlias+".") { + err := fmt.Errorf("column name %v does not start with table alias %v", columnName, tableAlias) + return nil, errInvalidKeyPath(err) + } + columnName = strings.TrimPrefix(columnName, tableAlias+".") + } + + return newColumnExpr(columnName), nil + case sqlparser.ValTuple: + var valueType Type + var values []*Value + for i, valExpr := range parserExpr.(sqlparser.ValTuple) { + sqlVal, ok := valExpr.(*sqlparser.SQLVal) + if !ok { + return nil, errParseInvalidTypeParam(fmt.Errorf("value %v in Tuple should be primitive value", i+1)) + } + + val, err := NewValue(sqlVal) + if err != nil { + return nil, err + } + + if i == 0 { + valueType = val.Type() + } else if valueType != val.Type() { + return nil, errParseInvalidTypeParam(fmt.Errorf("mixed value type is not allowed in Tuple")) + } + + values = append(values, val) + } + + return newValueExpr(NewArray(values)), nil + } + + return nil, nil +} + +func isExprToComparisonExpr(parserExpr *sqlparser.IsExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + leftExpr, err := newExpr(parserExpr.Expr, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newComparisonExpr(ComparisonOperator(parserExpr.Operator), leftExpr) + if err != nil { + return nil, err + } + + if !leftExpr.Type().isBase() { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func rangeCondToComparisonFunc(parserExpr *sqlparser.RangeCond, tableAlias string, isSelectExpr bool) (Expr, error) { + leftExpr, err := newExpr(parserExpr.Left, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + fromExpr, err := newExpr(parserExpr.From, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + toExpr, err := newExpr(parserExpr.To, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newComparisonExpr(ComparisonOperator(parserExpr.Operator), leftExpr, fromExpr, toExpr) + if err != nil { + return nil, err + } + + if !leftExpr.Type().isBase() || !fromExpr.Type().isBase() || !toExpr.Type().isBase() { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func toComparisonExpr(parserExpr *sqlparser.ComparisonExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + leftExpr, err := newExpr(parserExpr.Left, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + rightExpr, err := newExpr(parserExpr.Right, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newComparisonExpr(ComparisonOperator(parserExpr.Operator), leftExpr, rightExpr) + if err != nil { + return nil, err + } + + if !leftExpr.Type().isBase() || !rightExpr.Type().isBase() { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func toArithExpr(parserExpr *sqlparser.BinaryExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + leftExpr, err := newExpr(parserExpr.Left, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + rightExpr, err := newExpr(parserExpr.Right, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newArithExpr(ArithOperator(parserExpr.Operator), leftExpr, rightExpr) + if err != nil { + return nil, err + } + + if !leftExpr.Type().isBase() || !rightExpr.Type().isBase() { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func toFuncExpr(parserExpr *sqlparser.FuncExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + funcName := strings.ToUpper(parserExpr.Name.String()) + if !isSelectExpr && isAggregateFuncName(funcName) { + return nil, errUnsupportedSQLOperation(fmt.Errorf("%v() must be used in select expression", funcName)) + } + funcs, aggregatedExprFound, err := newSelectExprs(parserExpr.Exprs, tableAlias) + if err != nil { + return nil, err + } + + if aggregatedExprFound { + return nil, errIncorrectSQLFunctionArgumentType(fmt.Errorf("%v(): aggregated expression must not be used as argument", funcName)) + } + + return newFuncExpr(FuncName(funcName), funcs...) +} + +func toAndExpr(parserExpr *sqlparser.AndExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + leftExpr, err := newExpr(parserExpr.Left, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + rightExpr, err := newExpr(parserExpr.Right, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newAndExpr(leftExpr, rightExpr) + if err != nil { + return nil, err + } + + if leftExpr.Type() != Bool || rightExpr.Type() != Bool { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func toOrExpr(parserExpr *sqlparser.OrExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + leftExpr, err := newExpr(parserExpr.Left, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + rightExpr, err := newExpr(parserExpr.Right, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newOrExpr(leftExpr, rightExpr) + if err != nil { + return nil, err + } + + if leftExpr.Type() != Bool || rightExpr.Type() != Bool { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func toNotExpr(parserExpr *sqlparser.NotExpr, tableAlias string, isSelectExpr bool) (Expr, error) { + rightExpr, err := newExpr(parserExpr.Expr, tableAlias, isSelectExpr) + if err != nil { + return nil, err + } + + f, err := newNotExpr(rightExpr) + if err != nil { + return nil, err + } + + if rightExpr.Type() != Bool { + return f, nil + } + + value, err := f.Eval(nil) + if err != nil { + return nil, err + } + + return newValueExpr(value), nil +} + +func newExpr(parserExpr sqlparser.Expr, tableAlias string, isSelectExpr bool) (Expr, error) { + f, err := newLiteralExpr(parserExpr, tableAlias) + if err != nil { + return nil, err + } + + if f != nil { + return f, nil + } + + switch parserExpr.(type) { + case *sqlparser.ParenExpr: + return newExpr(parserExpr.(*sqlparser.ParenExpr).Expr, tableAlias, isSelectExpr) + case *sqlparser.IsExpr: + return isExprToComparisonExpr(parserExpr.(*sqlparser.IsExpr), tableAlias, isSelectExpr) + case *sqlparser.RangeCond: + return rangeCondToComparisonFunc(parserExpr.(*sqlparser.RangeCond), tableAlias, isSelectExpr) + case *sqlparser.ComparisonExpr: + return toComparisonExpr(parserExpr.(*sqlparser.ComparisonExpr), tableAlias, isSelectExpr) + case *sqlparser.BinaryExpr: + return toArithExpr(parserExpr.(*sqlparser.BinaryExpr), tableAlias, isSelectExpr) + case *sqlparser.FuncExpr: + return toFuncExpr(parserExpr.(*sqlparser.FuncExpr), tableAlias, isSelectExpr) + case *sqlparser.AndExpr: + return toAndExpr(parserExpr.(*sqlparser.AndExpr), tableAlias, isSelectExpr) + case *sqlparser.OrExpr: + return toOrExpr(parserExpr.(*sqlparser.OrExpr), tableAlias, isSelectExpr) + case *sqlparser.NotExpr: + return toNotExpr(parserExpr.(*sqlparser.NotExpr), tableAlias, isSelectExpr) + } + + return nil, errParseUnsupportedSyntax(fmt.Errorf("unknown expression type %T; %v", parserExpr, parserExpr)) +} + +func newSelectExprs(parserSelectExprs []sqlparser.SelectExpr, tableAlias string) ([]Expr, bool, error) { + var funcs []Expr + starExprFound := false + aggregatedExprFound := false + + for _, selectExpr := range parserSelectExprs { + switch selectExpr.(type) { + case *sqlparser.AliasedExpr: + if starExprFound { + return nil, false, errParseAsteriskIsNotAloneInSelectList(nil) + } + + aliasedExpr := selectExpr.(*sqlparser.AliasedExpr) + f, err := newExpr(aliasedExpr.Expr, tableAlias, true) + if err != nil { + return nil, false, err + } + + if f.Type() == aggregateFunction { + if !aggregatedExprFound { + aggregatedExprFound = true + if len(funcs) > 0 { + return nil, false, errParseUnsupportedSyntax(fmt.Errorf("expression must not mixed with aggregated expression")) + } + } + } else if aggregatedExprFound { + return nil, false, errParseUnsupportedSyntax(fmt.Errorf("expression must not mixed with aggregated expression")) + } + + alias := aliasedExpr.As.String() + if alias != "" { + f = newAliasExpr(alias, f) + } + + funcs = append(funcs, f) + case *sqlparser.StarExpr: + if starExprFound { + err := fmt.Errorf("only single star expression allowed") + return nil, false, errParseInvalidContextForWildcardInSelectList(err) + } + starExprFound = true + funcs = append(funcs, newStarExpr()) + default: + return nil, false, errParseUnsupportedSyntax(fmt.Errorf("unknown select expression %v", selectExpr)) + } + } + + return funcs, aggregatedExprFound, nil +} + +// Select - SQL Select statement. +type Select struct { + tableName string + tableAlias string + selectExprs []Expr + aggregatedExprFound bool + whereExpr Expr +} + +// TableAlias - returns table alias name. +func (statement *Select) TableAlias() string { + return statement.tableAlias +} + +// IsSelectAll - returns whether '*' is used in select expression or not. +func (statement *Select) IsSelectAll() bool { + if len(statement.selectExprs) == 1 { + _, ok := statement.selectExprs[0].(*starExpr) + return ok + } + + return false +} + +// IsAggregated - returns whether aggregated functions are used in select expression or not. +func (statement *Select) IsAggregated() bool { + return statement.aggregatedExprFound +} + +// AggregateResult - returns aggregate result as record. +func (statement *Select) AggregateResult(output Record) error { + if !statement.aggregatedExprFound { + return nil + } + + for i, expr := range statement.selectExprs { + value, err := expr.AggregateValue() + if err != nil { + return err + } + if value == nil { + return errInternalError(fmt.Errorf("%v returns for AggregateValue()", expr)) + } + + name := fmt.Sprintf("_%v", i+1) + if _, ok := expr.(*aliasExpr); ok { + name = expr.(*aliasExpr).alias + } + + if err = output.Set(name, value); err != nil { + return errInternalError(fmt.Errorf("error occurred to store value %v for %v; %v", value, name, err)) + } + } + + return nil +} + +// Eval - evaluates this Select expressions for given record. +func (statement *Select) Eval(input, output Record) (Record, error) { + if statement.whereExpr != nil { + value, err := statement.whereExpr.Eval(input) + if err != nil { + return nil, err + } + + if value == nil || value.valueType != Bool { + err = fmt.Errorf("WHERE expression %v returns invalid bool value %v", statement.whereExpr, value) + return nil, errInternalError(err) + } + + if !value.BoolValue() { + return nil, nil + } + } + + // Call selectExprs + for i, expr := range statement.selectExprs { + value, err := expr.Eval(input) + if err != nil { + return nil, err + } + + if statement.aggregatedExprFound { + continue + } + + name := fmt.Sprintf("_%v", i+1) + switch expr.(type) { + case *starExpr: + return value.recordValue(), nil + case *aliasExpr: + name = expr.(*aliasExpr).alias + case *columnExpr: + name = expr.(*columnExpr).name + } + + if err = output.Set(name, value); err != nil { + return nil, errInternalError(fmt.Errorf("error occurred to store value %v for %v; %v", value, name, err)) + } + } + + return output, nil +} + +// NewSelect - creates new Select by parsing sql. +func NewSelect(sql string) (*Select, error) { + stmt, err := sqlparser.Parse(sql) + if err != nil { + return nil, errUnsupportedSQLStructure(err) + } + + selectStmt, ok := stmt.(*sqlparser.Select) + if !ok { + return nil, errParseUnsupportedSelect(fmt.Errorf("unsupported SQL statement %v", sql)) + } + + var tableName, tableAlias string + for _, fromExpr := range selectStmt.From { + tableExpr := fromExpr.(*sqlparser.AliasedTableExpr) + tableName = tableExpr.Expr.(sqlparser.TableName).Name.String() + tableAlias = tableExpr.As.String() + } + + selectExprs, aggregatedExprFound, err := newSelectExprs(selectStmt.SelectExprs, tableAlias) + if err != nil { + return nil, err + } + + var whereExpr Expr + if selectStmt.Where != nil { + whereExpr, err = newExpr(selectStmt.Where.Expr, tableAlias, false) + if err != nil { + return nil, err + } + } + + return &Select{ + tableName: tableName, + tableAlias: tableAlias, + selectExprs: selectExprs, + aggregatedExprFound: aggregatedExprFound, + whereExpr: whereExpr, + }, nil +} diff --git a/pkg/s3select/sql/type.go b/pkg/s3select/sql/type.go new file mode 100644 index 000000000..cb591ab85 --- /dev/null +++ b/pkg/s3select/sql/type.go @@ -0,0 +1,118 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +// Type - value type. +type Type string + +const ( + // Null - represents NULL value type. + Null Type = "null" + + // Bool - represents boolean value type. + Bool Type = "bool" + + // Int - represents integer value type. + Int Type = "int" + + // Float - represents floating point value type. + Float Type = "float" + + // String - represents string value type. + String Type = "string" + + // Timestamp - represents time value type. + Timestamp Type = "timestamp" + + // Array - represents array of values where each value type is one of above. + Array Type = "array" + + column Type = "column" + record Type = "record" + function Type = "function" + aggregateFunction Type = "aggregatefunction" + arithmeticFunction Type = "arithmeticfunction" + comparisonFunction Type = "comparisonfunction" + logicalFunction Type = "logicalfunction" + + // Integer Type = "integer" // Same as Int + // Decimal Type = "decimal" // Same as Float + // Numeric Type = "numeric" // Same as Float +) + +func (t Type) isBase() bool { + switch t { + case Null, Bool, Int, Float, String, Timestamp: + return true + } + + return false +} + +func (t Type) isBaseKind() bool { + switch t { + case Null, Bool, Int, Float, String, Timestamp, column: + return true + } + + return false +} + +func (t Type) isNumber() bool { + switch t { + case Int, Float: + return true + } + + return false +} + +func (t Type) isNumberKind() bool { + switch t { + case Int, Float, column: + return true + } + + return false +} + +func (t Type) isIntKind() bool { + switch t { + case Int, column: + return true + } + + return false +} + +func (t Type) isBoolKind() bool { + switch t { + case Bool, column: + return true + } + + return false +} + +func (t Type) isStringKind() bool { + switch t { + case String, column: + return true + } + + return false +} diff --git a/pkg/s3select/sql/value.go b/pkg/s3select/sql/value.go new file mode 100644 index 000000000..365178792 --- /dev/null +++ b/pkg/s3select/sql/value.go @@ -0,0 +1,223 @@ +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sql + +import ( + "encoding/json" + "fmt" + "strconv" + "strings" + "time" + + "github.com/xwb1989/sqlparser" +) + +// Value - represents any primitive value of bool, int, float, string and time. +type Value struct { + value interface{} + valueType Type +} + +// String - represents value as string. +func (value *Value) String() string { + if value.value == nil { + if value.valueType == Null { + return "NULL" + } + + return "" + } + + switch value.valueType { + case String: + return fmt.Sprintf("'%v'", value.value) + case Array: + var valueStrings []string + for _, v := range value.value.([]*Value) { + valueStrings = append(valueStrings, fmt.Sprintf("%v", v)) + } + return fmt.Sprintf("(%v)", strings.Join(valueStrings, ",")) + } + + return fmt.Sprintf("%v", value.value) +} + +// CSVString - encodes to CSV string. +func (value *Value) CSVString() string { + return fmt.Sprintf("%v", value.value) +} + +// MarshalJSON - encodes to JSON data. +func (value *Value) MarshalJSON() ([]byte, error) { + return json.Marshal(value.value) +} + +// BoolValue - returns underlying bool value. It panics if value is not Bool type. +func (value *Value) BoolValue() bool { + if value.valueType == Bool { + return value.value.(bool) + } + + panic(fmt.Sprintf("requested bool value but found %T type", value.value)) +} + +// IntValue - returns underlying int value. It panics if value is not Int type. +func (value *Value) IntValue() int64 { + if value.valueType == Int { + return value.value.(int64) + } + + panic(fmt.Sprintf("requested int value but found %T type", value.value)) +} + +// FloatValue - returns underlying int/float value as float64. It panics if value is not Int/Float type. +func (value *Value) FloatValue() float64 { + switch value.valueType { + case Int: + return float64(value.value.(int64)) + case Float: + return value.value.(float64) + } + + panic(fmt.Sprintf("requested float value but found %T type", value.value)) +} + +// StringValue - returns underlying string value. It panics if value is not String type. +func (value *Value) StringValue() string { + if value.valueType == String { + return value.value.(string) + } + + panic(fmt.Sprintf("requested string value but found %T type", value.value)) +} + +// TimeValue - returns underlying time value. It panics if value is not Timestamp type. +func (value *Value) TimeValue() time.Time { + if value.valueType == Timestamp { + return value.value.(time.Time) + } + + panic(fmt.Sprintf("requested time value but found %T type", value.value)) +} + +// ArrayValue - returns underlying value array. It panics if value is not Array type. +func (value *Value) ArrayValue() []*Value { + if value.valueType == Array { + return value.value.([]*Value) + } + + panic(fmt.Sprintf("requested array value but found %T type", value.value)) +} + +func (value *Value) recordValue() Record { + if value.valueType == record { + return value.value.(Record) + } + + panic(fmt.Sprintf("requested record value but found %T type", value.value)) +} + +// Type - returns value type. +func (value *Value) Type() Type { + return value.valueType +} + +// Value - returns underneath value interface. +func (value *Value) Value() interface{} { + return value.value +} + +// NewNull - creates new null value. +func NewNull() *Value { + return &Value{nil, Null} +} + +// NewBool - creates new Bool value of b. +func NewBool(b bool) *Value { + return &Value{b, Bool} +} + +// NewInt - creates new Int value of i. +func NewInt(i int64) *Value { + return &Value{i, Int} +} + +// NewFloat - creates new Float value of f. +func NewFloat(f float64) *Value { + return &Value{f, Float} +} + +// NewString - creates new Sring value of s. +func NewString(s string) *Value { + return &Value{s, String} +} + +// NewTime - creates new Time value of t. +func NewTime(t time.Time) *Value { + return &Value{t, Timestamp} +} + +// NewArray - creates new Array value of values. +func NewArray(values []*Value) *Value { + return &Value{values, Array} +} + +func newRecordValue(r Record) *Value { + return &Value{r, record} +} + +// NewValue - creates new Value from SQLVal v. +func NewValue(v *sqlparser.SQLVal) (*Value, error) { + switch v.Type { + case sqlparser.StrVal: + return NewString(string(v.Val)), nil + case sqlparser.IntVal: + i64, err := strconv.ParseInt(string(v.Val), 10, 64) + if err != nil { + return nil, err + } + return NewInt(i64), nil + case sqlparser.FloatVal: + f64, err := strconv.ParseFloat(string(v.Val), 64) + if err != nil { + return nil, err + } + return NewFloat(f64), nil + case sqlparser.HexNum: // represented as 0xDD + i64, err := strconv.ParseInt(string(v.Val), 16, 64) + if err != nil { + return nil, err + } + return NewInt(i64), nil + case sqlparser.HexVal: // represented as X'0DD' + i64, err := strconv.ParseInt(string(v.Val), 16, 64) + if err != nil { + return nil, err + } + return NewInt(i64), nil + case sqlparser.BitVal: // represented as B'00' + i64, err := strconv.ParseInt(string(v.Val), 2, 64) + if err != nil { + return nil, err + } + return NewInt(i64), nil + case sqlparser.ValArg: + // FIXME: the format is unknown and not sure how to handle it. + } + + return nil, fmt.Errorf("unknown SQL value %v; %v ", v, v.Type) +} diff --git a/pkg/s3select/testdata.parquet b/pkg/s3select/testdata.parquet new file mode 100644 index 000000000..0128ad1aa Binary files /dev/null and b/pkg/s3select/testdata.parquet differ diff --git a/pkg/s3select/unused-errors.go b/pkg/s3select/unused-errors.go new file mode 100644 index 000000000..9f6f0efa3 --- /dev/null +++ b/pkg/s3select/unused-errors.go @@ -0,0 +1,642 @@ +// +build ignore + +/* + * Minio Cloud Storage, (C) 2019 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package s3select + +/////////////////////////////////////////////////////////////////////// +// +// Validation errors. +// +/////////////////////////////////////////////////////////////////////// +func errExpressionTooLong(err error) *s3Error { + return &s3Error{ + code: "ExpressionTooLong", + message: "The SQL expression is too long: The maximum byte-length for the SQL expression is 256 KB.", + statusCode: 400, + cause: err, + } +} + +func errColumnTooLong(err error) *s3Error { + return &s3Error{ + code: "ColumnTooLong", + message: "The length of a column in the result is greater than maxCharsPerColumn of 1 MB.", + statusCode: 400, + cause: err, + } +} + +func errOverMaxColumn(err error) *s3Error { + return &s3Error{ + code: "OverMaxColumn", + message: "The number of columns in the result is greater than the maximum allowable number of columns.", + statusCode: 400, + cause: err, + } +} + +func errOverMaxRecordSize(err error) *s3Error { + return &s3Error{ + code: "OverMaxRecordSize", + message: "The length of a record in the input or result is greater than maxCharsPerRecord of 1 MB.", + statusCode: 400, + cause: err, + } +} + +func errInvalidColumnIndex(err error) *s3Error { + return &s3Error{ + code: "InvalidColumnIndex", + message: "Column index in the SQL expression is invalid.", + statusCode: 400, + cause: err, + } +} + +func errInvalidTextEncoding(err error) *s3Error { + return &s3Error{ + code: "InvalidTextEncoding", + message: "Invalid encoding type. Only UTF-8 encoding is supported.", + statusCode: 400, + cause: err, + } +} + +func errInvalidTableAlias(err error) *s3Error { + return &s3Error{ + code: "InvalidTableAlias", + message: "The SQL expression contains an invalid table alias.", + statusCode: 400, + cause: err, + } +} + +func errUnsupportedSyntax(err error) *s3Error { + return &s3Error{ + code: "UnsupportedSyntax", + message: "Encountered invalid syntax.", + statusCode: 400, + cause: err, + } +} + +func errAmbiguousFieldName(err error) *s3Error { + return &s3Error{ + code: "AmbiguousFieldName", + message: "Field name matches to multiple fields in the file. Check the SQL expression and the file, and try again.", + statusCode: 400, + cause: err, + } +} + +func errIntegerOverflow(err error) *s3Error { + return &s3Error{ + code: "IntegerOverflow", + message: "Integer overflow or underflow in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errIllegalSQLFunctionArgument(err error) *s3Error { + return &s3Error{ + code: "IllegalSqlFunctionArgument", + message: "Illegal argument was used in the SQL function.", + statusCode: 400, + cause: err, + } +} + +func errMultipleDataSourcesUnsupported(err error) *s3Error { + return &s3Error{ + code: "MultipleDataSourcesUnsupported", + message: "Multiple data sources are not supported.", + statusCode: 400, + cause: err, + } +} + +func errMissingHeaders(err error) *s3Error { + return &s3Error{ + code: "MissingHeaders", + message: "Some headers in the query are missing from the file. Check the file and try again.", + statusCode: 400, + cause: err, + } +} + +func errUnrecognizedFormatException(err error) *s3Error { + return &s3Error{ + code: "UnrecognizedFormatException", + message: "Encountered an invalid record type.", + statusCode: 400, + cause: err, + } +} + +////////////////////////////////////////////////////////////////////////////////////// +// +// SQL parsing errors. +// +////////////////////////////////////////////////////////////////////////////////////// +func errLexerInvalidChar(err error) *s3Error { + return &s3Error{ + code: "LexerInvalidChar", + message: "The SQL expression contains an invalid character.", + statusCode: 400, + cause: err, + } +} + +func errLexerInvalidOperator(err error) *s3Error { + return &s3Error{ + code: "LexerInvalidOperator", + message: "The SQL expression contains an invalid literal.", + statusCode: 400, + cause: err, + } +} + +func errLexerInvalidLiteral(err error) *s3Error { + return &s3Error{ + code: "LexerInvalidLiteral", + message: "The SQL expression contains an invalid operator.", + statusCode: 400, + cause: err, + } +} + +func errLexerInvalidIONLiteral(err error) *s3Error { + return &s3Error{ + code: "LexerInvalidIONLiteral", + message: "The SQL expression contains an invalid operator.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedDatePart(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedDatePart", + message: "Did not find the expected date part in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedKeyword(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedKeyword", + message: "Did not find the expected keyword in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedTokenType(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedTokenType", + message: "Did not find the expected token in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpected2TokenTypes(err error) *s3Error { + return &s3Error{ + code: "ParseExpected2TokenTypes", + message: "Did not find the expected token in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedNumber(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedNumber", + message: "Did not find the expected number in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedRightParenBuiltinFunctionCall(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedRightParenBuiltinFunctionCall", + message: "Did not find the expected right parenthesis character in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedTypeName(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedTypeName", + message: "Did not find the expected type name in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedWhenClause(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedWhenClause", + message: "Did not find the expected WHEN clause in the SQL expression. CASE is not supported.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedToken(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedToken", + message: "The SQL expression contains an unsupported token.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedLiteralsGroupBy(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedLiteralsGroupBy", + message: "The SQL expression contains an unsupported use of GROUP BY.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedMember(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedMember", + message: "The SQL expression contains an unsupported use of MEMBER.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedCase(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedCase", + message: "The SQL expression contains an unsupported use of CASE.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedCaseClause(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedCaseClause", + message: "The SQL expression contains an unsupported use of CASE.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedAlias(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedAlias", + message: "The SQL expression contains an unsupported use of ALIAS.", + statusCode: 400, + cause: err, + } +} + +func errParseInvalidPathComponent(err error) *s3Error { + return &s3Error{ + code: "ParseInvalidPathComponent", + message: "The SQL expression contains an invalid path component.", + statusCode: 400, + cause: err, + } +} + +func errParseMissingIdentAfterAt(err error) *s3Error { + return &s3Error{ + code: "ParseMissingIdentAfterAt", + message: "Did not find the expected identifier after the @ symbol in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseUnexpectedOperator(err error) *s3Error { + return &s3Error{ + code: "ParseUnexpectedOperator", + message: "The SQL expression contains an unexpected operator.", + statusCode: 400, + cause: err, + } +} + +func errParseUnexpectedTerm(err error) *s3Error { + return &s3Error{ + code: "ParseUnexpectedTerm", + message: "The SQL expression contains an unexpected term.", + statusCode: 400, + cause: err, + } +} + +func errParseUnexpectedToken(err error) *s3Error { + return &s3Error{ + code: "ParseUnexpectedToken", + message: "The SQL expression contains an unexpected token.", + statusCode: 400, + cause: err, + } +} + +func errParseUnExpectedKeyword(err error) *s3Error { + return &s3Error{ + code: "ParseUnExpectedKeyword", + message: "The SQL expression contains an unexpected keyword.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedExpression(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedExpression", + message: "Did not find the expected SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedLeftParenAfterCast(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedLeftParenAfterCast", + message: "Did not find the expected left parenthesis after CAST in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedLeftParenValueConstructor(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedLeftParenValueConstructor", + message: "Did not find expected the left parenthesis in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedLeftParenBuiltinFunctionCall(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedLeftParenBuiltinFunctionCall", + message: "Did not find the expected left parenthesis in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedArgumentDelimiter(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedArgumentDelimiter", + message: "Did not find the expected argument delimiter in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseCastArity(err error) *s3Error { + return &s3Error{ + code: "ParseCastArity", + message: "The SQL expression CAST has incorrect arity.", + statusCode: 400, + cause: err, + } +} + +func errParseEmptySelect(err error) *s3Error { + return &s3Error{ + code: "ParseEmptySelect", + message: "The SQL expression contains an empty SELECT.", + statusCode: 400, + cause: err, + } +} + +func errParseSelectMissingFrom(err error) *s3Error { + return &s3Error{ + code: "ParseSelectMissingFrom", + message: "The SQL expression contains a missing FROM after SELECT list.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedIdentForGroupName(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedIdentForGroupName", + message: "GROUP is not supported in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedIdentForAlias(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedIdentForAlias", + message: "Did not find the expected identifier for the alias in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseUnsupportedCallWithStar(err error) *s3Error { + return &s3Error{ + code: "ParseUnsupportedCallWithStar", + message: "Only COUNT with (*) as a parameter is supported in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseMalformedJoin(err error) *s3Error { + return &s3Error{ + code: "ParseMalformedJoin", + message: "JOIN is not supported in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseExpectedIdentForAt(err error) *s3Error { + return &s3Error{ + code: "ParseExpectedIdentForAt", + message: "Did not find the expected identifier for AT name in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errParseCannotMixSqbAndWildcardInSelectList(err error) *s3Error { + return &s3Error{ + code: "ParseCannotMixSqbAndWildcardInSelectList", + message: "Cannot mix [] and * in the same expression in a SELECT list in SQL expression.", + statusCode: 400, + cause: err, + } +} + +////////////////////////////////////////////////////////////////////////////////////// +// +// CAST() related errors. +// +////////////////////////////////////////////////////////////////////////////////////// +func errCastFailed(err error) *s3Error { + return &s3Error{ + code: "CastFailed", + message: "Attempt to convert from one data type to another using CAST failed in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errInvalidCast(err error) *s3Error { + return &s3Error{ + code: "InvalidCast", + message: "Attempt to convert from one data type to another using CAST failed in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorInvalidTimestampFormatPattern(err error) *s3Error { + return &s3Error{ + code: "EvaluatorInvalidTimestampFormatPattern", + message: "Invalid time stamp format string in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorInvalidTimestampFormatPatternAdditionalFieldsRequired(err error) *s3Error { + return &s3Error{ + code: "EvaluatorInvalidTimestampFormatPattern", + message: "Time stamp format pattern requires additional fields in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorInvalidTimestampFormatPatternSymbolForParsing(err error) *s3Error { + return &s3Error{ + code: "EvaluatorInvalidTimestampFormatPatternSymbolForParsing", + message: "Time stamp format pattern contains a valid format symbol that cannot be applied to time stamp parsing in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorTimestampFormatPatternDuplicateFields(err error) *s3Error { + return &s3Error{ + code: "EvaluatorTimestampFormatPatternDuplicateFields", + message: "Time stamp format pattern contains multiple format specifiers representing the time stamp field in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorTimestampFormatPatternHourClockAmPmMismatch(err error) *s3Error { + return &s3Error{ + code: "EvaluatorTimestampFormatPatternHourClockAmPmMismatch", + message: "Time stamp format pattern contains a 12-hour hour of day format symbol but doesn't also contain an AM/PM field, or it contains a 24-hour hour of day format specifier and contains an AM/PM field in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorUnterminatedTimestampFormatPatternToken(err error) *s3Error { + return &s3Error{ + code: "EvaluatorUnterminatedTimestampFormatPatternToken", + message: "Time stamp format pattern contains unterminated token in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorInvalidTimestampFormatPatternToken(err error) *s3Error { + return &s3Error{ + code: "EvaluatorInvalidTimestampFormatPatternToken", + message: "Time stamp format pattern contains an invalid token in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +func errEvaluatorInvalidTimestampFormatPatternSymbol(err error) *s3Error { + return &s3Error{ + code: "EvaluatorInvalidTimestampFormatPatternSymbol", + message: "Time stamp format pattern contains an invalid symbol in the SQL expression.", + statusCode: 400, + cause: err, + } +} + +//////////////////////////////////////////////////////////////////////// +// +// Generic S3 HTTP handler errors. +// +//////////////////////////////////////////////////////////////////////// +func errBusy(err error) *s3Error { + return &s3Error{ + code: "Busy", + message: "The service is unavailable. Please retry.", + statusCode: 503, + cause: err, + } +} + +func errUnauthorizedAccess(err error) *s3Error { + return &s3Error{ + code: "UnauthorizedAccess", + message: "You are not authorized to perform this operation", + statusCode: 401, + cause: err, + } +} + +func errEmptyRequestBody(err error) *s3Error { + return &s3Error{ + code: "EmptyRequestBody", + message: "Request body cannot be empty.", + statusCode: 400, + cause: err, + } +} + +func errUnsupportedRangeHeader(err error) *s3Error { + return &s3Error{ + code: "UnsupportedRangeHeader", + message: "Range header is not supported for this operation.", + statusCode: 400, + cause: err, + } +} + +func errUnsupportedStorageClass(err error) *s3Error { + return &s3Error{ + code: "UnsupportedStorageClass", + message: "Encountered an invalid storage class. Only STANDARD, STANDARD_IA, and ONEZONE_IA storage classes are supported.", + statusCode: 400, + cause: err, + } +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/application_exception.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/application_exception.go new file mode 100644 index 000000000..b9d7eedcd --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/application_exception.go @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +const ( + UNKNOWN_APPLICATION_EXCEPTION = 0 + UNKNOWN_METHOD = 1 + INVALID_MESSAGE_TYPE_EXCEPTION = 2 + WRONG_METHOD_NAME = 3 + BAD_SEQUENCE_ID = 4 + MISSING_RESULT = 5 + INTERNAL_ERROR = 6 + PROTOCOL_ERROR = 7 +) + +var defaultApplicationExceptionMessage = map[int32]string{ + UNKNOWN_APPLICATION_EXCEPTION: "unknown application exception", + UNKNOWN_METHOD: "unknown method", + INVALID_MESSAGE_TYPE_EXCEPTION: "invalid message type", + WRONG_METHOD_NAME: "wrong method name", + BAD_SEQUENCE_ID: "bad sequence ID", + MISSING_RESULT: "missing result", + INTERNAL_ERROR: "unknown internal error", + PROTOCOL_ERROR: "unknown protocol error", +} + +// Application level Thrift exception +type TApplicationException interface { + TException + TypeId() int32 + Read(iprot TProtocol) error + Write(oprot TProtocol) error +} + +type tApplicationException struct { + message string + type_ int32 +} + +func (e tApplicationException) Error() string { + if e.message != "" { + return e.message + } + return defaultApplicationExceptionMessage[e.type_] +} + +func NewTApplicationException(type_ int32, message string) TApplicationException { + return &tApplicationException{message, type_} +} + +func (p *tApplicationException) TypeId() int32 { + return p.type_ +} + +func (p *tApplicationException) Read(iprot TProtocol) error { + // TODO: this should really be generated by the compiler + _, err := iprot.ReadStructBegin() + if err != nil { + return err + } + + message := "" + type_ := int32(UNKNOWN_APPLICATION_EXCEPTION) + + for { + _, ttype, id, err := iprot.ReadFieldBegin() + if err != nil { + return err + } + if ttype == STOP { + break + } + switch id { + case 1: + if ttype == STRING { + if message, err = iprot.ReadString(); err != nil { + return err + } + } else { + if err = SkipDefaultDepth(iprot, ttype); err != nil { + return err + } + } + case 2: + if ttype == I32 { + if type_, err = iprot.ReadI32(); err != nil { + return err + } + } else { + if err = SkipDefaultDepth(iprot, ttype); err != nil { + return err + } + } + default: + if err = SkipDefaultDepth(iprot, ttype); err != nil { + return err + } + } + if err = iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return err + } + + p.message = message + p.type_ = type_ + + return nil +} + +func (p *tApplicationException) Write(oprot TProtocol) (err error) { + err = oprot.WriteStructBegin("TApplicationException") + if len(p.Error()) > 0 { + err = oprot.WriteFieldBegin("message", STRING, 1) + if err != nil { + return + } + err = oprot.WriteString(p.Error()) + if err != nil { + return + } + err = oprot.WriteFieldEnd() + if err != nil { + return + } + } + err = oprot.WriteFieldBegin("type", I32, 2) + if err != nil { + return + } + err = oprot.WriteI32(p.type_) + if err != nil { + return + } + err = oprot.WriteFieldEnd() + if err != nil { + return + } + err = oprot.WriteFieldStop() + if err != nil { + return + } + err = oprot.WriteStructEnd() + return +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/binary_protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/binary_protocol.go new file mode 100644 index 000000000..1f90bf435 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/binary_protocol.go @@ -0,0 +1,509 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bytes" + "context" + "encoding/binary" + "errors" + "fmt" + "io" + "math" +) + +type TBinaryProtocol struct { + trans TRichTransport + origTransport TTransport + reader io.Reader + writer io.Writer + strictRead bool + strictWrite bool + buffer [64]byte +} + +type TBinaryProtocolFactory struct { + strictRead bool + strictWrite bool +} + +func NewTBinaryProtocolTransport(t TTransport) *TBinaryProtocol { + return NewTBinaryProtocol(t, false, true) +} + +func NewTBinaryProtocol(t TTransport, strictRead, strictWrite bool) *TBinaryProtocol { + p := &TBinaryProtocol{origTransport: t, strictRead: strictRead, strictWrite: strictWrite} + if et, ok := t.(TRichTransport); ok { + p.trans = et + } else { + p.trans = NewTRichTransport(t) + } + p.reader = p.trans + p.writer = p.trans + return p +} + +func NewTBinaryProtocolFactoryDefault() *TBinaryProtocolFactory { + return NewTBinaryProtocolFactory(false, true) +} + +func NewTBinaryProtocolFactory(strictRead, strictWrite bool) *TBinaryProtocolFactory { + return &TBinaryProtocolFactory{strictRead: strictRead, strictWrite: strictWrite} +} + +func (p *TBinaryProtocolFactory) GetProtocol(t TTransport) TProtocol { + return NewTBinaryProtocol(t, p.strictRead, p.strictWrite) +} + +/** + * Writing Methods + */ + +func (p *TBinaryProtocol) WriteMessageBegin(name string, typeId TMessageType, seqId int32) error { + if p.strictWrite { + version := uint32(VERSION_1) | uint32(typeId) + e := p.WriteI32(int32(version)) + if e != nil { + return e + } + e = p.WriteString(name) + if e != nil { + return e + } + e = p.WriteI32(seqId) + return e + } else { + e := p.WriteString(name) + if e != nil { + return e + } + e = p.WriteByte(int8(typeId)) + if e != nil { + return e + } + e = p.WriteI32(seqId) + return e + } + return nil +} + +func (p *TBinaryProtocol) WriteMessageEnd() error { + return nil +} + +func (p *TBinaryProtocol) WriteStructBegin(name string) error { + return nil +} + +func (p *TBinaryProtocol) WriteStructEnd() error { + return nil +} + +func (p *TBinaryProtocol) WriteFieldBegin(name string, typeId TType, id int16) error { + e := p.WriteByte(int8(typeId)) + if e != nil { + return e + } + e = p.WriteI16(id) + return e +} + +func (p *TBinaryProtocol) WriteFieldEnd() error { + return nil +} + +func (p *TBinaryProtocol) WriteFieldStop() error { + e := p.WriteByte(STOP) + return e +} + +func (p *TBinaryProtocol) WriteMapBegin(keyType TType, valueType TType, size int) error { + e := p.WriteByte(int8(keyType)) + if e != nil { + return e + } + e = p.WriteByte(int8(valueType)) + if e != nil { + return e + } + e = p.WriteI32(int32(size)) + return e +} + +func (p *TBinaryProtocol) WriteMapEnd() error { + return nil +} + +func (p *TBinaryProtocol) WriteListBegin(elemType TType, size int) error { + e := p.WriteByte(int8(elemType)) + if e != nil { + return e + } + e = p.WriteI32(int32(size)) + return e +} + +func (p *TBinaryProtocol) WriteListEnd() error { + return nil +} + +func (p *TBinaryProtocol) WriteSetBegin(elemType TType, size int) error { + e := p.WriteByte(int8(elemType)) + if e != nil { + return e + } + e = p.WriteI32(int32(size)) + return e +} + +func (p *TBinaryProtocol) WriteSetEnd() error { + return nil +} + +func (p *TBinaryProtocol) WriteBool(value bool) error { + if value { + return p.WriteByte(1) + } + return p.WriteByte(0) +} + +func (p *TBinaryProtocol) WriteByte(value int8) error { + e := p.trans.WriteByte(byte(value)) + return NewTProtocolException(e) +} + +func (p *TBinaryProtocol) WriteI16(value int16) error { + v := p.buffer[0:2] + binary.BigEndian.PutUint16(v, uint16(value)) + _, e := p.writer.Write(v) + return NewTProtocolException(e) +} + +func (p *TBinaryProtocol) WriteI32(value int32) error { + v := p.buffer[0:4] + binary.BigEndian.PutUint32(v, uint32(value)) + _, e := p.writer.Write(v) + return NewTProtocolException(e) +} + +func (p *TBinaryProtocol) WriteI64(value int64) error { + v := p.buffer[0:8] + binary.BigEndian.PutUint64(v, uint64(value)) + _, err := p.writer.Write(v) + return NewTProtocolException(err) +} + +func (p *TBinaryProtocol) WriteDouble(value float64) error { + return p.WriteI64(int64(math.Float64bits(value))) +} + +func (p *TBinaryProtocol) WriteString(value string) error { + e := p.WriteI32(int32(len(value))) + if e != nil { + return e + } + _, err := p.trans.WriteString(value) + return NewTProtocolException(err) +} + +func (p *TBinaryProtocol) WriteBinary(value []byte) error { + e := p.WriteI32(int32(len(value))) + if e != nil { + return e + } + _, err := p.writer.Write(value) + return NewTProtocolException(err) +} + +/** + * Reading methods + */ + +func (p *TBinaryProtocol) ReadMessageBegin() (name string, typeId TMessageType, seqId int32, err error) { + size, e := p.ReadI32() + if e != nil { + return "", typeId, 0, NewTProtocolException(e) + } + if size < 0 { + typeId = TMessageType(size & 0x0ff) + version := int64(int64(size) & VERSION_MASK) + if version != VERSION_1 { + return name, typeId, seqId, NewTProtocolExceptionWithType(BAD_VERSION, fmt.Errorf("Bad version in ReadMessageBegin")) + } + name, e = p.ReadString() + if e != nil { + return name, typeId, seqId, NewTProtocolException(e) + } + seqId, e = p.ReadI32() + if e != nil { + return name, typeId, seqId, NewTProtocolException(e) + } + return name, typeId, seqId, nil + } + if p.strictRead { + return name, typeId, seqId, NewTProtocolExceptionWithType(BAD_VERSION, fmt.Errorf("Missing version in ReadMessageBegin")) + } + name, e2 := p.readStringBody(size) + if e2 != nil { + return name, typeId, seqId, e2 + } + b, e3 := p.ReadByte() + if e3 != nil { + return name, typeId, seqId, e3 + } + typeId = TMessageType(b) + seqId, e4 := p.ReadI32() + if e4 != nil { + return name, typeId, seqId, e4 + } + return name, typeId, seqId, nil +} + +func (p *TBinaryProtocol) ReadMessageEnd() error { + return nil +} + +func (p *TBinaryProtocol) ReadStructBegin() (name string, err error) { + return +} + +func (p *TBinaryProtocol) ReadStructEnd() error { + return nil +} + +func (p *TBinaryProtocol) ReadFieldBegin() (name string, typeId TType, seqId int16, err error) { + t, err := p.ReadByte() + typeId = TType(t) + if err != nil { + return name, typeId, seqId, err + } + if t != STOP { + seqId, err = p.ReadI16() + } + return name, typeId, seqId, err +} + +func (p *TBinaryProtocol) ReadFieldEnd() error { + return nil +} + +var invalidDataLength = NewTProtocolExceptionWithType(INVALID_DATA, errors.New("Invalid data length")) + +func (p *TBinaryProtocol) ReadMapBegin() (kType, vType TType, size int, err error) { + k, e := p.ReadByte() + if e != nil { + err = NewTProtocolException(e) + return + } + kType = TType(k) + v, e := p.ReadByte() + if e != nil { + err = NewTProtocolException(e) + return + } + vType = TType(v) + size32, e := p.ReadI32() + if e != nil { + err = NewTProtocolException(e) + return + } + if size32 < 0 { + err = invalidDataLength + return + } + size = int(size32) + return kType, vType, size, nil +} + +func (p *TBinaryProtocol) ReadMapEnd() error { + return nil +} + +func (p *TBinaryProtocol) ReadListBegin() (elemType TType, size int, err error) { + b, e := p.ReadByte() + if e != nil { + err = NewTProtocolException(e) + return + } + elemType = TType(b) + size32, e := p.ReadI32() + if e != nil { + err = NewTProtocolException(e) + return + } + if size32 < 0 { + err = invalidDataLength + return + } + size = int(size32) + + return +} + +func (p *TBinaryProtocol) ReadListEnd() error { + return nil +} + +func (p *TBinaryProtocol) ReadSetBegin() (elemType TType, size int, err error) { + b, e := p.ReadByte() + if e != nil { + err = NewTProtocolException(e) + return + } + elemType = TType(b) + size32, e := p.ReadI32() + if e != nil { + err = NewTProtocolException(e) + return + } + if size32 < 0 { + err = invalidDataLength + return + } + size = int(size32) + return elemType, size, nil +} + +func (p *TBinaryProtocol) ReadSetEnd() error { + return nil +} + +func (p *TBinaryProtocol) ReadBool() (bool, error) { + b, e := p.ReadByte() + v := true + if b != 1 { + v = false + } + return v, e +} + +func (p *TBinaryProtocol) ReadByte() (int8, error) { + v, err := p.trans.ReadByte() + return int8(v), err +} + +func (p *TBinaryProtocol) ReadI16() (value int16, err error) { + buf := p.buffer[0:2] + err = p.readAll(buf) + value = int16(binary.BigEndian.Uint16(buf)) + return value, err +} + +func (p *TBinaryProtocol) ReadI32() (value int32, err error) { + buf := p.buffer[0:4] + err = p.readAll(buf) + value = int32(binary.BigEndian.Uint32(buf)) + return value, err +} + +func (p *TBinaryProtocol) ReadI64() (value int64, err error) { + buf := p.buffer[0:8] + err = p.readAll(buf) + value = int64(binary.BigEndian.Uint64(buf)) + return value, err +} + +func (p *TBinaryProtocol) ReadDouble() (value float64, err error) { + buf := p.buffer[0:8] + err = p.readAll(buf) + value = math.Float64frombits(binary.BigEndian.Uint64(buf)) + return value, err +} + +func (p *TBinaryProtocol) ReadString() (value string, err error) { + size, e := p.ReadI32() + if e != nil { + return "", e + } + if size < 0 { + err = invalidDataLength + return + } + + return p.readStringBody(size) +} + +func (p *TBinaryProtocol) ReadBinary() ([]byte, error) { + size, e := p.ReadI32() + if e != nil { + return nil, e + } + if size < 0 { + return nil, invalidDataLength + } + + isize := int(size) + buf := make([]byte, isize) + _, err := io.ReadFull(p.trans, buf) + return buf, NewTProtocolException(err) +} + +func (p *TBinaryProtocol) Flush(ctx context.Context) (err error) { + return NewTProtocolException(p.trans.Flush(ctx)) +} + +func (p *TBinaryProtocol) Skip(fieldType TType) (err error) { + return SkipDefaultDepth(p, fieldType) +} + +func (p *TBinaryProtocol) Transport() TTransport { + return p.origTransport +} + +func (p *TBinaryProtocol) readAll(buf []byte) error { + _, err := io.ReadFull(p.reader, buf) + return NewTProtocolException(err) +} + +const readLimit = 32768 + +func (p *TBinaryProtocol) readStringBody(size int32) (value string, err error) { + if size < 0 { + return "", nil + } + + var ( + buf bytes.Buffer + e error + b []byte + ) + + switch { + case int(size) <= len(p.buffer): + b = p.buffer[:size] // avoids allocation for small reads + case int(size) < readLimit: + b = make([]byte, size) + default: + b = make([]byte, readLimit) + } + + for size > 0 { + _, e = io.ReadFull(p.trans, b) + buf.Write(b) + if e != nil { + break + } + size -= readLimit + if size < readLimit && size > 0 { + b = b[:size] + } + } + return buf.String(), NewTProtocolException(e) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/buffered_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/buffered_transport.go new file mode 100644 index 000000000..96702061b --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/buffered_transport.go @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bufio" + "context" +) + +type TBufferedTransportFactory struct { + size int +} + +type TBufferedTransport struct { + bufio.ReadWriter + tp TTransport +} + +func (p *TBufferedTransportFactory) GetTransport(trans TTransport) (TTransport, error) { + return NewTBufferedTransport(trans, p.size), nil +} + +func NewTBufferedTransportFactory(bufferSize int) *TBufferedTransportFactory { + return &TBufferedTransportFactory{size: bufferSize} +} + +func NewTBufferedTransport(trans TTransport, bufferSize int) *TBufferedTransport { + return &TBufferedTransport{ + ReadWriter: bufio.ReadWriter{ + Reader: bufio.NewReaderSize(trans, bufferSize), + Writer: bufio.NewWriterSize(trans, bufferSize), + }, + tp: trans, + } +} + +func (p *TBufferedTransport) IsOpen() bool { + return p.tp.IsOpen() +} + +func (p *TBufferedTransport) Open() (err error) { + return p.tp.Open() +} + +func (p *TBufferedTransport) Close() (err error) { + return p.tp.Close() +} + +func (p *TBufferedTransport) Read(b []byte) (int, error) { + n, err := p.ReadWriter.Read(b) + if err != nil { + p.ReadWriter.Reader.Reset(p.tp) + } + return n, err +} + +func (p *TBufferedTransport) Write(b []byte) (int, error) { + n, err := p.ReadWriter.Write(b) + if err != nil { + p.ReadWriter.Writer.Reset(p.tp) + } + return n, err +} + +func (p *TBufferedTransport) Flush(ctx context.Context) error { + if err := p.ReadWriter.Flush(); err != nil { + p.ReadWriter.Writer.Reset(p.tp) + return err + } + return p.tp.Flush(ctx) +} + +func (p *TBufferedTransport) RemainingBytes() (num_bytes uint64) { + return p.tp.RemainingBytes() +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/client.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/client.go new file mode 100644 index 000000000..28791ccd0 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/client.go @@ -0,0 +1,85 @@ +package thrift + +import ( + "context" + "fmt" +) + +type TClient interface { + Call(ctx context.Context, method string, args, result TStruct) error +} + +type TStandardClient struct { + seqId int32 + iprot, oprot TProtocol +} + +// TStandardClient implements TClient, and uses the standard message format for Thrift. +// It is not safe for concurrent use. +func NewTStandardClient(inputProtocol, outputProtocol TProtocol) *TStandardClient { + return &TStandardClient{ + iprot: inputProtocol, + oprot: outputProtocol, + } +} + +func (p *TStandardClient) Send(ctx context.Context, oprot TProtocol, seqId int32, method string, args TStruct) error { + if err := oprot.WriteMessageBegin(method, CALL, seqId); err != nil { + return err + } + if err := args.Write(oprot); err != nil { + return err + } + if err := oprot.WriteMessageEnd(); err != nil { + return err + } + return oprot.Flush(ctx) +} + +func (p *TStandardClient) Recv(iprot TProtocol, seqId int32, method string, result TStruct) error { + rMethod, rTypeId, rSeqId, err := iprot.ReadMessageBegin() + if err != nil { + return err + } + + if method != rMethod { + return NewTApplicationException(WRONG_METHOD_NAME, fmt.Sprintf("%s: wrong method name", method)) + } else if seqId != rSeqId { + return NewTApplicationException(BAD_SEQUENCE_ID, fmt.Sprintf("%s: out of order sequence response", method)) + } else if rTypeId == EXCEPTION { + var exception tApplicationException + if err := exception.Read(iprot); err != nil { + return err + } + + if err := iprot.ReadMessageEnd(); err != nil { + return err + } + + return &exception + } else if rTypeId != REPLY { + return NewTApplicationException(INVALID_MESSAGE_TYPE_EXCEPTION, fmt.Sprintf("%s: invalid message type", method)) + } + + if err := result.Read(iprot); err != nil { + return err + } + + return iprot.ReadMessageEnd() +} + +func (p *TStandardClient) Call(ctx context.Context, method string, args, result TStruct) error { + p.seqId++ + seqId := p.seqId + + if err := p.Send(ctx, p.oprot, seqId, method, args); err != nil { + return err + } + + // method is oneway + if result == nil { + return nil + } + + return p.Recv(p.iprot, seqId, method, result) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/compact_protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/compact_protocol.go new file mode 100644 index 000000000..1900d50c3 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/compact_protocol.go @@ -0,0 +1,810 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + COMPACT_PROTOCOL_ID = 0x082 + COMPACT_VERSION = 1 + COMPACT_VERSION_MASK = 0x1f + COMPACT_TYPE_MASK = 0x0E0 + COMPACT_TYPE_BITS = 0x07 + COMPACT_TYPE_SHIFT_AMOUNT = 5 +) + +type tCompactType byte + +const ( + COMPACT_BOOLEAN_TRUE = 0x01 + COMPACT_BOOLEAN_FALSE = 0x02 + COMPACT_BYTE = 0x03 + COMPACT_I16 = 0x04 + COMPACT_I32 = 0x05 + COMPACT_I64 = 0x06 + COMPACT_DOUBLE = 0x07 + COMPACT_BINARY = 0x08 + COMPACT_LIST = 0x09 + COMPACT_SET = 0x0A + COMPACT_MAP = 0x0B + COMPACT_STRUCT = 0x0C +) + +var ( + ttypeToCompactType map[TType]tCompactType +) + +func init() { + ttypeToCompactType = map[TType]tCompactType{ + STOP: STOP, + BOOL: COMPACT_BOOLEAN_TRUE, + BYTE: COMPACT_BYTE, + I16: COMPACT_I16, + I32: COMPACT_I32, + I64: COMPACT_I64, + DOUBLE: COMPACT_DOUBLE, + STRING: COMPACT_BINARY, + LIST: COMPACT_LIST, + SET: COMPACT_SET, + MAP: COMPACT_MAP, + STRUCT: COMPACT_STRUCT, + } +} + +type TCompactProtocolFactory struct{} + +func NewTCompactProtocolFactory() *TCompactProtocolFactory { + return &TCompactProtocolFactory{} +} + +func (p *TCompactProtocolFactory) GetProtocol(trans TTransport) TProtocol { + return NewTCompactProtocol(trans) +} + +type TCompactProtocol struct { + trans TRichTransport + origTransport TTransport + + // Used to keep track of the last field for the current and previous structs, + // so we can do the delta stuff. + lastField []int + lastFieldId int + + // If we encounter a boolean field begin, save the TField here so it can + // have the value incorporated. + booleanFieldName string + booleanFieldId int16 + booleanFieldPending bool + + // If we read a field header, and it's a boolean field, save the boolean + // value here so that readBool can use it. + boolValue bool + boolValueIsNotNull bool + buffer [64]byte +} + +// Create a TCompactProtocol given a TTransport +func NewTCompactProtocol(trans TTransport) *TCompactProtocol { + p := &TCompactProtocol{origTransport: trans, lastField: []int{}} + if et, ok := trans.(TRichTransport); ok { + p.trans = et + } else { + p.trans = NewTRichTransport(trans) + } + + return p + +} + +// +// Public Writing methods. +// + +// Write a message header to the wire. Compact Protocol messages contain the +// protocol version so we can migrate forwards in the future if need be. +func (p *TCompactProtocol) WriteMessageBegin(name string, typeId TMessageType, seqid int32) error { + err := p.writeByteDirect(COMPACT_PROTOCOL_ID) + if err != nil { + return NewTProtocolException(err) + } + err = p.writeByteDirect((COMPACT_VERSION & COMPACT_VERSION_MASK) | ((byte(typeId) << COMPACT_TYPE_SHIFT_AMOUNT) & COMPACT_TYPE_MASK)) + if err != nil { + return NewTProtocolException(err) + } + _, err = p.writeVarint32(seqid) + if err != nil { + return NewTProtocolException(err) + } + e := p.WriteString(name) + return e + +} + +func (p *TCompactProtocol) WriteMessageEnd() error { return nil } + +// Write a struct begin. This doesn't actually put anything on the wire. We +// use it as an opportunity to put special placeholder markers on the field +// stack so we can get the field id deltas correct. +func (p *TCompactProtocol) WriteStructBegin(name string) error { + p.lastField = append(p.lastField, p.lastFieldId) + p.lastFieldId = 0 + return nil +} + +// Write a struct end. This doesn't actually put anything on the wire. We use +// this as an opportunity to pop the last field from the current struct off +// of the field stack. +func (p *TCompactProtocol) WriteStructEnd() error { + p.lastFieldId = p.lastField[len(p.lastField)-1] + p.lastField = p.lastField[:len(p.lastField)-1] + return nil +} + +func (p *TCompactProtocol) WriteFieldBegin(name string, typeId TType, id int16) error { + if typeId == BOOL { + // we want to possibly include the value, so we'll wait. + p.booleanFieldName, p.booleanFieldId, p.booleanFieldPending = name, id, true + return nil + } + _, err := p.writeFieldBeginInternal(name, typeId, id, 0xFF) + return NewTProtocolException(err) +} + +// The workhorse of writeFieldBegin. It has the option of doing a +// 'type override' of the type header. This is used specifically in the +// boolean field case. +func (p *TCompactProtocol) writeFieldBeginInternal(name string, typeId TType, id int16, typeOverride byte) (int, error) { + // short lastField = lastField_.pop(); + + // if there's a type override, use that. + var typeToWrite byte + if typeOverride == 0xFF { + typeToWrite = byte(p.getCompactType(typeId)) + } else { + typeToWrite = typeOverride + } + // check if we can use delta encoding for the field id + fieldId := int(id) + written := 0 + if fieldId > p.lastFieldId && fieldId-p.lastFieldId <= 15 { + // write them together + err := p.writeByteDirect(byte((fieldId-p.lastFieldId)<<4) | typeToWrite) + if err != nil { + return 0, err + } + } else { + // write them separate + err := p.writeByteDirect(typeToWrite) + if err != nil { + return 0, err + } + err = p.WriteI16(id) + written = 1 + 2 + if err != nil { + return 0, err + } + } + + p.lastFieldId = fieldId + // p.lastField.Push(field.id); + return written, nil +} + +func (p *TCompactProtocol) WriteFieldEnd() error { return nil } + +func (p *TCompactProtocol) WriteFieldStop() error { + err := p.writeByteDirect(STOP) + return NewTProtocolException(err) +} + +func (p *TCompactProtocol) WriteMapBegin(keyType TType, valueType TType, size int) error { + if size == 0 { + err := p.writeByteDirect(0) + return NewTProtocolException(err) + } + _, err := p.writeVarint32(int32(size)) + if err != nil { + return NewTProtocolException(err) + } + err = p.writeByteDirect(byte(p.getCompactType(keyType))<<4 | byte(p.getCompactType(valueType))) + return NewTProtocolException(err) +} + +func (p *TCompactProtocol) WriteMapEnd() error { return nil } + +// Write a list header. +func (p *TCompactProtocol) WriteListBegin(elemType TType, size int) error { + _, err := p.writeCollectionBegin(elemType, size) + return NewTProtocolException(err) +} + +func (p *TCompactProtocol) WriteListEnd() error { return nil } + +// Write a set header. +func (p *TCompactProtocol) WriteSetBegin(elemType TType, size int) error { + _, err := p.writeCollectionBegin(elemType, size) + return NewTProtocolException(err) +} + +func (p *TCompactProtocol) WriteSetEnd() error { return nil } + +func (p *TCompactProtocol) WriteBool(value bool) error { + v := byte(COMPACT_BOOLEAN_FALSE) + if value { + v = byte(COMPACT_BOOLEAN_TRUE) + } + if p.booleanFieldPending { + // we haven't written the field header yet + _, err := p.writeFieldBeginInternal(p.booleanFieldName, BOOL, p.booleanFieldId, v) + p.booleanFieldPending = false + return NewTProtocolException(err) + } + // we're not part of a field, so just write the value. + err := p.writeByteDirect(v) + return NewTProtocolException(err) +} + +// Write a byte. Nothing to see here! +func (p *TCompactProtocol) WriteByte(value int8) error { + err := p.writeByteDirect(byte(value)) + return NewTProtocolException(err) +} + +// Write an I16 as a zigzag varint. +func (p *TCompactProtocol) WriteI16(value int16) error { + _, err := p.writeVarint32(p.int32ToZigzag(int32(value))) + return NewTProtocolException(err) +} + +// Write an i32 as a zigzag varint. +func (p *TCompactProtocol) WriteI32(value int32) error { + _, err := p.writeVarint32(p.int32ToZigzag(value)) + return NewTProtocolException(err) +} + +// Write an i64 as a zigzag varint. +func (p *TCompactProtocol) WriteI64(value int64) error { + _, err := p.writeVarint64(p.int64ToZigzag(value)) + return NewTProtocolException(err) +} + +// Write a double to the wire as 8 bytes. +func (p *TCompactProtocol) WriteDouble(value float64) error { + buf := p.buffer[0:8] + binary.LittleEndian.PutUint64(buf, math.Float64bits(value)) + _, err := p.trans.Write(buf) + return NewTProtocolException(err) +} + +// Write a string to the wire with a varint size preceding. +func (p *TCompactProtocol) WriteString(value string) error { + _, e := p.writeVarint32(int32(len(value))) + if e != nil { + return NewTProtocolException(e) + } + if len(value) > 0 { + } + _, e = p.trans.WriteString(value) + return e +} + +// Write a byte array, using a varint for the size. +func (p *TCompactProtocol) WriteBinary(bin []byte) error { + _, e := p.writeVarint32(int32(len(bin))) + if e != nil { + return NewTProtocolException(e) + } + if len(bin) > 0 { + _, e = p.trans.Write(bin) + return NewTProtocolException(e) + } + return nil +} + +// +// Reading methods. +// + +// Read a message header. +func (p *TCompactProtocol) ReadMessageBegin() (name string, typeId TMessageType, seqId int32, err error) { + + protocolId, err := p.readByteDirect() + if err != nil { + return + } + + if protocolId != COMPACT_PROTOCOL_ID { + e := fmt.Errorf("Expected protocol id %02x but got %02x", COMPACT_PROTOCOL_ID, protocolId) + return "", typeId, seqId, NewTProtocolExceptionWithType(BAD_VERSION, e) + } + + versionAndType, err := p.readByteDirect() + if err != nil { + return + } + + version := versionAndType & COMPACT_VERSION_MASK + typeId = TMessageType((versionAndType >> COMPACT_TYPE_SHIFT_AMOUNT) & COMPACT_TYPE_BITS) + if version != COMPACT_VERSION { + e := fmt.Errorf("Expected version %02x but got %02x", COMPACT_VERSION, version) + err = NewTProtocolExceptionWithType(BAD_VERSION, e) + return + } + seqId, e := p.readVarint32() + if e != nil { + err = NewTProtocolException(e) + return + } + name, err = p.ReadString() + return +} + +func (p *TCompactProtocol) ReadMessageEnd() error { return nil } + +// Read a struct begin. There's nothing on the wire for this, but it is our +// opportunity to push a new struct begin marker onto the field stack. +func (p *TCompactProtocol) ReadStructBegin() (name string, err error) { + p.lastField = append(p.lastField, p.lastFieldId) + p.lastFieldId = 0 + return +} + +// Doesn't actually consume any wire data, just removes the last field for +// this struct from the field stack. +func (p *TCompactProtocol) ReadStructEnd() error { + // consume the last field we read off the wire. + p.lastFieldId = p.lastField[len(p.lastField)-1] + p.lastField = p.lastField[:len(p.lastField)-1] + return nil +} + +// Read a field header off the wire. +func (p *TCompactProtocol) ReadFieldBegin() (name string, typeId TType, id int16, err error) { + t, err := p.readByteDirect() + if err != nil { + return + } + + // if it's a stop, then we can return immediately, as the struct is over. + if (t & 0x0f) == STOP { + return "", STOP, 0, nil + } + + // mask off the 4 MSB of the type header. it could contain a field id delta. + modifier := int16((t & 0xf0) >> 4) + if modifier == 0 { + // not a delta. look ahead for the zigzag varint field id. + id, err = p.ReadI16() + if err != nil { + return + } + } else { + // has a delta. add the delta to the last read field id. + id = int16(p.lastFieldId) + modifier + } + typeId, e := p.getTType(tCompactType(t & 0x0f)) + if e != nil { + err = NewTProtocolException(e) + return + } + + // if this happens to be a boolean field, the value is encoded in the type + if p.isBoolType(t) { + // save the boolean value in a special instance variable. + p.boolValue = (byte(t)&0x0f == COMPACT_BOOLEAN_TRUE) + p.boolValueIsNotNull = true + } + + // push the new field onto the field stack so we can keep the deltas going. + p.lastFieldId = int(id) + return +} + +func (p *TCompactProtocol) ReadFieldEnd() error { return nil } + +// Read a map header off the wire. If the size is zero, skip reading the key +// and value type. This means that 0-length maps will yield TMaps without the +// "correct" types. +func (p *TCompactProtocol) ReadMapBegin() (keyType TType, valueType TType, size int, err error) { + size32, e := p.readVarint32() + if e != nil { + err = NewTProtocolException(e) + return + } + if size32 < 0 { + err = invalidDataLength + return + } + size = int(size32) + + keyAndValueType := byte(STOP) + if size != 0 { + keyAndValueType, err = p.readByteDirect() + if err != nil { + return + } + } + keyType, _ = p.getTType(tCompactType(keyAndValueType >> 4)) + valueType, _ = p.getTType(tCompactType(keyAndValueType & 0xf)) + return +} + +func (p *TCompactProtocol) ReadMapEnd() error { return nil } + +// Read a list header off the wire. If the list size is 0-14, the size will +// be packed into the element type header. If it's a longer list, the 4 MSB +// of the element type header will be 0xF, and a varint will follow with the +// true size. +func (p *TCompactProtocol) ReadListBegin() (elemType TType, size int, err error) { + size_and_type, err := p.readByteDirect() + if err != nil { + return + } + size = int((size_and_type >> 4) & 0x0f) + if size == 15 { + size2, e := p.readVarint32() + if e != nil { + err = NewTProtocolException(e) + return + } + if size2 < 0 { + err = invalidDataLength + return + } + size = int(size2) + } + elemType, e := p.getTType(tCompactType(size_and_type)) + if e != nil { + err = NewTProtocolException(e) + return + } + return +} + +func (p *TCompactProtocol) ReadListEnd() error { return nil } + +// Read a set header off the wire. If the set size is 0-14, the size will +// be packed into the element type header. If it's a longer set, the 4 MSB +// of the element type header will be 0xF, and a varint will follow with the +// true size. +func (p *TCompactProtocol) ReadSetBegin() (elemType TType, size int, err error) { + return p.ReadListBegin() +} + +func (p *TCompactProtocol) ReadSetEnd() error { return nil } + +// Read a boolean off the wire. If this is a boolean field, the value should +// already have been read during readFieldBegin, so we'll just consume the +// pre-stored value. Otherwise, read a byte. +func (p *TCompactProtocol) ReadBool() (value bool, err error) { + if p.boolValueIsNotNull { + p.boolValueIsNotNull = false + return p.boolValue, nil + } + v, err := p.readByteDirect() + return v == COMPACT_BOOLEAN_TRUE, err +} + +// Read a single byte off the wire. Nothing interesting here. +func (p *TCompactProtocol) ReadByte() (int8, error) { + v, err := p.readByteDirect() + if err != nil { + return 0, NewTProtocolException(err) + } + return int8(v), err +} + +// Read an i16 from the wire as a zigzag varint. +func (p *TCompactProtocol) ReadI16() (value int16, err error) { + v, err := p.ReadI32() + return int16(v), err +} + +// Read an i32 from the wire as a zigzag varint. +func (p *TCompactProtocol) ReadI32() (value int32, err error) { + v, e := p.readVarint32() + if e != nil { + return 0, NewTProtocolException(e) + } + value = p.zigzagToInt32(v) + return value, nil +} + +// Read an i64 from the wire as a zigzag varint. +func (p *TCompactProtocol) ReadI64() (value int64, err error) { + v, e := p.readVarint64() + if e != nil { + return 0, NewTProtocolException(e) + } + value = p.zigzagToInt64(v) + return value, nil +} + +// No magic here - just read a double off the wire. +func (p *TCompactProtocol) ReadDouble() (value float64, err error) { + longBits := p.buffer[0:8] + _, e := io.ReadFull(p.trans, longBits) + if e != nil { + return 0.0, NewTProtocolException(e) + } + return math.Float64frombits(p.bytesToUint64(longBits)), nil +} + +// Reads a []byte (via readBinary), and then UTF-8 decodes it. +func (p *TCompactProtocol) ReadString() (value string, err error) { + length, e := p.readVarint32() + if e != nil { + return "", NewTProtocolException(e) + } + if length < 0 { + return "", invalidDataLength + } + + if length == 0 { + return "", nil + } + var buf []byte + if length <= int32(len(p.buffer)) { + buf = p.buffer[0:length] + } else { + buf = make([]byte, length) + } + _, e = io.ReadFull(p.trans, buf) + return string(buf), NewTProtocolException(e) +} + +// Read a []byte from the wire. +func (p *TCompactProtocol) ReadBinary() (value []byte, err error) { + length, e := p.readVarint32() + if e != nil { + return nil, NewTProtocolException(e) + } + if length == 0 { + return []byte{}, nil + } + if length < 0 { + return nil, invalidDataLength + } + + buf := make([]byte, length) + _, e = io.ReadFull(p.trans, buf) + return buf, NewTProtocolException(e) +} + +func (p *TCompactProtocol) Flush(ctx context.Context) (err error) { + return NewTProtocolException(p.trans.Flush(ctx)) +} + +func (p *TCompactProtocol) Skip(fieldType TType) (err error) { + return SkipDefaultDepth(p, fieldType) +} + +func (p *TCompactProtocol) Transport() TTransport { + return p.origTransport +} + +// +// Internal writing methods +// + +// Abstract method for writing the start of lists and sets. List and sets on +// the wire differ only by the type indicator. +func (p *TCompactProtocol) writeCollectionBegin(elemType TType, size int) (int, error) { + if size <= 14 { + return 1, p.writeByteDirect(byte(int32(size<<4) | int32(p.getCompactType(elemType)))) + } + err := p.writeByteDirect(0xf0 | byte(p.getCompactType(elemType))) + if err != nil { + return 0, err + } + m, err := p.writeVarint32(int32(size)) + return 1 + m, err +} + +// Write an i32 as a varint. Results in 1-5 bytes on the wire. +// TODO(pomack): make a permanent buffer like writeVarint64? +func (p *TCompactProtocol) writeVarint32(n int32) (int, error) { + i32buf := p.buffer[0:5] + idx := 0 + for { + if (n & ^0x7F) == 0 { + i32buf[idx] = byte(n) + idx++ + // p.writeByteDirect(byte(n)); + break + // return; + } else { + i32buf[idx] = byte((n & 0x7F) | 0x80) + idx++ + // p.writeByteDirect(byte(((n & 0x7F) | 0x80))); + u := uint32(n) + n = int32(u >> 7) + } + } + return p.trans.Write(i32buf[0:idx]) +} + +// Write an i64 as a varint. Results in 1-10 bytes on the wire. +func (p *TCompactProtocol) writeVarint64(n int64) (int, error) { + varint64out := p.buffer[0:10] + idx := 0 + for { + if (n & ^0x7F) == 0 { + varint64out[idx] = byte(n) + idx++ + break + } else { + varint64out[idx] = byte((n & 0x7F) | 0x80) + idx++ + u := uint64(n) + n = int64(u >> 7) + } + } + return p.trans.Write(varint64out[0:idx]) +} + +// Convert l into a zigzag long. This allows negative numbers to be +// represented compactly as a varint. +func (p *TCompactProtocol) int64ToZigzag(l int64) int64 { + return (l << 1) ^ (l >> 63) +} + +// Convert l into a zigzag long. This allows negative numbers to be +// represented compactly as a varint. +func (p *TCompactProtocol) int32ToZigzag(n int32) int32 { + return (n << 1) ^ (n >> 31) +} + +func (p *TCompactProtocol) fixedUint64ToBytes(n uint64, buf []byte) { + binary.LittleEndian.PutUint64(buf, n) +} + +func (p *TCompactProtocol) fixedInt64ToBytes(n int64, buf []byte) { + binary.LittleEndian.PutUint64(buf, uint64(n)) +} + +// Writes a byte without any possibility of all that field header nonsense. +// Used internally by other writing methods that know they need to write a byte. +func (p *TCompactProtocol) writeByteDirect(b byte) error { + return p.trans.WriteByte(b) +} + +// Writes a byte without any possibility of all that field header nonsense. +func (p *TCompactProtocol) writeIntAsByteDirect(n int) (int, error) { + return 1, p.writeByteDirect(byte(n)) +} + +// +// Internal reading methods +// + +// Read an i32 from the wire as a varint. The MSB of each byte is set +// if there is another byte to follow. This can read up to 5 bytes. +func (p *TCompactProtocol) readVarint32() (int32, error) { + // if the wire contains the right stuff, this will just truncate the i64 we + // read and get us the right sign. + v, err := p.readVarint64() + return int32(v), err +} + +// Read an i64 from the wire as a proper varint. The MSB of each byte is set +// if there is another byte to follow. This can read up to 10 bytes. +func (p *TCompactProtocol) readVarint64() (int64, error) { + shift := uint(0) + result := int64(0) + for { + b, err := p.readByteDirect() + if err != nil { + return 0, err + } + result |= int64(b&0x7f) << shift + if (b & 0x80) != 0x80 { + break + } + shift += 7 + } + return result, nil +} + +// Read a byte, unlike ReadByte that reads Thrift-byte that is i8. +func (p *TCompactProtocol) readByteDirect() (byte, error) { + return p.trans.ReadByte() +} + +// +// encoding helpers +// + +// Convert from zigzag int to int. +func (p *TCompactProtocol) zigzagToInt32(n int32) int32 { + u := uint32(n) + return int32(u>>1) ^ -(n & 1) +} + +// Convert from zigzag long to long. +func (p *TCompactProtocol) zigzagToInt64(n int64) int64 { + u := uint64(n) + return int64(u>>1) ^ -(n & 1) +} + +// Note that it's important that the mask bytes are long literals, +// otherwise they'll default to ints, and when you shift an int left 56 bits, +// you just get a messed up int. +func (p *TCompactProtocol) bytesToInt64(b []byte) int64 { + return int64(binary.LittleEndian.Uint64(b)) +} + +// Note that it's important that the mask bytes are long literals, +// otherwise they'll default to ints, and when you shift an int left 56 bits, +// you just get a messed up int. +func (p *TCompactProtocol) bytesToUint64(b []byte) uint64 { + return binary.LittleEndian.Uint64(b) +} + +// +// type testing and converting +// + +func (p *TCompactProtocol) isBoolType(b byte) bool { + return (b&0x0f) == COMPACT_BOOLEAN_TRUE || (b&0x0f) == COMPACT_BOOLEAN_FALSE +} + +// Given a tCompactType constant, convert it to its corresponding +// TType value. +func (p *TCompactProtocol) getTType(t tCompactType) (TType, error) { + switch byte(t) & 0x0f { + case STOP: + return STOP, nil + case COMPACT_BOOLEAN_FALSE, COMPACT_BOOLEAN_TRUE: + return BOOL, nil + case COMPACT_BYTE: + return BYTE, nil + case COMPACT_I16: + return I16, nil + case COMPACT_I32: + return I32, nil + case COMPACT_I64: + return I64, nil + case COMPACT_DOUBLE: + return DOUBLE, nil + case COMPACT_BINARY: + return STRING, nil + case COMPACT_LIST: + return LIST, nil + case COMPACT_SET: + return SET, nil + case COMPACT_MAP: + return MAP, nil + case COMPACT_STRUCT: + return STRUCT, nil + } + return STOP, TException(fmt.Errorf("don't know what type: %v", t&0x0f)) +} + +// Given a TType value, find the appropriate TCompactProtocol.Types constant. +func (p *TCompactProtocol) getCompactType(t TType) tCompactType { + return ttypeToCompactType[t] +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/context.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/context.go new file mode 100644 index 000000000..d15c1bcf8 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/context.go @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import "context" + +var defaultCtx = context.Background() diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/debug_protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/debug_protocol.go new file mode 100644 index 000000000..57943e0f3 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/debug_protocol.go @@ -0,0 +1,270 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "log" +) + +type TDebugProtocol struct { + Delegate TProtocol + LogPrefix string +} + +type TDebugProtocolFactory struct { + Underlying TProtocolFactory + LogPrefix string +} + +func NewTDebugProtocolFactory(underlying TProtocolFactory, logPrefix string) *TDebugProtocolFactory { + return &TDebugProtocolFactory{ + Underlying: underlying, + LogPrefix: logPrefix, + } +} + +func (t *TDebugProtocolFactory) GetProtocol(trans TTransport) TProtocol { + return &TDebugProtocol{ + Delegate: t.Underlying.GetProtocol(trans), + LogPrefix: t.LogPrefix, + } +} + +func (tdp *TDebugProtocol) WriteMessageBegin(name string, typeId TMessageType, seqid int32) error { + err := tdp.Delegate.WriteMessageBegin(name, typeId, seqid) + log.Printf("%sWriteMessageBegin(name=%#v, typeId=%#v, seqid=%#v) => %#v", tdp.LogPrefix, name, typeId, seqid, err) + return err +} +func (tdp *TDebugProtocol) WriteMessageEnd() error { + err := tdp.Delegate.WriteMessageEnd() + log.Printf("%sWriteMessageEnd() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteStructBegin(name string) error { + err := tdp.Delegate.WriteStructBegin(name) + log.Printf("%sWriteStructBegin(name=%#v) => %#v", tdp.LogPrefix, name, err) + return err +} +func (tdp *TDebugProtocol) WriteStructEnd() error { + err := tdp.Delegate.WriteStructEnd() + log.Printf("%sWriteStructEnd() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteFieldBegin(name string, typeId TType, id int16) error { + err := tdp.Delegate.WriteFieldBegin(name, typeId, id) + log.Printf("%sWriteFieldBegin(name=%#v, typeId=%#v, id%#v) => %#v", tdp.LogPrefix, name, typeId, id, err) + return err +} +func (tdp *TDebugProtocol) WriteFieldEnd() error { + err := tdp.Delegate.WriteFieldEnd() + log.Printf("%sWriteFieldEnd() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteFieldStop() error { + err := tdp.Delegate.WriteFieldStop() + log.Printf("%sWriteFieldStop() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteMapBegin(keyType TType, valueType TType, size int) error { + err := tdp.Delegate.WriteMapBegin(keyType, valueType, size) + log.Printf("%sWriteMapBegin(keyType=%#v, valueType=%#v, size=%#v) => %#v", tdp.LogPrefix, keyType, valueType, size, err) + return err +} +func (tdp *TDebugProtocol) WriteMapEnd() error { + err := tdp.Delegate.WriteMapEnd() + log.Printf("%sWriteMapEnd() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteListBegin(elemType TType, size int) error { + err := tdp.Delegate.WriteListBegin(elemType, size) + log.Printf("%sWriteListBegin(elemType=%#v, size=%#v) => %#v", tdp.LogPrefix, elemType, size, err) + return err +} +func (tdp *TDebugProtocol) WriteListEnd() error { + err := tdp.Delegate.WriteListEnd() + log.Printf("%sWriteListEnd() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteSetBegin(elemType TType, size int) error { + err := tdp.Delegate.WriteSetBegin(elemType, size) + log.Printf("%sWriteSetBegin(elemType=%#v, size=%#v) => %#v", tdp.LogPrefix, elemType, size, err) + return err +} +func (tdp *TDebugProtocol) WriteSetEnd() error { + err := tdp.Delegate.WriteSetEnd() + log.Printf("%sWriteSetEnd() => %#v", tdp.LogPrefix, err) + return err +} +func (tdp *TDebugProtocol) WriteBool(value bool) error { + err := tdp.Delegate.WriteBool(value) + log.Printf("%sWriteBool(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteByte(value int8) error { + err := tdp.Delegate.WriteByte(value) + log.Printf("%sWriteByte(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteI16(value int16) error { + err := tdp.Delegate.WriteI16(value) + log.Printf("%sWriteI16(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteI32(value int32) error { + err := tdp.Delegate.WriteI32(value) + log.Printf("%sWriteI32(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteI64(value int64) error { + err := tdp.Delegate.WriteI64(value) + log.Printf("%sWriteI64(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteDouble(value float64) error { + err := tdp.Delegate.WriteDouble(value) + log.Printf("%sWriteDouble(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteString(value string) error { + err := tdp.Delegate.WriteString(value) + log.Printf("%sWriteString(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} +func (tdp *TDebugProtocol) WriteBinary(value []byte) error { + err := tdp.Delegate.WriteBinary(value) + log.Printf("%sWriteBinary(value=%#v) => %#v", tdp.LogPrefix, value, err) + return err +} + +func (tdp *TDebugProtocol) ReadMessageBegin() (name string, typeId TMessageType, seqid int32, err error) { + name, typeId, seqid, err = tdp.Delegate.ReadMessageBegin() + log.Printf("%sReadMessageBegin() (name=%#v, typeId=%#v, seqid=%#v, err=%#v)", tdp.LogPrefix, name, typeId, seqid, err) + return +} +func (tdp *TDebugProtocol) ReadMessageEnd() (err error) { + err = tdp.Delegate.ReadMessageEnd() + log.Printf("%sReadMessageEnd() err=%#v", tdp.LogPrefix, err) + return +} +func (tdp *TDebugProtocol) ReadStructBegin() (name string, err error) { + name, err = tdp.Delegate.ReadStructBegin() + log.Printf("%sReadStructBegin() (name%#v, err=%#v)", tdp.LogPrefix, name, err) + return +} +func (tdp *TDebugProtocol) ReadStructEnd() (err error) { + err = tdp.Delegate.ReadStructEnd() + log.Printf("%sReadStructEnd() err=%#v", tdp.LogPrefix, err) + return +} +func (tdp *TDebugProtocol) ReadFieldBegin() (name string, typeId TType, id int16, err error) { + name, typeId, id, err = tdp.Delegate.ReadFieldBegin() + log.Printf("%sReadFieldBegin() (name=%#v, typeId=%#v, id=%#v, err=%#v)", tdp.LogPrefix, name, typeId, id, err) + return +} +func (tdp *TDebugProtocol) ReadFieldEnd() (err error) { + err = tdp.Delegate.ReadFieldEnd() + log.Printf("%sReadFieldEnd() err=%#v", tdp.LogPrefix, err) + return +} +func (tdp *TDebugProtocol) ReadMapBegin() (keyType TType, valueType TType, size int, err error) { + keyType, valueType, size, err = tdp.Delegate.ReadMapBegin() + log.Printf("%sReadMapBegin() (keyType=%#v, valueType=%#v, size=%#v, err=%#v)", tdp.LogPrefix, keyType, valueType, size, err) + return +} +func (tdp *TDebugProtocol) ReadMapEnd() (err error) { + err = tdp.Delegate.ReadMapEnd() + log.Printf("%sReadMapEnd() err=%#v", tdp.LogPrefix, err) + return +} +func (tdp *TDebugProtocol) ReadListBegin() (elemType TType, size int, err error) { + elemType, size, err = tdp.Delegate.ReadListBegin() + log.Printf("%sReadListBegin() (elemType=%#v, size=%#v, err=%#v)", tdp.LogPrefix, elemType, size, err) + return +} +func (tdp *TDebugProtocol) ReadListEnd() (err error) { + err = tdp.Delegate.ReadListEnd() + log.Printf("%sReadListEnd() err=%#v", tdp.LogPrefix, err) + return +} +func (tdp *TDebugProtocol) ReadSetBegin() (elemType TType, size int, err error) { + elemType, size, err = tdp.Delegate.ReadSetBegin() + log.Printf("%sReadSetBegin() (elemType=%#v, size=%#v, err=%#v)", tdp.LogPrefix, elemType, size, err) + return +} +func (tdp *TDebugProtocol) ReadSetEnd() (err error) { + err = tdp.Delegate.ReadSetEnd() + log.Printf("%sReadSetEnd() err=%#v", tdp.LogPrefix, err) + return +} +func (tdp *TDebugProtocol) ReadBool() (value bool, err error) { + value, err = tdp.Delegate.ReadBool() + log.Printf("%sReadBool() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadByte() (value int8, err error) { + value, err = tdp.Delegate.ReadByte() + log.Printf("%sReadByte() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadI16() (value int16, err error) { + value, err = tdp.Delegate.ReadI16() + log.Printf("%sReadI16() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadI32() (value int32, err error) { + value, err = tdp.Delegate.ReadI32() + log.Printf("%sReadI32() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadI64() (value int64, err error) { + value, err = tdp.Delegate.ReadI64() + log.Printf("%sReadI64() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadDouble() (value float64, err error) { + value, err = tdp.Delegate.ReadDouble() + log.Printf("%sReadDouble() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadString() (value string, err error) { + value, err = tdp.Delegate.ReadString() + log.Printf("%sReadString() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) ReadBinary() (value []byte, err error) { + value, err = tdp.Delegate.ReadBinary() + log.Printf("%sReadBinary() (value=%#v, err=%#v)", tdp.LogPrefix, value, err) + return +} +func (tdp *TDebugProtocol) Skip(fieldType TType) (err error) { + err = tdp.Delegate.Skip(fieldType) + log.Printf("%sSkip(fieldType=%#v) (err=%#v)", tdp.LogPrefix, fieldType, err) + return +} +func (tdp *TDebugProtocol) Flush(ctx context.Context) (err error) { + err = tdp.Delegate.Flush(ctx) + log.Printf("%sFlush() (err=%#v)", tdp.LogPrefix, err) + return +} + +func (tdp *TDebugProtocol) Transport() TTransport { + return tdp.Delegate.Transport() +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/deserializer.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/deserializer.go new file mode 100644 index 000000000..91a0983a4 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/deserializer.go @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +type TDeserializer struct { + Transport TTransport + Protocol TProtocol +} + +func NewTDeserializer() *TDeserializer { + var transport TTransport + transport = NewTMemoryBufferLen(1024) + + protocol := NewTBinaryProtocolFactoryDefault().GetProtocol(transport) + + return &TDeserializer{ + transport, + protocol} +} + +func (t *TDeserializer) ReadString(msg TStruct, s string) (err error) { + err = nil + if _, err = t.Transport.Write([]byte(s)); err != nil { + return + } + if err = msg.Read(t.Protocol); err != nil { + return + } + return +} + +func (t *TDeserializer) Read(msg TStruct, b []byte) (err error) { + err = nil + if _, err = t.Transport.Write(b); err != nil { + return + } + if err = msg.Read(t.Protocol); err != nil { + return + } + return +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/exception.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/exception.go new file mode 100644 index 000000000..ea8d6f661 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/exception.go @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "errors" +) + +// Generic Thrift exception +type TException interface { + error +} + +// Prepends additional information to an error without losing the Thrift exception interface +func PrependError(prepend string, err error) error { + if t, ok := err.(TTransportException); ok { + return NewTTransportException(t.TypeId(), prepend+t.Error()) + } + if t, ok := err.(TProtocolException); ok { + return NewTProtocolExceptionWithType(t.TypeId(), errors.New(prepend+err.Error())) + } + if t, ok := err.(TApplicationException); ok { + return NewTApplicationException(t.TypeId(), prepend+t.Error()) + } + + return errors.New(prepend + err.Error()) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/field.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/field.go new file mode 100644 index 000000000..9d6652550 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/field.go @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +// Helper class that encapsulates field metadata. +type field struct { + name string + typeId TType + id int +} + +func newField(n string, t TType, i int) *field { + return &field{name: n, typeId: t, id: i} +} + +func (p *field) Name() string { + if p == nil { + return "" + } + return p.name +} + +func (p *field) TypeId() TType { + if p == nil { + return TType(VOID) + } + return p.typeId +} + +func (p *field) Id() int { + if p == nil { + return -1 + } + return p.id +} + +func (p *field) String() string { + if p == nil { + return "" + } + return "" +} + +var ANONYMOUS_FIELD *field + +type fieldSlice []field + +func (p fieldSlice) Len() int { + return len(p) +} + +func (p fieldSlice) Less(i, j int) bool { + return p[i].Id() < p[j].Id() +} + +func (p fieldSlice) Swap(i, j int) { + p[i], p[j] = p[j], p[i] +} + +func init() { + ANONYMOUS_FIELD = newField("", STOP, 0) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/framed_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/framed_transport.go new file mode 100644 index 000000000..81fa65aaa --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/framed_transport.go @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bufio" + "bytes" + "context" + "encoding/binary" + "fmt" + "io" +) + +const DEFAULT_MAX_LENGTH = 16384000 + +type TFramedTransport struct { + transport TTransport + buf bytes.Buffer + reader *bufio.Reader + frameSize uint32 //Current remaining size of the frame. if ==0 read next frame header + buffer [4]byte + maxLength uint32 +} + +type tFramedTransportFactory struct { + factory TTransportFactory + maxLength uint32 +} + +func NewTFramedTransportFactory(factory TTransportFactory) TTransportFactory { + return &tFramedTransportFactory{factory: factory, maxLength: DEFAULT_MAX_LENGTH} +} + +func NewTFramedTransportFactoryMaxLength(factory TTransportFactory, maxLength uint32) TTransportFactory { + return &tFramedTransportFactory{factory: factory, maxLength: maxLength} +} + +func (p *tFramedTransportFactory) GetTransport(base TTransport) (TTransport, error) { + tt, err := p.factory.GetTransport(base) + if err != nil { + return nil, err + } + return NewTFramedTransportMaxLength(tt, p.maxLength), nil +} + +func NewTFramedTransport(transport TTransport) *TFramedTransport { + return &TFramedTransport{transport: transport, reader: bufio.NewReader(transport), maxLength: DEFAULT_MAX_LENGTH} +} + +func NewTFramedTransportMaxLength(transport TTransport, maxLength uint32) *TFramedTransport { + return &TFramedTransport{transport: transport, reader: bufio.NewReader(transport), maxLength: maxLength} +} + +func (p *TFramedTransport) Open() error { + return p.transport.Open() +} + +func (p *TFramedTransport) IsOpen() bool { + return p.transport.IsOpen() +} + +func (p *TFramedTransport) Close() error { + return p.transport.Close() +} + +func (p *TFramedTransport) Read(buf []byte) (l int, err error) { + if p.frameSize == 0 { + p.frameSize, err = p.readFrameHeader() + if err != nil { + return + } + } + if p.frameSize < uint32(len(buf)) { + frameSize := p.frameSize + tmp := make([]byte, p.frameSize) + l, err = p.Read(tmp) + copy(buf, tmp) + if err == nil { + err = NewTTransportExceptionFromError(fmt.Errorf("Not enough frame size %d to read %d bytes", frameSize, len(buf))) + return + } + } + got, err := p.reader.Read(buf) + p.frameSize = p.frameSize - uint32(got) + //sanity check + if p.frameSize < 0 { + return 0, NewTTransportException(UNKNOWN_TRANSPORT_EXCEPTION, "Negative frame size") + } + return got, NewTTransportExceptionFromError(err) +} + +func (p *TFramedTransport) ReadByte() (c byte, err error) { + if p.frameSize == 0 { + p.frameSize, err = p.readFrameHeader() + if err != nil { + return + } + } + if p.frameSize < 1 { + return 0, NewTTransportExceptionFromError(fmt.Errorf("Not enough frame size %d to read %d bytes", p.frameSize, 1)) + } + c, err = p.reader.ReadByte() + if err == nil { + p.frameSize-- + } + return +} + +func (p *TFramedTransport) Write(buf []byte) (int, error) { + n, err := p.buf.Write(buf) + return n, NewTTransportExceptionFromError(err) +} + +func (p *TFramedTransport) WriteByte(c byte) error { + return p.buf.WriteByte(c) +} + +func (p *TFramedTransport) WriteString(s string) (n int, err error) { + return p.buf.WriteString(s) +} + +func (p *TFramedTransport) Flush(ctx context.Context) error { + size := p.buf.Len() + buf := p.buffer[:4] + binary.BigEndian.PutUint32(buf, uint32(size)) + _, err := p.transport.Write(buf) + if err != nil { + p.buf.Truncate(0) + return NewTTransportExceptionFromError(err) + } + if size > 0 { + if n, err := p.buf.WriteTo(p.transport); err != nil { + print("Error while flushing write buffer of size ", size, " to transport, only wrote ", n, " bytes: ", err.Error(), "\n") + p.buf.Truncate(0) + return NewTTransportExceptionFromError(err) + } + } + err = p.transport.Flush(ctx) + return NewTTransportExceptionFromError(err) +} + +func (p *TFramedTransport) readFrameHeader() (uint32, error) { + buf := p.buffer[:4] + if _, err := io.ReadFull(p.reader, buf); err != nil { + return 0, err + } + size := binary.BigEndian.Uint32(buf) + if size < 0 || size > p.maxLength { + return 0, NewTTransportException(UNKNOWN_TRANSPORT_EXCEPTION, fmt.Sprintf("Incorrect frame size (%d)", size)) + } + return size, nil +} + +func (p *TFramedTransport) RemainingBytes() (num_bytes uint64) { + return uint64(p.frameSize) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/http_client.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/http_client.go new file mode 100644 index 000000000..5c82bf538 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/http_client.go @@ -0,0 +1,242 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bytes" + "context" + "io" + "io/ioutil" + "net/http" + "net/url" + "strconv" +) + +// Default to using the shared http client. Library users are +// free to change this global client or specify one through +// THttpClientOptions. +var DefaultHttpClient *http.Client = http.DefaultClient + +type THttpClient struct { + client *http.Client + response *http.Response + url *url.URL + requestBuffer *bytes.Buffer + header http.Header + nsecConnectTimeout int64 + nsecReadTimeout int64 +} + +type THttpClientTransportFactory struct { + options THttpClientOptions + url string +} + +func (p *THttpClientTransportFactory) GetTransport(trans TTransport) (TTransport, error) { + if trans != nil { + t, ok := trans.(*THttpClient) + if ok && t.url != nil { + return NewTHttpClientWithOptions(t.url.String(), p.options) + } + } + return NewTHttpClientWithOptions(p.url, p.options) +} + +type THttpClientOptions struct { + // If nil, DefaultHttpClient is used + Client *http.Client +} + +func NewTHttpClientTransportFactory(url string) *THttpClientTransportFactory { + return NewTHttpClientTransportFactoryWithOptions(url, THttpClientOptions{}) +} + +func NewTHttpClientTransportFactoryWithOptions(url string, options THttpClientOptions) *THttpClientTransportFactory { + return &THttpClientTransportFactory{url: url, options: options} +} + +func NewTHttpClientWithOptions(urlstr string, options THttpClientOptions) (TTransport, error) { + parsedURL, err := url.Parse(urlstr) + if err != nil { + return nil, err + } + buf := make([]byte, 0, 1024) + client := options.Client + if client == nil { + client = DefaultHttpClient + } + httpHeader := map[string][]string{"Content-Type": {"application/x-thrift"}} + return &THttpClient{client: client, url: parsedURL, requestBuffer: bytes.NewBuffer(buf), header: httpHeader}, nil +} + +func NewTHttpClient(urlstr string) (TTransport, error) { + return NewTHttpClientWithOptions(urlstr, THttpClientOptions{}) +} + +// Set the HTTP Header for this specific Thrift Transport +// It is important that you first assert the TTransport as a THttpClient type +// like so: +// +// httpTrans := trans.(THttpClient) +// httpTrans.SetHeader("User-Agent","Thrift Client 1.0") +func (p *THttpClient) SetHeader(key string, value string) { + p.header.Add(key, value) +} + +// Get the HTTP Header represented by the supplied Header Key for this specific Thrift Transport +// It is important that you first assert the TTransport as a THttpClient type +// like so: +// +// httpTrans := trans.(THttpClient) +// hdrValue := httpTrans.GetHeader("User-Agent") +func (p *THttpClient) GetHeader(key string) string { + return p.header.Get(key) +} + +// Deletes the HTTP Header given a Header Key for this specific Thrift Transport +// It is important that you first assert the TTransport as a THttpClient type +// like so: +// +// httpTrans := trans.(THttpClient) +// httpTrans.DelHeader("User-Agent") +func (p *THttpClient) DelHeader(key string) { + p.header.Del(key) +} + +func (p *THttpClient) Open() error { + // do nothing + return nil +} + +func (p *THttpClient) IsOpen() bool { + return p.response != nil || p.requestBuffer != nil +} + +func (p *THttpClient) closeResponse() error { + var err error + if p.response != nil && p.response.Body != nil { + // The docs specify that if keepalive is enabled and the response body is not + // read to completion the connection will never be returned to the pool and + // reused. Errors are being ignored here because if the connection is invalid + // and this fails for some reason, the Close() method will do any remaining + // cleanup. + io.Copy(ioutil.Discard, p.response.Body) + + err = p.response.Body.Close() + } + + p.response = nil + return err +} + +func (p *THttpClient) Close() error { + if p.requestBuffer != nil { + p.requestBuffer.Reset() + p.requestBuffer = nil + } + return p.closeResponse() +} + +func (p *THttpClient) Read(buf []byte) (int, error) { + if p.response == nil { + return 0, NewTTransportException(NOT_OPEN, "Response buffer is empty, no request.") + } + n, err := p.response.Body.Read(buf) + if n > 0 && (err == nil || err == io.EOF) { + return n, nil + } + return n, NewTTransportExceptionFromError(err) +} + +func (p *THttpClient) ReadByte() (c byte, err error) { + return readByte(p.response.Body) +} + +func (p *THttpClient) Write(buf []byte) (int, error) { + n, err := p.requestBuffer.Write(buf) + return n, err +} + +func (p *THttpClient) WriteByte(c byte) error { + return p.requestBuffer.WriteByte(c) +} + +func (p *THttpClient) WriteString(s string) (n int, err error) { + return p.requestBuffer.WriteString(s) +} + +func (p *THttpClient) Flush(ctx context.Context) error { + // Close any previous response body to avoid leaking connections. + p.closeResponse() + + req, err := http.NewRequest("POST", p.url.String(), p.requestBuffer) + if err != nil { + return NewTTransportExceptionFromError(err) + } + req.Header = p.header + if ctx != nil { + req = req.WithContext(ctx) + } + response, err := p.client.Do(req) + if err != nil { + return NewTTransportExceptionFromError(err) + } + if response.StatusCode != http.StatusOK { + // Close the response to avoid leaking file descriptors. closeResponse does + // more than just call Close(), so temporarily assign it and reuse the logic. + p.response = response + p.closeResponse() + + // TODO(pomack) log bad response + return NewTTransportException(UNKNOWN_TRANSPORT_EXCEPTION, "HTTP Response code: "+strconv.Itoa(response.StatusCode)) + } + p.response = response + return nil +} + +func (p *THttpClient) RemainingBytes() (num_bytes uint64) { + len := p.response.ContentLength + if len >= 0 { + return uint64(len) + } + + const maxSize = ^uint64(0) + return maxSize // the thruth is, we just don't know unless framed is used +} + +// Deprecated: Use NewTHttpClientTransportFactory instead. +func NewTHttpPostClientTransportFactory(url string) *THttpClientTransportFactory { + return NewTHttpClientTransportFactoryWithOptions(url, THttpClientOptions{}) +} + +// Deprecated: Use NewTHttpClientTransportFactoryWithOptions instead. +func NewTHttpPostClientTransportFactoryWithOptions(url string, options THttpClientOptions) *THttpClientTransportFactory { + return NewTHttpClientTransportFactoryWithOptions(url, options) +} + +// Deprecated: Use NewTHttpClientWithOptions instead. +func NewTHttpPostClientWithOptions(urlstr string, options THttpClientOptions) (TTransport, error) { + return NewTHttpClientWithOptions(urlstr, options) +} + +// Deprecated: Use NewTHttpClient instead. +func NewTHttpPostClient(urlstr string) (TTransport, error) { + return NewTHttpClientWithOptions(urlstr, THttpClientOptions{}) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/http_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/http_transport.go new file mode 100644 index 000000000..66f0f388a --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/http_transport.go @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "compress/gzip" + "io" + "net/http" + "strings" +) + +// NewThriftHandlerFunc is a function that create a ready to use Apache Thrift Handler function +func NewThriftHandlerFunc(processor TProcessor, + inPfactory, outPfactory TProtocolFactory) func(w http.ResponseWriter, r *http.Request) { + + return gz(func(w http.ResponseWriter, r *http.Request) { + w.Header().Add("Content-Type", "application/x-thrift") + + transport := NewStreamTransport(r.Body, w) + processor.Process(r.Context(), inPfactory.GetProtocol(transport), outPfactory.GetProtocol(transport)) + }) +} + +// gz transparently compresses the HTTP response if the client supports it. +func gz(handler http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + if !strings.Contains(r.Header.Get("Accept-Encoding"), "gzip") { + handler(w, r) + return + } + w.Header().Set("Content-Encoding", "gzip") + gz := gzip.NewWriter(w) + defer gz.Close() + gzw := gzipResponseWriter{Writer: gz, ResponseWriter: w} + handler(gzw, r) + } +} + +type gzipResponseWriter struct { + io.Writer + http.ResponseWriter +} + +func (w gzipResponseWriter) Write(b []byte) (int, error) { + return w.Writer.Write(b) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/iostream_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/iostream_transport.go new file mode 100644 index 000000000..fea93bcef --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/iostream_transport.go @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bufio" + "context" + "io" +) + +// StreamTransport is a Transport made of an io.Reader and/or an io.Writer +type StreamTransport struct { + io.Reader + io.Writer + isReadWriter bool + closed bool +} + +type StreamTransportFactory struct { + Reader io.Reader + Writer io.Writer + isReadWriter bool +} + +func (p *StreamTransportFactory) GetTransport(trans TTransport) (TTransport, error) { + if trans != nil { + t, ok := trans.(*StreamTransport) + if ok { + if t.isReadWriter { + return NewStreamTransportRW(t.Reader.(io.ReadWriter)), nil + } + if t.Reader != nil && t.Writer != nil { + return NewStreamTransport(t.Reader, t.Writer), nil + } + if t.Reader != nil && t.Writer == nil { + return NewStreamTransportR(t.Reader), nil + } + if t.Reader == nil && t.Writer != nil { + return NewStreamTransportW(t.Writer), nil + } + return &StreamTransport{}, nil + } + } + if p.isReadWriter { + return NewStreamTransportRW(p.Reader.(io.ReadWriter)), nil + } + if p.Reader != nil && p.Writer != nil { + return NewStreamTransport(p.Reader, p.Writer), nil + } + if p.Reader != nil && p.Writer == nil { + return NewStreamTransportR(p.Reader), nil + } + if p.Reader == nil && p.Writer != nil { + return NewStreamTransportW(p.Writer), nil + } + return &StreamTransport{}, nil +} + +func NewStreamTransportFactory(reader io.Reader, writer io.Writer, isReadWriter bool) *StreamTransportFactory { + return &StreamTransportFactory{Reader: reader, Writer: writer, isReadWriter: isReadWriter} +} + +func NewStreamTransport(r io.Reader, w io.Writer) *StreamTransport { + return &StreamTransport{Reader: bufio.NewReader(r), Writer: bufio.NewWriter(w)} +} + +func NewStreamTransportR(r io.Reader) *StreamTransport { + return &StreamTransport{Reader: bufio.NewReader(r)} +} + +func NewStreamTransportW(w io.Writer) *StreamTransport { + return &StreamTransport{Writer: bufio.NewWriter(w)} +} + +func NewStreamTransportRW(rw io.ReadWriter) *StreamTransport { + bufrw := bufio.NewReadWriter(bufio.NewReader(rw), bufio.NewWriter(rw)) + return &StreamTransport{Reader: bufrw, Writer: bufrw, isReadWriter: true} +} + +func (p *StreamTransport) IsOpen() bool { + return !p.closed +} + +// implicitly opened on creation, can't be reopened once closed +func (p *StreamTransport) Open() error { + if !p.closed { + return NewTTransportException(ALREADY_OPEN, "StreamTransport already open.") + } else { + return NewTTransportException(NOT_OPEN, "cannot reopen StreamTransport.") + } +} + +// Closes both the input and output streams. +func (p *StreamTransport) Close() error { + if p.closed { + return NewTTransportException(NOT_OPEN, "StreamTransport already closed.") + } + p.closed = true + closedReader := false + if p.Reader != nil { + c, ok := p.Reader.(io.Closer) + if ok { + e := c.Close() + closedReader = true + if e != nil { + return e + } + } + p.Reader = nil + } + if p.Writer != nil && (!closedReader || !p.isReadWriter) { + c, ok := p.Writer.(io.Closer) + if ok { + e := c.Close() + if e != nil { + return e + } + } + p.Writer = nil + } + return nil +} + +// Flushes the underlying output stream if not null. +func (p *StreamTransport) Flush(ctx context.Context) error { + if p.Writer == nil { + return NewTTransportException(NOT_OPEN, "Cannot flush null outputStream") + } + f, ok := p.Writer.(Flusher) + if ok { + err := f.Flush() + if err != nil { + return NewTTransportExceptionFromError(err) + } + } + return nil +} + +func (p *StreamTransport) Read(c []byte) (n int, err error) { + n, err = p.Reader.Read(c) + if err != nil { + err = NewTTransportExceptionFromError(err) + } + return +} + +func (p *StreamTransport) ReadByte() (c byte, err error) { + f, ok := p.Reader.(io.ByteReader) + if ok { + c, err = f.ReadByte() + } else { + c, err = readByte(p.Reader) + } + if err != nil { + err = NewTTransportExceptionFromError(err) + } + return +} + +func (p *StreamTransport) Write(c []byte) (n int, err error) { + n, err = p.Writer.Write(c) + if err != nil { + err = NewTTransportExceptionFromError(err) + } + return +} + +func (p *StreamTransport) WriteByte(c byte) (err error) { + f, ok := p.Writer.(io.ByteWriter) + if ok { + err = f.WriteByte(c) + } else { + err = writeByte(p.Writer, c) + } + if err != nil { + err = NewTTransportExceptionFromError(err) + } + return +} + +func (p *StreamTransport) WriteString(s string) (n int, err error) { + f, ok := p.Writer.(stringWriter) + if ok { + n, err = f.WriteString(s) + } else { + n, err = p.Writer.Write([]byte(s)) + } + if err != nil { + err = NewTTransportExceptionFromError(err) + } + return +} + +func (p *StreamTransport) RemainingBytes() (num_bytes uint64) { + const maxSize = ^uint64(0) + return maxSize // the thruth is, we just don't know unless framed is used +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/json_protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/json_protocol.go new file mode 100644 index 000000000..7be685d43 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/json_protocol.go @@ -0,0 +1,584 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "encoding/base64" + "fmt" +) + +const ( + THRIFT_JSON_PROTOCOL_VERSION = 1 +) + +// for references to _ParseContext see tsimplejson_protocol.go + +// JSON protocol implementation for thrift. +// +// This protocol produces/consumes a simple output format +// suitable for parsing by scripting languages. It should not be +// confused with the full-featured TJSONProtocol. +// +type TJSONProtocol struct { + *TSimpleJSONProtocol +} + +// Constructor +func NewTJSONProtocol(t TTransport) *TJSONProtocol { + v := &TJSONProtocol{TSimpleJSONProtocol: NewTSimpleJSONProtocol(t)} + v.parseContextStack = append(v.parseContextStack, int(_CONTEXT_IN_TOPLEVEL)) + v.dumpContext = append(v.dumpContext, int(_CONTEXT_IN_TOPLEVEL)) + return v +} + +// Factory +type TJSONProtocolFactory struct{} + +func (p *TJSONProtocolFactory) GetProtocol(trans TTransport) TProtocol { + return NewTJSONProtocol(trans) +} + +func NewTJSONProtocolFactory() *TJSONProtocolFactory { + return &TJSONProtocolFactory{} +} + +func (p *TJSONProtocol) WriteMessageBegin(name string, typeId TMessageType, seqId int32) error { + p.resetContextStack() // THRIFT-3735 + if e := p.OutputListBegin(); e != nil { + return e + } + if e := p.WriteI32(THRIFT_JSON_PROTOCOL_VERSION); e != nil { + return e + } + if e := p.WriteString(name); e != nil { + return e + } + if e := p.WriteByte(int8(typeId)); e != nil { + return e + } + if e := p.WriteI32(seqId); e != nil { + return e + } + return nil +} + +func (p *TJSONProtocol) WriteMessageEnd() error { + return p.OutputListEnd() +} + +func (p *TJSONProtocol) WriteStructBegin(name string) error { + if e := p.OutputObjectBegin(); e != nil { + return e + } + return nil +} + +func (p *TJSONProtocol) WriteStructEnd() error { + return p.OutputObjectEnd() +} + +func (p *TJSONProtocol) WriteFieldBegin(name string, typeId TType, id int16) error { + if e := p.WriteI16(id); e != nil { + return e + } + if e := p.OutputObjectBegin(); e != nil { + return e + } + s, e1 := p.TypeIdToString(typeId) + if e1 != nil { + return e1 + } + if e := p.WriteString(s); e != nil { + return e + } + return nil +} + +func (p *TJSONProtocol) WriteFieldEnd() error { + return p.OutputObjectEnd() +} + +func (p *TJSONProtocol) WriteFieldStop() error { return nil } + +func (p *TJSONProtocol) WriteMapBegin(keyType TType, valueType TType, size int) error { + if e := p.OutputListBegin(); e != nil { + return e + } + s, e1 := p.TypeIdToString(keyType) + if e1 != nil { + return e1 + } + if e := p.WriteString(s); e != nil { + return e + } + s, e1 = p.TypeIdToString(valueType) + if e1 != nil { + return e1 + } + if e := p.WriteString(s); e != nil { + return e + } + if e := p.WriteI64(int64(size)); e != nil { + return e + } + return p.OutputObjectBegin() +} + +func (p *TJSONProtocol) WriteMapEnd() error { + if e := p.OutputObjectEnd(); e != nil { + return e + } + return p.OutputListEnd() +} + +func (p *TJSONProtocol) WriteListBegin(elemType TType, size int) error { + return p.OutputElemListBegin(elemType, size) +} + +func (p *TJSONProtocol) WriteListEnd() error { + return p.OutputListEnd() +} + +func (p *TJSONProtocol) WriteSetBegin(elemType TType, size int) error { + return p.OutputElemListBegin(elemType, size) +} + +func (p *TJSONProtocol) WriteSetEnd() error { + return p.OutputListEnd() +} + +func (p *TJSONProtocol) WriteBool(b bool) error { + if b { + return p.WriteI32(1) + } + return p.WriteI32(0) +} + +func (p *TJSONProtocol) WriteByte(b int8) error { + return p.WriteI32(int32(b)) +} + +func (p *TJSONProtocol) WriteI16(v int16) error { + return p.WriteI32(int32(v)) +} + +func (p *TJSONProtocol) WriteI32(v int32) error { + return p.OutputI64(int64(v)) +} + +func (p *TJSONProtocol) WriteI64(v int64) error { + return p.OutputI64(int64(v)) +} + +func (p *TJSONProtocol) WriteDouble(v float64) error { + return p.OutputF64(v) +} + +func (p *TJSONProtocol) WriteString(v string) error { + return p.OutputString(v) +} + +func (p *TJSONProtocol) WriteBinary(v []byte) error { + // JSON library only takes in a string, + // not an arbitrary byte array, to ensure bytes are transmitted + // efficiently we must convert this into a valid JSON string + // therefore we use base64 encoding to avoid excessive escaping/quoting + if e := p.OutputPreValue(); e != nil { + return e + } + if _, e := p.write(JSON_QUOTE_BYTES); e != nil { + return NewTProtocolException(e) + } + writer := base64.NewEncoder(base64.StdEncoding, p.writer) + if _, e := writer.Write(v); e != nil { + p.writer.Reset(p.trans) // THRIFT-3735 + return NewTProtocolException(e) + } + if e := writer.Close(); e != nil { + return NewTProtocolException(e) + } + if _, e := p.write(JSON_QUOTE_BYTES); e != nil { + return NewTProtocolException(e) + } + return p.OutputPostValue() +} + +// Reading methods. +func (p *TJSONProtocol) ReadMessageBegin() (name string, typeId TMessageType, seqId int32, err error) { + p.resetContextStack() // THRIFT-3735 + if isNull, err := p.ParseListBegin(); isNull || err != nil { + return name, typeId, seqId, err + } + version, err := p.ReadI32() + if err != nil { + return name, typeId, seqId, err + } + if version != THRIFT_JSON_PROTOCOL_VERSION { + e := fmt.Errorf("Unknown Protocol version %d, expected version %d", version, THRIFT_JSON_PROTOCOL_VERSION) + return name, typeId, seqId, NewTProtocolExceptionWithType(INVALID_DATA, e) + + } + if name, err = p.ReadString(); err != nil { + return name, typeId, seqId, err + } + bTypeId, err := p.ReadByte() + typeId = TMessageType(bTypeId) + if err != nil { + return name, typeId, seqId, err + } + if seqId, err = p.ReadI32(); err != nil { + return name, typeId, seqId, err + } + return name, typeId, seqId, nil +} + +func (p *TJSONProtocol) ReadMessageEnd() error { + err := p.ParseListEnd() + return err +} + +func (p *TJSONProtocol) ReadStructBegin() (name string, err error) { + _, err = p.ParseObjectStart() + return "", err +} + +func (p *TJSONProtocol) ReadStructEnd() error { + return p.ParseObjectEnd() +} + +func (p *TJSONProtocol) ReadFieldBegin() (string, TType, int16, error) { + b, _ := p.reader.Peek(1) + if len(b) < 1 || b[0] == JSON_RBRACE[0] || b[0] == JSON_RBRACKET[0] { + return "", STOP, -1, nil + } + fieldId, err := p.ReadI16() + if err != nil { + return "", STOP, fieldId, err + } + if _, err = p.ParseObjectStart(); err != nil { + return "", STOP, fieldId, err + } + sType, err := p.ReadString() + if err != nil { + return "", STOP, fieldId, err + } + fType, err := p.StringToTypeId(sType) + return "", fType, fieldId, err +} + +func (p *TJSONProtocol) ReadFieldEnd() error { + return p.ParseObjectEnd() +} + +func (p *TJSONProtocol) ReadMapBegin() (keyType TType, valueType TType, size int, e error) { + if isNull, e := p.ParseListBegin(); isNull || e != nil { + return VOID, VOID, 0, e + } + + // read keyType + sKeyType, e := p.ReadString() + if e != nil { + return keyType, valueType, size, e + } + keyType, e = p.StringToTypeId(sKeyType) + if e != nil { + return keyType, valueType, size, e + } + + // read valueType + sValueType, e := p.ReadString() + if e != nil { + return keyType, valueType, size, e + } + valueType, e = p.StringToTypeId(sValueType) + if e != nil { + return keyType, valueType, size, e + } + + // read size + iSize, e := p.ReadI64() + if e != nil { + return keyType, valueType, size, e + } + size = int(iSize) + + _, e = p.ParseObjectStart() + return keyType, valueType, size, e +} + +func (p *TJSONProtocol) ReadMapEnd() error { + e := p.ParseObjectEnd() + if e != nil { + return e + } + return p.ParseListEnd() +} + +func (p *TJSONProtocol) ReadListBegin() (elemType TType, size int, e error) { + return p.ParseElemListBegin() +} + +func (p *TJSONProtocol) ReadListEnd() error { + return p.ParseListEnd() +} + +func (p *TJSONProtocol) ReadSetBegin() (elemType TType, size int, e error) { + return p.ParseElemListBegin() +} + +func (p *TJSONProtocol) ReadSetEnd() error { + return p.ParseListEnd() +} + +func (p *TJSONProtocol) ReadBool() (bool, error) { + value, err := p.ReadI32() + return (value != 0), err +} + +func (p *TJSONProtocol) ReadByte() (int8, error) { + v, err := p.ReadI64() + return int8(v), err +} + +func (p *TJSONProtocol) ReadI16() (int16, error) { + v, err := p.ReadI64() + return int16(v), err +} + +func (p *TJSONProtocol) ReadI32() (int32, error) { + v, err := p.ReadI64() + return int32(v), err +} + +func (p *TJSONProtocol) ReadI64() (int64, error) { + v, _, err := p.ParseI64() + return v, err +} + +func (p *TJSONProtocol) ReadDouble() (float64, error) { + v, _, err := p.ParseF64() + return v, err +} + +func (p *TJSONProtocol) ReadString() (string, error) { + var v string + if err := p.ParsePreValue(); err != nil { + return v, err + } + f, _ := p.reader.Peek(1) + if len(f) > 0 && f[0] == JSON_QUOTE { + p.reader.ReadByte() + value, err := p.ParseStringBody() + v = value + if err != nil { + return v, err + } + } else if len(f) > 0 && f[0] == JSON_NULL[0] { + b := make([]byte, len(JSON_NULL)) + _, err := p.reader.Read(b) + if err != nil { + return v, NewTProtocolException(err) + } + if string(b) != string(JSON_NULL) { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(b)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } else { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(f)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return v, p.ParsePostValue() +} + +func (p *TJSONProtocol) ReadBinary() ([]byte, error) { + var v []byte + if err := p.ParsePreValue(); err != nil { + return nil, err + } + f, _ := p.reader.Peek(1) + if len(f) > 0 && f[0] == JSON_QUOTE { + p.reader.ReadByte() + value, err := p.ParseBase64EncodedBody() + v = value + if err != nil { + return v, err + } + } else if len(f) > 0 && f[0] == JSON_NULL[0] { + b := make([]byte, len(JSON_NULL)) + _, err := p.reader.Read(b) + if err != nil { + return v, NewTProtocolException(err) + } + if string(b) != string(JSON_NULL) { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(b)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } else { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(f)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + + return v, p.ParsePostValue() +} + +func (p *TJSONProtocol) Flush(ctx context.Context) (err error) { + err = p.writer.Flush() + if err == nil { + err = p.trans.Flush(ctx) + } + return NewTProtocolException(err) +} + +func (p *TJSONProtocol) Skip(fieldType TType) (err error) { + return SkipDefaultDepth(p, fieldType) +} + +func (p *TJSONProtocol) Transport() TTransport { + return p.trans +} + +func (p *TJSONProtocol) OutputElemListBegin(elemType TType, size int) error { + if e := p.OutputListBegin(); e != nil { + return e + } + s, e1 := p.TypeIdToString(elemType) + if e1 != nil { + return e1 + } + if e := p.WriteString(s); e != nil { + return e + } + if e := p.WriteI64(int64(size)); e != nil { + return e + } + return nil +} + +func (p *TJSONProtocol) ParseElemListBegin() (elemType TType, size int, e error) { + if isNull, e := p.ParseListBegin(); isNull || e != nil { + return VOID, 0, e + } + sElemType, err := p.ReadString() + if err != nil { + return VOID, size, err + } + elemType, err = p.StringToTypeId(sElemType) + if err != nil { + return elemType, size, err + } + nSize, err2 := p.ReadI64() + size = int(nSize) + return elemType, size, err2 +} + +func (p *TJSONProtocol) readElemListBegin() (elemType TType, size int, e error) { + if isNull, e := p.ParseListBegin(); isNull || e != nil { + return VOID, 0, e + } + sElemType, err := p.ReadString() + if err != nil { + return VOID, size, err + } + elemType, err = p.StringToTypeId(sElemType) + if err != nil { + return elemType, size, err + } + nSize, err2 := p.ReadI64() + size = int(nSize) + return elemType, size, err2 +} + +func (p *TJSONProtocol) writeElemListBegin(elemType TType, size int) error { + if e := p.OutputListBegin(); e != nil { + return e + } + s, e1 := p.TypeIdToString(elemType) + if e1 != nil { + return e1 + } + if e := p.OutputString(s); e != nil { + return e + } + if e := p.OutputI64(int64(size)); e != nil { + return e + } + return nil +} + +func (p *TJSONProtocol) TypeIdToString(fieldType TType) (string, error) { + switch byte(fieldType) { + case BOOL: + return "tf", nil + case BYTE: + return "i8", nil + case I16: + return "i16", nil + case I32: + return "i32", nil + case I64: + return "i64", nil + case DOUBLE: + return "dbl", nil + case STRING: + return "str", nil + case STRUCT: + return "rec", nil + case MAP: + return "map", nil + case SET: + return "set", nil + case LIST: + return "lst", nil + } + + e := fmt.Errorf("Unknown fieldType: %d", int(fieldType)) + return "", NewTProtocolExceptionWithType(INVALID_DATA, e) +} + +func (p *TJSONProtocol) StringToTypeId(fieldType string) (TType, error) { + switch fieldType { + case "tf": + return TType(BOOL), nil + case "i8": + return TType(BYTE), nil + case "i16": + return TType(I16), nil + case "i32": + return TType(I32), nil + case "i64": + return TType(I64), nil + case "dbl": + return TType(DOUBLE), nil + case "str": + return TType(STRING), nil + case "rec": + return TType(STRUCT), nil + case "map": + return TType(MAP), nil + case "set": + return TType(SET), nil + case "lst": + return TType(LIST), nil + } + + e := fmt.Errorf("Unknown type identifier: %s", fieldType) + return TType(STOP), NewTProtocolExceptionWithType(INVALID_DATA, e) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/memory_buffer.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/memory_buffer.go new file mode 100644 index 000000000..5936d2730 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/memory_buffer.go @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bytes" + "context" +) + +// Memory buffer-based implementation of the TTransport interface. +type TMemoryBuffer struct { + *bytes.Buffer + size int +} + +type TMemoryBufferTransportFactory struct { + size int +} + +func (p *TMemoryBufferTransportFactory) GetTransport(trans TTransport) (TTransport, error) { + if trans != nil { + t, ok := trans.(*TMemoryBuffer) + if ok && t.size > 0 { + return NewTMemoryBufferLen(t.size), nil + } + } + return NewTMemoryBufferLen(p.size), nil +} + +func NewTMemoryBufferTransportFactory(size int) *TMemoryBufferTransportFactory { + return &TMemoryBufferTransportFactory{size: size} +} + +func NewTMemoryBuffer() *TMemoryBuffer { + return &TMemoryBuffer{Buffer: &bytes.Buffer{}, size: 0} +} + +func NewTMemoryBufferLen(size int) *TMemoryBuffer { + buf := make([]byte, 0, size) + return &TMemoryBuffer{Buffer: bytes.NewBuffer(buf), size: size} +} + +func (p *TMemoryBuffer) IsOpen() bool { + return true +} + +func (p *TMemoryBuffer) Open() error { + return nil +} + +func (p *TMemoryBuffer) Close() error { + p.Buffer.Reset() + return nil +} + +// Flushing a memory buffer is a no-op +func (p *TMemoryBuffer) Flush(ctx context.Context) error { + return nil +} + +func (p *TMemoryBuffer) RemainingBytes() (num_bytes uint64) { + return uint64(p.Buffer.Len()) +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/messagetype.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/messagetype.go new file mode 100644 index 000000000..25ab2e98a --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/messagetype.go @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +// Message type constants in the Thrift protocol. +type TMessageType int32 + +const ( + INVALID_TMESSAGE_TYPE TMessageType = 0 + CALL TMessageType = 1 + REPLY TMessageType = 2 + EXCEPTION TMessageType = 3 + ONEWAY TMessageType = 4 +) diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/multiplexed_protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/multiplexed_protocol.go new file mode 100644 index 000000000..d028a30b3 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/multiplexed_protocol.go @@ -0,0 +1,170 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "fmt" + "strings" +) + +/* +TMultiplexedProtocol is a protocol-independent concrete decorator +that allows a Thrift client to communicate with a multiplexing Thrift server, +by prepending the service name to the function name during function calls. + +NOTE: THIS IS NOT USED BY SERVERS. On the server, use TMultiplexedProcessor to handle request +from a multiplexing client. + +This example uses a single socket transport to invoke two services: + +socket := thrift.NewTSocketFromAddrTimeout(addr, TIMEOUT) +transport := thrift.NewTFramedTransport(socket) +protocol := thrift.NewTBinaryProtocolTransport(transport) + +mp := thrift.NewTMultiplexedProtocol(protocol, "Calculator") +service := Calculator.NewCalculatorClient(mp) + +mp2 := thrift.NewTMultiplexedProtocol(protocol, "WeatherReport") +service2 := WeatherReport.NewWeatherReportClient(mp2) + +err := transport.Open() +if err != nil { + t.Fatal("Unable to open client socket", err) +} + +fmt.Println(service.Add(2,2)) +fmt.Println(service2.GetTemperature()) +*/ + +type TMultiplexedProtocol struct { + TProtocol + serviceName string +} + +const MULTIPLEXED_SEPARATOR = ":" + +func NewTMultiplexedProtocol(protocol TProtocol, serviceName string) *TMultiplexedProtocol { + return &TMultiplexedProtocol{ + TProtocol: protocol, + serviceName: serviceName, + } +} + +func (t *TMultiplexedProtocol) WriteMessageBegin(name string, typeId TMessageType, seqid int32) error { + if typeId == CALL || typeId == ONEWAY { + return t.TProtocol.WriteMessageBegin(t.serviceName+MULTIPLEXED_SEPARATOR+name, typeId, seqid) + } else { + return t.TProtocol.WriteMessageBegin(name, typeId, seqid) + } +} + +/* +TMultiplexedProcessor is a TProcessor allowing +a single TServer to provide multiple services. + +To do so, you instantiate the processor and then register additional +processors with it, as shown in the following example: + +var processor = thrift.NewTMultiplexedProcessor() + +firstProcessor := +processor.RegisterProcessor("FirstService", firstProcessor) + +processor.registerProcessor( + "Calculator", + Calculator.NewCalculatorProcessor(&CalculatorHandler{}), +) + +processor.registerProcessor( + "WeatherReport", + WeatherReport.NewWeatherReportProcessor(&WeatherReportHandler{}), +) + +serverTransport, err := thrift.NewTServerSocketTimeout(addr, TIMEOUT) +if err != nil { + t.Fatal("Unable to create server socket", err) +} +server := thrift.NewTSimpleServer2(processor, serverTransport) +server.Serve(); +*/ + +type TMultiplexedProcessor struct { + serviceProcessorMap map[string]TProcessor + DefaultProcessor TProcessor +} + +func NewTMultiplexedProcessor() *TMultiplexedProcessor { + return &TMultiplexedProcessor{ + serviceProcessorMap: make(map[string]TProcessor), + } +} + +func (t *TMultiplexedProcessor) RegisterDefault(processor TProcessor) { + t.DefaultProcessor = processor +} + +func (t *TMultiplexedProcessor) RegisterProcessor(name string, processor TProcessor) { + if t.serviceProcessorMap == nil { + t.serviceProcessorMap = make(map[string]TProcessor) + } + t.serviceProcessorMap[name] = processor +} + +func (t *TMultiplexedProcessor) Process(ctx context.Context, in, out TProtocol) (bool, TException) { + name, typeId, seqid, err := in.ReadMessageBegin() + if err != nil { + return false, err + } + if typeId != CALL && typeId != ONEWAY { + return false, fmt.Errorf("Unexpected message type %v", typeId) + } + //extract the service name + v := strings.SplitN(name, MULTIPLEXED_SEPARATOR, 2) + if len(v) != 2 { + if t.DefaultProcessor != nil { + smb := NewStoredMessageProtocol(in, name, typeId, seqid) + return t.DefaultProcessor.Process(ctx, smb, out) + } + return false, fmt.Errorf("Service name not found in message name: %s. Did you forget to use a TMultiplexProtocol in your client?", name) + } + actualProcessor, ok := t.serviceProcessorMap[v[0]] + if !ok { + return false, fmt.Errorf("Service name not found: %s. Did you forget to call registerProcessor()?", v[0]) + } + smb := NewStoredMessageProtocol(in, v[1], typeId, seqid) + return actualProcessor.Process(ctx, smb, out) +} + +//Protocol that use stored message for ReadMessageBegin +type storedMessageProtocol struct { + TProtocol + name string + typeId TMessageType + seqid int32 +} + +func NewStoredMessageProtocol(protocol TProtocol, name string, typeId TMessageType, seqid int32) *storedMessageProtocol { + return &storedMessageProtocol{protocol, name, typeId, seqid} +} + +func (s *storedMessageProtocol) ReadMessageBegin() (name string, typeId TMessageType, seqid int32, err error) { + return s.name, s.typeId, s.seqid, nil +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/numeric.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/numeric.go new file mode 100644 index 000000000..aa8daa9b5 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/numeric.go @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "math" + "strconv" +) + +type Numeric interface { + Int64() int64 + Int32() int32 + Int16() int16 + Byte() byte + Int() int + Float64() float64 + Float32() float32 + String() string + isNull() bool +} + +type numeric struct { + iValue int64 + dValue float64 + sValue string + isNil bool +} + +var ( + INFINITY Numeric + NEGATIVE_INFINITY Numeric + NAN Numeric + ZERO Numeric + NUMERIC_NULL Numeric +) + +func NewNumericFromDouble(dValue float64) Numeric { + if math.IsInf(dValue, 1) { + return INFINITY + } + if math.IsInf(dValue, -1) { + return NEGATIVE_INFINITY + } + if math.IsNaN(dValue) { + return NAN + } + iValue := int64(dValue) + sValue := strconv.FormatFloat(dValue, 'g', 10, 64) + isNil := false + return &numeric{iValue: iValue, dValue: dValue, sValue: sValue, isNil: isNil} +} + +func NewNumericFromI64(iValue int64) Numeric { + dValue := float64(iValue) + sValue := string(iValue) + isNil := false + return &numeric{iValue: iValue, dValue: dValue, sValue: sValue, isNil: isNil} +} + +func NewNumericFromI32(iValue int32) Numeric { + dValue := float64(iValue) + sValue := string(iValue) + isNil := false + return &numeric{iValue: int64(iValue), dValue: dValue, sValue: sValue, isNil: isNil} +} + +func NewNumericFromString(sValue string) Numeric { + if sValue == INFINITY.String() { + return INFINITY + } + if sValue == NEGATIVE_INFINITY.String() { + return NEGATIVE_INFINITY + } + if sValue == NAN.String() { + return NAN + } + iValue, _ := strconv.ParseInt(sValue, 10, 64) + dValue, _ := strconv.ParseFloat(sValue, 64) + isNil := len(sValue) == 0 + return &numeric{iValue: iValue, dValue: dValue, sValue: sValue, isNil: isNil} +} + +func NewNumericFromJSONString(sValue string, isNull bool) Numeric { + if isNull { + return NewNullNumeric() + } + if sValue == JSON_INFINITY { + return INFINITY + } + if sValue == JSON_NEGATIVE_INFINITY { + return NEGATIVE_INFINITY + } + if sValue == JSON_NAN { + return NAN + } + iValue, _ := strconv.ParseInt(sValue, 10, 64) + dValue, _ := strconv.ParseFloat(sValue, 64) + return &numeric{iValue: iValue, dValue: dValue, sValue: sValue, isNil: isNull} +} + +func NewNullNumeric() Numeric { + return &numeric{iValue: 0, dValue: 0.0, sValue: "", isNil: true} +} + +func (p *numeric) Int64() int64 { + return p.iValue +} + +func (p *numeric) Int32() int32 { + return int32(p.iValue) +} + +func (p *numeric) Int16() int16 { + return int16(p.iValue) +} + +func (p *numeric) Byte() byte { + return byte(p.iValue) +} + +func (p *numeric) Int() int { + return int(p.iValue) +} + +func (p *numeric) Float64() float64 { + return p.dValue +} + +func (p *numeric) Float32() float32 { + return float32(p.dValue) +} + +func (p *numeric) String() string { + return p.sValue +} + +func (p *numeric) isNull() bool { + return p.isNil +} + +func init() { + INFINITY = &numeric{iValue: 0, dValue: math.Inf(1), sValue: "Infinity", isNil: false} + NEGATIVE_INFINITY = &numeric{iValue: 0, dValue: math.Inf(-1), sValue: "-Infinity", isNil: false} + NAN = &numeric{iValue: 0, dValue: math.NaN(), sValue: "NaN", isNil: false} + ZERO = &numeric{iValue: 0, dValue: 0, sValue: "0", isNil: false} + NUMERIC_NULL = &numeric{iValue: 0, dValue: 0, sValue: "0", isNil: true} +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/pointerize.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/pointerize.go new file mode 100644 index 000000000..8d6b2c215 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/pointerize.go @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +/////////////////////////////////////////////////////////////////////////////// +// This file is home to helpers that convert from various base types to +// respective pointer types. This is necessary because Go does not permit +// references to constants, nor can a pointer type to base type be allocated +// and initialized in a single expression. +// +// E.g., this is not allowed: +// +// var ip *int = &5 +// +// But this *is* allowed: +// +// func IntPtr(i int) *int { return &i } +// var ip *int = IntPtr(5) +// +// Since pointers to base types are commonplace as [optional] fields in +// exported thrift structs, we factor such helpers here. +/////////////////////////////////////////////////////////////////////////////// + +func Float32Ptr(v float32) *float32 { return &v } +func Float64Ptr(v float64) *float64 { return &v } +func IntPtr(v int) *int { return &v } +func Int32Ptr(v int32) *int32 { return &v } +func Int64Ptr(v int64) *int64 { return &v } +func StringPtr(v string) *string { return &v } +func Uint32Ptr(v uint32) *uint32 { return &v } +func Uint64Ptr(v uint64) *uint64 { return &v } +func BoolPtr(v bool) *bool { return &v } +func ByteSlicePtr(v []byte) *[]byte { return &v } diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/processor_factory.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/processor_factory.go new file mode 100644 index 000000000..e4b132b30 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/processor_factory.go @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import "context" + +// A processor is a generic object which operates upon an input stream and +// writes to some output stream. +type TProcessor interface { + Process(ctx context.Context, in, out TProtocol) (bool, TException) +} + +type TProcessorFunction interface { + Process(ctx context.Context, seqId int32, in, out TProtocol) (bool, TException) +} + +// The default processor factory just returns a singleton +// instance. +type TProcessorFactory interface { + GetProcessor(trans TTransport) TProcessor +} + +type tProcessorFactory struct { + processor TProcessor +} + +func NewTProcessorFactory(p TProcessor) TProcessorFactory { + return &tProcessorFactory{processor: p} +} + +func (p *tProcessorFactory) GetProcessor(trans TTransport) TProcessor { + return p.processor +} + +/** + * The default processor factory just returns a singleton + * instance. + */ +type TProcessorFunctionFactory interface { + GetProcessorFunction(trans TTransport) TProcessorFunction +} + +type tProcessorFunctionFactory struct { + processor TProcessorFunction +} + +func NewTProcessorFunctionFactory(p TProcessorFunction) TProcessorFunctionFactory { + return &tProcessorFunctionFactory{processor: p} +} + +func (p *tProcessorFunctionFactory) GetProcessorFunction(trans TTransport) TProcessorFunction { + return p.processor +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol.go new file mode 100644 index 000000000..615b7a4a8 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol.go @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "errors" + "fmt" +) + +const ( + VERSION_MASK = 0xffff0000 + VERSION_1 = 0x80010000 +) + +type TProtocol interface { + WriteMessageBegin(name string, typeId TMessageType, seqid int32) error + WriteMessageEnd() error + WriteStructBegin(name string) error + WriteStructEnd() error + WriteFieldBegin(name string, typeId TType, id int16) error + WriteFieldEnd() error + WriteFieldStop() error + WriteMapBegin(keyType TType, valueType TType, size int) error + WriteMapEnd() error + WriteListBegin(elemType TType, size int) error + WriteListEnd() error + WriteSetBegin(elemType TType, size int) error + WriteSetEnd() error + WriteBool(value bool) error + WriteByte(value int8) error + WriteI16(value int16) error + WriteI32(value int32) error + WriteI64(value int64) error + WriteDouble(value float64) error + WriteString(value string) error + WriteBinary(value []byte) error + + ReadMessageBegin() (name string, typeId TMessageType, seqid int32, err error) + ReadMessageEnd() error + ReadStructBegin() (name string, err error) + ReadStructEnd() error + ReadFieldBegin() (name string, typeId TType, id int16, err error) + ReadFieldEnd() error + ReadMapBegin() (keyType TType, valueType TType, size int, err error) + ReadMapEnd() error + ReadListBegin() (elemType TType, size int, err error) + ReadListEnd() error + ReadSetBegin() (elemType TType, size int, err error) + ReadSetEnd() error + ReadBool() (value bool, err error) + ReadByte() (value int8, err error) + ReadI16() (value int16, err error) + ReadI32() (value int32, err error) + ReadI64() (value int64, err error) + ReadDouble() (value float64, err error) + ReadString() (value string, err error) + ReadBinary() (value []byte, err error) + + Skip(fieldType TType) (err error) + Flush(ctx context.Context) (err error) + + Transport() TTransport +} + +// The maximum recursive depth the skip() function will traverse +const DEFAULT_RECURSION_DEPTH = 64 + +// Skips over the next data element from the provided input TProtocol object. +func SkipDefaultDepth(prot TProtocol, typeId TType) (err error) { + return Skip(prot, typeId, DEFAULT_RECURSION_DEPTH) +} + +// Skips over the next data element from the provided input TProtocol object. +func Skip(self TProtocol, fieldType TType, maxDepth int) (err error) { + + if maxDepth <= 0 { + return NewTProtocolExceptionWithType(DEPTH_LIMIT, errors.New("Depth limit exceeded")) + } + + switch fieldType { + case STOP: + return + case BOOL: + _, err = self.ReadBool() + return + case BYTE: + _, err = self.ReadByte() + return + case I16: + _, err = self.ReadI16() + return + case I32: + _, err = self.ReadI32() + return + case I64: + _, err = self.ReadI64() + return + case DOUBLE: + _, err = self.ReadDouble() + return + case STRING: + _, err = self.ReadString() + return + case STRUCT: + if _, err = self.ReadStructBegin(); err != nil { + return err + } + for { + _, typeId, _, _ := self.ReadFieldBegin() + if typeId == STOP { + break + } + err := Skip(self, typeId, maxDepth-1) + if err != nil { + return err + } + self.ReadFieldEnd() + } + return self.ReadStructEnd() + case MAP: + keyType, valueType, size, err := self.ReadMapBegin() + if err != nil { + return err + } + for i := 0; i < size; i++ { + err := Skip(self, keyType, maxDepth-1) + if err != nil { + return err + } + self.Skip(valueType) + } + return self.ReadMapEnd() + case SET: + elemType, size, err := self.ReadSetBegin() + if err != nil { + return err + } + for i := 0; i < size; i++ { + err := Skip(self, elemType, maxDepth-1) + if err != nil { + return err + } + } + return self.ReadSetEnd() + case LIST: + elemType, size, err := self.ReadListBegin() + if err != nil { + return err + } + for i := 0; i < size; i++ { + err := Skip(self, elemType, maxDepth-1) + if err != nil { + return err + } + } + return self.ReadListEnd() + default: + return NewTProtocolExceptionWithType(INVALID_DATA, errors.New(fmt.Sprintf("Unknown data type %d", fieldType))) + } + return nil +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol_exception.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol_exception.go new file mode 100644 index 000000000..29ab75d92 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol_exception.go @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "encoding/base64" +) + +// Thrift Protocol exception +type TProtocolException interface { + TException + TypeId() int +} + +const ( + UNKNOWN_PROTOCOL_EXCEPTION = 0 + INVALID_DATA = 1 + NEGATIVE_SIZE = 2 + SIZE_LIMIT = 3 + BAD_VERSION = 4 + NOT_IMPLEMENTED = 5 + DEPTH_LIMIT = 6 +) + +type tProtocolException struct { + typeId int + message string +} + +func (p *tProtocolException) TypeId() int { + return p.typeId +} + +func (p *tProtocolException) String() string { + return p.message +} + +func (p *tProtocolException) Error() string { + return p.message +} + +func NewTProtocolException(err error) TProtocolException { + if err == nil { + return nil + } + if e, ok := err.(TProtocolException); ok { + return e + } + if _, ok := err.(base64.CorruptInputError); ok { + return &tProtocolException{INVALID_DATA, err.Error()} + } + return &tProtocolException{UNKNOWN_PROTOCOL_EXCEPTION, err.Error()} +} + +func NewTProtocolExceptionWithType(errType int, err error) TProtocolException { + if err == nil { + return nil + } + return &tProtocolException{errType, err.Error()} +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol_factory.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol_factory.go new file mode 100644 index 000000000..c40f796d8 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/protocol_factory.go @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +// Factory interface for constructing protocol instances. +type TProtocolFactory interface { + GetProtocol(trans TTransport) TProtocol +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/rich_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/rich_transport.go new file mode 100644 index 000000000..4025bebea --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/rich_transport.go @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import "io" + +type RichTransport struct { + TTransport +} + +// Wraps Transport to provide TRichTransport interface +func NewTRichTransport(trans TTransport) *RichTransport { + return &RichTransport{trans} +} + +func (r *RichTransport) ReadByte() (c byte, err error) { + return readByte(r.TTransport) +} + +func (r *RichTransport) WriteByte(c byte) error { + return writeByte(r.TTransport, c) +} + +func (r *RichTransport) WriteString(s string) (n int, err error) { + return r.Write([]byte(s)) +} + +func (r *RichTransport) RemainingBytes() (num_bytes uint64) { + return r.TTransport.RemainingBytes() +} + +func readByte(r io.Reader) (c byte, err error) { + v := [1]byte{0} + n, err := r.Read(v[0:1]) + if n > 0 && (err == nil || err == io.EOF) { + return v[0], nil + } + if n > 0 && err != nil { + return v[0], err + } + if err != nil { + return 0, err + } + return v[0], nil +} + +func writeByte(w io.Writer, c byte) error { + v := [1]byte{c} + _, err := w.Write(v[0:1]) + return err +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/serializer.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/serializer.go new file mode 100644 index 000000000..1ff4d3754 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/serializer.go @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" +) + +type TSerializer struct { + Transport *TMemoryBuffer + Protocol TProtocol +} + +type TStruct interface { + Write(p TProtocol) error + Read(p TProtocol) error +} + +func NewTSerializer() *TSerializer { + transport := NewTMemoryBufferLen(1024) + protocol := NewTBinaryProtocolFactoryDefault().GetProtocol(transport) + + return &TSerializer{ + transport, + protocol} +} + +func (t *TSerializer) WriteString(ctx context.Context, msg TStruct) (s string, err error) { + t.Transport.Reset() + + if err = msg.Write(t.Protocol); err != nil { + return + } + + if err = t.Protocol.Flush(ctx); err != nil { + return + } + if err = t.Transport.Flush(ctx); err != nil { + return + } + + return t.Transport.String(), nil +} + +func (t *TSerializer) Write(ctx context.Context, msg TStruct) (b []byte, err error) { + t.Transport.Reset() + + if err = msg.Write(t.Protocol); err != nil { + return + } + + if err = t.Protocol.Flush(ctx); err != nil { + return + } + + if err = t.Transport.Flush(ctx); err != nil { + return + } + + b = append(b, t.Transport.Bytes()...) + return +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/server.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/server.go new file mode 100644 index 000000000..f813fa353 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/server.go @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +type TServer interface { + ProcessorFactory() TProcessorFactory + ServerTransport() TServerTransport + InputTransportFactory() TTransportFactory + OutputTransportFactory() TTransportFactory + InputProtocolFactory() TProtocolFactory + OutputProtocolFactory() TProtocolFactory + + // Starts the server + Serve() error + // Stops the server. This is optional on a per-implementation basis. Not + // all servers are required to be cleanly stoppable. + Stop() error +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/server_socket.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/server_socket.go new file mode 100644 index 000000000..80313c4be --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/server_socket.go @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "net" + "sync" + "time" +) + +type TServerSocket struct { + listener net.Listener + addr net.Addr + clientTimeout time.Duration + + // Protects the interrupted value to make it thread safe. + mu sync.RWMutex + interrupted bool +} + +func NewTServerSocket(listenAddr string) (*TServerSocket, error) { + return NewTServerSocketTimeout(listenAddr, 0) +} + +func NewTServerSocketTimeout(listenAddr string, clientTimeout time.Duration) (*TServerSocket, error) { + addr, err := net.ResolveTCPAddr("tcp", listenAddr) + if err != nil { + return nil, err + } + return &TServerSocket{addr: addr, clientTimeout: clientTimeout}, nil +} + +// Creates a TServerSocket from a net.Addr +func NewTServerSocketFromAddrTimeout(addr net.Addr, clientTimeout time.Duration) *TServerSocket { + return &TServerSocket{addr: addr, clientTimeout: clientTimeout} +} + +func (p *TServerSocket) Listen() error { + p.mu.Lock() + defer p.mu.Unlock() + if p.IsListening() { + return nil + } + l, err := net.Listen(p.addr.Network(), p.addr.String()) + if err != nil { + return err + } + p.listener = l + return nil +} + +func (p *TServerSocket) Accept() (TTransport, error) { + p.mu.RLock() + interrupted := p.interrupted + p.mu.RUnlock() + + if interrupted { + return nil, errTransportInterrupted + } + + listener := p.listener + if listener == nil { + return nil, NewTTransportException(NOT_OPEN, "No underlying server socket") + } + + conn, err := listener.Accept() + if err != nil { + return nil, NewTTransportExceptionFromError(err) + } + return NewTSocketFromConnTimeout(conn, p.clientTimeout), nil +} + +// Checks whether the socket is listening. +func (p *TServerSocket) IsListening() bool { + return p.listener != nil +} + +// Connects the socket, creating a new socket object if necessary. +func (p *TServerSocket) Open() error { + p.mu.Lock() + defer p.mu.Unlock() + if p.IsListening() { + return NewTTransportException(ALREADY_OPEN, "Server socket already open") + } + if l, err := net.Listen(p.addr.Network(), p.addr.String()); err != nil { + return err + } else { + p.listener = l + } + return nil +} + +func (p *TServerSocket) Addr() net.Addr { + if p.listener != nil { + return p.listener.Addr() + } + return p.addr +} + +func (p *TServerSocket) Close() error { + defer func() { + p.listener = nil + }() + if p.IsListening() { + return p.listener.Close() + } + return nil +} + +func (p *TServerSocket) Interrupt() error { + p.mu.Lock() + defer p.mu.Unlock() + p.interrupted = true + p.Close() + + return nil +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/server_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/server_transport.go new file mode 100644 index 000000000..51c40b64a --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/server_transport.go @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +// Server transport. Object which provides client transports. +type TServerTransport interface { + Listen() error + Accept() (TTransport, error) + Close() error + + // Optional method implementation. This signals to the server transport + // that it should break out of any accept() or listen() that it is currently + // blocked on. This method, if implemented, MUST be thread safe, as it may + // be called from a different thread context than the other TServerTransport + // methods. + Interrupt() error +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/simple_json_protocol.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/simple_json_protocol.go new file mode 100644 index 000000000..2e8a71112 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/simple_json_protocol.go @@ -0,0 +1,1338 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "bufio" + "bytes" + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io" + "math" + "strconv" +) + +type _ParseContext int + +const ( + _CONTEXT_IN_TOPLEVEL _ParseContext = 1 + _CONTEXT_IN_LIST_FIRST _ParseContext = 2 + _CONTEXT_IN_LIST _ParseContext = 3 + _CONTEXT_IN_OBJECT_FIRST _ParseContext = 4 + _CONTEXT_IN_OBJECT_NEXT_KEY _ParseContext = 5 + _CONTEXT_IN_OBJECT_NEXT_VALUE _ParseContext = 6 +) + +func (p _ParseContext) String() string { + switch p { + case _CONTEXT_IN_TOPLEVEL: + return "TOPLEVEL" + case _CONTEXT_IN_LIST_FIRST: + return "LIST-FIRST" + case _CONTEXT_IN_LIST: + return "LIST" + case _CONTEXT_IN_OBJECT_FIRST: + return "OBJECT-FIRST" + case _CONTEXT_IN_OBJECT_NEXT_KEY: + return "OBJECT-NEXT-KEY" + case _CONTEXT_IN_OBJECT_NEXT_VALUE: + return "OBJECT-NEXT-VALUE" + } + return "UNKNOWN-PARSE-CONTEXT" +} + +// JSON protocol implementation for thrift. +// +// This protocol produces/consumes a simple output format +// suitable for parsing by scripting languages. It should not be +// confused with the full-featured TJSONProtocol. +// +type TSimpleJSONProtocol struct { + trans TTransport + + parseContextStack []int + dumpContext []int + + writer *bufio.Writer + reader *bufio.Reader +} + +// Constructor +func NewTSimpleJSONProtocol(t TTransport) *TSimpleJSONProtocol { + v := &TSimpleJSONProtocol{trans: t, + writer: bufio.NewWriter(t), + reader: bufio.NewReader(t), + } + v.parseContextStack = append(v.parseContextStack, int(_CONTEXT_IN_TOPLEVEL)) + v.dumpContext = append(v.dumpContext, int(_CONTEXT_IN_TOPLEVEL)) + return v +} + +// Factory +type TSimpleJSONProtocolFactory struct{} + +func (p *TSimpleJSONProtocolFactory) GetProtocol(trans TTransport) TProtocol { + return NewTSimpleJSONProtocol(trans) +} + +func NewTSimpleJSONProtocolFactory() *TSimpleJSONProtocolFactory { + return &TSimpleJSONProtocolFactory{} +} + +var ( + JSON_COMMA []byte + JSON_COLON []byte + JSON_LBRACE []byte + JSON_RBRACE []byte + JSON_LBRACKET []byte + JSON_RBRACKET []byte + JSON_QUOTE byte + JSON_QUOTE_BYTES []byte + JSON_NULL []byte + JSON_TRUE []byte + JSON_FALSE []byte + JSON_INFINITY string + JSON_NEGATIVE_INFINITY string + JSON_NAN string + JSON_INFINITY_BYTES []byte + JSON_NEGATIVE_INFINITY_BYTES []byte + JSON_NAN_BYTES []byte + json_nonbase_map_elem_bytes []byte +) + +func init() { + JSON_COMMA = []byte{','} + JSON_COLON = []byte{':'} + JSON_LBRACE = []byte{'{'} + JSON_RBRACE = []byte{'}'} + JSON_LBRACKET = []byte{'['} + JSON_RBRACKET = []byte{']'} + JSON_QUOTE = '"' + JSON_QUOTE_BYTES = []byte{'"'} + JSON_NULL = []byte{'n', 'u', 'l', 'l'} + JSON_TRUE = []byte{'t', 'r', 'u', 'e'} + JSON_FALSE = []byte{'f', 'a', 'l', 's', 'e'} + JSON_INFINITY = "Infinity" + JSON_NEGATIVE_INFINITY = "-Infinity" + JSON_NAN = "NaN" + JSON_INFINITY_BYTES = []byte{'I', 'n', 'f', 'i', 'n', 'i', 't', 'y'} + JSON_NEGATIVE_INFINITY_BYTES = []byte{'-', 'I', 'n', 'f', 'i', 'n', 'i', 't', 'y'} + JSON_NAN_BYTES = []byte{'N', 'a', 'N'} + json_nonbase_map_elem_bytes = []byte{']', ',', '['} +} + +func jsonQuote(s string) string { + b, _ := json.Marshal(s) + s1 := string(b) + return s1 +} + +func jsonUnquote(s string) (string, bool) { + s1 := new(string) + err := json.Unmarshal([]byte(s), s1) + return *s1, err == nil +} + +func mismatch(expected, actual string) error { + return fmt.Errorf("Expected '%s' but found '%s' while parsing JSON.", expected, actual) +} + +func (p *TSimpleJSONProtocol) WriteMessageBegin(name string, typeId TMessageType, seqId int32) error { + p.resetContextStack() // THRIFT-3735 + if e := p.OutputListBegin(); e != nil { + return e + } + if e := p.WriteString(name); e != nil { + return e + } + if e := p.WriteByte(int8(typeId)); e != nil { + return e + } + if e := p.WriteI32(seqId); e != nil { + return e + } + return nil +} + +func (p *TSimpleJSONProtocol) WriteMessageEnd() error { + return p.OutputListEnd() +} + +func (p *TSimpleJSONProtocol) WriteStructBegin(name string) error { + if e := p.OutputObjectBegin(); e != nil { + return e + } + return nil +} + +func (p *TSimpleJSONProtocol) WriteStructEnd() error { + return p.OutputObjectEnd() +} + +func (p *TSimpleJSONProtocol) WriteFieldBegin(name string, typeId TType, id int16) error { + if e := p.WriteString(name); e != nil { + return e + } + return nil +} + +func (p *TSimpleJSONProtocol) WriteFieldEnd() error { + //return p.OutputListEnd() + return nil +} + +func (p *TSimpleJSONProtocol) WriteFieldStop() error { return nil } + +func (p *TSimpleJSONProtocol) WriteMapBegin(keyType TType, valueType TType, size int) error { + if e := p.OutputListBegin(); e != nil { + return e + } + if e := p.WriteByte(int8(keyType)); e != nil { + return e + } + if e := p.WriteByte(int8(valueType)); e != nil { + return e + } + return p.WriteI32(int32(size)) +} + +func (p *TSimpleJSONProtocol) WriteMapEnd() error { + return p.OutputListEnd() +} + +func (p *TSimpleJSONProtocol) WriteListBegin(elemType TType, size int) error { + return p.OutputElemListBegin(elemType, size) +} + +func (p *TSimpleJSONProtocol) WriteListEnd() error { + return p.OutputListEnd() +} + +func (p *TSimpleJSONProtocol) WriteSetBegin(elemType TType, size int) error { + return p.OutputElemListBegin(elemType, size) +} + +func (p *TSimpleJSONProtocol) WriteSetEnd() error { + return p.OutputListEnd() +} + +func (p *TSimpleJSONProtocol) WriteBool(b bool) error { + return p.OutputBool(b) +} + +func (p *TSimpleJSONProtocol) WriteByte(b int8) error { + return p.WriteI32(int32(b)) +} + +func (p *TSimpleJSONProtocol) WriteI16(v int16) error { + return p.WriteI32(int32(v)) +} + +func (p *TSimpleJSONProtocol) WriteI32(v int32) error { + return p.OutputI64(int64(v)) +} + +func (p *TSimpleJSONProtocol) WriteI64(v int64) error { + return p.OutputI64(int64(v)) +} + +func (p *TSimpleJSONProtocol) WriteDouble(v float64) error { + return p.OutputF64(v) +} + +func (p *TSimpleJSONProtocol) WriteString(v string) error { + return p.OutputString(v) +} + +func (p *TSimpleJSONProtocol) WriteBinary(v []byte) error { + // JSON library only takes in a string, + // not an arbitrary byte array, to ensure bytes are transmitted + // efficiently we must convert this into a valid JSON string + // therefore we use base64 encoding to avoid excessive escaping/quoting + if e := p.OutputPreValue(); e != nil { + return e + } + if _, e := p.write(JSON_QUOTE_BYTES); e != nil { + return NewTProtocolException(e) + } + writer := base64.NewEncoder(base64.StdEncoding, p.writer) + if _, e := writer.Write(v); e != nil { + p.writer.Reset(p.trans) // THRIFT-3735 + return NewTProtocolException(e) + } + if e := writer.Close(); e != nil { + return NewTProtocolException(e) + } + if _, e := p.write(JSON_QUOTE_BYTES); e != nil { + return NewTProtocolException(e) + } + return p.OutputPostValue() +} + +// Reading methods. +func (p *TSimpleJSONProtocol) ReadMessageBegin() (name string, typeId TMessageType, seqId int32, err error) { + p.resetContextStack() // THRIFT-3735 + if isNull, err := p.ParseListBegin(); isNull || err != nil { + return name, typeId, seqId, err + } + if name, err = p.ReadString(); err != nil { + return name, typeId, seqId, err + } + bTypeId, err := p.ReadByte() + typeId = TMessageType(bTypeId) + if err != nil { + return name, typeId, seqId, err + } + if seqId, err = p.ReadI32(); err != nil { + return name, typeId, seqId, err + } + return name, typeId, seqId, nil +} + +func (p *TSimpleJSONProtocol) ReadMessageEnd() error { + return p.ParseListEnd() +} + +func (p *TSimpleJSONProtocol) ReadStructBegin() (name string, err error) { + _, err = p.ParseObjectStart() + return "", err +} + +func (p *TSimpleJSONProtocol) ReadStructEnd() error { + return p.ParseObjectEnd() +} + +func (p *TSimpleJSONProtocol) ReadFieldBegin() (string, TType, int16, error) { + if err := p.ParsePreValue(); err != nil { + return "", STOP, 0, err + } + b, _ := p.reader.Peek(1) + if len(b) > 0 { + switch b[0] { + case JSON_RBRACE[0]: + return "", STOP, 0, nil + case JSON_QUOTE: + p.reader.ReadByte() + name, err := p.ParseStringBody() + // simplejson is not meant to be read back into thrift + // - see http://wiki.apache.org/thrift/ThriftUsageJava + // - use JSON instead + if err != nil { + return name, STOP, 0, err + } + return name, STOP, -1, p.ParsePostValue() + /* + if err = p.ParsePostValue(); err != nil { + return name, STOP, 0, err + } + if isNull, err := p.ParseListBegin(); isNull || err != nil { + return name, STOP, 0, err + } + bType, err := p.ReadByte() + thetype := TType(bType) + if err != nil { + return name, thetype, 0, err + } + id, err := p.ReadI16() + return name, thetype, id, err + */ + } + e := fmt.Errorf("Expected \"}\" or '\"', but found: '%s'", string(b)) + return "", STOP, 0, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return "", STOP, 0, NewTProtocolException(io.EOF) +} + +func (p *TSimpleJSONProtocol) ReadFieldEnd() error { + return nil + //return p.ParseListEnd() +} + +func (p *TSimpleJSONProtocol) ReadMapBegin() (keyType TType, valueType TType, size int, e error) { + if isNull, e := p.ParseListBegin(); isNull || e != nil { + return VOID, VOID, 0, e + } + + // read keyType + bKeyType, e := p.ReadByte() + keyType = TType(bKeyType) + if e != nil { + return keyType, valueType, size, e + } + + // read valueType + bValueType, e := p.ReadByte() + valueType = TType(bValueType) + if e != nil { + return keyType, valueType, size, e + } + + // read size + iSize, err := p.ReadI64() + size = int(iSize) + return keyType, valueType, size, err +} + +func (p *TSimpleJSONProtocol) ReadMapEnd() error { + return p.ParseListEnd() +} + +func (p *TSimpleJSONProtocol) ReadListBegin() (elemType TType, size int, e error) { + return p.ParseElemListBegin() +} + +func (p *TSimpleJSONProtocol) ReadListEnd() error { + return p.ParseListEnd() +} + +func (p *TSimpleJSONProtocol) ReadSetBegin() (elemType TType, size int, e error) { + return p.ParseElemListBegin() +} + +func (p *TSimpleJSONProtocol) ReadSetEnd() error { + return p.ParseListEnd() +} + +func (p *TSimpleJSONProtocol) ReadBool() (bool, error) { + var value bool + + if err := p.ParsePreValue(); err != nil { + return value, err + } + f, _ := p.reader.Peek(1) + if len(f) > 0 { + switch f[0] { + case JSON_TRUE[0]: + b := make([]byte, len(JSON_TRUE)) + _, err := p.reader.Read(b) + if err != nil { + return false, NewTProtocolException(err) + } + if string(b) == string(JSON_TRUE) { + value = true + } else { + e := fmt.Errorf("Expected \"true\" but found: %s", string(b)) + return value, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + break + case JSON_FALSE[0]: + b := make([]byte, len(JSON_FALSE)) + _, err := p.reader.Read(b) + if err != nil { + return false, NewTProtocolException(err) + } + if string(b) == string(JSON_FALSE) { + value = false + } else { + e := fmt.Errorf("Expected \"false\" but found: %s", string(b)) + return value, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + break + case JSON_NULL[0]: + b := make([]byte, len(JSON_NULL)) + _, err := p.reader.Read(b) + if err != nil { + return false, NewTProtocolException(err) + } + if string(b) == string(JSON_NULL) { + value = false + } else { + e := fmt.Errorf("Expected \"null\" but found: %s", string(b)) + return value, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + default: + e := fmt.Errorf("Expected \"true\", \"false\", or \"null\" but found: %s", string(f)) + return value, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } + return value, p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) ReadByte() (int8, error) { + v, err := p.ReadI64() + return int8(v), err +} + +func (p *TSimpleJSONProtocol) ReadI16() (int16, error) { + v, err := p.ReadI64() + return int16(v), err +} + +func (p *TSimpleJSONProtocol) ReadI32() (int32, error) { + v, err := p.ReadI64() + return int32(v), err +} + +func (p *TSimpleJSONProtocol) ReadI64() (int64, error) { + v, _, err := p.ParseI64() + return v, err +} + +func (p *TSimpleJSONProtocol) ReadDouble() (float64, error) { + v, _, err := p.ParseF64() + return v, err +} + +func (p *TSimpleJSONProtocol) ReadString() (string, error) { + var v string + if err := p.ParsePreValue(); err != nil { + return v, err + } + f, _ := p.reader.Peek(1) + if len(f) > 0 && f[0] == JSON_QUOTE { + p.reader.ReadByte() + value, err := p.ParseStringBody() + v = value + if err != nil { + return v, err + } + } else if len(f) > 0 && f[0] == JSON_NULL[0] { + b := make([]byte, len(JSON_NULL)) + _, err := p.reader.Read(b) + if err != nil { + return v, NewTProtocolException(err) + } + if string(b) != string(JSON_NULL) { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(b)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } else { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(f)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return v, p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) ReadBinary() ([]byte, error) { + var v []byte + if err := p.ParsePreValue(); err != nil { + return nil, err + } + f, _ := p.reader.Peek(1) + if len(f) > 0 && f[0] == JSON_QUOTE { + p.reader.ReadByte() + value, err := p.ParseBase64EncodedBody() + v = value + if err != nil { + return v, err + } + } else if len(f) > 0 && f[0] == JSON_NULL[0] { + b := make([]byte, len(JSON_NULL)) + _, err := p.reader.Read(b) + if err != nil { + return v, NewTProtocolException(err) + } + if string(b) != string(JSON_NULL) { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(b)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } else { + e := fmt.Errorf("Expected a JSON string, found unquoted data started with %s", string(f)) + return v, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + + return v, p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) Flush(ctx context.Context) (err error) { + return NewTProtocolException(p.writer.Flush()) +} + +func (p *TSimpleJSONProtocol) Skip(fieldType TType) (err error) { + return SkipDefaultDepth(p, fieldType) +} + +func (p *TSimpleJSONProtocol) Transport() TTransport { + return p.trans +} + +func (p *TSimpleJSONProtocol) OutputPreValue() error { + cxt := _ParseContext(p.dumpContext[len(p.dumpContext)-1]) + switch cxt { + case _CONTEXT_IN_LIST, _CONTEXT_IN_OBJECT_NEXT_KEY: + if _, e := p.write(JSON_COMMA); e != nil { + return NewTProtocolException(e) + } + break + case _CONTEXT_IN_OBJECT_NEXT_VALUE: + if _, e := p.write(JSON_COLON); e != nil { + return NewTProtocolException(e) + } + break + } + return nil +} + +func (p *TSimpleJSONProtocol) OutputPostValue() error { + cxt := _ParseContext(p.dumpContext[len(p.dumpContext)-1]) + switch cxt { + case _CONTEXT_IN_LIST_FIRST: + p.dumpContext = p.dumpContext[:len(p.dumpContext)-1] + p.dumpContext = append(p.dumpContext, int(_CONTEXT_IN_LIST)) + break + case _CONTEXT_IN_OBJECT_FIRST: + p.dumpContext = p.dumpContext[:len(p.dumpContext)-1] + p.dumpContext = append(p.dumpContext, int(_CONTEXT_IN_OBJECT_NEXT_VALUE)) + break + case _CONTEXT_IN_OBJECT_NEXT_KEY: + p.dumpContext = p.dumpContext[:len(p.dumpContext)-1] + p.dumpContext = append(p.dumpContext, int(_CONTEXT_IN_OBJECT_NEXT_VALUE)) + break + case _CONTEXT_IN_OBJECT_NEXT_VALUE: + p.dumpContext = p.dumpContext[:len(p.dumpContext)-1] + p.dumpContext = append(p.dumpContext, int(_CONTEXT_IN_OBJECT_NEXT_KEY)) + break + } + return nil +} + +func (p *TSimpleJSONProtocol) OutputBool(value bool) error { + if e := p.OutputPreValue(); e != nil { + return e + } + var v string + if value { + v = string(JSON_TRUE) + } else { + v = string(JSON_FALSE) + } + switch _ParseContext(p.dumpContext[len(p.dumpContext)-1]) { + case _CONTEXT_IN_OBJECT_FIRST, _CONTEXT_IN_OBJECT_NEXT_KEY: + v = jsonQuote(v) + default: + } + if e := p.OutputStringData(v); e != nil { + return e + } + return p.OutputPostValue() +} + +func (p *TSimpleJSONProtocol) OutputNull() error { + if e := p.OutputPreValue(); e != nil { + return e + } + if _, e := p.write(JSON_NULL); e != nil { + return NewTProtocolException(e) + } + return p.OutputPostValue() +} + +func (p *TSimpleJSONProtocol) OutputF64(value float64) error { + if e := p.OutputPreValue(); e != nil { + return e + } + var v string + if math.IsNaN(value) { + v = string(JSON_QUOTE) + JSON_NAN + string(JSON_QUOTE) + } else if math.IsInf(value, 1) { + v = string(JSON_QUOTE) + JSON_INFINITY + string(JSON_QUOTE) + } else if math.IsInf(value, -1) { + v = string(JSON_QUOTE) + JSON_NEGATIVE_INFINITY + string(JSON_QUOTE) + } else { + v = strconv.FormatFloat(value, 'g', -1, 64) + switch _ParseContext(p.dumpContext[len(p.dumpContext)-1]) { + case _CONTEXT_IN_OBJECT_FIRST, _CONTEXT_IN_OBJECT_NEXT_KEY: + v = string(JSON_QUOTE) + v + string(JSON_QUOTE) + default: + } + } + if e := p.OutputStringData(v); e != nil { + return e + } + return p.OutputPostValue() +} + +func (p *TSimpleJSONProtocol) OutputI64(value int64) error { + if e := p.OutputPreValue(); e != nil { + return e + } + v := strconv.FormatInt(value, 10) + switch _ParseContext(p.dumpContext[len(p.dumpContext)-1]) { + case _CONTEXT_IN_OBJECT_FIRST, _CONTEXT_IN_OBJECT_NEXT_KEY: + v = jsonQuote(v) + default: + } + if e := p.OutputStringData(v); e != nil { + return e + } + return p.OutputPostValue() +} + +func (p *TSimpleJSONProtocol) OutputString(s string) error { + if e := p.OutputPreValue(); e != nil { + return e + } + if e := p.OutputStringData(jsonQuote(s)); e != nil { + return e + } + return p.OutputPostValue() +} + +func (p *TSimpleJSONProtocol) OutputStringData(s string) error { + _, e := p.write([]byte(s)) + return NewTProtocolException(e) +} + +func (p *TSimpleJSONProtocol) OutputObjectBegin() error { + if e := p.OutputPreValue(); e != nil { + return e + } + if _, e := p.write(JSON_LBRACE); e != nil { + return NewTProtocolException(e) + } + p.dumpContext = append(p.dumpContext, int(_CONTEXT_IN_OBJECT_FIRST)) + return nil +} + +func (p *TSimpleJSONProtocol) OutputObjectEnd() error { + if _, e := p.write(JSON_RBRACE); e != nil { + return NewTProtocolException(e) + } + p.dumpContext = p.dumpContext[:len(p.dumpContext)-1] + if e := p.OutputPostValue(); e != nil { + return e + } + return nil +} + +func (p *TSimpleJSONProtocol) OutputListBegin() error { + if e := p.OutputPreValue(); e != nil { + return e + } + if _, e := p.write(JSON_LBRACKET); e != nil { + return NewTProtocolException(e) + } + p.dumpContext = append(p.dumpContext, int(_CONTEXT_IN_LIST_FIRST)) + return nil +} + +func (p *TSimpleJSONProtocol) OutputListEnd() error { + if _, e := p.write(JSON_RBRACKET); e != nil { + return NewTProtocolException(e) + } + p.dumpContext = p.dumpContext[:len(p.dumpContext)-1] + if e := p.OutputPostValue(); e != nil { + return e + } + return nil +} + +func (p *TSimpleJSONProtocol) OutputElemListBegin(elemType TType, size int) error { + if e := p.OutputListBegin(); e != nil { + return e + } + if e := p.WriteByte(int8(elemType)); e != nil { + return e + } + if e := p.WriteI64(int64(size)); e != nil { + return e + } + return nil +} + +func (p *TSimpleJSONProtocol) ParsePreValue() error { + if e := p.readNonSignificantWhitespace(); e != nil { + return NewTProtocolException(e) + } + cxt := _ParseContext(p.parseContextStack[len(p.parseContextStack)-1]) + b, _ := p.reader.Peek(1) + switch cxt { + case _CONTEXT_IN_LIST: + if len(b) > 0 { + switch b[0] { + case JSON_RBRACKET[0]: + return nil + case JSON_COMMA[0]: + p.reader.ReadByte() + if e := p.readNonSignificantWhitespace(); e != nil { + return NewTProtocolException(e) + } + return nil + default: + e := fmt.Errorf("Expected \"]\" or \",\" in list context, but found \"%s\"", string(b)) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } + break + case _CONTEXT_IN_OBJECT_NEXT_KEY: + if len(b) > 0 { + switch b[0] { + case JSON_RBRACE[0]: + return nil + case JSON_COMMA[0]: + p.reader.ReadByte() + if e := p.readNonSignificantWhitespace(); e != nil { + return NewTProtocolException(e) + } + return nil + default: + e := fmt.Errorf("Expected \"}\" or \",\" in object context, but found \"%s\"", string(b)) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } + break + case _CONTEXT_IN_OBJECT_NEXT_VALUE: + if len(b) > 0 { + switch b[0] { + case JSON_COLON[0]: + p.reader.ReadByte() + if e := p.readNonSignificantWhitespace(); e != nil { + return NewTProtocolException(e) + } + return nil + default: + e := fmt.Errorf("Expected \":\" in object context, but found \"%s\"", string(b)) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } + break + } + return nil +} + +func (p *TSimpleJSONProtocol) ParsePostValue() error { + if e := p.readNonSignificantWhitespace(); e != nil { + return NewTProtocolException(e) + } + cxt := _ParseContext(p.parseContextStack[len(p.parseContextStack)-1]) + switch cxt { + case _CONTEXT_IN_LIST_FIRST: + p.parseContextStack = p.parseContextStack[:len(p.parseContextStack)-1] + p.parseContextStack = append(p.parseContextStack, int(_CONTEXT_IN_LIST)) + break + case _CONTEXT_IN_OBJECT_FIRST, _CONTEXT_IN_OBJECT_NEXT_KEY: + p.parseContextStack = p.parseContextStack[:len(p.parseContextStack)-1] + p.parseContextStack = append(p.parseContextStack, int(_CONTEXT_IN_OBJECT_NEXT_VALUE)) + break + case _CONTEXT_IN_OBJECT_NEXT_VALUE: + p.parseContextStack = p.parseContextStack[:len(p.parseContextStack)-1] + p.parseContextStack = append(p.parseContextStack, int(_CONTEXT_IN_OBJECT_NEXT_KEY)) + break + } + return nil +} + +func (p *TSimpleJSONProtocol) readNonSignificantWhitespace() error { + for { + b, _ := p.reader.Peek(1) + if len(b) < 1 { + return nil + } + switch b[0] { + case ' ', '\r', '\n', '\t': + p.reader.ReadByte() + continue + default: + break + } + break + } + return nil +} + +func (p *TSimpleJSONProtocol) ParseStringBody() (string, error) { + line, err := p.reader.ReadString(JSON_QUOTE) + if err != nil { + return "", NewTProtocolException(err) + } + l := len(line) + // count number of escapes to see if we need to keep going + i := 1 + for ; i < l; i++ { + if line[l-i-1] != '\\' { + break + } + } + if i&0x01 == 1 { + v, ok := jsonUnquote(string(JSON_QUOTE) + line) + if !ok { + return "", NewTProtocolException(err) + } + return v, nil + } + s, err := p.ParseQuotedStringBody() + if err != nil { + return "", NewTProtocolException(err) + } + str := string(JSON_QUOTE) + line + s + v, ok := jsonUnquote(str) + if !ok { + e := fmt.Errorf("Unable to parse as JSON string %s", str) + return "", NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return v, nil +} + +func (p *TSimpleJSONProtocol) ParseQuotedStringBody() (string, error) { + line, err := p.reader.ReadString(JSON_QUOTE) + if err != nil { + return "", NewTProtocolException(err) + } + l := len(line) + // count number of escapes to see if we need to keep going + i := 1 + for ; i < l; i++ { + if line[l-i-1] != '\\' { + break + } + } + if i&0x01 == 1 { + return line, nil + } + s, err := p.ParseQuotedStringBody() + if err != nil { + return "", NewTProtocolException(err) + } + v := line + s + return v, nil +} + +func (p *TSimpleJSONProtocol) ParseBase64EncodedBody() ([]byte, error) { + line, err := p.reader.ReadBytes(JSON_QUOTE) + if err != nil { + return line, NewTProtocolException(err) + } + line2 := line[0 : len(line)-1] + l := len(line2) + if (l % 4) != 0 { + pad := 4 - (l % 4) + fill := [...]byte{'=', '=', '='} + line2 = append(line2, fill[:pad]...) + l = len(line2) + } + output := make([]byte, base64.StdEncoding.DecodedLen(l)) + n, err := base64.StdEncoding.Decode(output, line2) + return output[0:n], NewTProtocolException(err) +} + +func (p *TSimpleJSONProtocol) ParseI64() (int64, bool, error) { + if err := p.ParsePreValue(); err != nil { + return 0, false, err + } + var value int64 + var isnull bool + if p.safePeekContains(JSON_NULL) { + p.reader.Read(make([]byte, len(JSON_NULL))) + isnull = true + } else { + num, err := p.readNumeric() + isnull = (num == nil) + if !isnull { + value = num.Int64() + } + if err != nil { + return value, isnull, err + } + } + return value, isnull, p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) ParseF64() (float64, bool, error) { + if err := p.ParsePreValue(); err != nil { + return 0, false, err + } + var value float64 + var isnull bool + if p.safePeekContains(JSON_NULL) { + p.reader.Read(make([]byte, len(JSON_NULL))) + isnull = true + } else { + num, err := p.readNumeric() + isnull = (num == nil) + if !isnull { + value = num.Float64() + } + if err != nil { + return value, isnull, err + } + } + return value, isnull, p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) ParseObjectStart() (bool, error) { + if err := p.ParsePreValue(); err != nil { + return false, err + } + var b []byte + b, err := p.reader.Peek(1) + if err != nil { + return false, err + } + if len(b) > 0 && b[0] == JSON_LBRACE[0] { + p.reader.ReadByte() + p.parseContextStack = append(p.parseContextStack, int(_CONTEXT_IN_OBJECT_FIRST)) + return false, nil + } else if p.safePeekContains(JSON_NULL) { + return true, nil + } + e := fmt.Errorf("Expected '{' or null, but found '%s'", string(b)) + return false, NewTProtocolExceptionWithType(INVALID_DATA, e) +} + +func (p *TSimpleJSONProtocol) ParseObjectEnd() error { + if isNull, err := p.readIfNull(); isNull || err != nil { + return err + } + cxt := _ParseContext(p.parseContextStack[len(p.parseContextStack)-1]) + if (cxt != _CONTEXT_IN_OBJECT_FIRST) && (cxt != _CONTEXT_IN_OBJECT_NEXT_KEY) { + e := fmt.Errorf("Expected to be in the Object Context, but not in Object Context (%d)", cxt) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + } + line, err := p.reader.ReadString(JSON_RBRACE[0]) + if err != nil { + return NewTProtocolException(err) + } + for _, char := range line { + switch char { + default: + e := fmt.Errorf("Expecting end of object \"}\", but found: \"%s\"", line) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + case ' ', '\n', '\r', '\t', '}': + break + } + } + p.parseContextStack = p.parseContextStack[:len(p.parseContextStack)-1] + return p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) ParseListBegin() (isNull bool, err error) { + if e := p.ParsePreValue(); e != nil { + return false, e + } + var b []byte + b, err = p.reader.Peek(1) + if err != nil { + return false, err + } + if len(b) >= 1 && b[0] == JSON_LBRACKET[0] { + p.parseContextStack = append(p.parseContextStack, int(_CONTEXT_IN_LIST_FIRST)) + p.reader.ReadByte() + isNull = false + } else if p.safePeekContains(JSON_NULL) { + isNull = true + } else { + err = fmt.Errorf("Expected \"null\" or \"[\", received %q", b) + } + return isNull, NewTProtocolExceptionWithType(INVALID_DATA, err) +} + +func (p *TSimpleJSONProtocol) ParseElemListBegin() (elemType TType, size int, e error) { + if isNull, e := p.ParseListBegin(); isNull || e != nil { + return VOID, 0, e + } + bElemType, err := p.ReadByte() + elemType = TType(bElemType) + if err != nil { + return elemType, size, err + } + nSize, err2 := p.ReadI64() + size = int(nSize) + return elemType, size, err2 +} + +func (p *TSimpleJSONProtocol) ParseListEnd() error { + if isNull, err := p.readIfNull(); isNull || err != nil { + return err + } + cxt := _ParseContext(p.parseContextStack[len(p.parseContextStack)-1]) + if cxt != _CONTEXT_IN_LIST { + e := fmt.Errorf("Expected to be in the List Context, but not in List Context (%d)", cxt) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + } + line, err := p.reader.ReadString(JSON_RBRACKET[0]) + if err != nil { + return NewTProtocolException(err) + } + for _, char := range line { + switch char { + default: + e := fmt.Errorf("Expecting end of list \"]\", but found: \"%v\"", line) + return NewTProtocolExceptionWithType(INVALID_DATA, e) + case ' ', '\n', '\r', '\t', rune(JSON_RBRACKET[0]): + break + } + } + p.parseContextStack = p.parseContextStack[:len(p.parseContextStack)-1] + if _ParseContext(p.parseContextStack[len(p.parseContextStack)-1]) == _CONTEXT_IN_TOPLEVEL { + return nil + } + return p.ParsePostValue() +} + +func (p *TSimpleJSONProtocol) readSingleValue() (interface{}, TType, error) { + e := p.readNonSignificantWhitespace() + if e != nil { + return nil, VOID, NewTProtocolException(e) + } + b, e := p.reader.Peek(1) + if len(b) > 0 { + c := b[0] + switch c { + case JSON_NULL[0]: + buf := make([]byte, len(JSON_NULL)) + _, e := p.reader.Read(buf) + if e != nil { + return nil, VOID, NewTProtocolException(e) + } + if string(JSON_NULL) != string(buf) { + e = mismatch(string(JSON_NULL), string(buf)) + return nil, VOID, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return nil, VOID, nil + case JSON_QUOTE: + p.reader.ReadByte() + v, e := p.ParseStringBody() + if e != nil { + return v, UTF8, NewTProtocolException(e) + } + if v == JSON_INFINITY { + return INFINITY, DOUBLE, nil + } else if v == JSON_NEGATIVE_INFINITY { + return NEGATIVE_INFINITY, DOUBLE, nil + } else if v == JSON_NAN { + return NAN, DOUBLE, nil + } + return v, UTF8, nil + case JSON_TRUE[0]: + buf := make([]byte, len(JSON_TRUE)) + _, e := p.reader.Read(buf) + if e != nil { + return true, BOOL, NewTProtocolException(e) + } + if string(JSON_TRUE) != string(buf) { + e := mismatch(string(JSON_TRUE), string(buf)) + return true, BOOL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return true, BOOL, nil + case JSON_FALSE[0]: + buf := make([]byte, len(JSON_FALSE)) + _, e := p.reader.Read(buf) + if e != nil { + return false, BOOL, NewTProtocolException(e) + } + if string(JSON_FALSE) != string(buf) { + e := mismatch(string(JSON_FALSE), string(buf)) + return false, BOOL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return false, BOOL, nil + case JSON_LBRACKET[0]: + _, e := p.reader.ReadByte() + return make([]interface{}, 0), LIST, NewTProtocolException(e) + case JSON_LBRACE[0]: + _, e := p.reader.ReadByte() + return make(map[string]interface{}), STRUCT, NewTProtocolException(e) + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'e', 'E', '.', '+', '-', JSON_INFINITY[0], JSON_NAN[0]: + // assume numeric + v, e := p.readNumeric() + return v, DOUBLE, e + default: + e := fmt.Errorf("Expected element in list but found '%s' while parsing JSON.", string(c)) + return nil, VOID, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } + e = fmt.Errorf("Cannot read a single element while parsing JSON.") + return nil, VOID, NewTProtocolExceptionWithType(INVALID_DATA, e) + +} + +func (p *TSimpleJSONProtocol) readIfNull() (bool, error) { + cont := true + for cont { + b, _ := p.reader.Peek(1) + if len(b) < 1 { + return false, nil + } + switch b[0] { + default: + return false, nil + case JSON_NULL[0]: + cont = false + break + case ' ', '\n', '\r', '\t': + p.reader.ReadByte() + break + } + } + if p.safePeekContains(JSON_NULL) { + p.reader.Read(make([]byte, len(JSON_NULL))) + return true, nil + } + return false, nil +} + +func (p *TSimpleJSONProtocol) readQuoteIfNext() { + b, _ := p.reader.Peek(1) + if len(b) > 0 && b[0] == JSON_QUOTE { + p.reader.ReadByte() + } +} + +func (p *TSimpleJSONProtocol) readNumeric() (Numeric, error) { + isNull, err := p.readIfNull() + if isNull || err != nil { + return NUMERIC_NULL, err + } + hasDecimalPoint := false + nextCanBeSign := true + hasE := false + MAX_LEN := 40 + buf := bytes.NewBuffer(make([]byte, 0, MAX_LEN)) + continueFor := true + inQuotes := false + for continueFor { + c, err := p.reader.ReadByte() + if err != nil { + if err == io.EOF { + break + } + return NUMERIC_NULL, NewTProtocolException(err) + } + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + buf.WriteByte(c) + nextCanBeSign = false + case '.': + if hasDecimalPoint { + e := fmt.Errorf("Unable to parse number with multiple decimal points '%s.'", buf.String()) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + if hasE { + e := fmt.Errorf("Unable to parse number with decimal points in the exponent '%s.'", buf.String()) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + buf.WriteByte(c) + hasDecimalPoint, nextCanBeSign = true, false + case 'e', 'E': + if hasE { + e := fmt.Errorf("Unable to parse number with multiple exponents '%s%c'", buf.String(), c) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + buf.WriteByte(c) + hasE, nextCanBeSign = true, true + case '-', '+': + if !nextCanBeSign { + e := fmt.Errorf("Negative sign within number") + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + buf.WriteByte(c) + nextCanBeSign = false + case ' ', 0, '\t', '\n', '\r', JSON_RBRACE[0], JSON_RBRACKET[0], JSON_COMMA[0], JSON_COLON[0]: + p.reader.UnreadByte() + continueFor = false + case JSON_NAN[0]: + if buf.Len() == 0 { + buffer := make([]byte, len(JSON_NAN)) + buffer[0] = c + _, e := p.reader.Read(buffer[1:]) + if e != nil { + return NUMERIC_NULL, NewTProtocolException(e) + } + if JSON_NAN != string(buffer) { + e := mismatch(JSON_NAN, string(buffer)) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + if inQuotes { + p.readQuoteIfNext() + } + return NAN, nil + } else { + e := fmt.Errorf("Unable to parse number starting with character '%c'", c) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + case JSON_INFINITY[0]: + if buf.Len() == 0 || (buf.Len() == 1 && buf.Bytes()[0] == '+') { + buffer := make([]byte, len(JSON_INFINITY)) + buffer[0] = c + _, e := p.reader.Read(buffer[1:]) + if e != nil { + return NUMERIC_NULL, NewTProtocolException(e) + } + if JSON_INFINITY != string(buffer) { + e := mismatch(JSON_INFINITY, string(buffer)) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + if inQuotes { + p.readQuoteIfNext() + } + return INFINITY, nil + } else if buf.Len() == 1 && buf.Bytes()[0] == JSON_NEGATIVE_INFINITY[0] { + buffer := make([]byte, len(JSON_NEGATIVE_INFINITY)) + buffer[0] = JSON_NEGATIVE_INFINITY[0] + buffer[1] = c + _, e := p.reader.Read(buffer[2:]) + if e != nil { + return NUMERIC_NULL, NewTProtocolException(e) + } + if JSON_NEGATIVE_INFINITY != string(buffer) { + e := mismatch(JSON_NEGATIVE_INFINITY, string(buffer)) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + if inQuotes { + p.readQuoteIfNext() + } + return NEGATIVE_INFINITY, nil + } else { + e := fmt.Errorf("Unable to parse number starting with character '%c' due to existing buffer %s", c, buf.String()) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + case JSON_QUOTE: + if !inQuotes { + inQuotes = true + } else { + break + } + default: + e := fmt.Errorf("Unable to parse number starting with character '%c'", c) + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + } + if buf.Len() == 0 { + e := fmt.Errorf("Unable to parse number from empty string ''") + return NUMERIC_NULL, NewTProtocolExceptionWithType(INVALID_DATA, e) + } + return NewNumericFromJSONString(buf.String(), false), nil +} + +// Safely peeks into the buffer, reading only what is necessary +func (p *TSimpleJSONProtocol) safePeekContains(b []byte) bool { + for i := 0; i < len(b); i++ { + a, _ := p.reader.Peek(i + 1) + if len(a) == 0 || a[i] != b[i] { + return false + } + } + return true +} + +// Reset the context stack to its initial state. +func (p *TSimpleJSONProtocol) resetContextStack() { + p.parseContextStack = []int{int(_CONTEXT_IN_TOPLEVEL)} + p.dumpContext = []int{int(_CONTEXT_IN_TOPLEVEL)} +} + +func (p *TSimpleJSONProtocol) write(b []byte) (int, error) { + n, err := p.writer.Write(b) + if err != nil { + p.writer.Reset(p.trans) // THRIFT-3735 + } + return n, err +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/simple_server.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/simple_server.go new file mode 100644 index 000000000..603580251 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/simple_server.go @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "log" + "runtime/debug" + "sync" + "sync/atomic" +) + +/* + * This is not a typical TSimpleServer as it is not blocked after accept a socket. + * It is more like a TThreadedServer that can handle different connections in different goroutines. + * This will work if golang user implements a conn-pool like thing in client side. + */ +type TSimpleServer struct { + closed int32 + wg sync.WaitGroup + mu sync.Mutex + + processorFactory TProcessorFactory + serverTransport TServerTransport + inputTransportFactory TTransportFactory + outputTransportFactory TTransportFactory + inputProtocolFactory TProtocolFactory + outputProtocolFactory TProtocolFactory +} + +func NewTSimpleServer2(processor TProcessor, serverTransport TServerTransport) *TSimpleServer { + return NewTSimpleServerFactory2(NewTProcessorFactory(processor), serverTransport) +} + +func NewTSimpleServer4(processor TProcessor, serverTransport TServerTransport, transportFactory TTransportFactory, protocolFactory TProtocolFactory) *TSimpleServer { + return NewTSimpleServerFactory4(NewTProcessorFactory(processor), + serverTransport, + transportFactory, + protocolFactory, + ) +} + +func NewTSimpleServer6(processor TProcessor, serverTransport TServerTransport, inputTransportFactory TTransportFactory, outputTransportFactory TTransportFactory, inputProtocolFactory TProtocolFactory, outputProtocolFactory TProtocolFactory) *TSimpleServer { + return NewTSimpleServerFactory6(NewTProcessorFactory(processor), + serverTransport, + inputTransportFactory, + outputTransportFactory, + inputProtocolFactory, + outputProtocolFactory, + ) +} + +func NewTSimpleServerFactory2(processorFactory TProcessorFactory, serverTransport TServerTransport) *TSimpleServer { + return NewTSimpleServerFactory6(processorFactory, + serverTransport, + NewTTransportFactory(), + NewTTransportFactory(), + NewTBinaryProtocolFactoryDefault(), + NewTBinaryProtocolFactoryDefault(), + ) +} + +func NewTSimpleServerFactory4(processorFactory TProcessorFactory, serverTransport TServerTransport, transportFactory TTransportFactory, protocolFactory TProtocolFactory) *TSimpleServer { + return NewTSimpleServerFactory6(processorFactory, + serverTransport, + transportFactory, + transportFactory, + protocolFactory, + protocolFactory, + ) +} + +func NewTSimpleServerFactory6(processorFactory TProcessorFactory, serverTransport TServerTransport, inputTransportFactory TTransportFactory, outputTransportFactory TTransportFactory, inputProtocolFactory TProtocolFactory, outputProtocolFactory TProtocolFactory) *TSimpleServer { + return &TSimpleServer{ + processorFactory: processorFactory, + serverTransport: serverTransport, + inputTransportFactory: inputTransportFactory, + outputTransportFactory: outputTransportFactory, + inputProtocolFactory: inputProtocolFactory, + outputProtocolFactory: outputProtocolFactory, + } +} + +func (p *TSimpleServer) ProcessorFactory() TProcessorFactory { + return p.processorFactory +} + +func (p *TSimpleServer) ServerTransport() TServerTransport { + return p.serverTransport +} + +func (p *TSimpleServer) InputTransportFactory() TTransportFactory { + return p.inputTransportFactory +} + +func (p *TSimpleServer) OutputTransportFactory() TTransportFactory { + return p.outputTransportFactory +} + +func (p *TSimpleServer) InputProtocolFactory() TProtocolFactory { + return p.inputProtocolFactory +} + +func (p *TSimpleServer) OutputProtocolFactory() TProtocolFactory { + return p.outputProtocolFactory +} + +func (p *TSimpleServer) Listen() error { + return p.serverTransport.Listen() +} + +func (p *TSimpleServer) innerAccept() (int32, error) { + client, err := p.serverTransport.Accept() + p.mu.Lock() + defer p.mu.Unlock() + closed := atomic.LoadInt32(&p.closed) + if closed != 0 { + return closed, nil + } + if err != nil { + return 0, err + } + if client != nil { + p.wg.Add(1) + go func() { + defer p.wg.Done() + if err := p.processRequests(client); err != nil { + log.Println("error processing request:", err) + } + }() + } + return 0, nil +} + +func (p *TSimpleServer) AcceptLoop() error { + for { + closed, err := p.innerAccept() + if err != nil { + return err + } + if closed != 0 { + return nil + } + } +} + +func (p *TSimpleServer) Serve() error { + err := p.Listen() + if err != nil { + return err + } + p.AcceptLoop() + return nil +} + +func (p *TSimpleServer) Stop() error { + p.mu.Lock() + defer p.mu.Unlock() + if atomic.LoadInt32(&p.closed) != 0 { + return nil + } + atomic.StoreInt32(&p.closed, 1) + p.serverTransport.Interrupt() + p.wg.Wait() + return nil +} + +func (p *TSimpleServer) processRequests(client TTransport) error { + processor := p.processorFactory.GetProcessor(client) + inputTransport, err := p.inputTransportFactory.GetTransport(client) + if err != nil { + return err + } + outputTransport, err := p.outputTransportFactory.GetTransport(client) + if err != nil { + return err + } + inputProtocol := p.inputProtocolFactory.GetProtocol(inputTransport) + outputProtocol := p.outputProtocolFactory.GetProtocol(outputTransport) + defer func() { + if e := recover(); e != nil { + log.Printf("panic in processor: %s: %s", e, debug.Stack()) + } + }() + + if inputTransport != nil { + defer inputTransport.Close() + } + if outputTransport != nil { + defer outputTransport.Close() + } + for { + if atomic.LoadInt32(&p.closed) != 0 { + return nil + } + + ok, err := processor.Process(defaultCtx, inputProtocol, outputProtocol) + if err, ok := err.(TTransportException); ok && err.TypeId() == END_OF_FILE { + return nil + } else if err != nil { + return err + } + if err, ok := err.(TApplicationException); ok && err.TypeId() == UNKNOWN_METHOD { + continue + } + if !ok { + break + } + } + return nil +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/socket.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/socket.go new file mode 100644 index 000000000..885427965 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/socket.go @@ -0,0 +1,166 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "net" + "time" +) + +type TSocket struct { + conn net.Conn + addr net.Addr + timeout time.Duration +} + +// NewTSocket creates a net.Conn-backed TTransport, given a host and port +// +// Example: +// trans, err := thrift.NewTSocket("localhost:9090") +func NewTSocket(hostPort string) (*TSocket, error) { + return NewTSocketTimeout(hostPort, 0) +} + +// NewTSocketTimeout creates a net.Conn-backed TTransport, given a host and port +// it also accepts a timeout as a time.Duration +func NewTSocketTimeout(hostPort string, timeout time.Duration) (*TSocket, error) { + //conn, err := net.DialTimeout(network, address, timeout) + addr, err := net.ResolveTCPAddr("tcp", hostPort) + if err != nil { + return nil, err + } + return NewTSocketFromAddrTimeout(addr, timeout), nil +} + +// Creates a TSocket from a net.Addr +func NewTSocketFromAddrTimeout(addr net.Addr, timeout time.Duration) *TSocket { + return &TSocket{addr: addr, timeout: timeout} +} + +// Creates a TSocket from an existing net.Conn +func NewTSocketFromConnTimeout(conn net.Conn, timeout time.Duration) *TSocket { + return &TSocket{conn: conn, addr: conn.RemoteAddr(), timeout: timeout} +} + +// Sets the socket timeout +func (p *TSocket) SetTimeout(timeout time.Duration) error { + p.timeout = timeout + return nil +} + +func (p *TSocket) pushDeadline(read, write bool) { + var t time.Time + if p.timeout > 0 { + t = time.Now().Add(time.Duration(p.timeout)) + } + if read && write { + p.conn.SetDeadline(t) + } else if read { + p.conn.SetReadDeadline(t) + } else if write { + p.conn.SetWriteDeadline(t) + } +} + +// Connects the socket, creating a new socket object if necessary. +func (p *TSocket) Open() error { + if p.IsOpen() { + return NewTTransportException(ALREADY_OPEN, "Socket already connected.") + } + if p.addr == nil { + return NewTTransportException(NOT_OPEN, "Cannot open nil address.") + } + if len(p.addr.Network()) == 0 { + return NewTTransportException(NOT_OPEN, "Cannot open bad network name.") + } + if len(p.addr.String()) == 0 { + return NewTTransportException(NOT_OPEN, "Cannot open bad address.") + } + var err error + if p.conn, err = net.DialTimeout(p.addr.Network(), p.addr.String(), p.timeout); err != nil { + return NewTTransportException(NOT_OPEN, err.Error()) + } + return nil +} + +// Retrieve the underlying net.Conn +func (p *TSocket) Conn() net.Conn { + return p.conn +} + +// Returns true if the connection is open +func (p *TSocket) IsOpen() bool { + if p.conn == nil { + return false + } + return true +} + +// Closes the socket. +func (p *TSocket) Close() error { + // Close the socket + if p.conn != nil { + err := p.conn.Close() + if err != nil { + return err + } + p.conn = nil + } + return nil +} + +//Returns the remote address of the socket. +func (p *TSocket) Addr() net.Addr { + return p.addr +} + +func (p *TSocket) Read(buf []byte) (int, error) { + if !p.IsOpen() { + return 0, NewTTransportException(NOT_OPEN, "Connection not open") + } + p.pushDeadline(true, false) + n, err := p.conn.Read(buf) + return n, NewTTransportExceptionFromError(err) +} + +func (p *TSocket) Write(buf []byte) (int, error) { + if !p.IsOpen() { + return 0, NewTTransportException(NOT_OPEN, "Connection not open") + } + p.pushDeadline(false, true) + return p.conn.Write(buf) +} + +func (p *TSocket) Flush(ctx context.Context) error { + return nil +} + +func (p *TSocket) Interrupt() error { + if !p.IsOpen() { + return nil + } + return p.conn.Close() +} + +func (p *TSocket) RemainingBytes() (num_bytes uint64) { + const maxSize = ^uint64(0) + return maxSize // the thruth is, we just don't know unless framed is used +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/ssl_server_socket.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/ssl_server_socket.go new file mode 100644 index 000000000..907afca32 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/ssl_server_socket.go @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "crypto/tls" + "net" + "time" +) + +type TSSLServerSocket struct { + listener net.Listener + addr net.Addr + clientTimeout time.Duration + interrupted bool + cfg *tls.Config +} + +func NewTSSLServerSocket(listenAddr string, cfg *tls.Config) (*TSSLServerSocket, error) { + return NewTSSLServerSocketTimeout(listenAddr, cfg, 0) +} + +func NewTSSLServerSocketTimeout(listenAddr string, cfg *tls.Config, clientTimeout time.Duration) (*TSSLServerSocket, error) { + if cfg.MinVersion == 0 { + cfg.MinVersion = tls.VersionTLS10 + } + addr, err := net.ResolveTCPAddr("tcp", listenAddr) + if err != nil { + return nil, err + } + return &TSSLServerSocket{addr: addr, clientTimeout: clientTimeout, cfg: cfg}, nil +} + +func (p *TSSLServerSocket) Listen() error { + if p.IsListening() { + return nil + } + l, err := tls.Listen(p.addr.Network(), p.addr.String(), p.cfg) + if err != nil { + return err + } + p.listener = l + return nil +} + +func (p *TSSLServerSocket) Accept() (TTransport, error) { + if p.interrupted { + return nil, errTransportInterrupted + } + if p.listener == nil { + return nil, NewTTransportException(NOT_OPEN, "No underlying server socket") + } + conn, err := p.listener.Accept() + if err != nil { + return nil, NewTTransportExceptionFromError(err) + } + return NewTSSLSocketFromConnTimeout(conn, p.cfg, p.clientTimeout), nil +} + +// Checks whether the socket is listening. +func (p *TSSLServerSocket) IsListening() bool { + return p.listener != nil +} + +// Connects the socket, creating a new socket object if necessary. +func (p *TSSLServerSocket) Open() error { + if p.IsListening() { + return NewTTransportException(ALREADY_OPEN, "Server socket already open") + } + if l, err := tls.Listen(p.addr.Network(), p.addr.String(), p.cfg); err != nil { + return err + } else { + p.listener = l + } + return nil +} + +func (p *TSSLServerSocket) Addr() net.Addr { + return p.addr +} + +func (p *TSSLServerSocket) Close() error { + defer func() { + p.listener = nil + }() + if p.IsListening() { + return p.listener.Close() + } + return nil +} + +func (p *TSSLServerSocket) Interrupt() error { + p.interrupted = true + return nil +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/ssl_socket.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/ssl_socket.go new file mode 100644 index 000000000..ba6337726 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/ssl_socket.go @@ -0,0 +1,176 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "crypto/tls" + "net" + "time" +) + +type TSSLSocket struct { + conn net.Conn + // hostPort contains host:port (e.g. "asdf.com:12345"). The field is + // only valid if addr is nil. + hostPort string + // addr is nil when hostPort is not "", and is only used when the + // TSSLSocket is constructed from a net.Addr. + addr net.Addr + timeout time.Duration + cfg *tls.Config +} + +// NewTSSLSocket creates a net.Conn-backed TTransport, given a host and port and tls Configuration +// +// Example: +// trans, err := thrift.NewTSSLSocket("localhost:9090", nil) +func NewTSSLSocket(hostPort string, cfg *tls.Config) (*TSSLSocket, error) { + return NewTSSLSocketTimeout(hostPort, cfg, 0) +} + +// NewTSSLSocketTimeout creates a net.Conn-backed TTransport, given a host and port +// it also accepts a tls Configuration and a timeout as a time.Duration +func NewTSSLSocketTimeout(hostPort string, cfg *tls.Config, timeout time.Duration) (*TSSLSocket, error) { + if cfg.MinVersion == 0 { + cfg.MinVersion = tls.VersionTLS10 + } + return &TSSLSocket{hostPort: hostPort, timeout: timeout, cfg: cfg}, nil +} + +// Creates a TSSLSocket from a net.Addr +func NewTSSLSocketFromAddrTimeout(addr net.Addr, cfg *tls.Config, timeout time.Duration) *TSSLSocket { + return &TSSLSocket{addr: addr, timeout: timeout, cfg: cfg} +} + +// Creates a TSSLSocket from an existing net.Conn +func NewTSSLSocketFromConnTimeout(conn net.Conn, cfg *tls.Config, timeout time.Duration) *TSSLSocket { + return &TSSLSocket{conn: conn, addr: conn.RemoteAddr(), timeout: timeout, cfg: cfg} +} + +// Sets the socket timeout +func (p *TSSLSocket) SetTimeout(timeout time.Duration) error { + p.timeout = timeout + return nil +} + +func (p *TSSLSocket) pushDeadline(read, write bool) { + var t time.Time + if p.timeout > 0 { + t = time.Now().Add(time.Duration(p.timeout)) + } + if read && write { + p.conn.SetDeadline(t) + } else if read { + p.conn.SetReadDeadline(t) + } else if write { + p.conn.SetWriteDeadline(t) + } +} + +// Connects the socket, creating a new socket object if necessary. +func (p *TSSLSocket) Open() error { + var err error + // If we have a hostname, we need to pass the hostname to tls.Dial for + // certificate hostname checks. + if p.hostPort != "" { + if p.conn, err = tls.DialWithDialer(&net.Dialer{ + Timeout: p.timeout}, "tcp", p.hostPort, p.cfg); err != nil { + return NewTTransportException(NOT_OPEN, err.Error()) + } + } else { + if p.IsOpen() { + return NewTTransportException(ALREADY_OPEN, "Socket already connected.") + } + if p.addr == nil { + return NewTTransportException(NOT_OPEN, "Cannot open nil address.") + } + if len(p.addr.Network()) == 0 { + return NewTTransportException(NOT_OPEN, "Cannot open bad network name.") + } + if len(p.addr.String()) == 0 { + return NewTTransportException(NOT_OPEN, "Cannot open bad address.") + } + if p.conn, err = tls.DialWithDialer(&net.Dialer{ + Timeout: p.timeout}, p.addr.Network(), p.addr.String(), p.cfg); err != nil { + return NewTTransportException(NOT_OPEN, err.Error()) + } + } + return nil +} + +// Retrieve the underlying net.Conn +func (p *TSSLSocket) Conn() net.Conn { + return p.conn +} + +// Returns true if the connection is open +func (p *TSSLSocket) IsOpen() bool { + if p.conn == nil { + return false + } + return true +} + +// Closes the socket. +func (p *TSSLSocket) Close() error { + // Close the socket + if p.conn != nil { + err := p.conn.Close() + if err != nil { + return err + } + p.conn = nil + } + return nil +} + +func (p *TSSLSocket) Read(buf []byte) (int, error) { + if !p.IsOpen() { + return 0, NewTTransportException(NOT_OPEN, "Connection not open") + } + p.pushDeadline(true, false) + n, err := p.conn.Read(buf) + return n, NewTTransportExceptionFromError(err) +} + +func (p *TSSLSocket) Write(buf []byte) (int, error) { + if !p.IsOpen() { + return 0, NewTTransportException(NOT_OPEN, "Connection not open") + } + p.pushDeadline(false, true) + return p.conn.Write(buf) +} + +func (p *TSSLSocket) Flush(ctx context.Context) error { + return nil +} + +func (p *TSSLSocket) Interrupt() error { + if !p.IsOpen() { + return nil + } + return p.conn.Close() +} + +func (p *TSSLSocket) RemainingBytes() (num_bytes uint64) { + const maxSize = ^uint64(0) + return maxSize // the thruth is, we just don't know unless framed is used +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/transport.go new file mode 100644 index 000000000..ba2738a8d --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/transport.go @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "context" + "errors" + "io" +) + +var errTransportInterrupted = errors.New("Transport Interrupted") + +type Flusher interface { + Flush() (err error) +} + +type ContextFlusher interface { + Flush(ctx context.Context) (err error) +} + +type ReadSizeProvider interface { + RemainingBytes() (num_bytes uint64) +} + +// Encapsulates the I/O layer +type TTransport interface { + io.ReadWriteCloser + ContextFlusher + ReadSizeProvider + + // Opens the transport for communication + Open() error + + // Returns true if the transport is open + IsOpen() bool +} + +type stringWriter interface { + WriteString(s string) (n int, err error) +} + +// This is "enchanced" transport with extra capabilities. You need to use one of these +// to construct protocol. +// Notably, TSocket does not implement this interface, and it is always a mistake to use +// TSocket directly in protocol. +type TRichTransport interface { + io.ReadWriter + io.ByteReader + io.ByteWriter + stringWriter + ContextFlusher + ReadSizeProvider +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/transport_exception.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/transport_exception.go new file mode 100644 index 000000000..9505b4461 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/transport_exception.go @@ -0,0 +1,90 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +import ( + "errors" + "io" +) + +type timeoutable interface { + Timeout() bool +} + +// Thrift Transport exception +type TTransportException interface { + TException + TypeId() int + Err() error +} + +const ( + UNKNOWN_TRANSPORT_EXCEPTION = 0 + NOT_OPEN = 1 + ALREADY_OPEN = 2 + TIMED_OUT = 3 + END_OF_FILE = 4 +) + +type tTransportException struct { + typeId int + err error +} + +func (p *tTransportException) TypeId() int { + return p.typeId +} + +func (p *tTransportException) Error() string { + return p.err.Error() +} + +func (p *tTransportException) Err() error { + return p.err +} + +func NewTTransportException(t int, e string) TTransportException { + return &tTransportException{typeId: t, err: errors.New(e)} +} + +func NewTTransportExceptionFromError(e error) TTransportException { + if e == nil { + return nil + } + + if t, ok := e.(TTransportException); ok { + return t + } + + switch v := e.(type) { + case TTransportException: + return v + case timeoutable: + if v.Timeout() { + return &tTransportException{typeId: TIMED_OUT, err: e} + } + } + + if e == io.EOF { + return &tTransportException{typeId: END_OF_FILE, err: e} + } + + return &tTransportException{typeId: UNKNOWN_TRANSPORT_EXCEPTION, err: e} +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/transport_factory.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/transport_factory.go new file mode 100644 index 000000000..c80580794 --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/transport_factory.go @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +// Factory class used to create wrapped instance of Transports. +// This is used primarily in servers, which get Transports from +// a ServerTransport and then may want to mutate them (i.e. create +// a BufferedTransport from the underlying base transport) +type TTransportFactory interface { + GetTransport(trans TTransport) (TTransport, error) +} + +type tTransportFactory struct{} + +// Return a wrapped instance of the base Transport. +func (p *tTransportFactory) GetTransport(trans TTransport) (TTransport, error) { + return trans, nil +} + +func NewTTransportFactory() TTransportFactory { + return &tTransportFactory{} +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/type.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/type.go new file mode 100644 index 000000000..4292ffcad --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/type.go @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package thrift + +// Type constants in the Thrift protocol +type TType byte + +const ( + STOP = 0 + VOID = 1 + BOOL = 2 + BYTE = 3 + I08 = 3 + DOUBLE = 4 + I16 = 6 + I32 = 8 + I64 = 10 + STRING = 11 + UTF7 = 11 + STRUCT = 12 + MAP = 13 + SET = 14 + LIST = 15 + UTF8 = 16 + UTF16 = 17 + //BINARY = 18 wrong and unusued +) + +var typeNames = map[int]string{ + STOP: "STOP", + VOID: "VOID", + BOOL: "BOOL", + BYTE: "BYTE", + DOUBLE: "DOUBLE", + I16: "I16", + I32: "I32", + I64: "I64", + STRING: "STRING", + STRUCT: "STRUCT", + MAP: "MAP", + SET: "SET", + LIST: "LIST", + UTF8: "UTF8", + UTF16: "UTF16", +} + +func (p TType) String() string { + if s, ok := typeNames[int(p)]; ok { + return s + } + return "Unknown" +} diff --git a/vendor/git.apache.org/thrift.git/lib/go/thrift/zlib_transport.go b/vendor/git.apache.org/thrift.git/lib/go/thrift/zlib_transport.go new file mode 100644 index 000000000..f3d42673a --- /dev/null +++ b/vendor/git.apache.org/thrift.git/lib/go/thrift/zlib_transport.go @@ -0,0 +1,132 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. + */ + +package thrift + +import ( + "compress/zlib" + "context" + "io" + "log" +) + +// TZlibTransportFactory is a factory for TZlibTransport instances +type TZlibTransportFactory struct { + level int + factory TTransportFactory +} + +// TZlibTransport is a TTransport implementation that makes use of zlib compression. +type TZlibTransport struct { + reader io.ReadCloser + transport TTransport + writer *zlib.Writer +} + +// GetTransport constructs a new instance of NewTZlibTransport +func (p *TZlibTransportFactory) GetTransport(trans TTransport) (TTransport, error) { + if p.factory != nil { + // wrap other factory + var err error + trans, err = p.factory.GetTransport(trans) + if err != nil { + return nil, err + } + } + return NewTZlibTransport(trans, p.level) +} + +// NewTZlibTransportFactory constructs a new instance of NewTZlibTransportFactory +func NewTZlibTransportFactory(level int) *TZlibTransportFactory { + return &TZlibTransportFactory{level: level, factory: nil} +} + +// NewTZlibTransportFactory constructs a new instance of TZlibTransportFactory +// as a wrapper over existing transport factory +func NewTZlibTransportFactoryWithFactory(level int, factory TTransportFactory) *TZlibTransportFactory { + return &TZlibTransportFactory{level: level, factory: factory} +} + +// NewTZlibTransport constructs a new instance of TZlibTransport +func NewTZlibTransport(trans TTransport, level int) (*TZlibTransport, error) { + w, err := zlib.NewWriterLevel(trans, level) + if err != nil { + log.Println(err) + return nil, err + } + + return &TZlibTransport{ + writer: w, + transport: trans, + }, nil +} + +// Close closes the reader and writer (flushing any unwritten data) and closes +// the underlying transport. +func (z *TZlibTransport) Close() error { + if z.reader != nil { + if err := z.reader.Close(); err != nil { + return err + } + } + if err := z.writer.Close(); err != nil { + return err + } + return z.transport.Close() +} + +// Flush flushes the writer and its underlying transport. +func (z *TZlibTransport) Flush(ctx context.Context) error { + if err := z.writer.Flush(); err != nil { + return err + } + return z.transport.Flush(ctx) +} + +// IsOpen returns true if the transport is open +func (z *TZlibTransport) IsOpen() bool { + return z.transport.IsOpen() +} + +// Open opens the transport for communication +func (z *TZlibTransport) Open() error { + return z.transport.Open() +} + +func (z *TZlibTransport) Read(p []byte) (int, error) { + if z.reader == nil { + r, err := zlib.NewReader(z.transport) + if err != nil { + return 0, NewTTransportExceptionFromError(err) + } + z.reader = r + } + + return z.reader.Read(p) +} + +// RemainingBytes returns the size in bytes of the data that is still to be +// read. +func (z *TZlibTransport) RemainingBytes() uint64 { + return z.transport.RemainingBytes() +} + +func (z *TZlibTransport) Write(p []byte) (int, error) { + return z.writer.Write(p) +} diff --git a/vendor/github.com/minio/parquet-go/LICENSE b/vendor/github.com/minio/parquet-go/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/minio/parquet-go/Makefile b/vendor/github.com/minio/parquet-go/Makefile new file mode 100644 index 000000000..dc06ae83a --- /dev/null +++ b/vendor/github.com/minio/parquet-go/Makefile @@ -0,0 +1,36 @@ +GOPATH := $(shell go env GOPATH) + +all: check + +getdeps: + @if [ ! -f ${GOPATH}/bin/golint ]; then echo "Installing golint" && go get -u golang.org/x/lint/golint; fi + @if [ ! -f ${GOPATH}/bin/gocyclo ]; then echo "Installing gocyclo" && go get -u github.com/fzipp/gocyclo; fi + @if [ ! -f ${GOPATH}/bin/misspell ]; then echo "Installing misspell" && go get -u github.com/client9/misspell/cmd/misspell; fi + @if [ ! -f ${GOPATH}/bin/ineffassign ]; then echo "Installing ineffassign" && go get -u github.com/gordonklaus/ineffassign; fi + +vet: + @echo "Running $@" + @go tool vet -atomic -bool -copylocks -nilfunc -printf -shadow -rangeloops -unreachable -unsafeptr -unusedresult *.go + +fmt: + @echo "Running $@" + @gofmt -d *.go + +lint: + @echo "Running $@" + @${GOPATH}/bin/golint -set_exit_status + +cyclo: + @echo "Running $@" + @${GOPATH}/bin/gocyclo -over 200 . + +spelling: + @${GOPATH}/bin/misspell -locale US -error *.go README.md + +ineffassign: + @echo "Running $@" + @${GOPATH}/bin/ineffassign . + +check: getdeps vet fmt lint cyclo spelling ineffassign + @echo "Running unit tests" + @go test -tags kqueue . diff --git a/vendor/github.com/minio/parquet-go/README.md b/vendor/github.com/minio/parquet-go/README.md new file mode 100644 index 000000000..d04809562 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/README.md @@ -0,0 +1 @@ +# parquet-go diff --git a/vendor/github.com/minio/parquet-go/column.go b/vendor/github.com/minio/parquet-go/column.go new file mode 100644 index 000000000..f5fb13310 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/column.go @@ -0,0 +1,152 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import ( + "io" + "strings" + + "git.apache.org/thrift.git/lib/go/thrift" + "github.com/minio/minio-go/pkg/set" + "github.com/minio/parquet-go/gen-go/parquet" +) + +func getColumns( + rowGroup *parquet.RowGroup, + columnNames set.StringSet, + schemaElements []*parquet.SchemaElement, + getReaderFunc GetReaderFunc, +) (nameColumnMap map[string]*column, err error) { + nameIndexMap := make(map[string]int) + for colIndex, columnChunk := range rowGroup.GetColumns() { + meta := columnChunk.GetMetaData() + columnName := strings.Join(meta.GetPathInSchema(), ".") + if columnNames != nil && !columnNames.Contains(columnName) { + continue + } + + // Ignore column spanning into another file. + if columnChunk.GetFilePath() != "" { + continue + } + + offset := meta.GetDataPageOffset() + if meta.DictionaryPageOffset != nil { + offset = meta.GetDictionaryPageOffset() + } + + size := meta.GetTotalCompressedSize() + + rc, err := getReaderFunc(offset, size) + if err != nil { + return nil, err + } + + thriftReader := thrift.NewTBufferedTransport(thrift.NewStreamTransportR(rc), int(size)) + + if nameColumnMap == nil { + nameColumnMap = make(map[string]*column) + } + + nameColumnMap[columnName] = &column{ + name: columnName, + metadata: meta, + schemaElements: schemaElements, + rc: rc, + thriftReader: thriftReader, + valueType: meta.GetType(), + } + + nameIndexMap[columnName] = colIndex + } + + for name := range nameColumnMap { + nameColumnMap[name].nameIndexMap = nameIndexMap + } + + return nameColumnMap, nil +} + +type column struct { + name string + endOfValues bool + valueIndex int + valueType parquet.Type + metadata *parquet.ColumnMetaData + schemaElements []*parquet.SchemaElement + nameIndexMap map[string]int + dictPage *page + dataTable *table + rc io.ReadCloser + thriftReader *thrift.TBufferedTransport +} + +func (column *column) close() (err error) { + if column.rc != nil { + err = column.rc.Close() + column.rc = nil + } + + return err +} + +func (column *column) readPage() { + page, _, _, err := readPage( + column.thriftReader, + column.metadata, + column.nameIndexMap, + column.schemaElements, + ) + + if err != nil { + column.endOfValues = true + return + } + + if page.Header.GetType() == parquet.PageType_DICTIONARY_PAGE { + column.dictPage = page + column.readPage() + return + } + + page.decode(column.dictPage) + + if column.dataTable == nil { + column.dataTable = newTableFromTable(page.DataTable) + } + + column.dataTable.Merge(page.DataTable) +} + +func (column *column) read() (value interface{}, valueType parquet.Type) { + if column.dataTable == nil { + column.readPage() + column.valueIndex = 0 + } + + if column.endOfValues { + return nil, column.metadata.GetType() + } + + value = column.dataTable.Values[column.valueIndex] + column.valueIndex++ + if len(column.dataTable.Values) == column.valueIndex { + column.dataTable = nil + } + + return value, column.metadata.GetType() +} diff --git a/vendor/github.com/minio/parquet-go/compression.go b/vendor/github.com/minio/parquet-go/compression.go new file mode 100644 index 000000000..7f9533ac4 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/compression.go @@ -0,0 +1,57 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import ( + "bytes" + "compress/gzip" + "fmt" + "io/ioutil" + + "github.com/golang/snappy" + "github.com/minio/parquet-go/gen-go/parquet" + "github.com/pierrec/lz4" + lzo "github.com/rasky/go-lzo" +) + +type compressionCodec parquet.CompressionCodec + +func (c compressionCodec) uncompress(buf []byte) ([]byte, error) { + switch parquet.CompressionCodec(c) { + case parquet.CompressionCodec_UNCOMPRESSED: + return buf, nil + + case parquet.CompressionCodec_SNAPPY: + return snappy.Decode(nil, buf) + + case parquet.CompressionCodec_GZIP: + reader, err := gzip.NewReader(bytes.NewReader(buf)) + if err != nil { + return nil, err + } + defer reader.Close() + return ioutil.ReadAll(reader) + + case parquet.CompressionCodec_LZO: + return lzo.Decompress1X(bytes.NewReader(buf), len(buf), 0) + + case parquet.CompressionCodec_LZ4: + return ioutil.ReadAll(lz4.NewReader(bytes.NewReader(buf))) + } + + return nil, fmt.Errorf("invalid compression codec %v", c) +} diff --git a/vendor/github.com/minio/parquet-go/decode.go b/vendor/github.com/minio/parquet-go/decode.go new file mode 100644 index 000000000..f7ae556e9 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/decode.go @@ -0,0 +1,506 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + + "github.com/minio/parquet-go/gen-go/parquet" +) + +func uint32ToBytes(v uint32) []byte { + buf := make([]byte, 4) + binary.LittleEndian.PutUint32(buf, v) + return buf +} + +func uint64ToBytes(v uint64) []byte { + buf := make([]byte, 8) + binary.LittleEndian.PutUint64(buf, v) + return buf +} + +func bytesToUint32(buf []byte) uint32 { + return binary.LittleEndian.Uint32(buf) +} + +func bytesToUint64(buf []byte) uint64 { + return binary.LittleEndian.Uint64(buf) +} + +func i64sToi32s(i64s []int64) (i32s []int32) { + i32s = make([]int32, len(i64s)) + for i := range i64s { + i32s[i] = int32(i64s[i]) + } + + return i32s +} + +func readBitPacked(reader *bytes.Reader, header, bitWidth uint64) (result []int64, err error) { + count := header * 8 + + if count == 0 { + return result, nil + } + + if bitWidth == 0 { + return make([]int64, count), nil + } + + data := make([]byte, header*bitWidth) + if _, err = reader.Read(data); err != nil { + return nil, err + } + + var val, used, left, b uint64 + + valNeedBits := bitWidth + i := -1 + for { + if left <= 0 { + i++ + if i >= len(data) { + break + } + + b = uint64(data[i]) + left = 8 + used = 0 + } + + if left >= valNeedBits { + val |= ((b >> used) & ((1 << valNeedBits) - 1)) << (bitWidth - valNeedBits) + result = append(result, int64(val)) + val = 0 + left -= valNeedBits + used += valNeedBits + valNeedBits = bitWidth + } else { + val |= (b >> used) << (bitWidth - valNeedBits) + valNeedBits -= left + left = 0 + } + } + + return result, nil +} + +func readBools(reader *bytes.Reader, count uint64) (result []bool, err error) { + i64s, err := readBitPacked(reader, count, 1) + if err != nil { + return nil, err + } + + var i uint64 + for i = 0; i < count; i++ { + result = append(result, i64s[i] > 0) + } + + return result, nil +} + +func readInt32s(reader *bytes.Reader, count uint64) (result []int32, err error) { + buf := make([]byte, 4) + + var i uint64 + for i = 0; i < count; i++ { + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + result = append(result, int32(bytesToUint32(buf))) + } + + return result, nil +} + +func readInt64s(reader *bytes.Reader, count uint64) (result []int64, err error) { + buf := make([]byte, 8) + + var i uint64 + for i = 0; i < count; i++ { + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + result = append(result, int64(bytesToUint64(buf))) + } + + return result, nil +} + +func readInt96s(reader *bytes.Reader, count uint64) (result [][]byte, err error) { + var i uint64 + for i = 0; i < count; i++ { + buf := make([]byte, 12) + + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + result = append(result, buf) + } + + return result, nil +} + +func readFloats(reader *bytes.Reader, count uint64) (result []float32, err error) { + buf := make([]byte, 4) + + var i uint64 + for i = 0; i < count; i++ { + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + result = append(result, math.Float32frombits(bytesToUint32(buf))) + } + + return result, nil +} + +func readDoubles(reader *bytes.Reader, count uint64) (result []float64, err error) { + buf := make([]byte, 8) + + var i uint64 + for i = 0; i < count; i++ { + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + result = append(result, math.Float64frombits(bytesToUint64(buf))) + } + + return result, nil +} + +func readByteArrays(reader *bytes.Reader, count uint64) (result [][]byte, err error) { + buf := make([]byte, 4) + + var i uint64 + for i = 0; i < count; i++ { + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + data := make([]byte, bytesToUint32(buf)) + if _, err = reader.Read(data); err != nil { + return nil, err + } + + result = append(result, data) + } + + return result, nil +} + +func readFixedLenByteArrays(reader *bytes.Reader, count, length uint64) (result [][]byte, err error) { + var i uint64 + for i = 0; i < count; i++ { + data := make([]byte, length) + if _, err = reader.Read(data); err != nil { + return nil, err + } + + result = append(result, data) + } + + return result, nil +} + +func readValues(reader *bytes.Reader, dataType parquet.Type, count, length uint64) (interface{}, error) { + switch dataType { + case parquet.Type_BOOLEAN: + return readBools(reader, count) + case parquet.Type_INT32: + return readInt32s(reader, count) + case parquet.Type_INT64: + return readInt64s(reader, count) + case parquet.Type_INT96: + return readInt96s(reader, count) + case parquet.Type_FLOAT: + return readFloats(reader, count) + case parquet.Type_DOUBLE: + return readDoubles(reader, count) + case parquet.Type_BYTE_ARRAY: + return readByteArrays(reader, count) + case parquet.Type_FIXED_LEN_BYTE_ARRAY: + return readFixedLenByteArrays(reader, count, length) + } + + return nil, fmt.Errorf("unknown parquet type %v", dataType) +} + +func readUnsignedVarInt(reader *bytes.Reader) (v uint64, err error) { + var b byte + var shift uint64 + + for { + if b, err = reader.ReadByte(); err != nil { + return 0, err + } + + if v |= ((uint64(b) & 0x7F) << shift); b&0x80 == 0 { + break + } + + shift += 7 + } + + return v, nil +} + +func readRLE(reader *bytes.Reader, header, bitWidth uint64) (result []int64, err error) { + width := (bitWidth + 7) / 8 + data := make([]byte, width) + if width > 0 { + if _, err = reader.Read(data); err != nil { + return nil, err + } + } + + if width < 4 { + data = append(data, make([]byte, 4-width)...) + } + + val := int64(bytesToUint32(data)) + + count := header >> 1 + result = make([]int64, count) + for i := range result { + result[i] = val + } + + return result, nil +} + +func readRLEBitPackedHybrid(reader *bytes.Reader, length, bitWidth uint64) (result []int64, err error) { + if length <= 0 { + var i32s []int32 + i32s, err = readInt32s(reader, 1) + if err != nil { + return nil, err + } + length = uint64(i32s[0]) + } + + buf := make([]byte, length) + if _, err = reader.Read(buf); err != nil { + return nil, err + } + + reader = bytes.NewReader(buf) + for reader.Len() > 0 { + header, err := readUnsignedVarInt(reader) + if err != nil { + return nil, err + } + + var i64s []int64 + if header&1 == 0 { + i64s, err = readRLE(reader, header, bitWidth) + } else { + i64s, err = readBitPacked(reader, header>>1, bitWidth) + } + + if err != nil { + return nil, err + } + + result = append(result, i64s...) + } + + return result, nil +} + +func readDeltaBinaryPackedInt(reader *bytes.Reader) (result []int64, err error) { + blockSize, err := readUnsignedVarInt(reader) + if err != nil { + return nil, err + } + + numMiniblocksInBlock, err := readUnsignedVarInt(reader) + if err != nil { + return nil, err + } + + numValues, err := readUnsignedVarInt(reader) + if err != nil { + return nil, err + } + + firstValueZigZag, err := readUnsignedVarInt(reader) + if err != nil { + return nil, err + } + + v := int64(firstValueZigZag>>1) ^ (-int64(firstValueZigZag & 1)) + result = append(result, v) + + numValuesInMiniBlock := blockSize / numMiniblocksInBlock + + bitWidths := make([]uint64, numMiniblocksInBlock) + for uint64(len(result)) < numValues { + minDeltaZigZag, err := readUnsignedVarInt(reader) + if err != nil { + return nil, err + } + + for i := 0; uint64(i) < numMiniblocksInBlock; i++ { + b, err := reader.ReadByte() + if err != nil { + return nil, err + } + bitWidths[i] = uint64(b) + } + + minDelta := int64(minDeltaZigZag>>1) ^ (-int64(minDeltaZigZag & 1)) + for i := 0; uint64(i) < numMiniblocksInBlock; i++ { + i64s, err := readBitPacked(reader, numValuesInMiniBlock/8, bitWidths[i]) + if err != nil { + return nil, err + } + + for j := range i64s { + v += i64s[j] + minDelta + result = append(result, v) + } + } + } + + return result[:numValues], nil +} + +func readDeltaLengthByteArrays(reader *bytes.Reader) (result [][]byte, err error) { + i64s, err := readDeltaBinaryPackedInt(reader) + if err != nil { + return nil, err + } + + for i := 0; i < len(i64s); i++ { + arrays, err := readFixedLenByteArrays(reader, 1, uint64(i64s[i])) + if err != nil { + return nil, err + } + + result = append(result, arrays[0]) + } + + return result, nil +} + +func readDeltaByteArrays(reader *bytes.Reader) (result [][]byte, err error) { + i64s, err := readDeltaBinaryPackedInt(reader) + if err != nil { + return nil, err + } + + suffixes, err := readDeltaLengthByteArrays(reader) + if err != nil { + return nil, err + } + + result = append(result, suffixes[0]) + for i := 1; i < len(i64s); i++ { + prefixLength := i64s[i] + val := append([]byte{}, result[i-1][:prefixLength]...) + val = append(val, suffixes[i]...) + result = append(result, val) + } + + return result, nil +} + +func readDataPageValues( + bytesReader *bytes.Reader, + encoding parquet.Encoding, + dataType parquet.Type, + convertedType parquet.ConvertedType, + count, bitWidth uint64, +) (result interface{}, resultDataType parquet.Type, err error) { + switch encoding { + case parquet.Encoding_PLAIN: + result, err = readValues(bytesReader, dataType, count, bitWidth) + return result, dataType, err + + case parquet.Encoding_PLAIN_DICTIONARY: + b, err := bytesReader.ReadByte() + if err != nil { + return nil, -1, err + } + + i64s, err := readRLEBitPackedHybrid(bytesReader, uint64(bytesReader.Len()), uint64(b)) + if err != nil { + return nil, -1, err + } + + return i64s[:count], parquet.Type_INT64, nil + + case parquet.Encoding_RLE: + i64s, err := readRLEBitPackedHybrid(bytesReader, 0, bitWidth) + if err != nil { + return nil, -1, err + } + + i64s = i64s[:count] + + if dataType == parquet.Type_INT32 { + return i64sToi32s(i64s), parquet.Type_INT32, nil + } + + return i64s, parquet.Type_INT64, nil + + case parquet.Encoding_BIT_PACKED: + return nil, -1, fmt.Errorf("deprecated parquet encoding %v", parquet.Encoding_BIT_PACKED) + + case parquet.Encoding_DELTA_BINARY_PACKED: + i64s, err := readDeltaBinaryPackedInt(bytesReader) + if err != nil { + return nil, -1, err + } + + i64s = i64s[:count] + + if dataType == parquet.Type_INT32 { + return i64sToi32s(i64s), parquet.Type_INT32, nil + } + + return i64s, parquet.Type_INT64, nil + + case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: + byteSlices, err := readDeltaLengthByteArrays(bytesReader) + if err != nil { + return nil, -1, err + } + + return byteSlices[:count], parquet.Type_FIXED_LEN_BYTE_ARRAY, nil + + case parquet.Encoding_DELTA_BYTE_ARRAY: + byteSlices, err := readDeltaByteArrays(bytesReader) + if err != nil { + return nil, -1, err + } + + return byteSlices[:count], parquet.Type_FIXED_LEN_BYTE_ARRAY, nil + } + + return nil, -1, fmt.Errorf("unsupported parquet encoding %v", encoding) +} diff --git a/vendor/github.com/minio/parquet-go/example.parquet b/vendor/github.com/minio/parquet-go/example.parquet new file mode 100644 index 000000000..05cd61aea Binary files /dev/null and b/vendor/github.com/minio/parquet-go/example.parquet differ diff --git a/vendor/github.com/minio/parquet-go/gen-go/parquet/GoUnusedProtection__.go b/vendor/github.com/minio/parquet-go/gen-go/parquet/GoUnusedProtection__.go new file mode 100644 index 000000000..dc8137cff --- /dev/null +++ b/vendor/github.com/minio/parquet-go/gen-go/parquet/GoUnusedProtection__.go @@ -0,0 +1,7 @@ +// Autogenerated by Thrift Compiler (0.10.0) +// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + +package parquet + +var GoUnusedProtection__ int; + diff --git a/vendor/github.com/minio/parquet-go/gen-go/parquet/parquet-consts.go b/vendor/github.com/minio/parquet-go/gen-go/parquet/parquet-consts.go new file mode 100644 index 000000000..c34042af6 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/gen-go/parquet/parquet-consts.go @@ -0,0 +1,20 @@ +// Autogenerated by Thrift Compiler (0.10.0) +// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + +package parquet + +import ( + "bytes" + "fmt" + "git.apache.org/thrift.git/lib/go/thrift" +) + +// (needed to ensure safety because of naive import list construction.) +var _ = thrift.ZERO +var _ = fmt.Printf +var _ = bytes.Equal + + +func init() { +} + diff --git a/vendor/github.com/minio/parquet-go/gen-go/parquet/parquet.go b/vendor/github.com/minio/parquet-go/gen-go/parquet/parquet.go new file mode 100644 index 000000000..22711db46 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/gen-go/parquet/parquet.go @@ -0,0 +1,8191 @@ +// Autogenerated by Thrift Compiler (0.10.0) +// DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + +package parquet + +import ( + "bytes" + "database/sql/driver" + "errors" + "fmt" + "git.apache.org/thrift.git/lib/go/thrift" +) + +// (needed to ensure safety because of naive import list construction.) +var _ = thrift.ZERO +var _ = fmt.Printf +var _ = bytes.Equal + +//Types supported by Parquet. These types are intended to be used in combination +//with the encodings to control the on disk storage format. +//For example INT16 is not included as a type since a good encoding of INT32 +//would handle this. +type Type int64 + +const ( + Type_BOOLEAN Type = 0 + Type_INT32 Type = 1 + Type_INT64 Type = 2 + Type_INT96 Type = 3 + Type_FLOAT Type = 4 + Type_DOUBLE Type = 5 + Type_BYTE_ARRAY Type = 6 + Type_FIXED_LEN_BYTE_ARRAY Type = 7 +) + +func (p Type) String() string { + switch p { + case Type_BOOLEAN: + return "BOOLEAN" + case Type_INT32: + return "INT32" + case Type_INT64: + return "INT64" + case Type_INT96: + return "INT96" + case Type_FLOAT: + return "FLOAT" + case Type_DOUBLE: + return "DOUBLE" + case Type_BYTE_ARRAY: + return "BYTE_ARRAY" + case Type_FIXED_LEN_BYTE_ARRAY: + return "FIXED_LEN_BYTE_ARRAY" + } + return "" +} + +func TypeFromString(s string) (Type, error) { + switch s { + case "BOOLEAN": + return Type_BOOLEAN, nil + case "INT32": + return Type_INT32, nil + case "INT64": + return Type_INT64, nil + case "INT96": + return Type_INT96, nil + case "FLOAT": + return Type_FLOAT, nil + case "DOUBLE": + return Type_DOUBLE, nil + case "BYTE_ARRAY": + return Type_BYTE_ARRAY, nil + case "FIXED_LEN_BYTE_ARRAY": + return Type_FIXED_LEN_BYTE_ARRAY, nil + } + return Type(0), fmt.Errorf("not a valid Type string") +} + +func TypePtr(v Type) *Type { return &v } + +func (p Type) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *Type) UnmarshalText(text []byte) error { + q, err := TypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *Type) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = Type(v) + return nil +} + +func (p *Type) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +//Common types used by frameworks(e.g. hive, pig) using parquet. This helps map +//between types in those frameworks to the base types in parquet. This is only +//metadata and not needed to read or write the data. +type ConvertedType int64 + +const ( + ConvertedType_UTF8 ConvertedType = 0 + ConvertedType_MAP ConvertedType = 1 + ConvertedType_MAP_KEY_VALUE ConvertedType = 2 + ConvertedType_LIST ConvertedType = 3 + ConvertedType_ENUM ConvertedType = 4 + ConvertedType_DECIMAL ConvertedType = 5 + ConvertedType_DATE ConvertedType = 6 + ConvertedType_TIME_MILLIS ConvertedType = 7 + ConvertedType_TIME_MICROS ConvertedType = 8 + ConvertedType_TIMESTAMP_MILLIS ConvertedType = 9 + ConvertedType_TIMESTAMP_MICROS ConvertedType = 10 + ConvertedType_UINT_8 ConvertedType = 11 + ConvertedType_UINT_16 ConvertedType = 12 + ConvertedType_UINT_32 ConvertedType = 13 + ConvertedType_UINT_64 ConvertedType = 14 + ConvertedType_INT_8 ConvertedType = 15 + ConvertedType_INT_16 ConvertedType = 16 + ConvertedType_INT_32 ConvertedType = 17 + ConvertedType_INT_64 ConvertedType = 18 + ConvertedType_JSON ConvertedType = 19 + ConvertedType_BSON ConvertedType = 20 + ConvertedType_INTERVAL ConvertedType = 21 +) + +func (p ConvertedType) String() string { + switch p { + case ConvertedType_UTF8: + return "UTF8" + case ConvertedType_MAP: + return "MAP" + case ConvertedType_MAP_KEY_VALUE: + return "MAP_KEY_VALUE" + case ConvertedType_LIST: + return "LIST" + case ConvertedType_ENUM: + return "ENUM" + case ConvertedType_DECIMAL: + return "DECIMAL" + case ConvertedType_DATE: + return "DATE" + case ConvertedType_TIME_MILLIS: + return "TIME_MILLIS" + case ConvertedType_TIME_MICROS: + return "TIME_MICROS" + case ConvertedType_TIMESTAMP_MILLIS: + return "TIMESTAMP_MILLIS" + case ConvertedType_TIMESTAMP_MICROS: + return "TIMESTAMP_MICROS" + case ConvertedType_UINT_8: + return "UINT_8" + case ConvertedType_UINT_16: + return "UINT_16" + case ConvertedType_UINT_32: + return "UINT_32" + case ConvertedType_UINT_64: + return "UINT_64" + case ConvertedType_INT_8: + return "INT_8" + case ConvertedType_INT_16: + return "INT_16" + case ConvertedType_INT_32: + return "INT_32" + case ConvertedType_INT_64: + return "INT_64" + case ConvertedType_JSON: + return "JSON" + case ConvertedType_BSON: + return "BSON" + case ConvertedType_INTERVAL: + return "INTERVAL" + } + return "" +} + +func ConvertedTypeFromString(s string) (ConvertedType, error) { + switch s { + case "UTF8": + return ConvertedType_UTF8, nil + case "MAP": + return ConvertedType_MAP, nil + case "MAP_KEY_VALUE": + return ConvertedType_MAP_KEY_VALUE, nil + case "LIST": + return ConvertedType_LIST, nil + case "ENUM": + return ConvertedType_ENUM, nil + case "DECIMAL": + return ConvertedType_DECIMAL, nil + case "DATE": + return ConvertedType_DATE, nil + case "TIME_MILLIS": + return ConvertedType_TIME_MILLIS, nil + case "TIME_MICROS": + return ConvertedType_TIME_MICROS, nil + case "TIMESTAMP_MILLIS": + return ConvertedType_TIMESTAMP_MILLIS, nil + case "TIMESTAMP_MICROS": + return ConvertedType_TIMESTAMP_MICROS, nil + case "UINT_8": + return ConvertedType_UINT_8, nil + case "UINT_16": + return ConvertedType_UINT_16, nil + case "UINT_32": + return ConvertedType_UINT_32, nil + case "UINT_64": + return ConvertedType_UINT_64, nil + case "INT_8": + return ConvertedType_INT_8, nil + case "INT_16": + return ConvertedType_INT_16, nil + case "INT_32": + return ConvertedType_INT_32, nil + case "INT_64": + return ConvertedType_INT_64, nil + case "JSON": + return ConvertedType_JSON, nil + case "BSON": + return ConvertedType_BSON, nil + case "INTERVAL": + return ConvertedType_INTERVAL, nil + } + return ConvertedType(0), fmt.Errorf("not a valid ConvertedType string") +} + +func ConvertedTypePtr(v ConvertedType) *ConvertedType { return &v } + +func (p ConvertedType) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *ConvertedType) UnmarshalText(text []byte) error { + q, err := ConvertedTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *ConvertedType) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = ConvertedType(v) + return nil +} + +func (p *ConvertedType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +//Representation of Schemas +type FieldRepetitionType int64 + +const ( + FieldRepetitionType_REQUIRED FieldRepetitionType = 0 + FieldRepetitionType_OPTIONAL FieldRepetitionType = 1 + FieldRepetitionType_REPEATED FieldRepetitionType = 2 +) + +func (p FieldRepetitionType) String() string { + switch p { + case FieldRepetitionType_REQUIRED: + return "REQUIRED" + case FieldRepetitionType_OPTIONAL: + return "OPTIONAL" + case FieldRepetitionType_REPEATED: + return "REPEATED" + } + return "" +} + +func FieldRepetitionTypeFromString(s string) (FieldRepetitionType, error) { + switch s { + case "REQUIRED": + return FieldRepetitionType_REQUIRED, nil + case "OPTIONAL": + return FieldRepetitionType_OPTIONAL, nil + case "REPEATED": + return FieldRepetitionType_REPEATED, nil + } + return FieldRepetitionType(0), fmt.Errorf("not a valid FieldRepetitionType string") +} + +func FieldRepetitionTypePtr(v FieldRepetitionType) *FieldRepetitionType { return &v } + +func (p FieldRepetitionType) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *FieldRepetitionType) UnmarshalText(text []byte) error { + q, err := FieldRepetitionTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *FieldRepetitionType) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = FieldRepetitionType(v) + return nil +} + +func (p *FieldRepetitionType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +//Encodings supported by Parquet. Not all encodings are valid for all types. These +//enums are also used to specify the encoding of definition and repetition levels. +//See the accompanying doc for the details of the more complicated encodings. +type Encoding int64 + +const ( + Encoding_PLAIN Encoding = 0 + Encoding_PLAIN_DICTIONARY Encoding = 2 + Encoding_RLE Encoding = 3 + Encoding_BIT_PACKED Encoding = 4 + Encoding_DELTA_BINARY_PACKED Encoding = 5 + Encoding_DELTA_LENGTH_BYTE_ARRAY Encoding = 6 + Encoding_DELTA_BYTE_ARRAY Encoding = 7 + Encoding_RLE_DICTIONARY Encoding = 8 +) + +func (p Encoding) String() string { + switch p { + case Encoding_PLAIN: + return "PLAIN" + case Encoding_PLAIN_DICTIONARY: + return "PLAIN_DICTIONARY" + case Encoding_RLE: + return "RLE" + case Encoding_BIT_PACKED: + return "BIT_PACKED" + case Encoding_DELTA_BINARY_PACKED: + return "DELTA_BINARY_PACKED" + case Encoding_DELTA_LENGTH_BYTE_ARRAY: + return "DELTA_LENGTH_BYTE_ARRAY" + case Encoding_DELTA_BYTE_ARRAY: + return "DELTA_BYTE_ARRAY" + case Encoding_RLE_DICTIONARY: + return "RLE_DICTIONARY" + } + return "" +} + +func EncodingFromString(s string) (Encoding, error) { + switch s { + case "PLAIN": + return Encoding_PLAIN, nil + case "PLAIN_DICTIONARY": + return Encoding_PLAIN_DICTIONARY, nil + case "RLE": + return Encoding_RLE, nil + case "BIT_PACKED": + return Encoding_BIT_PACKED, nil + case "DELTA_BINARY_PACKED": + return Encoding_DELTA_BINARY_PACKED, nil + case "DELTA_LENGTH_BYTE_ARRAY": + return Encoding_DELTA_LENGTH_BYTE_ARRAY, nil + case "DELTA_BYTE_ARRAY": + return Encoding_DELTA_BYTE_ARRAY, nil + case "RLE_DICTIONARY": + return Encoding_RLE_DICTIONARY, nil + } + return Encoding(0), fmt.Errorf("not a valid Encoding string") +} + +func EncodingPtr(v Encoding) *Encoding { return &v } + +func (p Encoding) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *Encoding) UnmarshalText(text []byte) error { + q, err := EncodingFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *Encoding) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = Encoding(v) + return nil +} + +func (p *Encoding) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +//Supported compression algorithms. +// +//Codecs added in 2.4 can be read by readers based on 2.4 and later. +//Codec support may vary between readers based on the format version and +//libraries available at runtime. Gzip, Snappy, and LZ4 codecs are +//widely available, while Zstd and Brotli require additional libraries. +type CompressionCodec int64 + +const ( + CompressionCodec_UNCOMPRESSED CompressionCodec = 0 + CompressionCodec_SNAPPY CompressionCodec = 1 + CompressionCodec_GZIP CompressionCodec = 2 + CompressionCodec_LZO CompressionCodec = 3 + CompressionCodec_BROTLI CompressionCodec = 4 + CompressionCodec_LZ4 CompressionCodec = 5 + CompressionCodec_ZSTD CompressionCodec = 6 +) + +func (p CompressionCodec) String() string { + switch p { + case CompressionCodec_UNCOMPRESSED: + return "UNCOMPRESSED" + case CompressionCodec_SNAPPY: + return "SNAPPY" + case CompressionCodec_GZIP: + return "GZIP" + case CompressionCodec_LZO: + return "LZO" + case CompressionCodec_BROTLI: + return "BROTLI" + case CompressionCodec_LZ4: + return "LZ4" + case CompressionCodec_ZSTD: + return "ZSTD" + } + return "" +} + +func CompressionCodecFromString(s string) (CompressionCodec, error) { + switch s { + case "UNCOMPRESSED": + return CompressionCodec_UNCOMPRESSED, nil + case "SNAPPY": + return CompressionCodec_SNAPPY, nil + case "GZIP": + return CompressionCodec_GZIP, nil + case "LZO": + return CompressionCodec_LZO, nil + case "BROTLI": + return CompressionCodec_BROTLI, nil + case "LZ4": + return CompressionCodec_LZ4, nil + case "ZSTD": + return CompressionCodec_ZSTD, nil + } + return CompressionCodec(0), fmt.Errorf("not a valid CompressionCodec string") +} + +func CompressionCodecPtr(v CompressionCodec) *CompressionCodec { return &v } + +func (p CompressionCodec) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *CompressionCodec) UnmarshalText(text []byte) error { + q, err := CompressionCodecFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *CompressionCodec) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = CompressionCodec(v) + return nil +} + +func (p *CompressionCodec) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +type PageType int64 + +const ( + PageType_DATA_PAGE PageType = 0 + PageType_INDEX_PAGE PageType = 1 + PageType_DICTIONARY_PAGE PageType = 2 + PageType_DATA_PAGE_V2 PageType = 3 +) + +func (p PageType) String() string { + switch p { + case PageType_DATA_PAGE: + return "DATA_PAGE" + case PageType_INDEX_PAGE: + return "INDEX_PAGE" + case PageType_DICTIONARY_PAGE: + return "DICTIONARY_PAGE" + case PageType_DATA_PAGE_V2: + return "DATA_PAGE_V2" + } + return "" +} + +func PageTypeFromString(s string) (PageType, error) { + switch s { + case "DATA_PAGE": + return PageType_DATA_PAGE, nil + case "INDEX_PAGE": + return PageType_INDEX_PAGE, nil + case "DICTIONARY_PAGE": + return PageType_DICTIONARY_PAGE, nil + case "DATA_PAGE_V2": + return PageType_DATA_PAGE_V2, nil + } + return PageType(0), fmt.Errorf("not a valid PageType string") +} + +func PageTypePtr(v PageType) *PageType { return &v } + +func (p PageType) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *PageType) UnmarshalText(text []byte) error { + q, err := PageTypeFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *PageType) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = PageType(v) + return nil +} + +func (p *PageType) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +//Enum to annotate whether lists of min/max elements inside ColumnIndex +//are ordered and if so, in which direction. +type BoundaryOrder int64 + +const ( + BoundaryOrder_UNORDERED BoundaryOrder = 0 + BoundaryOrder_ASCENDING BoundaryOrder = 1 + BoundaryOrder_DESCENDING BoundaryOrder = 2 +) + +func (p BoundaryOrder) String() string { + switch p { + case BoundaryOrder_UNORDERED: + return "UNORDERED" + case BoundaryOrder_ASCENDING: + return "ASCENDING" + case BoundaryOrder_DESCENDING: + return "DESCENDING" + } + return "" +} + +func BoundaryOrderFromString(s string) (BoundaryOrder, error) { + switch s { + case "UNORDERED": + return BoundaryOrder_UNORDERED, nil + case "ASCENDING": + return BoundaryOrder_ASCENDING, nil + case "DESCENDING": + return BoundaryOrder_DESCENDING, nil + } + return BoundaryOrder(0), fmt.Errorf("not a valid BoundaryOrder string") +} + +func BoundaryOrderPtr(v BoundaryOrder) *BoundaryOrder { return &v } + +func (p BoundaryOrder) MarshalText() ([]byte, error) { + return []byte(p.String()), nil +} + +func (p *BoundaryOrder) UnmarshalText(text []byte) error { + q, err := BoundaryOrderFromString(string(text)) + if err != nil { + return err + } + *p = q + return nil +} + +func (p *BoundaryOrder) Scan(value interface{}) error { + v, ok := value.(int64) + if !ok { + return errors.New("Scan value is not int64") + } + *p = BoundaryOrder(v) + return nil +} + +func (p *BoundaryOrder) Value() (driver.Value, error) { + if p == nil { + return nil, nil + } + return int64(*p), nil +} + +// Statistics per row group and per page +// All fields are optional. +// +// Attributes: +// - Max: DEPRECATED: min and max value of the column. Use min_value and max_value. +// +// Values are encoded using PLAIN encoding, except that variable-length byte +// arrays do not include a length prefix. +// +// These fields encode min and max values determined by signed comparison +// only. New files should use the correct order for a column's logical type +// and store the values in the min_value and max_value fields. +// +// To support older readers, these may be set when the column order is +// signed. +// - Min +// - NullCount: count of null value in the column +// - DistinctCount: count of distinct values occurring +// - MaxValue: Min and max values for the column, determined by its ColumnOrder. +// +// Values are encoded using PLAIN encoding, except that variable-length byte +// arrays do not include a length prefix. +// - MinValue +type Statistics struct { + Max []byte `thrift:"max,1" db:"max" json:"max,omitempty"` + Min []byte `thrift:"min,2" db:"min" json:"min,omitempty"` + NullCount *int64 `thrift:"null_count,3" db:"null_count" json:"null_count,omitempty"` + DistinctCount *int64 `thrift:"distinct_count,4" db:"distinct_count" json:"distinct_count,omitempty"` + MaxValue []byte `thrift:"max_value,5" db:"max_value" json:"max_value,omitempty"` + MinValue []byte `thrift:"min_value,6" db:"min_value" json:"min_value,omitempty"` +} + +func NewStatistics() *Statistics { + return &Statistics{} +} + +var Statistics_Max_DEFAULT []byte + +func (p *Statistics) GetMax() []byte { + return p.Max +} + +var Statistics_Min_DEFAULT []byte + +func (p *Statistics) GetMin() []byte { + return p.Min +} + +var Statistics_NullCount_DEFAULT int64 + +func (p *Statistics) GetNullCount() int64 { + if !p.IsSetNullCount() { + return Statistics_NullCount_DEFAULT + } + return *p.NullCount +} + +var Statistics_DistinctCount_DEFAULT int64 + +func (p *Statistics) GetDistinctCount() int64 { + if !p.IsSetDistinctCount() { + return Statistics_DistinctCount_DEFAULT + } + return *p.DistinctCount +} + +var Statistics_MaxValue_DEFAULT []byte + +func (p *Statistics) GetMaxValue() []byte { + return p.MaxValue +} + +var Statistics_MinValue_DEFAULT []byte + +func (p *Statistics) GetMinValue() []byte { + return p.MinValue +} +func (p *Statistics) IsSetMax() bool { + return p.Max != nil +} + +func (p *Statistics) IsSetMin() bool { + return p.Min != nil +} + +func (p *Statistics) IsSetNullCount() bool { + return p.NullCount != nil +} + +func (p *Statistics) IsSetDistinctCount() bool { + return p.DistinctCount != nil +} + +func (p *Statistics) IsSetMaxValue() bool { + return p.MaxValue != nil +} + +func (p *Statistics) IsSetMinValue() bool { + return p.MinValue != nil +} + +func (p *Statistics) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *Statistics) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Max = v + } + return nil +} + +func (p *Statistics) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Min = v + } + return nil +} + +func (p *Statistics) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NullCount = &v + } + return nil +} + +func (p *Statistics) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.DistinctCount = &v + } + return nil +} + +func (p *Statistics) ReadField5(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.MaxValue = v + } + return nil +} + +func (p *Statistics) ReadField6(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBinary(); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.MinValue = v + } + return nil +} + +func (p *Statistics) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("Statistics"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *Statistics) writeField1(oprot thrift.TProtocol) (err error) { + if p.IsSetMax() { + if err := oprot.WriteFieldBegin("max", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:max: ", p), err) + } + if err := oprot.WriteBinary(p.Max); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.max (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:max: ", p), err) + } + } + return err +} + +func (p *Statistics) writeField2(oprot thrift.TProtocol) (err error) { + if p.IsSetMin() { + if err := oprot.WriteFieldBegin("min", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min: ", p), err) + } + if err := oprot.WriteBinary(p.Min); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.min (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min: ", p), err) + } + } + return err +} + +func (p *Statistics) writeField3(oprot thrift.TProtocol) (err error) { + if p.IsSetNullCount() { + if err := oprot.WriteFieldBegin("null_count", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:null_count: ", p), err) + } + if err := oprot.WriteI64(int64(*p.NullCount)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.null_count (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:null_count: ", p), err) + } + } + return err +} + +func (p *Statistics) writeField4(oprot thrift.TProtocol) (err error) { + if p.IsSetDistinctCount() { + if err := oprot.WriteFieldBegin("distinct_count", thrift.I64, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:distinct_count: ", p), err) + } + if err := oprot.WriteI64(int64(*p.DistinctCount)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.distinct_count (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:distinct_count: ", p), err) + } + } + return err +} + +func (p *Statistics) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetMaxValue() { + if err := oprot.WriteFieldBegin("max_value", thrift.STRING, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:max_value: ", p), err) + } + if err := oprot.WriteBinary(p.MaxValue); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.max_value (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:max_value: ", p), err) + } + } + return err +} + +func (p *Statistics) writeField6(oprot thrift.TProtocol) (err error) { + if p.IsSetMinValue() { + if err := oprot.WriteFieldBegin("min_value", thrift.STRING, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:min_value: ", p), err) + } + if err := oprot.WriteBinary(p.MinValue); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.min_value (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:min_value: ", p), err) + } + } + return err +} + +func (p *Statistics) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("Statistics(%+v)", *p) +} + +// Empty structs to use as logical type annotations +type StringType struct { +} + +func NewStringType() *StringType { + return &StringType{} +} + +func (p *StringType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *StringType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("StringType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *StringType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("StringType(%+v)", *p) +} + +type UUIDType struct { +} + +func NewUUIDType() *UUIDType { + return &UUIDType{} +} + +func (p *UUIDType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *UUIDType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("UUIDType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *UUIDType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("UUIDType(%+v)", *p) +} + +type MapType struct { +} + +func NewMapType() *MapType { + return &MapType{} +} + +func (p *MapType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *MapType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("MapType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *MapType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("MapType(%+v)", *p) +} + +type ListType struct { +} + +func NewListType() *ListType { + return &ListType{} +} + +func (p *ListType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *ListType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("ListType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *ListType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("ListType(%+v)", *p) +} + +type EnumType struct { +} + +func NewEnumType() *EnumType { + return &EnumType{} +} + +func (p *EnumType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *EnumType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("EnumType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *EnumType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("EnumType(%+v)", *p) +} + +type DateType struct { +} + +func NewDateType() *DateType { + return &DateType{} +} + +func (p *DateType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *DateType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("DateType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *DateType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("DateType(%+v)", *p) +} + +// Logical type to annotate a column that is always null. +// +// Sometimes when discovering the schema of existing data, values are always +// null and the physical type can't be determined. This annotation signals +// the case where the physical type was guessed from all null values. +type NullType struct { +} + +func NewNullType() *NullType { + return &NullType{} +} + +func (p *NullType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *NullType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("NullType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *NullType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("NullType(%+v)", *p) +} + +// Decimal logical type annotation +// +// To maintain forward-compatibility in v1, implementations using this logical +// type must also set scale and precision on the annotated SchemaElement. +// +// Allowed for physical types: INT32, INT64, FIXED, and BINARY +// +// Attributes: +// - Scale +// - Precision +type DecimalType struct { + Scale int32 `thrift:"scale,1,required" db:"scale" json:"scale"` + Precision int32 `thrift:"precision,2,required" db:"precision" json:"precision"` +} + +func NewDecimalType() *DecimalType { + return &DecimalType{} +} + +func (p *DecimalType) GetScale() int32 { + return p.Scale +} + +func (p *DecimalType) GetPrecision() int32 { + return p.Precision +} +func (p *DecimalType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetScale bool = false + var issetPrecision bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetScale = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetPrecision = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetScale { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Scale is not set")) + } + if !issetPrecision { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Precision is not set")) + } + return nil +} + +func (p *DecimalType) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Scale = v + } + return nil +} + +func (p *DecimalType) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Precision = v + } + return nil +} + +func (p *DecimalType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("DecimalType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *DecimalType) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("scale", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:scale: ", p), err) + } + if err := oprot.WriteI32(int32(p.Scale)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.scale (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:scale: ", p), err) + } + return err +} + +func (p *DecimalType) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("precision", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:precision: ", p), err) + } + if err := oprot.WriteI32(int32(p.Precision)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.precision (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:precision: ", p), err) + } + return err +} + +func (p *DecimalType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("DecimalType(%+v)", *p) +} + +// Time units for logical types +type MilliSeconds struct { +} + +func NewMilliSeconds() *MilliSeconds { + return &MilliSeconds{} +} + +func (p *MilliSeconds) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *MilliSeconds) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("MilliSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *MilliSeconds) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("MilliSeconds(%+v)", *p) +} + +type MicroSeconds struct { +} + +func NewMicroSeconds() *MicroSeconds { + return &MicroSeconds{} +} + +func (p *MicroSeconds) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *MicroSeconds) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("MicroSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *MicroSeconds) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("MicroSeconds(%+v)", *p) +} + +type NanoSeconds struct { +} + +func NewNanoSeconds() *NanoSeconds { + return &NanoSeconds{} +} + +func (p *NanoSeconds) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *NanoSeconds) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("NanoSeconds"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *NanoSeconds) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("NanoSeconds(%+v)", *p) +} + +// Attributes: +// - MILLIS +// - MICROS +// - NANOS +type TimeUnit struct { + MILLIS *MilliSeconds `thrift:"MILLIS,1" db:"MILLIS" json:"MILLIS,omitempty"` + MICROS *MicroSeconds `thrift:"MICROS,2" db:"MICROS" json:"MICROS,omitempty"` + NANOS *NanoSeconds `thrift:"NANOS,3" db:"NANOS" json:"NANOS,omitempty"` +} + +func NewTimeUnit() *TimeUnit { + return &TimeUnit{} +} + +var TimeUnit_MILLIS_DEFAULT *MilliSeconds + +func (p *TimeUnit) GetMILLIS() *MilliSeconds { + if !p.IsSetMILLIS() { + return TimeUnit_MILLIS_DEFAULT + } + return p.MILLIS +} + +var TimeUnit_MICROS_DEFAULT *MicroSeconds + +func (p *TimeUnit) GetMICROS() *MicroSeconds { + if !p.IsSetMICROS() { + return TimeUnit_MICROS_DEFAULT + } + return p.MICROS +} + +var TimeUnit_NANOS_DEFAULT *NanoSeconds + +func (p *TimeUnit) GetNANOS() *NanoSeconds { + if !p.IsSetNANOS() { + return TimeUnit_NANOS_DEFAULT + } + return p.NANOS +} +func (p *TimeUnit) CountSetFieldsTimeUnit() int { + count := 0 + if p.IsSetMILLIS() { + count++ + } + if p.IsSetMICROS() { + count++ + } + if p.IsSetNANOS() { + count++ + } + return count + +} + +func (p *TimeUnit) IsSetMILLIS() bool { + return p.MILLIS != nil +} + +func (p *TimeUnit) IsSetMICROS() bool { + return p.MICROS != nil +} + +func (p *TimeUnit) IsSetNANOS() bool { + return p.NANOS != nil +} + +func (p *TimeUnit) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *TimeUnit) ReadField1(iprot thrift.TProtocol) error { + p.MILLIS = &MilliSeconds{} + if err := p.MILLIS.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MILLIS), err) + } + return nil +} + +func (p *TimeUnit) ReadField2(iprot thrift.TProtocol) error { + p.MICROS = &MicroSeconds{} + if err := p.MICROS.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MICROS), err) + } + return nil +} + +func (p *TimeUnit) ReadField3(iprot thrift.TProtocol) error { + p.NANOS = &NanoSeconds{} + if err := p.NANOS.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.NANOS), err) + } + return nil +} + +func (p *TimeUnit) Write(oprot thrift.TProtocol) error { + if c := p.CountSetFieldsTimeUnit(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c) + } + if err := oprot.WriteStructBegin("TimeUnit"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *TimeUnit) writeField1(oprot thrift.TProtocol) (err error) { + if p.IsSetMILLIS() { + if err := oprot.WriteFieldBegin("MILLIS", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:MILLIS: ", p), err) + } + if err := p.MILLIS.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MILLIS), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:MILLIS: ", p), err) + } + } + return err +} + +func (p *TimeUnit) writeField2(oprot thrift.TProtocol) (err error) { + if p.IsSetMICROS() { + if err := oprot.WriteFieldBegin("MICROS", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MICROS: ", p), err) + } + if err := p.MICROS.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MICROS), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MICROS: ", p), err) + } + } + return err +} + +func (p *TimeUnit) writeField3(oprot thrift.TProtocol) (err error) { + if p.IsSetNANOS() { + if err := oprot.WriteFieldBegin("NANOS", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:NANOS: ", p), err) + } + if err := p.NANOS.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.NANOS), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:NANOS: ", p), err) + } + } + return err +} + +func (p *TimeUnit) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("TimeUnit(%+v)", *p) +} + +// Timestamp logical type annotation +// +// Allowed for physical types: INT64 +// +// Attributes: +// - IsAdjustedToUTC +// - Unit +type TimestampType struct { + IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` + Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` +} + +func NewTimestampType() *TimestampType { + return &TimestampType{} +} + +func (p *TimestampType) GetIsAdjustedToUTC() bool { + return p.IsAdjustedToUTC +} + +var TimestampType_Unit_DEFAULT *TimeUnit + +func (p *TimestampType) GetUnit() *TimeUnit { + if !p.IsSetUnit() { + return TimestampType_Unit_DEFAULT + } + return p.Unit +} +func (p *TimestampType) IsSetUnit() bool { + return p.Unit != nil +} + +func (p *TimestampType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetIsAdjustedToUTC bool = false + var issetUnit bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetIsAdjustedToUTC = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetUnit = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetIsAdjustedToUTC { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) + } + if !issetUnit { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) + } + return nil +} + +func (p *TimestampType) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.IsAdjustedToUTC = v + } + return nil +} + +func (p *TimestampType) ReadField2(iprot thrift.TProtocol) error { + p.Unit = &TimeUnit{} + if err := p.Unit.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) + } + return nil +} + +func (p *TimestampType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("TimestampType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *TimestampType) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("isAdjustedToUTC", thrift.BOOL, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) + } + if err := oprot.WriteBool(bool(p.IsAdjustedToUTC)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) + } + return err +} + +func (p *TimestampType) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("unit", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) + } + if err := p.Unit.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) + } + return err +} + +func (p *TimestampType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("TimestampType(%+v)", *p) +} + +// Time logical type annotation +// +// Allowed for physical types: INT32 (millis), INT64 (micros, nanos) +// +// Attributes: +// - IsAdjustedToUTC +// - Unit +type TimeType struct { + IsAdjustedToUTC bool `thrift:"isAdjustedToUTC,1,required" db:"isAdjustedToUTC" json:"isAdjustedToUTC"` + Unit *TimeUnit `thrift:"unit,2,required" db:"unit" json:"unit"` +} + +func NewTimeType() *TimeType { + return &TimeType{} +} + +func (p *TimeType) GetIsAdjustedToUTC() bool { + return p.IsAdjustedToUTC +} + +var TimeType_Unit_DEFAULT *TimeUnit + +func (p *TimeType) GetUnit() *TimeUnit { + if !p.IsSetUnit() { + return TimeType_Unit_DEFAULT + } + return p.Unit +} +func (p *TimeType) IsSetUnit() bool { + return p.Unit != nil +} + +func (p *TimeType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetIsAdjustedToUTC bool = false + var issetUnit bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetIsAdjustedToUTC = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetUnit = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetIsAdjustedToUTC { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsAdjustedToUTC is not set")) + } + if !issetUnit { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Unit is not set")) + } + return nil +} + +func (p *TimeType) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.IsAdjustedToUTC = v + } + return nil +} + +func (p *TimeType) ReadField2(iprot thrift.TProtocol) error { + p.Unit = &TimeUnit{} + if err := p.Unit.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Unit), err) + } + return nil +} + +func (p *TimeType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("TimeType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *TimeType) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("isAdjustedToUTC", thrift.BOOL, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:isAdjustedToUTC: ", p), err) + } + if err := oprot.WriteBool(bool(p.IsAdjustedToUTC)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isAdjustedToUTC (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:isAdjustedToUTC: ", p), err) + } + return err +} + +func (p *TimeType) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("unit", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:unit: ", p), err) + } + if err := p.Unit.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Unit), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:unit: ", p), err) + } + return err +} + +func (p *TimeType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("TimeType(%+v)", *p) +} + +// Integer logical type annotation +// +// bitWidth must be 8, 16, 32, or 64. +// +// Allowed for physical types: INT32, INT64 +// +// Attributes: +// - BitWidth +// - IsSigned +type IntType struct { + BitWidth int8 `thrift:"bitWidth,1,required" db:"bitWidth" json:"bitWidth"` + IsSigned bool `thrift:"isSigned,2,required" db:"isSigned" json:"isSigned"` +} + +func NewIntType() *IntType { + return &IntType{} +} + +func (p *IntType) GetBitWidth() int8 { + return p.BitWidth +} + +func (p *IntType) GetIsSigned() bool { + return p.IsSigned +} +func (p *IntType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetBitWidth bool = false + var issetIsSigned bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetBitWidth = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetIsSigned = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetBitWidth { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BitWidth is not set")) + } + if !issetIsSigned { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field IsSigned is not set")) + } + return nil +} + +func (p *IntType) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadByte(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := int8(v) + p.BitWidth = temp + } + return nil +} + +func (p *IntType) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.IsSigned = v + } + return nil +} + +func (p *IntType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("IntType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *IntType) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("bitWidth", thrift.BYTE, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:bitWidth: ", p), err) + } + if err := oprot.WriteByte(int8(p.BitWidth)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.bitWidth (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:bitWidth: ", p), err) + } + return err +} + +func (p *IntType) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("isSigned", thrift.BOOL, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:isSigned: ", p), err) + } + if err := oprot.WriteBool(bool(p.IsSigned)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.isSigned (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:isSigned: ", p), err) + } + return err +} + +func (p *IntType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("IntType(%+v)", *p) +} + +// Embedded JSON logical type annotation +// +// Allowed for physical types: BINARY +type JsonType struct { +} + +func NewJsonType() *JsonType { + return &JsonType{} +} + +func (p *JsonType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *JsonType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("JsonType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *JsonType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("JsonType(%+v)", *p) +} + +// Embedded BSON logical type annotation +// +// Allowed for physical types: BINARY +type BsonType struct { +} + +func NewBsonType() *BsonType { + return &BsonType{} +} + +func (p *BsonType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *BsonType) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("BsonType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *BsonType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("BsonType(%+v)", *p) +} + +// LogicalType annotations to replace ConvertedType. +// +// To maintain compatibility, implementations using LogicalType for a +// SchemaElement must also set the corresponding ConvertedType from the +// following table. +// +// Attributes: +// - STRING +// - MAP +// - LIST +// - ENUM +// - DECIMAL +// - DATE +// - TIME +// - TIMESTAMP +// - INTEGER +// - UNKNOWN +// - JSON +// - BSON +// - UUID +type LogicalType struct { + STRING *StringType `thrift:"STRING,1" db:"STRING" json:"STRING,omitempty"` + MAP *MapType `thrift:"MAP,2" db:"MAP" json:"MAP,omitempty"` + LIST *ListType `thrift:"LIST,3" db:"LIST" json:"LIST,omitempty"` + ENUM *EnumType `thrift:"ENUM,4" db:"ENUM" json:"ENUM,omitempty"` + DECIMAL *DecimalType `thrift:"DECIMAL,5" db:"DECIMAL" json:"DECIMAL,omitempty"` + DATE *DateType `thrift:"DATE,6" db:"DATE" json:"DATE,omitempty"` + TIME *TimeType `thrift:"TIME,7" db:"TIME" json:"TIME,omitempty"` + TIMESTAMP *TimestampType `thrift:"TIMESTAMP,8" db:"TIMESTAMP" json:"TIMESTAMP,omitempty"` + // unused field # 9 + INTEGER *IntType `thrift:"INTEGER,10" db:"INTEGER" json:"INTEGER,omitempty"` + UNKNOWN *NullType `thrift:"UNKNOWN,11" db:"UNKNOWN" json:"UNKNOWN,omitempty"` + JSON *JsonType `thrift:"JSON,12" db:"JSON" json:"JSON,omitempty"` + BSON *BsonType `thrift:"BSON,13" db:"BSON" json:"BSON,omitempty"` + UUID *UUIDType `thrift:"UUID,14" db:"UUID" json:"UUID,omitempty"` +} + +func NewLogicalType() *LogicalType { + return &LogicalType{} +} + +var LogicalType_STRING_DEFAULT *StringType + +func (p *LogicalType) GetSTRING() *StringType { + if !p.IsSetSTRING() { + return LogicalType_STRING_DEFAULT + } + return p.STRING +} + +var LogicalType_MAP_DEFAULT *MapType + +func (p *LogicalType) GetMAP() *MapType { + if !p.IsSetMAP() { + return LogicalType_MAP_DEFAULT + } + return p.MAP +} + +var LogicalType_LIST_DEFAULT *ListType + +func (p *LogicalType) GetLIST() *ListType { + if !p.IsSetLIST() { + return LogicalType_LIST_DEFAULT + } + return p.LIST +} + +var LogicalType_ENUM_DEFAULT *EnumType + +func (p *LogicalType) GetENUM() *EnumType { + if !p.IsSetENUM() { + return LogicalType_ENUM_DEFAULT + } + return p.ENUM +} + +var LogicalType_DECIMAL_DEFAULT *DecimalType + +func (p *LogicalType) GetDECIMAL() *DecimalType { + if !p.IsSetDECIMAL() { + return LogicalType_DECIMAL_DEFAULT + } + return p.DECIMAL +} + +var LogicalType_DATE_DEFAULT *DateType + +func (p *LogicalType) GetDATE() *DateType { + if !p.IsSetDATE() { + return LogicalType_DATE_DEFAULT + } + return p.DATE +} + +var LogicalType_TIME_DEFAULT *TimeType + +func (p *LogicalType) GetTIME() *TimeType { + if !p.IsSetTIME() { + return LogicalType_TIME_DEFAULT + } + return p.TIME +} + +var LogicalType_TIMESTAMP_DEFAULT *TimestampType + +func (p *LogicalType) GetTIMESTAMP() *TimestampType { + if !p.IsSetTIMESTAMP() { + return LogicalType_TIMESTAMP_DEFAULT + } + return p.TIMESTAMP +} + +var LogicalType_INTEGER_DEFAULT *IntType + +func (p *LogicalType) GetINTEGER() *IntType { + if !p.IsSetINTEGER() { + return LogicalType_INTEGER_DEFAULT + } + return p.INTEGER +} + +var LogicalType_UNKNOWN_DEFAULT *NullType + +func (p *LogicalType) GetUNKNOWN() *NullType { + if !p.IsSetUNKNOWN() { + return LogicalType_UNKNOWN_DEFAULT + } + return p.UNKNOWN +} + +var LogicalType_JSON_DEFAULT *JsonType + +func (p *LogicalType) GetJSON() *JsonType { + if !p.IsSetJSON() { + return LogicalType_JSON_DEFAULT + } + return p.JSON +} + +var LogicalType_BSON_DEFAULT *BsonType + +func (p *LogicalType) GetBSON() *BsonType { + if !p.IsSetBSON() { + return LogicalType_BSON_DEFAULT + } + return p.BSON +} + +var LogicalType_UUID_DEFAULT *UUIDType + +func (p *LogicalType) GetUUID() *UUIDType { + if !p.IsSetUUID() { + return LogicalType_UUID_DEFAULT + } + return p.UUID +} +func (p *LogicalType) CountSetFieldsLogicalType() int { + count := 0 + if p.IsSetSTRING() { + count++ + } + if p.IsSetMAP() { + count++ + } + if p.IsSetLIST() { + count++ + } + if p.IsSetENUM() { + count++ + } + if p.IsSetDECIMAL() { + count++ + } + if p.IsSetDATE() { + count++ + } + if p.IsSetTIME() { + count++ + } + if p.IsSetTIMESTAMP() { + count++ + } + if p.IsSetINTEGER() { + count++ + } + if p.IsSetUNKNOWN() { + count++ + } + if p.IsSetJSON() { + count++ + } + if p.IsSetBSON() { + count++ + } + if p.IsSetUUID() { + count++ + } + return count + +} + +func (p *LogicalType) IsSetSTRING() bool { + return p.STRING != nil +} + +func (p *LogicalType) IsSetMAP() bool { + return p.MAP != nil +} + +func (p *LogicalType) IsSetLIST() bool { + return p.LIST != nil +} + +func (p *LogicalType) IsSetENUM() bool { + return p.ENUM != nil +} + +func (p *LogicalType) IsSetDECIMAL() bool { + return p.DECIMAL != nil +} + +func (p *LogicalType) IsSetDATE() bool { + return p.DATE != nil +} + +func (p *LogicalType) IsSetTIME() bool { + return p.TIME != nil +} + +func (p *LogicalType) IsSetTIMESTAMP() bool { + return p.TIMESTAMP != nil +} + +func (p *LogicalType) IsSetINTEGER() bool { + return p.INTEGER != nil +} + +func (p *LogicalType) IsSetUNKNOWN() bool { + return p.UNKNOWN != nil +} + +func (p *LogicalType) IsSetJSON() bool { + return p.JSON != nil +} + +func (p *LogicalType) IsSetBSON() bool { + return p.BSON != nil +} + +func (p *LogicalType) IsSetUUID() bool { + return p.UUID != nil +} + +func (p *LogicalType) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + case 8: + if err := p.ReadField8(iprot); err != nil { + return err + } + case 10: + if err := p.ReadField10(iprot); err != nil { + return err + } + case 11: + if err := p.ReadField11(iprot); err != nil { + return err + } + case 12: + if err := p.ReadField12(iprot); err != nil { + return err + } + case 13: + if err := p.ReadField13(iprot); err != nil { + return err + } + case 14: + if err := p.ReadField14(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *LogicalType) ReadField1(iprot thrift.TProtocol) error { + p.STRING = &StringType{} + if err := p.STRING.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.STRING), err) + } + return nil +} + +func (p *LogicalType) ReadField2(iprot thrift.TProtocol) error { + p.MAP = &MapType{} + if err := p.MAP.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MAP), err) + } + return nil +} + +func (p *LogicalType) ReadField3(iprot thrift.TProtocol) error { + p.LIST = &ListType{} + if err := p.LIST.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LIST), err) + } + return nil +} + +func (p *LogicalType) ReadField4(iprot thrift.TProtocol) error { + p.ENUM = &EnumType{} + if err := p.ENUM.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.ENUM), err) + } + return nil +} + +func (p *LogicalType) ReadField5(iprot thrift.TProtocol) error { + p.DECIMAL = &DecimalType{} + if err := p.DECIMAL.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DECIMAL), err) + } + return nil +} + +func (p *LogicalType) ReadField6(iprot thrift.TProtocol) error { + p.DATE = &DateType{} + if err := p.DATE.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DATE), err) + } + return nil +} + +func (p *LogicalType) ReadField7(iprot thrift.TProtocol) error { + p.TIME = &TimeType{} + if err := p.TIME.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIME), err) + } + return nil +} + +func (p *LogicalType) ReadField8(iprot thrift.TProtocol) error { + p.TIMESTAMP = &TimestampType{} + if err := p.TIMESTAMP.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TIMESTAMP), err) + } + return nil +} + +func (p *LogicalType) ReadField10(iprot thrift.TProtocol) error { + p.INTEGER = &IntType{} + if err := p.INTEGER.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.INTEGER), err) + } + return nil +} + +func (p *LogicalType) ReadField11(iprot thrift.TProtocol) error { + p.UNKNOWN = &NullType{} + if err := p.UNKNOWN.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UNKNOWN), err) + } + return nil +} + +func (p *LogicalType) ReadField12(iprot thrift.TProtocol) error { + p.JSON = &JsonType{} + if err := p.JSON.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.JSON), err) + } + return nil +} + +func (p *LogicalType) ReadField13(iprot thrift.TProtocol) error { + p.BSON = &BsonType{} + if err := p.BSON.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.BSON), err) + } + return nil +} + +func (p *LogicalType) ReadField14(iprot thrift.TProtocol) error { + p.UUID = &UUIDType{} + if err := p.UUID.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.UUID), err) + } + return nil +} + +func (p *LogicalType) Write(oprot thrift.TProtocol) error { + if c := p.CountSetFieldsLogicalType(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c) + } + if err := oprot.WriteStructBegin("LogicalType"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + if err := p.writeField8(oprot); err != nil { + return err + } + if err := p.writeField10(oprot); err != nil { + return err + } + if err := p.writeField11(oprot); err != nil { + return err + } + if err := p.writeField12(oprot); err != nil { + return err + } + if err := p.writeField13(oprot); err != nil { + return err + } + if err := p.writeField14(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *LogicalType) writeField1(oprot thrift.TProtocol) (err error) { + if p.IsSetSTRING() { + if err := oprot.WriteFieldBegin("STRING", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:STRING: ", p), err) + } + if err := p.STRING.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.STRING), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:STRING: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField2(oprot thrift.TProtocol) (err error) { + if p.IsSetMAP() { + if err := oprot.WriteFieldBegin("MAP", thrift.STRUCT, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:MAP: ", p), err) + } + if err := p.MAP.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MAP), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:MAP: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField3(oprot thrift.TProtocol) (err error) { + if p.IsSetLIST() { + if err := oprot.WriteFieldBegin("LIST", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:LIST: ", p), err) + } + if err := p.LIST.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LIST), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:LIST: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField4(oprot thrift.TProtocol) (err error) { + if p.IsSetENUM() { + if err := oprot.WriteFieldBegin("ENUM", thrift.STRUCT, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:ENUM: ", p), err) + } + if err := p.ENUM.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.ENUM), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:ENUM: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetDECIMAL() { + if err := oprot.WriteFieldBegin("DECIMAL", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:DECIMAL: ", p), err) + } + if err := p.DECIMAL.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DECIMAL), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:DECIMAL: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField6(oprot thrift.TProtocol) (err error) { + if p.IsSetDATE() { + if err := oprot.WriteFieldBegin("DATE", thrift.STRUCT, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:DATE: ", p), err) + } + if err := p.DATE.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DATE), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:DATE: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField7(oprot thrift.TProtocol) (err error) { + if p.IsSetTIME() { + if err := oprot.WriteFieldBegin("TIME", thrift.STRUCT, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:TIME: ", p), err) + } + if err := p.TIME.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIME), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:TIME: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField8(oprot thrift.TProtocol) (err error) { + if p.IsSetTIMESTAMP() { + if err := oprot.WriteFieldBegin("TIMESTAMP", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:TIMESTAMP: ", p), err) + } + if err := p.TIMESTAMP.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TIMESTAMP), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:TIMESTAMP: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField10(oprot thrift.TProtocol) (err error) { + if p.IsSetINTEGER() { + if err := oprot.WriteFieldBegin("INTEGER", thrift.STRUCT, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:INTEGER: ", p), err) + } + if err := p.INTEGER.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.INTEGER), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:INTEGER: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField11(oprot thrift.TProtocol) (err error) { + if p.IsSetUNKNOWN() { + if err := oprot.WriteFieldBegin("UNKNOWN", thrift.STRUCT, 11); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:UNKNOWN: ", p), err) + } + if err := p.UNKNOWN.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UNKNOWN), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 11:UNKNOWN: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField12(oprot thrift.TProtocol) (err error) { + if p.IsSetJSON() { + if err := oprot.WriteFieldBegin("JSON", thrift.STRUCT, 12); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:JSON: ", p), err) + } + if err := p.JSON.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.JSON), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 12:JSON: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField13(oprot thrift.TProtocol) (err error) { + if p.IsSetBSON() { + if err := oprot.WriteFieldBegin("BSON", thrift.STRUCT, 13); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:BSON: ", p), err) + } + if err := p.BSON.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.BSON), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 13:BSON: ", p), err) + } + } + return err +} + +func (p *LogicalType) writeField14(oprot thrift.TProtocol) (err error) { + if p.IsSetUUID() { + if err := oprot.WriteFieldBegin("UUID", thrift.STRUCT, 14); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 14:UUID: ", p), err) + } + if err := p.UUID.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.UUID), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 14:UUID: ", p), err) + } + } + return err +} + +func (p *LogicalType) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("LogicalType(%+v)", *p) +} + +// Represents a element inside a schema definition. +// - if it is a group (inner node) then type is undefined and num_children is defined +// - if it is a primitive type (leaf) then type is defined and num_children is undefined +// the nodes are listed in depth first traversal order. +// +// Attributes: +// - Type: Data type for this field. Not set if the current element is a non-leaf node +// - TypeLength: If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. +// Otherwise, if specified, this is the maximum bit length to store any of the values. +// (e.g. a low cardinality INT col could have this set to 3). Note that this is +// in the schema, and therefore fixed for the entire file. +// - RepetitionType: repetition of the field. The root of the schema does not have a repetition_type. +// All other nodes must have one +// - Name: Name of the field in the schema +// - NumChildren: Nested fields. Since thrift does not support nested fields, +// the nesting is flattened to a single list by a depth-first traversal. +// The children count is used to construct the nested relationship. +// This field is not set when the element is a primitive type +// - ConvertedType: When the schema is the result of a conversion from another model +// Used to record the original type to help with cross conversion. +// - Scale: Used when this column contains decimal data. +// See the DECIMAL converted type for more details. +// - Precision +// - FieldID: When the original schema supports field ids, this will save the +// original field id in the parquet schema +// - LogicalType: The logical type of this SchemaElement +// +// LogicalType replaces ConvertedType, but ConvertedType is still required +// for some logical types to ensure forward-compatibility in format v1. +type SchemaElement struct { + Type *Type `thrift:"type,1" db:"type" json:"type,omitempty"` + TypeLength *int32 `thrift:"type_length,2" db:"type_length" json:"type_length,omitempty"` + RepetitionType *FieldRepetitionType `thrift:"repetition_type,3" db:"repetition_type" json:"repetition_type,omitempty"` + Name string `thrift:"name,4,required" db:"name" json:"name"` + NumChildren *int32 `thrift:"num_children,5" db:"num_children" json:"num_children,omitempty"` + ConvertedType *ConvertedType `thrift:"converted_type,6" db:"converted_type" json:"converted_type,omitempty"` + Scale *int32 `thrift:"scale,7" db:"scale" json:"scale,omitempty"` + Precision *int32 `thrift:"precision,8" db:"precision" json:"precision,omitempty"` + FieldID *int32 `thrift:"field_id,9" db:"field_id" json:"field_id,omitempty"` + LogicalType *LogicalType `thrift:"logicalType,10" db:"logicalType" json:"logicalType,omitempty"` +} + +func NewSchemaElement() *SchemaElement { + return &SchemaElement{} +} + +var SchemaElement_Type_DEFAULT Type + +func (p *SchemaElement) GetType() Type { + if !p.IsSetType() { + return SchemaElement_Type_DEFAULT + } + return *p.Type +} + +var SchemaElement_TypeLength_DEFAULT int32 + +func (p *SchemaElement) GetTypeLength() int32 { + if !p.IsSetTypeLength() { + return SchemaElement_TypeLength_DEFAULT + } + return *p.TypeLength +} + +var SchemaElement_RepetitionType_DEFAULT FieldRepetitionType + +func (p *SchemaElement) GetRepetitionType() FieldRepetitionType { + if !p.IsSetRepetitionType() { + return SchemaElement_RepetitionType_DEFAULT + } + return *p.RepetitionType +} + +func (p *SchemaElement) GetName() string { + return p.Name +} + +var SchemaElement_NumChildren_DEFAULT int32 + +func (p *SchemaElement) GetNumChildren() int32 { + if !p.IsSetNumChildren() { + return SchemaElement_NumChildren_DEFAULT + } + return *p.NumChildren +} + +var SchemaElement_ConvertedType_DEFAULT ConvertedType + +func (p *SchemaElement) GetConvertedType() ConvertedType { + if !p.IsSetConvertedType() { + return SchemaElement_ConvertedType_DEFAULT + } + return *p.ConvertedType +} + +var SchemaElement_Scale_DEFAULT int32 + +func (p *SchemaElement) GetScale() int32 { + if !p.IsSetScale() { + return SchemaElement_Scale_DEFAULT + } + return *p.Scale +} + +var SchemaElement_Precision_DEFAULT int32 + +func (p *SchemaElement) GetPrecision() int32 { + if !p.IsSetPrecision() { + return SchemaElement_Precision_DEFAULT + } + return *p.Precision +} + +var SchemaElement_FieldID_DEFAULT int32 + +func (p *SchemaElement) GetFieldID() int32 { + if !p.IsSetFieldID() { + return SchemaElement_FieldID_DEFAULT + } + return *p.FieldID +} + +var SchemaElement_LogicalType_DEFAULT *LogicalType + +func (p *SchemaElement) GetLogicalType() *LogicalType { + if !p.IsSetLogicalType() { + return SchemaElement_LogicalType_DEFAULT + } + return p.LogicalType +} +func (p *SchemaElement) IsSetType() bool { + return p.Type != nil +} + +func (p *SchemaElement) IsSetTypeLength() bool { + return p.TypeLength != nil +} + +func (p *SchemaElement) IsSetRepetitionType() bool { + return p.RepetitionType != nil +} + +func (p *SchemaElement) IsSetNumChildren() bool { + return p.NumChildren != nil +} + +func (p *SchemaElement) IsSetConvertedType() bool { + return p.ConvertedType != nil +} + +func (p *SchemaElement) IsSetScale() bool { + return p.Scale != nil +} + +func (p *SchemaElement) IsSetPrecision() bool { + return p.Precision != nil +} + +func (p *SchemaElement) IsSetFieldID() bool { + return p.FieldID != nil +} + +func (p *SchemaElement) IsSetLogicalType() bool { + return p.LogicalType != nil +} + +func (p *SchemaElement) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetName bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + issetName = true + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + case 8: + if err := p.ReadField8(iprot); err != nil { + return err + } + case 9: + if err := p.ReadField9(iprot); err != nil { + return err + } + case 10: + if err := p.ReadField10(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetName { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Name is not set")) + } + return nil +} + +func (p *SchemaElement) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := Type(v) + p.Type = &temp + } + return nil +} + +func (p *SchemaElement) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.TypeLength = &v + } + return nil +} + +func (p *SchemaElement) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + temp := FieldRepetitionType(v) + p.RepetitionType = &temp + } + return nil +} + +func (p *SchemaElement) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.Name = v + } + return nil +} + +func (p *SchemaElement) ReadField5(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.NumChildren = &v + } + return nil +} + +func (p *SchemaElement) ReadField6(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + temp := ConvertedType(v) + p.ConvertedType = &temp + } + return nil +} + +func (p *SchemaElement) ReadField7(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.Scale = &v + } + return nil +} + +func (p *SchemaElement) ReadField8(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 8: ", err) + } else { + p.Precision = &v + } + return nil +} + +func (p *SchemaElement) ReadField9(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.FieldID = &v + } + return nil +} + +func (p *SchemaElement) ReadField10(iprot thrift.TProtocol) error { + p.LogicalType = &LogicalType{} + if err := p.LogicalType.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.LogicalType), err) + } + return nil +} + +func (p *SchemaElement) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("SchemaElement"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + if err := p.writeField8(oprot); err != nil { + return err + } + if err := p.writeField9(oprot); err != nil { + return err + } + if err := p.writeField10(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *SchemaElement) writeField1(oprot thrift.TProtocol) (err error) { + if p.IsSetType() { + if err := oprot.WriteFieldBegin("type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(int32(*p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField2(oprot thrift.TProtocol) (err error) { + if p.IsSetTypeLength() { + if err := oprot.WriteFieldBegin("type_length", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:type_length: ", p), err) + } + if err := oprot.WriteI32(int32(*p.TypeLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type_length (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:type_length: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField3(oprot thrift.TProtocol) (err error) { + if p.IsSetRepetitionType() { + if err := oprot.WriteFieldBegin("repetition_type", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:repetition_type: ", p), err) + } + if err := oprot.WriteI32(int32(*p.RepetitionType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_type (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:repetition_type: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField4(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("name", thrift.STRING, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:name: ", p), err) + } + if err := oprot.WriteString(string(p.Name)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.name (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:name: ", p), err) + } + return err +} + +func (p *SchemaElement) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetNumChildren() { + if err := oprot.WriteFieldBegin("num_children", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_children: ", p), err) + } + if err := oprot.WriteI32(int32(*p.NumChildren)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_children (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_children: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField6(oprot thrift.TProtocol) (err error) { + if p.IsSetConvertedType() { + if err := oprot.WriteFieldBegin("converted_type", thrift.I32, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:converted_type: ", p), err) + } + if err := oprot.WriteI32(int32(*p.ConvertedType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.converted_type (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:converted_type: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField7(oprot thrift.TProtocol) (err error) { + if p.IsSetScale() { + if err := oprot.WriteFieldBegin("scale", thrift.I32, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:scale: ", p), err) + } + if err := oprot.WriteI32(int32(*p.Scale)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.scale (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:scale: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField8(oprot thrift.TProtocol) (err error) { + if p.IsSetPrecision() { + if err := oprot.WriteFieldBegin("precision", thrift.I32, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:precision: ", p), err) + } + if err := oprot.WriteI32(int32(*p.Precision)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.precision (8) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:precision: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField9(oprot thrift.TProtocol) (err error) { + if p.IsSetFieldID() { + if err := oprot.WriteFieldBegin("field_id", thrift.I32, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:field_id: ", p), err) + } + if err := oprot.WriteI32(int32(*p.FieldID)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.field_id (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:field_id: ", p), err) + } + } + return err +} + +func (p *SchemaElement) writeField10(oprot thrift.TProtocol) (err error) { + if p.IsSetLogicalType() { + if err := oprot.WriteFieldBegin("logicalType", thrift.STRUCT, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:logicalType: ", p), err) + } + if err := p.LogicalType.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.LogicalType), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:logicalType: ", p), err) + } + } + return err +} + +func (p *SchemaElement) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("SchemaElement(%+v)", *p) +} + +// Data page header +// +// Attributes: +// - NumValues: Number of values, including NULLs, in this data page. * +// - Encoding: Encoding used for this data page * +// - DefinitionLevelEncoding: Encoding used for definition levels * +// - RepetitionLevelEncoding: Encoding used for repetition levels * +// - Statistics: Optional statistics for the data in this page* +type DataPageHeader struct { + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + DefinitionLevelEncoding Encoding `thrift:"definition_level_encoding,3,required" db:"definition_level_encoding" json:"definition_level_encoding"` + RepetitionLevelEncoding Encoding `thrift:"repetition_level_encoding,4,required" db:"repetition_level_encoding" json:"repetition_level_encoding"` + Statistics *Statistics `thrift:"statistics,5" db:"statistics" json:"statistics,omitempty"` +} + +func NewDataPageHeader() *DataPageHeader { + return &DataPageHeader{} +} + +func (p *DataPageHeader) GetNumValues() int32 { + return p.NumValues +} + +func (p *DataPageHeader) GetEncoding() Encoding { + return p.Encoding +} + +func (p *DataPageHeader) GetDefinitionLevelEncoding() Encoding { + return p.DefinitionLevelEncoding +} + +func (p *DataPageHeader) GetRepetitionLevelEncoding() Encoding { + return p.RepetitionLevelEncoding +} + +var DataPageHeader_Statistics_DEFAULT *Statistics + +func (p *DataPageHeader) GetStatistics() *Statistics { + if !p.IsSetStatistics() { + return DataPageHeader_Statistics_DEFAULT + } + return p.Statistics +} +func (p *DataPageHeader) IsSetStatistics() bool { + return p.Statistics != nil +} + +func (p *DataPageHeader) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetEncoding bool = false + var issetDefinitionLevelEncoding bool = false + var issetRepetitionLevelEncoding bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetNumValues = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetEncoding = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetDefinitionLevelEncoding = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + issetRepetitionLevelEncoding = true + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetDefinitionLevelEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelEncoding is not set")) + } + if !issetRepetitionLevelEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelEncoding is not set")) + } + return nil +} + +func (p *DataPageHeader) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DataPageHeader) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + temp := Encoding(v) + p.DefinitionLevelEncoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := Encoding(v) + p.RepetitionLevelEncoding = temp + } + return nil +} + +func (p *DataPageHeader) ReadField5(iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil +} + +func (p *DataPageHeader) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("DataPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *DataPageHeader) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err +} + +func (p *DataPageHeader) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err +} + +func (p *DataPageHeader) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("definition_level_encoding", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:definition_level_encoding: ", p), err) + } + if err := oprot.WriteI32(int32(p.DefinitionLevelEncoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.definition_level_encoding (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:definition_level_encoding: ", p), err) + } + return err +} + +func (p *DataPageHeader) writeField4(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("repetition_level_encoding", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:repetition_level_encoding: ", p), err) + } + if err := oprot.WriteI32(int32(p.RepetitionLevelEncoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_level_encoding (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:repetition_level_encoding: ", p), err) + } + return err +} + +func (p *DataPageHeader) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin("statistics", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:statistics: ", p), err) + } + if err := p.Statistics.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:statistics: ", p), err) + } + } + return err +} + +func (p *DataPageHeader) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("DataPageHeader(%+v)", *p) +} + +type IndexPageHeader struct { +} + +func NewIndexPageHeader() *IndexPageHeader { + return &IndexPageHeader{} +} + +func (p *IndexPageHeader) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *IndexPageHeader) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("IndexPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *IndexPageHeader) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("IndexPageHeader(%+v)", *p) +} + +// TODO: * +// +// Attributes: +// - NumValues: Number of values in the dictionary * +// - Encoding: Encoding using this dictionary page * +// - IsSorted: If true, the entries in the dictionary are sorted in ascending order * +type DictionaryPageHeader struct { + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + IsSorted *bool `thrift:"is_sorted,3" db:"is_sorted" json:"is_sorted,omitempty"` +} + +func NewDictionaryPageHeader() *DictionaryPageHeader { + return &DictionaryPageHeader{} +} + +func (p *DictionaryPageHeader) GetNumValues() int32 { + return p.NumValues +} + +func (p *DictionaryPageHeader) GetEncoding() Encoding { + return p.Encoding +} + +var DictionaryPageHeader_IsSorted_DEFAULT bool + +func (p *DictionaryPageHeader) GetIsSorted() bool { + if !p.IsSetIsSorted() { + return DictionaryPageHeader_IsSorted_DEFAULT + } + return *p.IsSorted +} +func (p *DictionaryPageHeader) IsSetIsSorted() bool { + return p.IsSorted != nil +} + +func (p *DictionaryPageHeader) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetEncoding bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetNumValues = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetEncoding = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + return nil +} + +func (p *DictionaryPageHeader) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DictionaryPageHeader) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DictionaryPageHeader) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.IsSorted = &v + } + return nil +} + +func (p *DictionaryPageHeader) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("DictionaryPageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *DictionaryPageHeader) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err +} + +func (p *DictionaryPageHeader) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err +} + +func (p *DictionaryPageHeader) writeField3(oprot thrift.TProtocol) (err error) { + if p.IsSetIsSorted() { + if err := oprot.WriteFieldBegin("is_sorted", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:is_sorted: ", p), err) + } + if err := oprot.WriteBool(bool(*p.IsSorted)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.is_sorted (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:is_sorted: ", p), err) + } + } + return err +} + +func (p *DictionaryPageHeader) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("DictionaryPageHeader(%+v)", *p) +} + +// New page format allowing reading levels without decompressing the data +// Repetition and definition levels are uncompressed +// The remaining section containing the data is compressed if is_compressed is true +// +// +// Attributes: +// - NumValues: Number of values, including NULLs, in this data page. * +// - NumNulls: Number of NULL values, in this data page. +// Number of non-null = num_values - num_nulls which is also the number of values in the data section * +// - NumRows: Number of rows in this data page. which means pages change on record boundaries (r = 0) * +// - Encoding: Encoding used for data in this page * +// - DefinitionLevelsByteLength: length of the definition levels +// - RepetitionLevelsByteLength: length of the repetition levels +// - IsCompressed: whether the values are compressed. +// Which means the section of the page between +// definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) +// is compressed with the compression_codec. +// If missing it is considered compressed +// - Statistics: optional statistics for this column chunk +type DataPageHeaderV2 struct { + NumValues int32 `thrift:"num_values,1,required" db:"num_values" json:"num_values"` + NumNulls int32 `thrift:"num_nulls,2,required" db:"num_nulls" json:"num_nulls"` + NumRows int32 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + Encoding Encoding `thrift:"encoding,4,required" db:"encoding" json:"encoding"` + DefinitionLevelsByteLength int32 `thrift:"definition_levels_byte_length,5,required" db:"definition_levels_byte_length" json:"definition_levels_byte_length"` + RepetitionLevelsByteLength int32 `thrift:"repetition_levels_byte_length,6,required" db:"repetition_levels_byte_length" json:"repetition_levels_byte_length"` + IsCompressed bool `thrift:"is_compressed,7" db:"is_compressed" json:"is_compressed,omitempty"` + Statistics *Statistics `thrift:"statistics,8" db:"statistics" json:"statistics,omitempty"` +} + +func NewDataPageHeaderV2() *DataPageHeaderV2 { + return &DataPageHeaderV2{ + IsCompressed: true, + } +} + +func (p *DataPageHeaderV2) GetNumValues() int32 { + return p.NumValues +} + +func (p *DataPageHeaderV2) GetNumNulls() int32 { + return p.NumNulls +} + +func (p *DataPageHeaderV2) GetNumRows() int32 { + return p.NumRows +} + +func (p *DataPageHeaderV2) GetEncoding() Encoding { + return p.Encoding +} + +func (p *DataPageHeaderV2) GetDefinitionLevelsByteLength() int32 { + return p.DefinitionLevelsByteLength +} + +func (p *DataPageHeaderV2) GetRepetitionLevelsByteLength() int32 { + return p.RepetitionLevelsByteLength +} + +var DataPageHeaderV2_IsCompressed_DEFAULT bool = true + +func (p *DataPageHeaderV2) GetIsCompressed() bool { + return p.IsCompressed +} + +var DataPageHeaderV2_Statistics_DEFAULT *Statistics + +func (p *DataPageHeaderV2) GetStatistics() *Statistics { + if !p.IsSetStatistics() { + return DataPageHeaderV2_Statistics_DEFAULT + } + return p.Statistics +} +func (p *DataPageHeaderV2) IsSetIsCompressed() bool { + return p.IsCompressed != DataPageHeaderV2_IsCompressed_DEFAULT +} + +func (p *DataPageHeaderV2) IsSetStatistics() bool { + return p.Statistics != nil +} + +func (p *DataPageHeaderV2) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNumValues bool = false + var issetNumNulls bool = false + var issetNumRows bool = false + var issetEncoding bool = false + var issetDefinitionLevelsByteLength bool = false + var issetRepetitionLevelsByteLength bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetNumValues = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetNumNulls = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetNumRows = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + issetEncoding = true + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + issetDefinitionLevelsByteLength = true + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + issetRepetitionLevelsByteLength = true + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + case 8: + if err := p.ReadField8(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetNumNulls { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumNulls is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetDefinitionLevelsByteLength { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DefinitionLevelsByteLength is not set")) + } + if !issetRepetitionLevelsByteLength { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RepetitionLevelsByteLength is not set")) + } + return nil +} + +func (p *DataPageHeaderV2) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.NumNulls = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *DataPageHeaderV2) ReadField5(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.DefinitionLevelsByteLength = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField6(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.RepetitionLevelsByteLength = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField7(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.IsCompressed = v + } + return nil +} + +func (p *DataPageHeaderV2) ReadField8(iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil +} + +func (p *DataPageHeaderV2) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("DataPageHeaderV2"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + if err := p.writeField8(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *DataPageHeaderV2) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_values", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:num_values: ", p), err) + } + if err := oprot.WriteI32(int32(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:num_values: ", p), err) + } + return err +} + +func (p *DataPageHeaderV2) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_nulls", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:num_nulls: ", p), err) + } + if err := oprot.WriteI32(int32(p.NumNulls)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_nulls (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:num_nulls: ", p), err) + } + return err +} + +func (p *DataPageHeaderV2) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_rows", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI32(int32(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err +} + +func (p *DataPageHeaderV2) writeField4(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("encoding", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:encoding: ", p), err) + } + if err := oprot.WriteI32(int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:encoding: ", p), err) + } + return err +} + +func (p *DataPageHeaderV2) writeField5(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("definition_levels_byte_length", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:definition_levels_byte_length: ", p), err) + } + if err := oprot.WriteI32(int32(p.DefinitionLevelsByteLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.definition_levels_byte_length (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:definition_levels_byte_length: ", p), err) + } + return err +} + +func (p *DataPageHeaderV2) writeField6(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("repetition_levels_byte_length", thrift.I32, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:repetition_levels_byte_length: ", p), err) + } + if err := oprot.WriteI32(int32(p.RepetitionLevelsByteLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.repetition_levels_byte_length (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:repetition_levels_byte_length: ", p), err) + } + return err +} + +func (p *DataPageHeaderV2) writeField7(oprot thrift.TProtocol) (err error) { + if p.IsSetIsCompressed() { + if err := oprot.WriteFieldBegin("is_compressed", thrift.BOOL, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:is_compressed: ", p), err) + } + if err := oprot.WriteBool(bool(p.IsCompressed)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.is_compressed (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:is_compressed: ", p), err) + } + } + return err +} + +func (p *DataPageHeaderV2) writeField8(oprot thrift.TProtocol) (err error) { + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin("statistics", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:statistics: ", p), err) + } + if err := p.Statistics.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:statistics: ", p), err) + } + } + return err +} + +func (p *DataPageHeaderV2) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("DataPageHeaderV2(%+v)", *p) +} + +// Attributes: +// - Type: the type of the page: indicates which of the *_header fields is set * +// - UncompressedPageSize: Uncompressed page size in bytes (not including this header) * +// - CompressedPageSize: Compressed page size in bytes (not including this header) * +// - Crc: 32bit crc for the data below. This allows for disabling checksumming in HDFS +// if only a few pages needs to be read +// +// - DataPageHeader +// - IndexPageHeader +// - DictionaryPageHeader +// - DataPageHeaderV2 +type PageHeader struct { + Type PageType `thrift:"type,1,required" db:"type" json:"type"` + UncompressedPageSize int32 `thrift:"uncompressed_page_size,2,required" db:"uncompressed_page_size" json:"uncompressed_page_size"` + CompressedPageSize int32 `thrift:"compressed_page_size,3,required" db:"compressed_page_size" json:"compressed_page_size"` + Crc *int32 `thrift:"crc,4" db:"crc" json:"crc,omitempty"` + DataPageHeader *DataPageHeader `thrift:"data_page_header,5" db:"data_page_header" json:"data_page_header,omitempty"` + IndexPageHeader *IndexPageHeader `thrift:"index_page_header,6" db:"index_page_header" json:"index_page_header,omitempty"` + DictionaryPageHeader *DictionaryPageHeader `thrift:"dictionary_page_header,7" db:"dictionary_page_header" json:"dictionary_page_header,omitempty"` + DataPageHeaderV2 *DataPageHeaderV2 `thrift:"data_page_header_v2,8" db:"data_page_header_v2" json:"data_page_header_v2,omitempty"` +} + +func NewPageHeader() *PageHeader { + return &PageHeader{} +} + +func (p *PageHeader) GetType() PageType { + return p.Type +} + +func (p *PageHeader) GetUncompressedPageSize() int32 { + return p.UncompressedPageSize +} + +func (p *PageHeader) GetCompressedPageSize() int32 { + return p.CompressedPageSize +} + +var PageHeader_Crc_DEFAULT int32 + +func (p *PageHeader) GetCrc() int32 { + if !p.IsSetCrc() { + return PageHeader_Crc_DEFAULT + } + return *p.Crc +} + +var PageHeader_DataPageHeader_DEFAULT *DataPageHeader + +func (p *PageHeader) GetDataPageHeader() *DataPageHeader { + if !p.IsSetDataPageHeader() { + return PageHeader_DataPageHeader_DEFAULT + } + return p.DataPageHeader +} + +var PageHeader_IndexPageHeader_DEFAULT *IndexPageHeader + +func (p *PageHeader) GetIndexPageHeader() *IndexPageHeader { + if !p.IsSetIndexPageHeader() { + return PageHeader_IndexPageHeader_DEFAULT + } + return p.IndexPageHeader +} + +var PageHeader_DictionaryPageHeader_DEFAULT *DictionaryPageHeader + +func (p *PageHeader) GetDictionaryPageHeader() *DictionaryPageHeader { + if !p.IsSetDictionaryPageHeader() { + return PageHeader_DictionaryPageHeader_DEFAULT + } + return p.DictionaryPageHeader +} + +var PageHeader_DataPageHeaderV2_DEFAULT *DataPageHeaderV2 + +func (p *PageHeader) GetDataPageHeaderV2() *DataPageHeaderV2 { + if !p.IsSetDataPageHeaderV2() { + return PageHeader_DataPageHeaderV2_DEFAULT + } + return p.DataPageHeaderV2 +} +func (p *PageHeader) IsSetCrc() bool { + return p.Crc != nil +} + +func (p *PageHeader) IsSetDataPageHeader() bool { + return p.DataPageHeader != nil +} + +func (p *PageHeader) IsSetIndexPageHeader() bool { + return p.IndexPageHeader != nil +} + +func (p *PageHeader) IsSetDictionaryPageHeader() bool { + return p.DictionaryPageHeader != nil +} + +func (p *PageHeader) IsSetDataPageHeaderV2() bool { + return p.DataPageHeaderV2 != nil +} + +func (p *PageHeader) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetType bool = false + var issetUncompressedPageSize bool = false + var issetCompressedPageSize bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetType = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetUncompressedPageSize = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetCompressedPageSize = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + case 8: + if err := p.ReadField8(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) + } + if !issetUncompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field UncompressedPageSize is not set")) + } + if !issetCompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) + } + return nil +} + +func (p *PageHeader) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := PageType(v) + p.Type = temp + } + return nil +} + +func (p *PageHeader) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.UncompressedPageSize = v + } + return nil +} + +func (p *PageHeader) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.CompressedPageSize = v + } + return nil +} + +func (p *PageHeader) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.Crc = &v + } + return nil +} + +func (p *PageHeader) ReadField5(iprot thrift.TProtocol) error { + p.DataPageHeader = &DataPageHeader{} + if err := p.DataPageHeader.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField6(iprot thrift.TProtocol) error { + p.IndexPageHeader = &IndexPageHeader{} + if err := p.IndexPageHeader.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.IndexPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField7(iprot thrift.TProtocol) error { + p.DictionaryPageHeader = &DictionaryPageHeader{} + if err := p.DictionaryPageHeader.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DictionaryPageHeader), err) + } + return nil +} + +func (p *PageHeader) ReadField8(iprot thrift.TProtocol) error { + p.DataPageHeaderV2 = &DataPageHeaderV2{ + IsCompressed: true, + } + if err := p.DataPageHeaderV2.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.DataPageHeaderV2), err) + } + return nil +} + +func (p *PageHeader) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("PageHeader"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + if err := p.writeField8(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *PageHeader) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(int32(p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + return err +} + +func (p *PageHeader) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("uncompressed_page_size", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:uncompressed_page_size: ", p), err) + } + if err := oprot.WriteI32(int32(p.UncompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.uncompressed_page_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:uncompressed_page_size: ", p), err) + } + return err +} + +func (p *PageHeader) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("compressed_page_size", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:compressed_page_size: ", p), err) + } + if err := oprot.WriteI32(int32(p.CompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:compressed_page_size: ", p), err) + } + return err +} + +func (p *PageHeader) writeField4(oprot thrift.TProtocol) (err error) { + if p.IsSetCrc() { + if err := oprot.WriteFieldBegin("crc", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:crc: ", p), err) + } + if err := oprot.WriteI32(int32(*p.Crc)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.crc (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:crc: ", p), err) + } + } + return err +} + +func (p *PageHeader) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetDataPageHeader() { + if err := oprot.WriteFieldBegin("data_page_header", thrift.STRUCT, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:data_page_header: ", p), err) + } + if err := p.DataPageHeader.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeader), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:data_page_header: ", p), err) + } + } + return err +} + +func (p *PageHeader) writeField6(oprot thrift.TProtocol) (err error) { + if p.IsSetIndexPageHeader() { + if err := oprot.WriteFieldBegin("index_page_header", thrift.STRUCT, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:index_page_header: ", p), err) + } + if err := p.IndexPageHeader.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.IndexPageHeader), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:index_page_header: ", p), err) + } + } + return err +} + +func (p *PageHeader) writeField7(oprot thrift.TProtocol) (err error) { + if p.IsSetDictionaryPageHeader() { + if err := oprot.WriteFieldBegin("dictionary_page_header", thrift.STRUCT, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:dictionary_page_header: ", p), err) + } + if err := p.DictionaryPageHeader.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DictionaryPageHeader), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:dictionary_page_header: ", p), err) + } + } + return err +} + +func (p *PageHeader) writeField8(oprot thrift.TProtocol) (err error) { + if p.IsSetDataPageHeaderV2() { + if err := oprot.WriteFieldBegin("data_page_header_v2", thrift.STRUCT, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:data_page_header_v2: ", p), err) + } + if err := p.DataPageHeaderV2.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.DataPageHeaderV2), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:data_page_header_v2: ", p), err) + } + } + return err +} + +func (p *PageHeader) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("PageHeader(%+v)", *p) +} + +// Wrapper struct to store key values +// +// Attributes: +// - Key +// - Value +type KeyValue struct { + Key string `thrift:"key,1,required" db:"key" json:"key"` + Value *string `thrift:"value,2" db:"value" json:"value,omitempty"` +} + +func NewKeyValue() *KeyValue { + return &KeyValue{} +} + +func (p *KeyValue) GetKey() string { + return p.Key +} + +var KeyValue_Value_DEFAULT string + +func (p *KeyValue) GetValue() string { + if !p.IsSetValue() { + return KeyValue_Value_DEFAULT + } + return *p.Value +} +func (p *KeyValue) IsSetValue() bool { + return p.Value != nil +} + +func (p *KeyValue) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetKey bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetKey = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetKey { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Key is not set")) + } + return nil +} + +func (p *KeyValue) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Key = v + } + return nil +} + +func (p *KeyValue) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Value = &v + } + return nil +} + +func (p *KeyValue) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("KeyValue"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *KeyValue) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("key", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:key: ", p), err) + } + if err := oprot.WriteString(string(p.Key)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.key (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:key: ", p), err) + } + return err +} + +func (p *KeyValue) writeField2(oprot thrift.TProtocol) (err error) { + if p.IsSetValue() { + if err := oprot.WriteFieldBegin("value", thrift.STRING, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:value: ", p), err) + } + if err := oprot.WriteString(string(*p.Value)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.value (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:value: ", p), err) + } + } + return err +} + +func (p *KeyValue) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("KeyValue(%+v)", *p) +} + +// Wrapper struct to specify sort order +// +// Attributes: +// - ColumnIdx: The column index (in this row group) * +// - Descending: If true, indicates this column is sorted in descending order. * +// - NullsFirst: If true, nulls will come before non-null values, otherwise, +// nulls go at the end. +type SortingColumn struct { + ColumnIdx int32 `thrift:"column_idx,1,required" db:"column_idx" json:"column_idx"` + Descending bool `thrift:"descending,2,required" db:"descending" json:"descending"` + NullsFirst bool `thrift:"nulls_first,3,required" db:"nulls_first" json:"nulls_first"` +} + +func NewSortingColumn() *SortingColumn { + return &SortingColumn{} +} + +func (p *SortingColumn) GetColumnIdx() int32 { + return p.ColumnIdx +} + +func (p *SortingColumn) GetDescending() bool { + return p.Descending +} + +func (p *SortingColumn) GetNullsFirst() bool { + return p.NullsFirst +} +func (p *SortingColumn) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetColumnIdx bool = false + var issetDescending bool = false + var issetNullsFirst bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetColumnIdx = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetDescending = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetNullsFirst = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetColumnIdx { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field ColumnIdx is not set")) + } + if !issetDescending { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Descending is not set")) + } + if !issetNullsFirst { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullsFirst is not set")) + } + return nil +} + +func (p *SortingColumn) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.ColumnIdx = v + } + return nil +} + +func (p *SortingColumn) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.Descending = v + } + return nil +} + +func (p *SortingColumn) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NullsFirst = v + } + return nil +} + +func (p *SortingColumn) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("SortingColumn"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *SortingColumn) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("column_idx", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:column_idx: ", p), err) + } + if err := oprot.WriteI32(int32(p.ColumnIdx)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_idx (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:column_idx: ", p), err) + } + return err +} + +func (p *SortingColumn) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("descending", thrift.BOOL, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:descending: ", p), err) + } + if err := oprot.WriteBool(bool(p.Descending)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.descending (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:descending: ", p), err) + } + return err +} + +func (p *SortingColumn) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("nulls_first", thrift.BOOL, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:nulls_first: ", p), err) + } + if err := oprot.WriteBool(bool(p.NullsFirst)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.nulls_first (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:nulls_first: ", p), err) + } + return err +} + +func (p *SortingColumn) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("SortingColumn(%+v)", *p) +} + +// statistics of a given page type and encoding +// +// Attributes: +// - PageType: the page type (data/dic/...) * +// - Encoding: encoding of the page * +// - Count: number of pages of this type with this encoding * +type PageEncodingStats struct { + PageType PageType `thrift:"page_type,1,required" db:"page_type" json:"page_type"` + Encoding Encoding `thrift:"encoding,2,required" db:"encoding" json:"encoding"` + Count int32 `thrift:"count,3,required" db:"count" json:"count"` +} + +func NewPageEncodingStats() *PageEncodingStats { + return &PageEncodingStats{} +} + +func (p *PageEncodingStats) GetPageType() PageType { + return p.PageType +} + +func (p *PageEncodingStats) GetEncoding() Encoding { + return p.Encoding +} + +func (p *PageEncodingStats) GetCount() int32 { + return p.Count +} +func (p *PageEncodingStats) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPageType bool = false + var issetEncoding bool = false + var issetCount bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetPageType = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetEncoding = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetCount = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPageType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageType is not set")) + } + if !issetEncoding { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encoding is not set")) + } + if !issetCount { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Count is not set")) + } + return nil +} + +func (p *PageEncodingStats) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := PageType(v) + p.PageType = temp + } + return nil +} + +func (p *PageEncodingStats) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + temp := Encoding(v) + p.Encoding = temp + } + return nil +} + +func (p *PageEncodingStats) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.Count = v + } + return nil +} + +func (p *PageEncodingStats) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("PageEncodingStats"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *PageEncodingStats) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("page_type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_type: ", p), err) + } + if err := oprot.WriteI32(int32(p.PageType)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.page_type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_type: ", p), err) + } + return err +} + +func (p *PageEncodingStats) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("encoding", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encoding: ", p), err) + } + if err := oprot.WriteI32(int32(p.Encoding)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.encoding (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encoding: ", p), err) + } + return err +} + +func (p *PageEncodingStats) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("count", thrift.I32, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:count: ", p), err) + } + if err := oprot.WriteI32(int32(p.Count)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.count (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:count: ", p), err) + } + return err +} + +func (p *PageEncodingStats) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("PageEncodingStats(%+v)", *p) +} + +// Description for column metadata +// +// Attributes: +// - Type: Type of this column * +// - Encodings: Set of all encodings used for this column. The purpose is to validate +// whether we can decode those pages. * +// - PathInSchema: Path in schema * +// - Codec: Compression codec * +// - NumValues: Number of values in this column * +// - TotalUncompressedSize: total byte size of all uncompressed pages in this column chunk (including the headers) * +// - TotalCompressedSize: total byte size of all compressed pages in this column chunk (including the headers) * +// - KeyValueMetadata: Optional key/value metadata * +// - DataPageOffset: Byte offset from beginning of file to first data page * +// - IndexPageOffset: Byte offset from beginning of file to root index page * +// - DictionaryPageOffset: Byte offset from the beginning of file to first (only) dictionary page * +// - Statistics: optional statistics for this column chunk +// - EncodingStats: Set of all encodings used for pages in this column chunk. +// This information can be used to determine if all data pages are +// dictionary encoded for example * +type ColumnMetaData struct { + Type Type `thrift:"type,1,required" db:"type" json:"type"` + Encodings []Encoding `thrift:"encodings,2,required" db:"encodings" json:"encodings"` + PathInSchema []string `thrift:"path_in_schema,3,required" db:"path_in_schema" json:"path_in_schema"` + Codec CompressionCodec `thrift:"codec,4,required" db:"codec" json:"codec"` + NumValues int64 `thrift:"num_values,5,required" db:"num_values" json:"num_values"` + TotalUncompressedSize int64 `thrift:"total_uncompressed_size,6,required" db:"total_uncompressed_size" json:"total_uncompressed_size"` + TotalCompressedSize int64 `thrift:"total_compressed_size,7,required" db:"total_compressed_size" json:"total_compressed_size"` + KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,8" db:"key_value_metadata" json:"key_value_metadata,omitempty"` + DataPageOffset int64 `thrift:"data_page_offset,9,required" db:"data_page_offset" json:"data_page_offset"` + IndexPageOffset *int64 `thrift:"index_page_offset,10" db:"index_page_offset" json:"index_page_offset,omitempty"` + DictionaryPageOffset *int64 `thrift:"dictionary_page_offset,11" db:"dictionary_page_offset" json:"dictionary_page_offset,omitempty"` + Statistics *Statistics `thrift:"statistics,12" db:"statistics" json:"statistics,omitempty"` + EncodingStats []*PageEncodingStats `thrift:"encoding_stats,13" db:"encoding_stats" json:"encoding_stats,omitempty"` +} + +func NewColumnMetaData() *ColumnMetaData { + return &ColumnMetaData{} +} + +func (p *ColumnMetaData) GetType() Type { + return p.Type +} + +func (p *ColumnMetaData) GetEncodings() []Encoding { + return p.Encodings +} + +func (p *ColumnMetaData) GetPathInSchema() []string { + return p.PathInSchema +} + +func (p *ColumnMetaData) GetCodec() CompressionCodec { + return p.Codec +} + +func (p *ColumnMetaData) GetNumValues() int64 { + return p.NumValues +} + +func (p *ColumnMetaData) GetTotalUncompressedSize() int64 { + return p.TotalUncompressedSize +} + +func (p *ColumnMetaData) GetTotalCompressedSize() int64 { + return p.TotalCompressedSize +} + +var ColumnMetaData_KeyValueMetadata_DEFAULT []*KeyValue + +func (p *ColumnMetaData) GetKeyValueMetadata() []*KeyValue { + return p.KeyValueMetadata +} + +func (p *ColumnMetaData) GetDataPageOffset() int64 { + return p.DataPageOffset +} + +var ColumnMetaData_IndexPageOffset_DEFAULT int64 + +func (p *ColumnMetaData) GetIndexPageOffset() int64 { + if !p.IsSetIndexPageOffset() { + return ColumnMetaData_IndexPageOffset_DEFAULT + } + return *p.IndexPageOffset +} + +var ColumnMetaData_DictionaryPageOffset_DEFAULT int64 + +func (p *ColumnMetaData) GetDictionaryPageOffset() int64 { + if !p.IsSetDictionaryPageOffset() { + return ColumnMetaData_DictionaryPageOffset_DEFAULT + } + return *p.DictionaryPageOffset +} + +var ColumnMetaData_Statistics_DEFAULT *Statistics + +func (p *ColumnMetaData) GetStatistics() *Statistics { + if !p.IsSetStatistics() { + return ColumnMetaData_Statistics_DEFAULT + } + return p.Statistics +} + +var ColumnMetaData_EncodingStats_DEFAULT []*PageEncodingStats + +func (p *ColumnMetaData) GetEncodingStats() []*PageEncodingStats { + return p.EncodingStats +} +func (p *ColumnMetaData) IsSetKeyValueMetadata() bool { + return p.KeyValueMetadata != nil +} + +func (p *ColumnMetaData) IsSetIndexPageOffset() bool { + return p.IndexPageOffset != nil +} + +func (p *ColumnMetaData) IsSetDictionaryPageOffset() bool { + return p.DictionaryPageOffset != nil +} + +func (p *ColumnMetaData) IsSetStatistics() bool { + return p.Statistics != nil +} + +func (p *ColumnMetaData) IsSetEncodingStats() bool { + return p.EncodingStats != nil +} + +func (p *ColumnMetaData) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetType bool = false + var issetEncodings bool = false + var issetPathInSchema bool = false + var issetCodec bool = false + var issetNumValues bool = false + var issetTotalUncompressedSize bool = false + var issetTotalCompressedSize bool = false + var issetDataPageOffset bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetType = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetEncodings = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetPathInSchema = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + issetCodec = true + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + issetNumValues = true + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + issetTotalUncompressedSize = true + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + issetTotalCompressedSize = true + case 8: + if err := p.ReadField8(iprot); err != nil { + return err + } + case 9: + if err := p.ReadField9(iprot); err != nil { + return err + } + issetDataPageOffset = true + case 10: + if err := p.ReadField10(iprot); err != nil { + return err + } + case 11: + if err := p.ReadField11(iprot); err != nil { + return err + } + case 12: + if err := p.ReadField12(iprot); err != nil { + return err + } + case 13: + if err := p.ReadField13(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetType { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Type is not set")) + } + if !issetEncodings { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Encodings is not set")) + } + if !issetPathInSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PathInSchema is not set")) + } + if !issetCodec { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Codec is not set")) + } + if !issetNumValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumValues is not set")) + } + if !issetTotalUncompressedSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalUncompressedSize is not set")) + } + if !issetTotalCompressedSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalCompressedSize is not set")) + } + if !issetDataPageOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field DataPageOffset is not set")) + } + return nil +} + +func (p *ColumnMetaData) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + temp := Type(v) + p.Type = temp + } + return nil +} + +func (p *ColumnMetaData) ReadField2(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]Encoding, 0, size) + p.Encodings = tSlice + for i := 0; i < size; i++ { + var _elem0 Encoding + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + temp := Encoding(v) + _elem0 = temp + } + p.Encodings = append(p.Encodings, _elem0) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField3(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]string, 0, size) + p.PathInSchema = tSlice + for i := 0; i < size; i++ { + var _elem1 string + if v, err := iprot.ReadString(); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem1 = v + } + p.PathInSchema = append(p.PathInSchema, _elem1) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := CompressionCodec(v) + p.Codec = temp + } + return nil +} + +func (p *ColumnMetaData) ReadField5(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.NumValues = v + } + return nil +} + +func (p *ColumnMetaData) ReadField6(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.TotalUncompressedSize = v + } + return nil +} + +func (p *ColumnMetaData) ReadField7(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.TotalCompressedSize = v + } + return nil +} + +func (p *ColumnMetaData) ReadField8(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*KeyValue, 0, size) + p.KeyValueMetadata = tSlice + for i := 0; i < size; i++ { + _elem2 := &KeyValue{} + if err := _elem2.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem2), err) + } + p.KeyValueMetadata = append(p.KeyValueMetadata, _elem2) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) ReadField9(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 9: ", err) + } else { + p.DataPageOffset = v + } + return nil +} + +func (p *ColumnMetaData) ReadField10(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 10: ", err) + } else { + p.IndexPageOffset = &v + } + return nil +} + +func (p *ColumnMetaData) ReadField11(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 11: ", err) + } else { + p.DictionaryPageOffset = &v + } + return nil +} + +func (p *ColumnMetaData) ReadField12(iprot thrift.TProtocol) error { + p.Statistics = &Statistics{} + if err := p.Statistics.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.Statistics), err) + } + return nil +} + +func (p *ColumnMetaData) ReadField13(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*PageEncodingStats, 0, size) + p.EncodingStats = tSlice + for i := 0; i < size; i++ { + _elem3 := &PageEncodingStats{} + if err := _elem3.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem3), err) + } + p.EncodingStats = append(p.EncodingStats, _elem3) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnMetaData) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("ColumnMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + if err := p.writeField8(oprot); err != nil { + return err + } + if err := p.writeField9(oprot); err != nil { + return err + } + if err := p.writeField10(oprot); err != nil { + return err + } + if err := p.writeField11(oprot); err != nil { + return err + } + if err := p.writeField12(oprot); err != nil { + return err + } + if err := p.writeField13(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *ColumnMetaData) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("type", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:type: ", p), err) + } + if err := oprot.WriteI32(int32(p.Type)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.type (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:type: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("encodings", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:encodings: ", p), err) + } + if err := oprot.WriteListBegin(thrift.I32, len(p.Encodings)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Encodings { + if err := oprot.WriteI32(int32(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:encodings: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("path_in_schema", thrift.LIST, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:path_in_schema: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRING, len(p.PathInSchema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PathInSchema { + if err := oprot.WriteString(string(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:path_in_schema: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField4(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("codec", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:codec: ", p), err) + } + if err := oprot.WriteI32(int32(p.Codec)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.codec (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:codec: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField5(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_values", thrift.I64, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:num_values: ", p), err) + } + if err := oprot.WriteI64(int64(p.NumValues)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_values (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:num_values: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField6(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("total_uncompressed_size", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:total_uncompressed_size: ", p), err) + } + if err := oprot.WriteI64(int64(p.TotalUncompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_uncompressed_size (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:total_uncompressed_size: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField7(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("total_compressed_size", thrift.I64, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:total_compressed_size: ", p), err) + } + if err := oprot.WriteI64(int64(p.TotalCompressedSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_compressed_size (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:total_compressed_size: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField8(oprot thrift.TProtocol) (err error) { + if p.IsSetKeyValueMetadata() { + if err := oprot.WriteFieldBegin("key_value_metadata", thrift.LIST, 8); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 8:key_value_metadata: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.KeyValueMetadata { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 8:key_value_metadata: ", p), err) + } + } + return err +} + +func (p *ColumnMetaData) writeField9(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("data_page_offset", thrift.I64, 9); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 9:data_page_offset: ", p), err) + } + if err := oprot.WriteI64(int64(p.DataPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.data_page_offset (9) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 9:data_page_offset: ", p), err) + } + return err +} + +func (p *ColumnMetaData) writeField10(oprot thrift.TProtocol) (err error) { + if p.IsSetIndexPageOffset() { + if err := oprot.WriteFieldBegin("index_page_offset", thrift.I64, 10); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:index_page_offset: ", p), err) + } + if err := oprot.WriteI64(int64(*p.IndexPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.index_page_offset (10) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 10:index_page_offset: ", p), err) + } + } + return err +} + +func (p *ColumnMetaData) writeField11(oprot thrift.TProtocol) (err error) { + if p.IsSetDictionaryPageOffset() { + if err := oprot.WriteFieldBegin("dictionary_page_offset", thrift.I64, 11); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:dictionary_page_offset: ", p), err) + } + if err := oprot.WriteI64(int64(*p.DictionaryPageOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.dictionary_page_offset (11) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 11:dictionary_page_offset: ", p), err) + } + } + return err +} + +func (p *ColumnMetaData) writeField12(oprot thrift.TProtocol) (err error) { + if p.IsSetStatistics() { + if err := oprot.WriteFieldBegin("statistics", thrift.STRUCT, 12); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 12:statistics: ", p), err) + } + if err := p.Statistics.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.Statistics), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 12:statistics: ", p), err) + } + } + return err +} + +func (p *ColumnMetaData) writeField13(oprot thrift.TProtocol) (err error) { + if p.IsSetEncodingStats() { + if err := oprot.WriteFieldBegin("encoding_stats", thrift.LIST, 13); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 13:encoding_stats: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.EncodingStats)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.EncodingStats { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 13:encoding_stats: ", p), err) + } + } + return err +} + +func (p *ColumnMetaData) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("ColumnMetaData(%+v)", *p) +} + +// Attributes: +// - FilePath: File where column data is stored. If not set, assumed to be same file as +// metadata. This path is relative to the current file. +// +// - FileOffset: Byte offset in file_path to the ColumnMetaData * +// - MetaData: Column metadata for this chunk. This is the same content as what is at +// file_path/file_offset. Having it here has it replicated in the file +// metadata. +// +// - OffsetIndexOffset: File offset of ColumnChunk's OffsetIndex * +// - OffsetIndexLength: Size of ColumnChunk's OffsetIndex, in bytes * +// - ColumnIndexOffset: File offset of ColumnChunk's ColumnIndex * +// - ColumnIndexLength: Size of ColumnChunk's ColumnIndex, in bytes * +type ColumnChunk struct { + FilePath *string `thrift:"file_path,1" db:"file_path" json:"file_path,omitempty"` + FileOffset int64 `thrift:"file_offset,2,required" db:"file_offset" json:"file_offset"` + MetaData *ColumnMetaData `thrift:"meta_data,3" db:"meta_data" json:"meta_data,omitempty"` + OffsetIndexOffset *int64 `thrift:"offset_index_offset,4" db:"offset_index_offset" json:"offset_index_offset,omitempty"` + OffsetIndexLength *int32 `thrift:"offset_index_length,5" db:"offset_index_length" json:"offset_index_length,omitempty"` + ColumnIndexOffset *int64 `thrift:"column_index_offset,6" db:"column_index_offset" json:"column_index_offset,omitempty"` + ColumnIndexLength *int32 `thrift:"column_index_length,7" db:"column_index_length" json:"column_index_length,omitempty"` +} + +func NewColumnChunk() *ColumnChunk { + return &ColumnChunk{} +} + +var ColumnChunk_FilePath_DEFAULT string + +func (p *ColumnChunk) GetFilePath() string { + if !p.IsSetFilePath() { + return ColumnChunk_FilePath_DEFAULT + } + return *p.FilePath +} + +func (p *ColumnChunk) GetFileOffset() int64 { + return p.FileOffset +} + +var ColumnChunk_MetaData_DEFAULT *ColumnMetaData + +func (p *ColumnChunk) GetMetaData() *ColumnMetaData { + if !p.IsSetMetaData() { + return ColumnChunk_MetaData_DEFAULT + } + return p.MetaData +} + +var ColumnChunk_OffsetIndexOffset_DEFAULT int64 + +func (p *ColumnChunk) GetOffsetIndexOffset() int64 { + if !p.IsSetOffsetIndexOffset() { + return ColumnChunk_OffsetIndexOffset_DEFAULT + } + return *p.OffsetIndexOffset +} + +var ColumnChunk_OffsetIndexLength_DEFAULT int32 + +func (p *ColumnChunk) GetOffsetIndexLength() int32 { + if !p.IsSetOffsetIndexLength() { + return ColumnChunk_OffsetIndexLength_DEFAULT + } + return *p.OffsetIndexLength +} + +var ColumnChunk_ColumnIndexOffset_DEFAULT int64 + +func (p *ColumnChunk) GetColumnIndexOffset() int64 { + if !p.IsSetColumnIndexOffset() { + return ColumnChunk_ColumnIndexOffset_DEFAULT + } + return *p.ColumnIndexOffset +} + +var ColumnChunk_ColumnIndexLength_DEFAULT int32 + +func (p *ColumnChunk) GetColumnIndexLength() int32 { + if !p.IsSetColumnIndexLength() { + return ColumnChunk_ColumnIndexLength_DEFAULT + } + return *p.ColumnIndexLength +} +func (p *ColumnChunk) IsSetFilePath() bool { + return p.FilePath != nil +} + +func (p *ColumnChunk) IsSetMetaData() bool { + return p.MetaData != nil +} + +func (p *ColumnChunk) IsSetOffsetIndexOffset() bool { + return p.OffsetIndexOffset != nil +} + +func (p *ColumnChunk) IsSetOffsetIndexLength() bool { + return p.OffsetIndexLength != nil +} + +func (p *ColumnChunk) IsSetColumnIndexOffset() bool { + return p.ColumnIndexOffset != nil +} + +func (p *ColumnChunk) IsSetColumnIndexLength() bool { + return p.ColumnIndexLength != nil +} + +func (p *ColumnChunk) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetFileOffset bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetFileOffset = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetFileOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FileOffset is not set")) + } + return nil +} + +func (p *ColumnChunk) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.FilePath = &v + } + return nil +} + +func (p *ColumnChunk) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.FileOffset = v + } + return nil +} + +func (p *ColumnChunk) ReadField3(iprot thrift.TProtocol) error { + p.MetaData = &ColumnMetaData{} + if err := p.MetaData.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.MetaData), err) + } + return nil +} + +func (p *ColumnChunk) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + p.OffsetIndexOffset = &v + } + return nil +} + +func (p *ColumnChunk) ReadField5(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 5: ", err) + } else { + p.OffsetIndexLength = &v + } + return nil +} + +func (p *ColumnChunk) ReadField6(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.ColumnIndexOffset = &v + } + return nil +} + +func (p *ColumnChunk) ReadField7(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 7: ", err) + } else { + p.ColumnIndexLength = &v + } + return nil +} + +func (p *ColumnChunk) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("ColumnChunk"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *ColumnChunk) writeField1(oprot thrift.TProtocol) (err error) { + if p.IsSetFilePath() { + if err := oprot.WriteFieldBegin("file_path", thrift.STRING, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:file_path: ", p), err) + } + if err := oprot.WriteString(string(*p.FilePath)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_path (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:file_path: ", p), err) + } + } + return err +} + +func (p *ColumnChunk) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("file_offset", thrift.I64, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:file_offset: ", p), err) + } + if err := oprot.WriteI64(int64(p.FileOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.file_offset (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:file_offset: ", p), err) + } + return err +} + +func (p *ColumnChunk) writeField3(oprot thrift.TProtocol) (err error) { + if p.IsSetMetaData() { + if err := oprot.WriteFieldBegin("meta_data", thrift.STRUCT, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:meta_data: ", p), err) + } + if err := p.MetaData.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.MetaData), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:meta_data: ", p), err) + } + } + return err +} + +func (p *ColumnChunk) writeField4(oprot thrift.TProtocol) (err error) { + if p.IsSetOffsetIndexOffset() { + if err := oprot.WriteFieldBegin("offset_index_offset", thrift.I64, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:offset_index_offset: ", p), err) + } + if err := oprot.WriteI64(int64(*p.OffsetIndexOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset_index_offset (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:offset_index_offset: ", p), err) + } + } + return err +} + +func (p *ColumnChunk) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetOffsetIndexLength() { + if err := oprot.WriteFieldBegin("offset_index_length", thrift.I32, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:offset_index_length: ", p), err) + } + if err := oprot.WriteI32(int32(*p.OffsetIndexLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset_index_length (5) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:offset_index_length: ", p), err) + } + } + return err +} + +func (p *ColumnChunk) writeField6(oprot thrift.TProtocol) (err error) { + if p.IsSetColumnIndexOffset() { + if err := oprot.WriteFieldBegin("column_index_offset", thrift.I64, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:column_index_offset: ", p), err) + } + if err := oprot.WriteI64(int64(*p.ColumnIndexOffset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_index_offset (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:column_index_offset: ", p), err) + } + } + return err +} + +func (p *ColumnChunk) writeField7(oprot thrift.TProtocol) (err error) { + if p.IsSetColumnIndexLength() { + if err := oprot.WriteFieldBegin("column_index_length", thrift.I32, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_index_length: ", p), err) + } + if err := oprot.WriteI32(int32(*p.ColumnIndexLength)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.column_index_length (7) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_index_length: ", p), err) + } + } + return err +} + +func (p *ColumnChunk) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("ColumnChunk(%+v)", *p) +} + +// Attributes: +// - Columns: Metadata for each column chunk in this row group. +// This list must have the same order as the SchemaElement list in FileMetaData. +// +// - TotalByteSize: Total byte size of all the uncompressed column data in this row group * +// - NumRows: Number of rows in this row group * +// - SortingColumns: If set, specifies a sort ordering of the rows in this RowGroup. +// The sorting columns can be a subset of all the columns. +type RowGroup struct { + Columns []*ColumnChunk `thrift:"columns,1,required" db:"columns" json:"columns"` + TotalByteSize int64 `thrift:"total_byte_size,2,required" db:"total_byte_size" json:"total_byte_size"` + NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + SortingColumns []*SortingColumn `thrift:"sorting_columns,4" db:"sorting_columns" json:"sorting_columns,omitempty"` +} + +func NewRowGroup() *RowGroup { + return &RowGroup{} +} + +func (p *RowGroup) GetColumns() []*ColumnChunk { + return p.Columns +} + +func (p *RowGroup) GetTotalByteSize() int64 { + return p.TotalByteSize +} + +func (p *RowGroup) GetNumRows() int64 { + return p.NumRows +} + +var RowGroup_SortingColumns_DEFAULT []*SortingColumn + +func (p *RowGroup) GetSortingColumns() []*SortingColumn { + return p.SortingColumns +} +func (p *RowGroup) IsSetSortingColumns() bool { + return p.SortingColumns != nil +} + +func (p *RowGroup) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetColumns bool = false + var issetTotalByteSize bool = false + var issetNumRows bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetColumns = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetTotalByteSize = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetNumRows = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetColumns { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Columns is not set")) + } + if !issetTotalByteSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field TotalByteSize is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + return nil +} + +func (p *RowGroup) ReadField1(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*ColumnChunk, 0, size) + p.Columns = tSlice + for i := 0; i < size; i++ { + _elem4 := &ColumnChunk{} + if err := _elem4.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem4), err) + } + p.Columns = append(p.Columns, _elem4) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *RowGroup) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.TotalByteSize = v + } + return nil +} + +func (p *RowGroup) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *RowGroup) ReadField4(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*SortingColumn, 0, size) + p.SortingColumns = tSlice + for i := 0; i < size; i++ { + _elem5 := &SortingColumn{} + if err := _elem5.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem5), err) + } + p.SortingColumns = append(p.SortingColumns, _elem5) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *RowGroup) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("RowGroup"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *RowGroup) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("columns", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:columns: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.Columns)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Columns { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:columns: ", p), err) + } + return err +} + +func (p *RowGroup) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("total_byte_size", thrift.I64, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:total_byte_size: ", p), err) + } + if err := oprot.WriteI64(int64(p.TotalByteSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.total_byte_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:total_byte_size: ", p), err) + } + return err +} + +func (p *RowGroup) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_rows", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI64(int64(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err +} + +func (p *RowGroup) writeField4(oprot thrift.TProtocol) (err error) { + if p.IsSetSortingColumns() { + if err := oprot.WriteFieldBegin("sorting_columns", thrift.LIST, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:sorting_columns: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.SortingColumns)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.SortingColumns { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:sorting_columns: ", p), err) + } + } + return err +} + +func (p *RowGroup) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("RowGroup(%+v)", *p) +} + +// Empty struct to signal the order defined by the physical or logical type +type TypeDefinedOrder struct { +} + +func NewTypeDefinedOrder() *TypeDefinedOrder { + return &TypeDefinedOrder{} +} + +func (p *TypeDefinedOrder) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *TypeDefinedOrder) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("TypeDefinedOrder"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *TypeDefinedOrder) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("TypeDefinedOrder(%+v)", *p) +} + +// Union to specify the order used for the min_value and max_value fields for a +// column. This union takes the role of an enhanced enum that allows rich +// elements (which will be needed for a collation-based ordering in the future). +// +// Possible values are: +// * TypeDefinedOrder - the column uses the order defined by its logical or +// physical type (if there is no logical type). +// +// If the reader does not support the value of this union, min and max stats +// for this column should be ignored. +// +// Attributes: +// - TYPE_ORDER: The sort orders for logical types are: +// UTF8 - unsigned byte-wise comparison +// INT8 - signed comparison +// INT16 - signed comparison +// INT32 - signed comparison +// INT64 - signed comparison +// UINT8 - unsigned comparison +// UINT16 - unsigned comparison +// UINT32 - unsigned comparison +// UINT64 - unsigned comparison +// DECIMAL - signed comparison of the represented value +// DATE - signed comparison +// TIME_MILLIS - signed comparison +// TIME_MICROS - signed comparison +// TIMESTAMP_MILLIS - signed comparison +// TIMESTAMP_MICROS - signed comparison +// INTERVAL - unsigned comparison +// JSON - unsigned byte-wise comparison +// BSON - unsigned byte-wise comparison +// ENUM - unsigned byte-wise comparison +// LIST - undefined +// MAP - undefined +// +// In the absence of logical types, the sort order is determined by the physical type: +// BOOLEAN - false, true +// INT32 - signed comparison +// INT64 - signed comparison +// INT96 (only used for legacy timestamps) - undefined +// FLOAT - signed comparison of the represented value (*) +// DOUBLE - signed comparison of the represented value (*) +// BYTE_ARRAY - unsigned byte-wise comparison +// FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison +// +// (*) Because the sorting order is not specified properly for floating +// point values (relations vs. total ordering) the following +// compatibility rules should be applied when reading statistics: +// - If the min is a NaN, it should be ignored. +// - If the max is a NaN, it should be ignored. +// - If the min is +0, the row group may contain -0 values as well. +// - If the max is -0, the row group may contain +0 values as well. +// - When looking for NaN values, min and max should be ignored. +type ColumnOrder struct { + TYPE_ORDER *TypeDefinedOrder `thrift:"TYPE_ORDER,1" db:"TYPE_ORDER" json:"TYPE_ORDER,omitempty"` +} + +func NewColumnOrder() *ColumnOrder { + return &ColumnOrder{} +} + +var ColumnOrder_TYPE_ORDER_DEFAULT *TypeDefinedOrder + +func (p *ColumnOrder) GetTYPE_ORDER() *TypeDefinedOrder { + if !p.IsSetTYPE_ORDER() { + return ColumnOrder_TYPE_ORDER_DEFAULT + } + return p.TYPE_ORDER +} +func (p *ColumnOrder) CountSetFieldsColumnOrder() int { + count := 0 + if p.IsSetTYPE_ORDER() { + count++ + } + return count + +} + +func (p *ColumnOrder) IsSetTYPE_ORDER() bool { + return p.TYPE_ORDER != nil +} + +func (p *ColumnOrder) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + return nil +} + +func (p *ColumnOrder) ReadField1(iprot thrift.TProtocol) error { + p.TYPE_ORDER = &TypeDefinedOrder{} + if err := p.TYPE_ORDER.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", p.TYPE_ORDER), err) + } + return nil +} + +func (p *ColumnOrder) Write(oprot thrift.TProtocol) error { + if c := p.CountSetFieldsColumnOrder(); c != 1 { + return fmt.Errorf("%T write union: exactly one field must be set (%d set).", p, c) + } + if err := oprot.WriteStructBegin("ColumnOrder"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *ColumnOrder) writeField1(oprot thrift.TProtocol) (err error) { + if p.IsSetTYPE_ORDER() { + if err := oprot.WriteFieldBegin("TYPE_ORDER", thrift.STRUCT, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:TYPE_ORDER: ", p), err) + } + if err := p.TYPE_ORDER.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", p.TYPE_ORDER), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:TYPE_ORDER: ", p), err) + } + } + return err +} + +func (p *ColumnOrder) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("ColumnOrder(%+v)", *p) +} + +// Attributes: +// - Offset: Offset of the page in the file * +// - CompressedPageSize: Size of the page, including header. Sum of compressed_page_size and header +// length +// - FirstRowIndex: Index within the RowGroup of the first row of the page; this means pages +// change on record boundaries (r = 0). +type PageLocation struct { + Offset int64 `thrift:"offset,1,required" db:"offset" json:"offset"` + CompressedPageSize int32 `thrift:"compressed_page_size,2,required" db:"compressed_page_size" json:"compressed_page_size"` + FirstRowIndex int64 `thrift:"first_row_index,3,required" db:"first_row_index" json:"first_row_index"` +} + +func NewPageLocation() *PageLocation { + return &PageLocation{} +} + +func (p *PageLocation) GetOffset() int64 { + return p.Offset +} + +func (p *PageLocation) GetCompressedPageSize() int32 { + return p.CompressedPageSize +} + +func (p *PageLocation) GetFirstRowIndex() int64 { + return p.FirstRowIndex +} +func (p *PageLocation) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetOffset bool = false + var issetCompressedPageSize bool = false + var issetFirstRowIndex bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetOffset = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetCompressedPageSize = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetFirstRowIndex = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetOffset { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Offset is not set")) + } + if !issetCompressedPageSize { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field CompressedPageSize is not set")) + } + if !issetFirstRowIndex { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field FirstRowIndex is not set")) + } + return nil +} + +func (p *PageLocation) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Offset = v + } + return nil +} + +func (p *PageLocation) ReadField2(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 2: ", err) + } else { + p.CompressedPageSize = v + } + return nil +} + +func (p *PageLocation) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.FirstRowIndex = v + } + return nil +} + +func (p *PageLocation) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("PageLocation"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *PageLocation) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("offset", thrift.I64, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:offset: ", p), err) + } + if err := oprot.WriteI64(int64(p.Offset)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.offset (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:offset: ", p), err) + } + return err +} + +func (p *PageLocation) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("compressed_page_size", thrift.I32, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:compressed_page_size: ", p), err) + } + if err := oprot.WriteI32(int32(p.CompressedPageSize)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.compressed_page_size (2) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:compressed_page_size: ", p), err) + } + return err +} + +func (p *PageLocation) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("first_row_index", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:first_row_index: ", p), err) + } + if err := oprot.WriteI64(int64(p.FirstRowIndex)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.first_row_index (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:first_row_index: ", p), err) + } + return err +} + +func (p *PageLocation) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("PageLocation(%+v)", *p) +} + +// Attributes: +// - PageLocations: PageLocations, ordered by increasing PageLocation.offset. It is required +// that page_locations[i].first_row_index < page_locations[i+1].first_row_index. +type OffsetIndex struct { + PageLocations []*PageLocation `thrift:"page_locations,1,required" db:"page_locations" json:"page_locations"` +} + +func NewOffsetIndex() *OffsetIndex { + return &OffsetIndex{} +} + +func (p *OffsetIndex) GetPageLocations() []*PageLocation { + return p.PageLocations +} +func (p *OffsetIndex) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetPageLocations bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetPageLocations = true + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetPageLocations { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field PageLocations is not set")) + } + return nil +} + +func (p *OffsetIndex) ReadField1(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*PageLocation, 0, size) + p.PageLocations = tSlice + for i := 0; i < size; i++ { + _elem6 := &PageLocation{} + if err := _elem6.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem6), err) + } + p.PageLocations = append(p.PageLocations, _elem6) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *OffsetIndex) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("OffsetIndex"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *OffsetIndex) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("page_locations", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:page_locations: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.PageLocations)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.PageLocations { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:page_locations: ", p), err) + } + return err +} + +func (p *OffsetIndex) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("OffsetIndex(%+v)", *p) +} + +// Description for ColumnIndex. +// Each [i] refers to the page at OffsetIndex.page_locations[i] +// +// Attributes: +// - NullPages: A list of Boolean values to determine the validity of the corresponding +// min and max values. If true, a page contains only null values, and writers +// have to set the corresponding entries in min_values and max_values to +// byte[0], so that all lists have the same length. If false, the +// corresponding entries in min_values and max_values must be valid. +// - MinValues: Two lists containing lower and upper bounds for the values of each page. +// These may be the actual minimum and maximum values found on a page, but +// can also be (more compact) values that do not exist on a page. For +// example, instead of storing ""Blart Versenwald III", a writer may set +// min_values[i]="B", max_values[i]="C". Such more compact values must still +// be valid values within the column's logical type. Readers must make sure +// that list entries are populated before using them by inspecting null_pages. +// - MaxValues +// - BoundaryOrder: Stores whether both min_values and max_values are orderd and if so, in +// which direction. This allows readers to perform binary searches in both +// lists. Readers cannot assume that max_values[i] <= min_values[i+1], even +// if the lists are ordered. +// - NullCounts: A list containing the number of null values for each page * +type ColumnIndex struct { + NullPages []bool `thrift:"null_pages,1,required" db:"null_pages" json:"null_pages"` + MinValues [][]byte `thrift:"min_values,2,required" db:"min_values" json:"min_values"` + MaxValues [][]byte `thrift:"max_values,3,required" db:"max_values" json:"max_values"` + BoundaryOrder BoundaryOrder `thrift:"boundary_order,4,required" db:"boundary_order" json:"boundary_order"` + NullCounts []int64 `thrift:"null_counts,5" db:"null_counts" json:"null_counts,omitempty"` +} + +func NewColumnIndex() *ColumnIndex { + return &ColumnIndex{} +} + +func (p *ColumnIndex) GetNullPages() []bool { + return p.NullPages +} + +func (p *ColumnIndex) GetMinValues() [][]byte { + return p.MinValues +} + +func (p *ColumnIndex) GetMaxValues() [][]byte { + return p.MaxValues +} + +func (p *ColumnIndex) GetBoundaryOrder() BoundaryOrder { + return p.BoundaryOrder +} + +var ColumnIndex_NullCounts_DEFAULT []int64 + +func (p *ColumnIndex) GetNullCounts() []int64 { + return p.NullCounts +} +func (p *ColumnIndex) IsSetNullCounts() bool { + return p.NullCounts != nil +} + +func (p *ColumnIndex) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetNullPages bool = false + var issetMinValues bool = false + var issetMaxValues bool = false + var issetBoundaryOrder bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetNullPages = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetMinValues = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetMaxValues = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + issetBoundaryOrder = true + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetNullPages { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NullPages is not set")) + } + if !issetMinValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MinValues is not set")) + } + if !issetMaxValues { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field MaxValues is not set")) + } + if !issetBoundaryOrder { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field BoundaryOrder is not set")) + } + return nil +} + +func (p *ColumnIndex) ReadField1(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]bool, 0, size) + p.NullPages = tSlice + for i := 0; i < size; i++ { + var _elem7 bool + if v, err := iprot.ReadBool(); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem7 = v + } + p.NullPages = append(p.NullPages, _elem7) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField2(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([][]byte, 0, size) + p.MinValues = tSlice + for i := 0; i < size; i++ { + var _elem8 []byte + if v, err := iprot.ReadBinary(); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem8 = v + } + p.MinValues = append(p.MinValues, _elem8) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField3(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([][]byte, 0, size) + p.MaxValues = tSlice + for i := 0; i < size; i++ { + var _elem9 []byte + if v, err := iprot.ReadBinary(); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem9 = v + } + p.MaxValues = append(p.MaxValues, _elem9) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) ReadField4(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 4: ", err) + } else { + temp := BoundaryOrder(v) + p.BoundaryOrder = temp + } + return nil +} + +func (p *ColumnIndex) ReadField5(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]int64, 0, size) + p.NullCounts = tSlice + for i := 0; i < size; i++ { + var _elem10 int64 + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 0: ", err) + } else { + _elem10 = v + } + p.NullCounts = append(p.NullCounts, _elem10) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *ColumnIndex) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("ColumnIndex"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *ColumnIndex) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("null_pages", thrift.LIST, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:null_pages: ", p), err) + } + if err := oprot.WriteListBegin(thrift.BOOL, len(p.NullPages)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.NullPages { + if err := oprot.WriteBool(bool(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:null_pages: ", p), err) + } + return err +} + +func (p *ColumnIndex) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("min_values", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:min_values: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRING, len(p.MinValues)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.MinValues { + if err := oprot.WriteBinary(v); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:min_values: ", p), err) + } + return err +} + +func (p *ColumnIndex) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("max_values", thrift.LIST, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:max_values: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRING, len(p.MaxValues)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.MaxValues { + if err := oprot.WriteBinary(v); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:max_values: ", p), err) + } + return err +} + +func (p *ColumnIndex) writeField4(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("boundary_order", thrift.I32, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:boundary_order: ", p), err) + } + if err := oprot.WriteI32(int32(p.BoundaryOrder)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.boundary_order (4) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:boundary_order: ", p), err) + } + return err +} + +func (p *ColumnIndex) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetNullCounts() { + if err := oprot.WriteFieldBegin("null_counts", thrift.LIST, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:null_counts: ", p), err) + } + if err := oprot.WriteListBegin(thrift.I64, len(p.NullCounts)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.NullCounts { + if err := oprot.WriteI64(int64(v)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:null_counts: ", p), err) + } + } + return err +} + +func (p *ColumnIndex) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("ColumnIndex(%+v)", *p) +} + +// Description for file metadata +// +// Attributes: +// - Version: Version of this file * +// - Schema: Parquet schema for this file. This schema contains metadata for all the columns. +// The schema is represented as a tree with a single root. The nodes of the tree +// are flattened to a list by doing a depth-first traversal. +// The column metadata contains the path in the schema for that column which can be +// used to map columns to nodes in the schema. +// The first element is the root * +// - NumRows: Number of rows in this file * +// - RowGroups: Row groups in this file * +// - KeyValueMetadata: Optional key/value metadata * +// - CreatedBy: String for application that wrote this file. This should be in the format +// version (build ). +// e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) +// +// - ColumnOrders: Sort order used for the min_value and max_value fields of each column in +// this file. Each sort order corresponds to one column, determined by its +// position in the list, matching the position of the column in the schema. +// +// Without column_orders, the meaning of the min_value and max_value fields is +// undefined. To ensure well-defined behaviour, if min_value and max_value are +// written to a Parquet file, column_orders must be written as well. +// +// The obsolete min and max fields are always sorted by signed comparison +// regardless of column_orders. +type FileMetaData struct { + Version int32 `thrift:"version,1,required" db:"version" json:"version"` + Schema []*SchemaElement `thrift:"schema,2,required" db:"schema" json:"schema"` + NumRows int64 `thrift:"num_rows,3,required" db:"num_rows" json:"num_rows"` + RowGroups []*RowGroup `thrift:"row_groups,4,required" db:"row_groups" json:"row_groups"` + KeyValueMetadata []*KeyValue `thrift:"key_value_metadata,5" db:"key_value_metadata" json:"key_value_metadata,omitempty"` + CreatedBy *string `thrift:"created_by,6" db:"created_by" json:"created_by,omitempty"` + ColumnOrders []*ColumnOrder `thrift:"column_orders,7" db:"column_orders" json:"column_orders,omitempty"` +} + +func NewFileMetaData() *FileMetaData { + return &FileMetaData{} +} + +func (p *FileMetaData) GetVersion() int32 { + return p.Version +} + +func (p *FileMetaData) GetSchema() []*SchemaElement { + return p.Schema +} + +func (p *FileMetaData) GetNumRows() int64 { + return p.NumRows +} + +func (p *FileMetaData) GetRowGroups() []*RowGroup { + return p.RowGroups +} + +var FileMetaData_KeyValueMetadata_DEFAULT []*KeyValue + +func (p *FileMetaData) GetKeyValueMetadata() []*KeyValue { + return p.KeyValueMetadata +} + +var FileMetaData_CreatedBy_DEFAULT string + +func (p *FileMetaData) GetCreatedBy() string { + if !p.IsSetCreatedBy() { + return FileMetaData_CreatedBy_DEFAULT + } + return *p.CreatedBy +} + +var FileMetaData_ColumnOrders_DEFAULT []*ColumnOrder + +func (p *FileMetaData) GetColumnOrders() []*ColumnOrder { + return p.ColumnOrders +} +func (p *FileMetaData) IsSetKeyValueMetadata() bool { + return p.KeyValueMetadata != nil +} + +func (p *FileMetaData) IsSetCreatedBy() bool { + return p.CreatedBy != nil +} + +func (p *FileMetaData) IsSetColumnOrders() bool { + return p.ColumnOrders != nil +} + +func (p *FileMetaData) Read(iprot thrift.TProtocol) error { + if _, err := iprot.ReadStructBegin(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read error: ", p), err) + } + + var issetVersion bool = false + var issetSchema bool = false + var issetNumRows bool = false + var issetRowGroups bool = false + + for { + _, fieldTypeId, fieldId, err := iprot.ReadFieldBegin() + if err != nil { + return thrift.PrependError(fmt.Sprintf("%T field %d read error: ", p, fieldId), err) + } + if fieldTypeId == thrift.STOP { + break + } + switch fieldId { + case 1: + if err := p.ReadField1(iprot); err != nil { + return err + } + issetVersion = true + case 2: + if err := p.ReadField2(iprot); err != nil { + return err + } + issetSchema = true + case 3: + if err := p.ReadField3(iprot); err != nil { + return err + } + issetNumRows = true + case 4: + if err := p.ReadField4(iprot); err != nil { + return err + } + issetRowGroups = true + case 5: + if err := p.ReadField5(iprot); err != nil { + return err + } + case 6: + if err := p.ReadField6(iprot); err != nil { + return err + } + case 7: + if err := p.ReadField7(iprot); err != nil { + return err + } + default: + if err := iprot.Skip(fieldTypeId); err != nil { + return err + } + } + if err := iprot.ReadFieldEnd(); err != nil { + return err + } + } + if err := iprot.ReadStructEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T read struct end error: ", p), err) + } + if !issetVersion { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Version is not set")) + } + if !issetSchema { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field Schema is not set")) + } + if !issetNumRows { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field NumRows is not set")) + } + if !issetRowGroups { + return thrift.NewTProtocolExceptionWithType(thrift.INVALID_DATA, fmt.Errorf("Required field RowGroups is not set")) + } + return nil +} + +func (p *FileMetaData) ReadField1(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI32(); err != nil { + return thrift.PrependError("error reading field 1: ", err) + } else { + p.Version = v + } + return nil +} + +func (p *FileMetaData) ReadField2(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*SchemaElement, 0, size) + p.Schema = tSlice + for i := 0; i < size; i++ { + _elem11 := &SchemaElement{} + if err := _elem11.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem11), err) + } + p.Schema = append(p.Schema, _elem11) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField3(iprot thrift.TProtocol) error { + if v, err := iprot.ReadI64(); err != nil { + return thrift.PrependError("error reading field 3: ", err) + } else { + p.NumRows = v + } + return nil +} + +func (p *FileMetaData) ReadField4(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*RowGroup, 0, size) + p.RowGroups = tSlice + for i := 0; i < size; i++ { + _elem12 := &RowGroup{} + if err := _elem12.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem12), err) + } + p.RowGroups = append(p.RowGroups, _elem12) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField5(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*KeyValue, 0, size) + p.KeyValueMetadata = tSlice + for i := 0; i < size; i++ { + _elem13 := &KeyValue{} + if err := _elem13.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem13), err) + } + p.KeyValueMetadata = append(p.KeyValueMetadata, _elem13) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) ReadField6(iprot thrift.TProtocol) error { + if v, err := iprot.ReadString(); err != nil { + return thrift.PrependError("error reading field 6: ", err) + } else { + p.CreatedBy = &v + } + return nil +} + +func (p *FileMetaData) ReadField7(iprot thrift.TProtocol) error { + _, size, err := iprot.ReadListBegin() + if err != nil { + return thrift.PrependError("error reading list begin: ", err) + } + tSlice := make([]*ColumnOrder, 0, size) + p.ColumnOrders = tSlice + for i := 0; i < size; i++ { + _elem14 := &ColumnOrder{} + if err := _elem14.Read(iprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error reading struct: ", _elem14), err) + } + p.ColumnOrders = append(p.ColumnOrders, _elem14) + } + if err := iprot.ReadListEnd(); err != nil { + return thrift.PrependError("error reading list end: ", err) + } + return nil +} + +func (p *FileMetaData) Write(oprot thrift.TProtocol) error { + if err := oprot.WriteStructBegin("FileMetaData"); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) + } + if p != nil { + if err := p.writeField1(oprot); err != nil { + return err + } + if err := p.writeField2(oprot); err != nil { + return err + } + if err := p.writeField3(oprot); err != nil { + return err + } + if err := p.writeField4(oprot); err != nil { + return err + } + if err := p.writeField5(oprot); err != nil { + return err + } + if err := p.writeField6(oprot); err != nil { + return err + } + if err := p.writeField7(oprot); err != nil { + return err + } + } + if err := oprot.WriteFieldStop(); err != nil { + return thrift.PrependError("write field stop error: ", err) + } + if err := oprot.WriteStructEnd(); err != nil { + return thrift.PrependError("write struct stop error: ", err) + } + return nil +} + +func (p *FileMetaData) writeField1(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("version", thrift.I32, 1); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 1:version: ", p), err) + } + if err := oprot.WriteI32(int32(p.Version)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.version (1) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 1:version: ", p), err) + } + return err +} + +func (p *FileMetaData) writeField2(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("schema", thrift.LIST, 2); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 2:schema: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.Schema)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.Schema { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 2:schema: ", p), err) + } + return err +} + +func (p *FileMetaData) writeField3(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("num_rows", thrift.I64, 3); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 3:num_rows: ", p), err) + } + if err := oprot.WriteI64(int64(p.NumRows)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.num_rows (3) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 3:num_rows: ", p), err) + } + return err +} + +func (p *FileMetaData) writeField4(oprot thrift.TProtocol) (err error) { + if err := oprot.WriteFieldBegin("row_groups", thrift.LIST, 4); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 4:row_groups: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.RowGroups)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.RowGroups { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 4:row_groups: ", p), err) + } + return err +} + +func (p *FileMetaData) writeField5(oprot thrift.TProtocol) (err error) { + if p.IsSetKeyValueMetadata() { + if err := oprot.WriteFieldBegin("key_value_metadata", thrift.LIST, 5); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 5:key_value_metadata: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.KeyValueMetadata)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.KeyValueMetadata { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 5:key_value_metadata: ", p), err) + } + } + return err +} + +func (p *FileMetaData) writeField6(oprot thrift.TProtocol) (err error) { + if p.IsSetCreatedBy() { + if err := oprot.WriteFieldBegin("created_by", thrift.STRING, 6); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 6:created_by: ", p), err) + } + if err := oprot.WriteString(string(*p.CreatedBy)); err != nil { + return thrift.PrependError(fmt.Sprintf("%T.created_by (6) field write error: ", p), err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 6:created_by: ", p), err) + } + } + return err +} + +func (p *FileMetaData) writeField7(oprot thrift.TProtocol) (err error) { + if p.IsSetColumnOrders() { + if err := oprot.WriteFieldBegin("column_orders", thrift.LIST, 7); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field begin error 7:column_orders: ", p), err) + } + if err := oprot.WriteListBegin(thrift.STRUCT, len(p.ColumnOrders)); err != nil { + return thrift.PrependError("error writing list begin: ", err) + } + for _, v := range p.ColumnOrders { + if err := v.Write(oprot); err != nil { + return thrift.PrependError(fmt.Sprintf("%T error writing struct: ", v), err) + } + } + if err := oprot.WriteListEnd(); err != nil { + return thrift.PrependError("error writing list end: ", err) + } + if err := oprot.WriteFieldEnd(); err != nil { + return thrift.PrependError(fmt.Sprintf("%T write field end error 7:column_orders: ", p), err) + } + } + return err +} + +func (p *FileMetaData) String() string { + if p == nil { + return "" + } + return fmt.Sprintf("FileMetaData(%+v)", *p) +} diff --git a/vendor/github.com/minio/parquet-go/gen-parquet-format-pkg.sh b/vendor/github.com/minio/parquet-go/gen-parquet-format-pkg.sh new file mode 100755 index 000000000..56bd950f7 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/gen-parquet-format-pkg.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# +# Minio Cloud Storage, (C) 2018 Minio, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +rm -f parquet.thrift +wget -q https://github.com/apache/parquet-format/raw/df6132b94f273521a418a74442085fdd5a0aa009/src/main/thrift/parquet.thrift +thrift --gen go parquet.thrift diff --git a/vendor/github.com/minio/parquet-go/page.go b/vendor/github.com/minio/parquet-go/page.go new file mode 100644 index 000000000..fdee8f351 --- /dev/null +++ b/vendor/github.com/minio/parquet-go/page.go @@ -0,0 +1,531 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import ( + "bytes" + "fmt" + "strings" + + "git.apache.org/thrift.git/lib/go/thrift" + "github.com/minio/parquet-go/gen-go/parquet" +) + +// getBitWidth - returns bits required to place num e.g. +// +// num | width +// -----|------- +// 0 | 0 +// 1 | 1 +// 2 | 2 +// 3 | 2 +// 4 | 3 +// 5 | 3 +// ... | ... +// ... | ... +// +func getBitWidth(num uint64) (width uint64) { + for ; num != 0; num >>= 1 { + width++ + } + + return width +} + +// getMaxDefLevel - get maximum definition level. +func getMaxDefLevel(nameIndexMap map[string]int, schemaElements []*parquet.SchemaElement, path []string) (v int) { + for i := 1; i <= len(path); i++ { + name := strings.Join(path[:i], ".") + if index, ok := nameIndexMap[name]; ok { + if schemaElements[index].GetRepetitionType() != parquet.FieldRepetitionType_REQUIRED { + v++ + } + } + } + + return v +} + +// getMaxRepLevel - get maximum repetition level. +func getMaxRepLevel(nameIndexMap map[string]int, schemaElements []*parquet.SchemaElement, path []string) (v int) { + for i := 1; i <= len(path); i++ { + name := strings.Join(path[:i], ".") + if index, ok := nameIndexMap[name]; ok { + if schemaElements[index].GetRepetitionType() == parquet.FieldRepetitionType_REPEATED { + v++ + } + } + } + + return v +} + +func readPageHeader(reader *thrift.TBufferedTransport) (*parquet.PageHeader, error) { + pageHeader := parquet.NewPageHeader() + if err := pageHeader.Read(thrift.NewTCompactProtocol(reader)); err != nil { + return nil, err + } + + return pageHeader, nil +} + +func readPageRawData(thriftReader *thrift.TBufferedTransport, metadata *parquet.ColumnMetaData) (page *page, err error) { + pageHeader, err := readPageHeader(thriftReader) + if err != nil { + return nil, err + } + + switch pageType := pageHeader.GetType(); pageType { + case parquet.PageType_DICTIONARY_PAGE: + page = newDictPage() + case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: + page = newDataPage() + default: + return nil, fmt.Errorf("unsupported page type %v", pageType) + } + + compressedPageSize := pageHeader.GetCompressedPageSize() + buf := make([]byte, compressedPageSize) + if _, err := thriftReader.Read(buf); err != nil { + return nil, err + } + + page.Header = pageHeader + page.CompressType = metadata.GetCodec() + page.RawData = buf + page.Path = append([]string{}, metadata.GetPathInSchema()...) + page.DataType = metadata.GetType() + + return page, nil +} + +func readPage( + thriftReader *thrift.TBufferedTransport, + metadata *parquet.ColumnMetaData, + columnNameIndexMap map[string]int, + schemaElements []*parquet.SchemaElement, +) (page *page, definitionLevels, numRows int64, err error) { + + pageHeader, err := readPageHeader(thriftReader) + if err != nil { + return nil, 0, 0, err + } + + read := func() (data []byte, err error) { + var repLevelsLen, defLevelsLen int32 + var repLevelsBuf, defLevelsBuf []byte + + if pageHeader.GetType() == parquet.PageType_DATA_PAGE_V2 { + repLevelsLen = pageHeader.DataPageHeaderV2.GetRepetitionLevelsByteLength() + repLevelsBuf = make([]byte, repLevelsLen) + if _, err = thriftReader.Read(repLevelsBuf); err != nil { + return nil, err + } + + defLevelsLen = pageHeader.DataPageHeaderV2.GetDefinitionLevelsByteLength() + defLevelsBuf = make([]byte, defLevelsLen) + if _, err = thriftReader.Read(defLevelsBuf); err != nil { + return nil, err + } + } + + dataBuf := make([]byte, pageHeader.GetCompressedPageSize()-repLevelsLen-defLevelsLen) + if _, err = thriftReader.Read(dataBuf); err != nil { + return nil, err + } + + if dataBuf, err = compressionCodec(metadata.GetCodec()).uncompress(dataBuf); err != nil { + return nil, err + } + + if repLevelsLen == 0 && defLevelsLen == 0 { + return dataBuf, nil + } + + if repLevelsLen > 0 { + data = append(data, uint32ToBytes(uint32(repLevelsLen))...) + data = append(data, repLevelsBuf...) + } + + if defLevelsLen > 0 { + data = append(data, uint32ToBytes(uint32(defLevelsLen))...) + data = append(data, defLevelsBuf...) + } + + data = append(data, dataBuf...) + + return data, nil + } + + buf, err := read() + if err != nil { + return nil, 0, 0, err + } + + path := append([]string{}, metadata.GetPathInSchema()...) + + bytesReader := bytes.NewReader(buf) + pageType := pageHeader.GetType() + switch pageType { + case parquet.PageType_INDEX_PAGE: + return nil, 0, 0, fmt.Errorf("page type %v is not supported", parquet.PageType_INDEX_PAGE) + + case parquet.PageType_DICTIONARY_PAGE: + page = newDictPage() + page.Header = pageHeader + table := new(table) + table.Path = path + values, err := readValues(bytesReader, metadata.GetType(), + uint64(pageHeader.DictionaryPageHeader.GetNumValues()), 0) + if err != nil { + return nil, 0, 0, err + } + table.Values = getTableValues(values, metadata.GetType()) + page.DataTable = table + + return page, 0, 0, nil + + case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: + name := strings.Join(path, ".") + + page = newDataPage() + page.Header = pageHeader + + maxDefinitionLevel := getMaxDefLevel(columnNameIndexMap, schemaElements, path) + maxRepetitionLevel := getMaxRepLevel(columnNameIndexMap, schemaElements, path) + + var numValues uint64 + var encodingType parquet.Encoding + + if pageHeader.GetType() == parquet.PageType_DATA_PAGE { + numValues = uint64(pageHeader.DataPageHeader.GetNumValues()) + encodingType = pageHeader.DataPageHeader.GetEncoding() + } else { + numValues = uint64(pageHeader.DataPageHeaderV2.GetNumValues()) + encodingType = pageHeader.DataPageHeaderV2.GetEncoding() + } + + var repetitionLevels []int64 + if maxRepetitionLevel > 0 { + values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, + -1, numValues, getBitWidth(uint64(maxRepetitionLevel))) + if err != nil { + return nil, 0, 0, err + } + + if repetitionLevels = values.([]int64); uint64(len(repetitionLevels)) > numValues { + repetitionLevels = repetitionLevels[:numValues] + } + } else { + repetitionLevels = make([]int64, numValues) + } + + var definitionLevels []int64 + if maxDefinitionLevel > 0 { + values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, + -1, numValues, getBitWidth(uint64(maxDefinitionLevel))) + if err != nil { + return nil, 0, 0, err + } + if definitionLevels = values.([]int64); uint64(len(definitionLevels)) > numValues { + definitionLevels = definitionLevels[:numValues] + } + } else { + definitionLevels = make([]int64, numValues) + } + + var numNulls uint64 + for i := 0; i < len(definitionLevels); i++ { + if definitionLevels[i] != int64(maxDefinitionLevel) { + numNulls++ + } + } + + var convertedType parquet.ConvertedType = -1 + if schemaElements[columnNameIndexMap[name]].IsSetConvertedType() { + convertedType = schemaElements[columnNameIndexMap[name]].GetConvertedType() + } + values, valueType, err := readDataPageValues(bytesReader, encodingType, metadata.GetType(), + convertedType, uint64(len(definitionLevels))-numNulls, + uint64(schemaElements[columnNameIndexMap[name]].GetTypeLength())) + if err != nil { + return nil, 0, 0, err + } + tableValues := getTableValues(values, valueType) + + table := new(table) + table.Path = path + table.RepetitionType = schemaElements[columnNameIndexMap[name]].GetRepetitionType() + table.MaxRepetitionLevel = int32(maxRepetitionLevel) + table.MaxDefinitionLevel = int32(maxDefinitionLevel) + table.Values = make([]interface{}, len(definitionLevels)) + table.RepetitionLevels = make([]int32, len(definitionLevels)) + table.DefinitionLevels = make([]int32, len(definitionLevels)) + + j := 0 + numRows := int64(0) + for i := 0; i < len(definitionLevels); i++ { + table.RepetitionLevels[i] = int32(repetitionLevels[i]) + table.DefinitionLevels[i] = int32(definitionLevels[i]) + if int(table.DefinitionLevels[i]) == maxDefinitionLevel { + table.Values[i] = tableValues[j] + j++ + } + if table.RepetitionLevels[i] == 0 { + numRows++ + } + } + page.DataTable = table + + return page, int64(len(definitionLevels)), numRows, nil + } + + return nil, 0, 0, fmt.Errorf("unknown page type %v", pageType) +} + +type page struct { + Header *parquet.PageHeader // Header of a page + DataTable *table // Table to store values + RawData []byte // Compressed data of the page, which is written in parquet file + CompressType parquet.CompressionCodec // Compress type: gzip/snappy/none + DataType parquet.Type // Parquet type of the values in the page + Path []string // Path in schema(include the root) + MaxVal interface{} // Maximum of the values + MinVal interface{} // Minimum of the values + PageSize int32 +} + +func newPage() *page { + return &page{ + Header: parquet.NewPageHeader(), + PageSize: 8 * 1024, + } +} + +func newDictPage() *page { + page := newPage() + page.Header.DictionaryPageHeader = parquet.NewDictionaryPageHeader() + return page +} + +func newDataPage() *page { + page := newPage() + page.Header.DataPageHeader = parquet.NewDataPageHeader() + return page +} + +func (page *page) decode(dictPage *page) { + if dictPage == nil || page == nil || page.Header.DataPageHeader == nil || + (page.Header.DataPageHeader.Encoding != parquet.Encoding_RLE_DICTIONARY && + page.Header.DataPageHeader.Encoding != parquet.Encoding_PLAIN_DICTIONARY) { + return + } + + for i := 0; i < len(page.DataTable.Values); i++ { + if page.DataTable.Values[i] != nil { + index := page.DataTable.Values[i].(int64) + page.DataTable.Values[i] = dictPage.DataTable.Values[index] + } + } +} + +// Get RepetitionLevels and Definitions from RawData +func (page *page) getRLDLFromRawData(columnNameIndexMap map[string]int, schemaElements []*parquet.SchemaElement) (numValues int64, numRows int64, err error) { + bytesReader := bytes.NewReader(page.RawData) + + pageType := page.Header.GetType() + + var buf []byte + if pageType == parquet.PageType_DATA_PAGE_V2 { + var repLevelsLen, defLevelsLen int32 + var repLevelsBuf, defLevelsBuf []byte + + repLevelsLen = page.Header.DataPageHeaderV2.GetRepetitionLevelsByteLength() + repLevelsBuf = make([]byte, repLevelsLen) + if _, err = bytesReader.Read(repLevelsBuf); err != nil { + return 0, 0, err + } + + defLevelsLen = page.Header.DataPageHeaderV2.GetDefinitionLevelsByteLength() + defLevelsBuf = make([]byte, defLevelsLen) + if _, err = bytesReader.Read(defLevelsBuf); err != nil { + return 0, 0, err + } + + dataBuf := make([]byte, len(page.RawData)-int(repLevelsLen)-int(defLevelsLen)) + if _, err = bytesReader.Read(dataBuf); err != nil { + return 0, 0, err + } + + if repLevelsLen == 0 && defLevelsLen == 0 { + buf = dataBuf + } else { + if repLevelsLen > 0 { + buf = append(buf, uint32ToBytes(uint32(repLevelsLen))...) + buf = append(buf, repLevelsBuf...) + } + + if defLevelsLen > 0 { + buf = append(buf, uint32ToBytes(uint32(defLevelsLen))...) + buf = append(buf, defLevelsBuf...) + } + + buf = append(buf, dataBuf...) + } + } else { + if buf, err = compressionCodec(page.CompressType).uncompress(page.RawData); err != nil { + return 0, 0, err + } + } + + bytesReader = bytes.NewReader(buf) + + switch pageType { + case parquet.PageType_DICTIONARY_PAGE: + table := new(table) + table.Path = page.Path + page.DataTable = table + return 0, 0, nil + + case parquet.PageType_DATA_PAGE, parquet.PageType_DATA_PAGE_V2: + var numValues uint64 + if pageType == parquet.PageType_DATA_PAGE { + numValues = uint64(page.Header.DataPageHeader.GetNumValues()) + } else { + numValues = uint64(page.Header.DataPageHeaderV2.GetNumValues()) + } + + maxDefinitionLevel := getMaxDefLevel(columnNameIndexMap, schemaElements, page.Path) + maxRepetitionLevel := getMaxRepLevel(columnNameIndexMap, schemaElements, page.Path) + + var repetitionLevels []int64 + if maxRepetitionLevel > 0 { + values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, + -1, numValues, getBitWidth(uint64(maxRepetitionLevel))) + if err != nil { + return 0, 0, err + } + + if repetitionLevels = values.([]int64); uint64(len(repetitionLevels)) > numValues { + repetitionLevels = repetitionLevels[:numValues] + } + } else { + repetitionLevels = make([]int64, numValues) + } + + var definitionLevels []int64 + if maxDefinitionLevel > 0 { + values, _, err := readDataPageValues(bytesReader, parquet.Encoding_RLE, parquet.Type_INT64, + -1, numValues, getBitWidth(uint64(maxDefinitionLevel))) + if err != nil { + return 0, 0, err + } + if definitionLevels = values.([]int64); uint64(len(definitionLevels)) > numValues { + definitionLevels = definitionLevels[:numValues] + } + } else { + definitionLevels = make([]int64, numValues) + } + + table := new(table) + table.Path = page.Path + name := strings.Join(page.Path, ".") + table.RepetitionType = schemaElements[columnNameIndexMap[name]].GetRepetitionType() + table.MaxRepetitionLevel = int32(maxRepetitionLevel) + table.MaxDefinitionLevel = int32(maxDefinitionLevel) + table.Values = make([]interface{}, len(definitionLevels)) + table.RepetitionLevels = make([]int32, len(definitionLevels)) + table.DefinitionLevels = make([]int32, len(definitionLevels)) + + numRows := int64(0) + for i := 0; i < len(definitionLevels); i++ { + table.RepetitionLevels[i] = int32(repetitionLevels[i]) + table.DefinitionLevels[i] = int32(definitionLevels[i]) + if table.RepetitionLevels[i] == 0 { + numRows++ + } + } + page.DataTable = table + page.RawData = buf[len(buf)-bytesReader.Len():] + + return int64(numValues), numRows, nil + } + + return 0, 0, fmt.Errorf("Unsupported page type %v", pageType) +} + +func (page *page) getValueFromRawData(columnNameIndexMap map[string]int, schemaElements []*parquet.SchemaElement) (err error) { + pageType := page.Header.GetType() + switch pageType { + case parquet.PageType_DICTIONARY_PAGE: + bytesReader := bytes.NewReader(page.RawData) + var values interface{} + values, err = readValues(bytesReader, page.DataType, + uint64(page.Header.DictionaryPageHeader.GetNumValues()), 0) + if err != nil { + return err + } + + page.DataTable.Values = getTableValues(values, page.DataType) + return nil + + case parquet.PageType_DATA_PAGE_V2: + if page.RawData, err = compressionCodec(page.CompressType).uncompress(page.RawData); err != nil { + return err + } + fallthrough + case parquet.PageType_DATA_PAGE: + encodingType := page.Header.DataPageHeader.GetEncoding() + bytesReader := bytes.NewReader(page.RawData) + + var numNulls uint64 + for i := 0; i < len(page.DataTable.DefinitionLevels); i++ { + if page.DataTable.DefinitionLevels[i] != page.DataTable.MaxDefinitionLevel { + numNulls++ + } + } + + name := strings.Join(page.DataTable.Path, ".") + var convertedType parquet.ConvertedType = -1 + + if schemaElements[columnNameIndexMap[name]].IsSetConvertedType() { + convertedType = schemaElements[columnNameIndexMap[name]].GetConvertedType() + } + + values, _, err := readDataPageValues(bytesReader, encodingType, page.DataType, + convertedType, uint64(len(page.DataTable.DefinitionLevels))-numNulls, + uint64(schemaElements[columnNameIndexMap[name]].GetTypeLength())) + if err != nil { + return err + } + + tableValues := getTableValues(values, page.DataType) + + j := 0 + for i := 0; i < len(page.DataTable.DefinitionLevels); i++ { + if page.DataTable.DefinitionLevels[i] == page.DataTable.MaxDefinitionLevel { + page.DataTable.Values[i] = tableValues[j] + j++ + } + } + + page.RawData = []byte{} + return nil + } + + return fmt.Errorf("unsupported page type %v", pageType) +} diff --git a/vendor/github.com/minio/parquet-go/parquet.go b/vendor/github.com/minio/parquet-go/parquet.go new file mode 100644 index 000000000..d9719cfee --- /dev/null +++ b/vendor/github.com/minio/parquet-go/parquet.go @@ -0,0 +1,162 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import ( + "encoding/binary" + "encoding/json" + "io" + + "git.apache.org/thrift.git/lib/go/thrift" + "github.com/minio/minio-go/pkg/set" + "github.com/minio/parquet-go/gen-go/parquet" +) + +// GetReaderFunc - function type returning io.ReadCloser for requested offset/length. +type GetReaderFunc func(offset, length int64) (io.ReadCloser, error) + +func footerSize(getReaderFunc GetReaderFunc) (size int64, err error) { + rc, err := getReaderFunc(-8, 4) + if err != nil { + return 0, err + } + defer rc.Close() + + buf := make([]byte, 4) + if _, err = io.ReadFull(rc, buf); err != nil { + return 0, err + } + + size = int64(binary.LittleEndian.Uint32(buf)) + + return size, nil +} + +func fileMetadata(getReaderFunc GetReaderFunc) (*parquet.FileMetaData, error) { + size, err := footerSize(getReaderFunc) + if err != nil { + return nil, err + } + + rc, err := getReaderFunc(-(8 + size), size) + if err != nil { + return nil, err + } + defer rc.Close() + + fileMeta := parquet.NewFileMetaData() + + pf := thrift.NewTCompactProtocolFactory() + protocol := pf.GetProtocol(thrift.NewStreamTransportR(rc)) + err = fileMeta.Read(protocol) + if err != nil { + return nil, err + } + + return fileMeta, nil +} + +// Value - denotes column value +type Value struct { + Value interface{} + Type parquet.Type +} + +// MarshalJSON - encodes to JSON data +func (value Value) MarshalJSON() (data []byte, err error) { + return json.Marshal(value.Value) +} + +// File - denotes parquet file. +type File struct { + getReaderFunc GetReaderFunc + schemaElements []*parquet.SchemaElement + rowGroups []*parquet.RowGroup + rowGroupIndex int + + columnNames set.StringSet + columns map[string]*column + rowIndex int64 +} + +// Open - opens parquet file with given column names. +func Open(getReaderFunc GetReaderFunc, columnNames set.StringSet) (*File, error) { + fileMeta, err := fileMetadata(getReaderFunc) + if err != nil { + return nil, err + } + + return &File{ + getReaderFunc: getReaderFunc, + rowGroups: fileMeta.GetRowGroups(), + schemaElements: fileMeta.GetSchema(), + columnNames: columnNames, + }, nil +} + +// Read - reads single record. +func (file *File) Read() (record map[string]Value, err error) { + if file.rowGroupIndex >= len(file.rowGroups) { + return nil, io.EOF + } + + if file.columns == nil { + file.columns, err = getColumns( + file.rowGroups[file.rowGroupIndex], + file.columnNames, + file.schemaElements, + file.getReaderFunc, + ) + if err != nil { + return nil, err + } + + file.rowIndex = 0 + } + + if file.rowIndex >= file.rowGroups[file.rowGroupIndex].GetNumRows() { + file.rowGroupIndex++ + file.Close() + return file.Read() + } + + record = make(map[string]Value) + for name := range file.columns { + value, valueType := file.columns[name].read() + record[name] = Value{value, valueType} + } + + file.rowIndex++ + + return record, nil +} + +// Close - closes underneath readers. +func (file *File) Close() (err error) { + if file.columns != nil { + return nil + } + + for _, column := range file.columns { + column.close() + } + + file.columns = nil + file.rowIndex = 0 + + return nil +} diff --git a/vendor/github.com/minio/parquet-go/parquet.thrift b/vendor/github.com/minio/parquet-go/parquet.thrift new file mode 100644 index 000000000..6c9011b9a --- /dev/null +++ b/vendor/github.com/minio/parquet-go/parquet.thrift @@ -0,0 +1,881 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/** + * File format description for the parquet file format + */ +namespace cpp parquet +namespace java org.apache.parquet.format + +/** + * Types supported by Parquet. These types are intended to be used in combination + * with the encodings to control the on disk storage format. + * For example INT16 is not included as a type since a good encoding of INT32 + * would handle this. + */ +enum Type { + BOOLEAN = 0; + INT32 = 1; + INT64 = 2; + INT96 = 3; // deprecated, only used by legacy implementations. + FLOAT = 4; + DOUBLE = 5; + BYTE_ARRAY = 6; + FIXED_LEN_BYTE_ARRAY = 7; +} + +/** + * Common types used by frameworks(e.g. hive, pig) using parquet. This helps map + * between types in those frameworks to the base types in parquet. This is only + * metadata and not needed to read or write the data. + */ +enum ConvertedType { + /** a BYTE_ARRAY actually contains UTF8 encoded chars */ + UTF8 = 0; + + /** a map is converted as an optional field containing a repeated key/value pair */ + MAP = 1; + + /** a key/value pair is converted into a group of two fields */ + MAP_KEY_VALUE = 2; + + /** a list is converted into an optional field containing a repeated field for its + * values */ + LIST = 3; + + /** an enum is converted into a binary field */ + ENUM = 4; + + /** + * A decimal value. + * + * This may be used to annotate binary or fixed primitive types. The + * underlying byte array stores the unscaled value encoded as two's + * complement using big-endian byte order (the most significant byte is the + * zeroth element). The value of the decimal is the value * 10^{-scale}. + * + * This must be accompanied by a (maximum) precision and a scale in the + * SchemaElement. The precision specifies the number of digits in the decimal + * and the scale stores the location of the decimal point. For example 1.23 + * would have precision 3 (3 total digits) and scale 2 (the decimal point is + * 2 digits over). + */ + DECIMAL = 5; + + /** + * A Date + * + * Stored as days since Unix epoch, encoded as the INT32 physical type. + * + */ + DATE = 6; + + /** + * A time + * + * The total number of milliseconds since midnight. The value is stored + * as an INT32 physical type. + */ + TIME_MILLIS = 7; + + /** + * A time. + * + * The total number of microseconds since midnight. The value is stored as + * an INT64 physical type. + */ + TIME_MICROS = 8; + + /** + * A date/time combination + * + * Date and time recorded as milliseconds since the Unix epoch. Recorded as + * a physical type of INT64. + */ + TIMESTAMP_MILLIS = 9; + + /** + * A date/time combination + * + * Date and time recorded as microseconds since the Unix epoch. The value is + * stored as an INT64 physical type. + */ + TIMESTAMP_MICROS = 10; + + + /** + * An unsigned integer value. + * + * The number describes the maximum number of meainful data bits in + * the stored value. 8, 16 and 32 bit values are stored using the + * INT32 physical type. 64 bit values are stored using the INT64 + * physical type. + * + */ + UINT_8 = 11; + UINT_16 = 12; + UINT_32 = 13; + UINT_64 = 14; + + /** + * A signed integer value. + * + * The number describes the maximum number of meainful data bits in + * the stored value. 8, 16 and 32 bit values are stored using the + * INT32 physical type. 64 bit values are stored using the INT64 + * physical type. + * + */ + INT_8 = 15; + INT_16 = 16; + INT_32 = 17; + INT_64 = 18; + + /** + * An embedded JSON document + * + * A JSON document embedded within a single UTF8 column. + */ + JSON = 19; + + /** + * An embedded BSON document + * + * A BSON document embedded within a single BINARY column. + */ + BSON = 20; + + /** + * An interval of time + * + * This type annotates data stored as a FIXED_LEN_BYTE_ARRAY of length 12 + * This data is composed of three separate little endian unsigned + * integers. Each stores a component of a duration of time. The first + * integer identifies the number of months associated with the duration, + * the second identifies the number of days associated with the duration + * and the third identifies the number of milliseconds associated with + * the provided duration. This duration of time is independent of any + * particular timezone or date. + */ + INTERVAL = 21; +} + +/** + * Representation of Schemas + */ +enum FieldRepetitionType { + /** This field is required (can not be null) and each record has exactly 1 value. */ + REQUIRED = 0; + + /** The field is optional (can be null) and each record has 0 or 1 values. */ + OPTIONAL = 1; + + /** The field is repeated and can contain 0 or more values */ + REPEATED = 2; +} + +/** + * Statistics per row group and per page + * All fields are optional. + */ +struct Statistics { + /** + * DEPRECATED: min and max value of the column. Use min_value and max_value. + * + * Values are encoded using PLAIN encoding, except that variable-length byte + * arrays do not include a length prefix. + * + * These fields encode min and max values determined by signed comparison + * only. New files should use the correct order for a column's logical type + * and store the values in the min_value and max_value fields. + * + * To support older readers, these may be set when the column order is + * signed. + */ + 1: optional binary max; + 2: optional binary min; + /** count of null value in the column */ + 3: optional i64 null_count; + /** count of distinct values occurring */ + 4: optional i64 distinct_count; + /** + * Min and max values for the column, determined by its ColumnOrder. + * + * Values are encoded using PLAIN encoding, except that variable-length byte + * arrays do not include a length prefix. + */ + 5: optional binary max_value; + 6: optional binary min_value; +} + +/** Empty structs to use as logical type annotations */ +struct StringType {} // allowed for BINARY, must be encoded with UTF-8 +struct UUIDType {} // allowed for FIXED[16], must encoded raw UUID bytes +struct MapType {} // see LogicalTypes.md +struct ListType {} // see LogicalTypes.md +struct EnumType {} // allowed for BINARY, must be encoded with UTF-8 +struct DateType {} // allowed for INT32 + +/** + * Logical type to annotate a column that is always null. + * + * Sometimes when discovering the schema of existing data, values are always + * null and the physical type can't be determined. This annotation signals + * the case where the physical type was guessed from all null values. + */ +struct NullType {} // allowed for any physical type, only null values stored + +/** + * Decimal logical type annotation + * + * To maintain forward-compatibility in v1, implementations using this logical + * type must also set scale and precision on the annotated SchemaElement. + * + * Allowed for physical types: INT32, INT64, FIXED, and BINARY + */ +struct DecimalType { + 1: required i32 scale + 2: required i32 precision +} + +/** Time units for logical types */ +struct MilliSeconds {} +struct MicroSeconds {} +struct NanoSeconds {} +union TimeUnit { + 1: MilliSeconds MILLIS + 2: MicroSeconds MICROS + 3: NanoSeconds NANOS +} + +/** + * Timestamp logical type annotation + * + * Allowed for physical types: INT64 + */ +struct TimestampType { + 1: required bool isAdjustedToUTC + 2: required TimeUnit unit +} + +/** + * Time logical type annotation + * + * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) + */ +struct TimeType { + 1: required bool isAdjustedToUTC + 2: required TimeUnit unit +} + +/** + * Integer logical type annotation + * + * bitWidth must be 8, 16, 32, or 64. + * + * Allowed for physical types: INT32, INT64 + */ +struct IntType { + 1: required byte bitWidth + 2: required bool isSigned +} + +/** + * Embedded JSON logical type annotation + * + * Allowed for physical types: BINARY + */ +struct JsonType { +} + +/** + * Embedded BSON logical type annotation + * + * Allowed for physical types: BINARY + */ +struct BsonType { +} + +/** + * LogicalType annotations to replace ConvertedType. + * + * To maintain compatibility, implementations using LogicalType for a + * SchemaElement must also set the corresponding ConvertedType from the + * following table. + */ +union LogicalType { + 1: StringType STRING // use ConvertedType UTF8 + 2: MapType MAP // use ConvertedType MAP + 3: ListType LIST // use ConvertedType LIST + 4: EnumType ENUM // use ConvertedType ENUM + 5: DecimalType DECIMAL // use ConvertedType DECIMAL + 6: DateType DATE // use ConvertedType DATE + 7: TimeType TIME // use ConvertedType TIME_MICROS or TIME_MILLIS + 8: TimestampType TIMESTAMP // use ConvertedType TIMESTAMP_MICROS or TIMESTAMP_MILLIS + // 9: reserved for INTERVAL + 10: IntType INTEGER // use ConvertedType INT_* or UINT_* + 11: NullType UNKNOWN // no compatible ConvertedType + 12: JsonType JSON // use ConvertedType JSON + 13: BsonType BSON // use ConvertedType BSON + 14: UUIDType UUID +} + +/** + * Represents a element inside a schema definition. + * - if it is a group (inner node) then type is undefined and num_children is defined + * - if it is a primitive type (leaf) then type is defined and num_children is undefined + * the nodes are listed in depth first traversal order. + */ +struct SchemaElement { + /** Data type for this field. Not set if the current element is a non-leaf node */ + 1: optional Type type; + + /** If type is FIXED_LEN_BYTE_ARRAY, this is the byte length of the vales. + * Otherwise, if specified, this is the maximum bit length to store any of the values. + * (e.g. a low cardinality INT col could have this set to 3). Note that this is + * in the schema, and therefore fixed for the entire file. + */ + 2: optional i32 type_length; + + /** repetition of the field. The root of the schema does not have a repetition_type. + * All other nodes must have one */ + 3: optional FieldRepetitionType repetition_type; + + /** Name of the field in the schema */ + 4: required string name; + + /** Nested fields. Since thrift does not support nested fields, + * the nesting is flattened to a single list by a depth-first traversal. + * The children count is used to construct the nested relationship. + * This field is not set when the element is a primitive type + */ + 5: optional i32 num_children; + + /** When the schema is the result of a conversion from another model + * Used to record the original type to help with cross conversion. + */ + 6: optional ConvertedType converted_type; + + /** Used when this column contains decimal data. + * See the DECIMAL converted type for more details. + */ + 7: optional i32 scale + 8: optional i32 precision + + /** When the original schema supports field ids, this will save the + * original field id in the parquet schema + */ + 9: optional i32 field_id; + + /** + * The logical type of this SchemaElement + * + * LogicalType replaces ConvertedType, but ConvertedType is still required + * for some logical types to ensure forward-compatibility in format v1. + */ + 10: optional LogicalType logicalType +} + +/** + * Encodings supported by Parquet. Not all encodings are valid for all types. These + * enums are also used to specify the encoding of definition and repetition levels. + * See the accompanying doc for the details of the more complicated encodings. + */ +enum Encoding { + /** Default encoding. + * BOOLEAN - 1 bit per value. 0 is false; 1 is true. + * INT32 - 4 bytes per value. Stored as little-endian. + * INT64 - 8 bytes per value. Stored as little-endian. + * FLOAT - 4 bytes per value. IEEE. Stored as little-endian. + * DOUBLE - 8 bytes per value. IEEE. Stored as little-endian. + * BYTE_ARRAY - 4 byte length stored as little endian, followed by bytes. + * FIXED_LEN_BYTE_ARRAY - Just the bytes. + */ + PLAIN = 0; + + /** Group VarInt encoding for INT32/INT64. + * This encoding is deprecated. It was never used + */ + // GROUP_VAR_INT = 1; + + /** + * Deprecated: Dictionary encoding. The values in the dictionary are encoded in the + * plain type. + * in a data page use RLE_DICTIONARY instead. + * in a Dictionary page use PLAIN instead + */ + PLAIN_DICTIONARY = 2; + + /** Group packed run length encoding. Usable for definition/repetition levels + * encoding and Booleans (on one bit: 0 is false; 1 is true.) + */ + RLE = 3; + + /** Bit packed encoding. This can only be used if the data has a known max + * width. Usable for definition/repetition levels encoding. + */ + BIT_PACKED = 4; + + /** Delta encoding for integers. This can be used for int columns and works best + * on sorted data + */ + DELTA_BINARY_PACKED = 5; + + /** Encoding for byte arrays to separate the length values and the data. The lengths + * are encoded using DELTA_BINARY_PACKED + */ + DELTA_LENGTH_BYTE_ARRAY = 6; + + /** Incremental-encoded byte array. Prefix lengths are encoded using DELTA_BINARY_PACKED. + * Suffixes are stored as delta length byte arrays. + */ + DELTA_BYTE_ARRAY = 7; + + /** Dictionary encoding: the ids are encoded using the RLE encoding + */ + RLE_DICTIONARY = 8; +} + +/** + * Supported compression algorithms. + * + * Codecs added in 2.4 can be read by readers based on 2.4 and later. + * Codec support may vary between readers based on the format version and + * libraries available at runtime. Gzip, Snappy, and LZ4 codecs are + * widely available, while Zstd and Brotli require additional libraries. + */ +enum CompressionCodec { + UNCOMPRESSED = 0; + SNAPPY = 1; + GZIP = 2; + LZO = 3; + BROTLI = 4; // Added in 2.4 + LZ4 = 5; // Added in 2.4 + ZSTD = 6; // Added in 2.4 +} + +enum PageType { + DATA_PAGE = 0; + INDEX_PAGE = 1; + DICTIONARY_PAGE = 2; + DATA_PAGE_V2 = 3; +} + +/** + * Enum to annotate whether lists of min/max elements inside ColumnIndex + * are ordered and if so, in which direction. + */ +enum BoundaryOrder { + UNORDERED = 0; + ASCENDING = 1; + DESCENDING = 2; +} + +/** Data page header */ +struct DataPageHeader { + /** Number of values, including NULLs, in this data page. **/ + 1: required i32 num_values + + /** Encoding used for this data page **/ + 2: required Encoding encoding + + /** Encoding used for definition levels **/ + 3: required Encoding definition_level_encoding; + + /** Encoding used for repetition levels **/ + 4: required Encoding repetition_level_encoding; + + /** Optional statistics for the data in this page**/ + 5: optional Statistics statistics; +} + +struct IndexPageHeader { + /** TODO: **/ +} + +struct DictionaryPageHeader { + /** Number of values in the dictionary **/ + 1: required i32 num_values; + + /** Encoding using this dictionary page **/ + 2: required Encoding encoding + + /** If true, the entries in the dictionary are sorted in ascending order **/ + 3: optional bool is_sorted; +} + +/** + * New page format allowing reading levels without decompressing the data + * Repetition and definition levels are uncompressed + * The remaining section containing the data is compressed if is_compressed is true + **/ +struct DataPageHeaderV2 { + /** Number of values, including NULLs, in this data page. **/ + 1: required i32 num_values + /** Number of NULL values, in this data page. + Number of non-null = num_values - num_nulls which is also the number of values in the data section **/ + 2: required i32 num_nulls + /** Number of rows in this data page. which means pages change on record boundaries (r = 0) **/ + 3: required i32 num_rows + /** Encoding used for data in this page **/ + 4: required Encoding encoding + + // repetition levels and definition levels are always using RLE (without size in it) + + /** length of the definition levels */ + 5: required i32 definition_levels_byte_length; + /** length of the repetition levels */ + 6: required i32 repetition_levels_byte_length; + + /** whether the values are compressed. + Which means the section of the page between + definition_levels_byte_length + repetition_levels_byte_length + 1 and compressed_page_size (included) + is compressed with the compression_codec. + If missing it is considered compressed */ + 7: optional bool is_compressed = 1; + + /** optional statistics for this column chunk */ + 8: optional Statistics statistics; +} + +struct PageHeader { + /** the type of the page: indicates which of the *_header fields is set **/ + 1: required PageType type + + /** Uncompressed page size in bytes (not including this header) **/ + 2: required i32 uncompressed_page_size + + /** Compressed page size in bytes (not including this header) **/ + 3: required i32 compressed_page_size + + /** 32bit crc for the data below. This allows for disabling checksumming in HDFS + * if only a few pages needs to be read + **/ + 4: optional i32 crc + + // Headers for page specific data. One only will be set. + 5: optional DataPageHeader data_page_header; + 6: optional IndexPageHeader index_page_header; + 7: optional DictionaryPageHeader dictionary_page_header; + 8: optional DataPageHeaderV2 data_page_header_v2; +} + +/** + * Wrapper struct to store key values + */ + struct KeyValue { + 1: required string key + 2: optional string value +} + +/** + * Wrapper struct to specify sort order + */ +struct SortingColumn { + /** The column index (in this row group) **/ + 1: required i32 column_idx + + /** If true, indicates this column is sorted in descending order. **/ + 2: required bool descending + + /** If true, nulls will come before non-null values, otherwise, + * nulls go at the end. */ + 3: required bool nulls_first +} + +/** + * statistics of a given page type and encoding + */ +struct PageEncodingStats { + + /** the page type (data/dic/...) **/ + 1: required PageType page_type; + + /** encoding of the page **/ + 2: required Encoding encoding; + + /** number of pages of this type with this encoding **/ + 3: required i32 count; + +} + +/** + * Description for column metadata + */ +struct ColumnMetaData { + /** Type of this column **/ + 1: required Type type + + /** Set of all encodings used for this column. The purpose is to validate + * whether we can decode those pages. **/ + 2: required list encodings + + /** Path in schema **/ + 3: required list path_in_schema + + /** Compression codec **/ + 4: required CompressionCodec codec + + /** Number of values in this column **/ + 5: required i64 num_values + + /** total byte size of all uncompressed pages in this column chunk (including the headers) **/ + 6: required i64 total_uncompressed_size + + /** total byte size of all compressed pages in this column chunk (including the headers) **/ + 7: required i64 total_compressed_size + + /** Optional key/value metadata **/ + 8: optional list key_value_metadata + + /** Byte offset from beginning of file to first data page **/ + 9: required i64 data_page_offset + + /** Byte offset from beginning of file to root index page **/ + 10: optional i64 index_page_offset + + /** Byte offset from the beginning of file to first (only) dictionary page **/ + 11: optional i64 dictionary_page_offset + + /** optional statistics for this column chunk */ + 12: optional Statistics statistics; + + /** Set of all encodings used for pages in this column chunk. + * This information can be used to determine if all data pages are + * dictionary encoded for example **/ + 13: optional list encoding_stats; +} + +struct ColumnChunk { + /** File where column data is stored. If not set, assumed to be same file as + * metadata. This path is relative to the current file. + **/ + 1: optional string file_path + + /** Byte offset in file_path to the ColumnMetaData **/ + 2: required i64 file_offset + + /** Column metadata for this chunk. This is the same content as what is at + * file_path/file_offset. Having it here has it replicated in the file + * metadata. + **/ + 3: optional ColumnMetaData meta_data + + /** File offset of ColumnChunk's OffsetIndex **/ + 4: optional i64 offset_index_offset + + /** Size of ColumnChunk's OffsetIndex, in bytes **/ + 5: optional i32 offset_index_length + + /** File offset of ColumnChunk's ColumnIndex **/ + 6: optional i64 column_index_offset + + /** Size of ColumnChunk's ColumnIndex, in bytes **/ + 7: optional i32 column_index_length +} + +struct RowGroup { + /** Metadata for each column chunk in this row group. + * This list must have the same order as the SchemaElement list in FileMetaData. + **/ + 1: required list columns + + /** Total byte size of all the uncompressed column data in this row group **/ + 2: required i64 total_byte_size + + /** Number of rows in this row group **/ + 3: required i64 num_rows + + /** If set, specifies a sort ordering of the rows in this RowGroup. + * The sorting columns can be a subset of all the columns. + */ + 4: optional list sorting_columns +} + +/** Empty struct to signal the order defined by the physical or logical type */ +struct TypeDefinedOrder {} + +/** + * Union to specify the order used for the min_value and max_value fields for a + * column. This union takes the role of an enhanced enum that allows rich + * elements (which will be needed for a collation-based ordering in the future). + * + * Possible values are: + * * TypeDefinedOrder - the column uses the order defined by its logical or + * physical type (if there is no logical type). + * + * If the reader does not support the value of this union, min and max stats + * for this column should be ignored. + */ +union ColumnOrder { + + /** + * The sort orders for logical types are: + * UTF8 - unsigned byte-wise comparison + * INT8 - signed comparison + * INT16 - signed comparison + * INT32 - signed comparison + * INT64 - signed comparison + * UINT8 - unsigned comparison + * UINT16 - unsigned comparison + * UINT32 - unsigned comparison + * UINT64 - unsigned comparison + * DECIMAL - signed comparison of the represented value + * DATE - signed comparison + * TIME_MILLIS - signed comparison + * TIME_MICROS - signed comparison + * TIMESTAMP_MILLIS - signed comparison + * TIMESTAMP_MICROS - signed comparison + * INTERVAL - unsigned comparison + * JSON - unsigned byte-wise comparison + * BSON - unsigned byte-wise comparison + * ENUM - unsigned byte-wise comparison + * LIST - undefined + * MAP - undefined + * + * In the absence of logical types, the sort order is determined by the physical type: + * BOOLEAN - false, true + * INT32 - signed comparison + * INT64 - signed comparison + * INT96 (only used for legacy timestamps) - undefined + * FLOAT - signed comparison of the represented value (*) + * DOUBLE - signed comparison of the represented value (*) + * BYTE_ARRAY - unsigned byte-wise comparison + * FIXED_LEN_BYTE_ARRAY - unsigned byte-wise comparison + * + * (*) Because the sorting order is not specified properly for floating + * point values (relations vs. total ordering) the following + * compatibility rules should be applied when reading statistics: + * - If the min is a NaN, it should be ignored. + * - If the max is a NaN, it should be ignored. + * - If the min is +0, the row group may contain -0 values as well. + * - If the max is -0, the row group may contain +0 values as well. + * - When looking for NaN values, min and max should be ignored. + */ + 1: TypeDefinedOrder TYPE_ORDER; +} + +struct PageLocation { + /** Offset of the page in the file **/ + 1: required i64 offset + + /** + * Size of the page, including header. Sum of compressed_page_size and header + * length + */ + 2: required i32 compressed_page_size + + /** + * Index within the RowGroup of the first row of the page; this means pages + * change on record boundaries (r = 0). + */ + 3: required i64 first_row_index +} + +struct OffsetIndex { + /** + * PageLocations, ordered by increasing PageLocation.offset. It is required + * that page_locations[i].first_row_index < page_locations[i+1].first_row_index. + */ + 1: required list page_locations +} + +/** + * Description for ColumnIndex. + * Each [i] refers to the page at OffsetIndex.page_locations[i] + */ +struct ColumnIndex { + /** + * A list of Boolean values to determine the validity of the corresponding + * min and max values. If true, a page contains only null values, and writers + * have to set the corresponding entries in min_values and max_values to + * byte[0], so that all lists have the same length. If false, the + * corresponding entries in min_values and max_values must be valid. + */ + 1: required list null_pages + + /** + * Two lists containing lower and upper bounds for the values of each page. + * These may be the actual minimum and maximum values found on a page, but + * can also be (more compact) values that do not exist on a page. For + * example, instead of storing ""Blart Versenwald III", a writer may set + * min_values[i]="B", max_values[i]="C". Such more compact values must still + * be valid values within the column's logical type. Readers must make sure + * that list entries are populated before using them by inspecting null_pages. + */ + 2: required list min_values + 3: required list max_values + + /** + * Stores whether both min_values and max_values are orderd and if so, in + * which direction. This allows readers to perform binary searches in both + * lists. Readers cannot assume that max_values[i] <= min_values[i+1], even + * if the lists are ordered. + */ + 4: required BoundaryOrder boundary_order + + /** A list containing the number of null values for each page **/ + 5: optional list null_counts +} + +/** + * Description for file metadata + */ +struct FileMetaData { + /** Version of this file **/ + 1: required i32 version + + /** Parquet schema for this file. This schema contains metadata for all the columns. + * The schema is represented as a tree with a single root. The nodes of the tree + * are flattened to a list by doing a depth-first traversal. + * The column metadata contains the path in the schema for that column which can be + * used to map columns to nodes in the schema. + * The first element is the root **/ + 2: required list schema; + + /** Number of rows in this file **/ + 3: required i64 num_rows + + /** Row groups in this file **/ + 4: required list row_groups + + /** Optional key/value metadata **/ + 5: optional list key_value_metadata + + /** String for application that wrote this file. This should be in the format + * version (build ). + * e.g. impala version 1.0 (build 6cf94d29b2b7115df4de2c06e2ab4326d721eb55) + **/ + 6: optional string created_by + + /** + * Sort order used for the min_value and max_value fields of each column in + * this file. Each sort order corresponds to one column, determined by its + * position in the list, matching the position of the column in the schema. + * + * Without column_orders, the meaning of the min_value and max_value fields is + * undefined. To ensure well-defined behaviour, if min_value and max_value are + * written to a Parquet file, column_orders must be written as well. + * + * The obsolete min and max fields are always sorted by signed comparison + * regardless of column_orders. + */ + 7: optional list column_orders; +} + diff --git a/vendor/github.com/minio/parquet-go/table.go b/vendor/github.com/minio/parquet-go/table.go new file mode 100644 index 000000000..17db2d47f --- /dev/null +++ b/vendor/github.com/minio/parquet-go/table.go @@ -0,0 +1,124 @@ +/* + * Minio Cloud Storage, (C) 2018 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package parquet + +import "github.com/minio/parquet-go/gen-go/parquet" + +func getTableValues(values interface{}, valueType parquet.Type) (tableValues []interface{}) { + switch valueType { + case parquet.Type_BOOLEAN: + for _, v := range values.([]bool) { + tableValues = append(tableValues, v) + } + case parquet.Type_INT32: + for _, v := range values.([]int32) { + tableValues = append(tableValues, v) + } + case parquet.Type_INT64: + for _, v := range values.([]int64) { + tableValues = append(tableValues, v) + } + case parquet.Type_FLOAT: + for _, v := range values.([]float32) { + tableValues = append(tableValues, v) + } + case parquet.Type_DOUBLE: + for _, v := range values.([]float64) { + tableValues = append(tableValues, v) + } + case parquet.Type_INT96, parquet.Type_BYTE_ARRAY, parquet.Type_FIXED_LEN_BYTE_ARRAY: + for _, v := range values.([][]byte) { + tableValues = append(tableValues, v) + } + } + + return tableValues +} + +type table struct { + RepetitionType parquet.FieldRepetitionType + Type parquet.Type + MaxDefinitionLevel int32 + MaxRepetitionLevel int32 + Path []string // Path of this column + Values []interface{} // Parquet values + DefinitionLevels []int32 // Definition Levels slice + RepetitionLevels []int32 // Repetition Levels slice +} + +func newTableFromTable(srcTable *table) *table { + if srcTable == nil { + return nil + } + + return &table{ + Type: srcTable.Type, + Path: append([]string{}, srcTable.Path...), + } +} + +func (table *table) Merge(tables ...*table) { + for i := 0; i < len(tables); i++ { + if tables[i] == nil { + continue + } + + table.Values = append(table.Values, tables[i].Values...) + table.RepetitionLevels = append(table.RepetitionLevels, tables[i].RepetitionLevels...) + table.DefinitionLevels = append(table.DefinitionLevels, tables[i].DefinitionLevels...) + + if table.MaxDefinitionLevel < tables[i].MaxDefinitionLevel { + table.MaxDefinitionLevel = tables[i].MaxDefinitionLevel + } + + if table.MaxRepetitionLevel < tables[i].MaxRepetitionLevel { + table.MaxRepetitionLevel = tables[i].MaxRepetitionLevel + } + } +} + +func (table *table) Pop(numRows int64) *table { + result := newTableFromTable(table) + var i, num int64 + for i = int64(0); i < int64(len(table.Values)); i++ { + if table.RepetitionLevels[i] == 0 { + if num >= numRows { + break + } + + num++ + } + + if result.MaxRepetitionLevel < table.RepetitionLevels[i] { + result.MaxRepetitionLevel = table.RepetitionLevels[i] + } + + if result.MaxDefinitionLevel < table.DefinitionLevels[i] { + result.MaxDefinitionLevel = table.DefinitionLevels[i] + } + } + + result.RepetitionLevels = table.RepetitionLevels[:i] + result.DefinitionLevels = table.DefinitionLevels[:i] + result.Values = table.Values[:i] + + table.RepetitionLevels = table.RepetitionLevels[i:] + table.DefinitionLevels = table.DefinitionLevels[i:] + table.Values = table.Values[i:] + + return result +} diff --git a/vendor/github.com/pierrec/lz4/LICENSE b/vendor/github.com/pierrec/lz4/LICENSE new file mode 100644 index 000000000..bd899d835 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2015, Pierre Curto +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of xxHash nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/pierrec/lz4/README.md b/vendor/github.com/pierrec/lz4/README.md new file mode 100644 index 000000000..e71ebd59d --- /dev/null +++ b/vendor/github.com/pierrec/lz4/README.md @@ -0,0 +1,24 @@ +[![godoc](https://godoc.org/github.com/pierrec/lz4?status.png)](https://godoc.org/github.com/pierrec/lz4) + +# lz4 +LZ4 compression and decompression in pure Go. + +## Usage + +```go +import "github.com/pierrec/lz4/v2" +``` + +## Description +Package lz4 implements reading and writing lz4 compressed data (a frame), +as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html. + +This package is **compatible with the LZ4 frame format** although the block level compression +and decompression functions are exposed and are fully compatible with the lz4 block format +definition, they are low level and should not be used directly. + +For a complete description of an lz4 compressed block, see: +http://fastcompression.blogspot.fr/2011/05/lz4-explained.html + +See https://github.com/Cyan4973/lz4 for the reference C implementation. + diff --git a/vendor/github.com/pierrec/lz4/block.go b/vendor/github.com/pierrec/lz4/block.go new file mode 100644 index 000000000..ef24f17e5 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/block.go @@ -0,0 +1,397 @@ +package lz4 + +import ( + "encoding/binary" + "errors" +) + +var ( + // ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed + // block is corrupted or the destination buffer is not large enough for the uncompressed data. + ErrInvalidSourceShortBuffer = errors.New("lz4: invalid source or destination buffer too short") + // ErrInvalid is returned when reading an invalid LZ4 archive. + ErrInvalid = errors.New("lz4: bad magic number") +) + +// blockHash hashes 4 bytes into a value < winSize. +func blockHash(x uint32) uint32 { + const hasher uint32 = 2654435761 // Knuth multiplicative hash. + return x * hasher >> hashShift +} + +// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible. +func CompressBlockBound(n int) int { + return n + n/255 + 16 +} + +// UncompressBlock uncompresses the source buffer into the destination one, +// and returns the uncompressed size. +// +// The destination buffer must be sized appropriately. +// +// An error is returned if the source data is invalid or the destination buffer is too small. +func UncompressBlock(src, dst []byte) (si int, err error) { + defer func() { + // It is now faster to let the runtime panic and recover on out of bound slice access + // than checking indices as we go along. + if recover() != nil { + err = ErrInvalidSourceShortBuffer + } + }() + sn := len(src) + if sn == 0 { + return 0, nil + } + var di int + + for { + // Literals and match lengths (token). + b := int(src[si]) + si++ + + // Literals. + if lLen := b >> 4; lLen > 0 { + if lLen == 0xF { + for src[si] == 0xFF { + lLen += 0xFF + si++ + } + lLen += int(src[si]) + si++ + } + i := si + si += lLen + di += copy(dst[di:], src[i:si]) + + if si >= sn { + return di, nil + } + } + + si++ + _ = src[si] // Bound check elimination. + offset := int(src[si-1]) | int(src[si])<<8 + si++ + + // Match. + mLen := b & 0xF + if mLen == 0xF { + for src[si] == 0xFF { + mLen += 0xFF + si++ + } + mLen += int(src[si]) + si++ + } + mLen += minMatch + + // Copy the match. + i := di - offset + if offset > 0 && mLen >= offset { + // Efficiently copy the match dst[di-offset:di] into the dst slice. + bytesToCopy := offset * (mLen / offset) + expanded := dst[i:] + for n := offset; n <= bytesToCopy+offset; n *= 2 { + copy(expanded[n:], expanded[:n]) + } + di += bytesToCopy + mLen -= bytesToCopy + } + di += copy(dst[di:], dst[i:i+mLen]) + } +} + +// CompressBlock compresses the source buffer into the destination one. +// This is the fast version of LZ4 compression and also the default one. +// The size of hashTable must be at least 64Kb. +// +// The size of the compressed data is returned. If it is 0 and no error, then the data is incompressible. +// +// An error is returned if the destination buffer is too small. +func CompressBlock(src, dst []byte, hashTable []int) (di int, err error) { + defer func() { + if recover() != nil { + err = ErrInvalidSourceShortBuffer + } + }() + + sn, dn := len(src)-mfLimit, len(dst) + if sn <= 0 || dn == 0 { + return 0, nil + } + var si int + + // Fast scan strategy: the hash table only stores the last 4 bytes sequences. + // const accInit = 1 << skipStrength + + anchor := si // Position of the current literals. + // acc := accInit // Variable step: improves performance on non-compressible data. + + for si < sn { + // Hash the next 4 bytes (sequence)... + match := binary.LittleEndian.Uint32(src[si:]) + h := blockHash(match) + + ref := hashTable[h] + hashTable[h] = si + if ref >= sn { // Invalid reference (dirty hashtable). + si++ + continue + } + offset := si - ref + if offset <= 0 || offset >= winSize || // Out of window. + match != binary.LittleEndian.Uint32(src[ref:]) { // Hash collision on different matches. + // si += acc >> skipStrength + // acc++ + si++ + continue + } + + // Match found. + // acc = accInit + lLen := si - anchor // Literal length. + + // Encode match length part 1. + si += minMatch + mLen := si // Match length has minMatch already. + // Find the longest match, first looking by batches of 8 bytes. + for si < sn && binary.LittleEndian.Uint64(src[si:]) == binary.LittleEndian.Uint64(src[si-offset:]) { + si += 8 + } + // Then byte by byte. + for si < sn && src[si] == src[si-offset] { + si++ + } + + mLen = si - mLen + if mLen < 0xF { + dst[di] = byte(mLen) + } else { + dst[di] = 0xF + } + + // Encode literals length. + if lLen < 0xF { + dst[di] |= byte(lLen << 4) + } else { + dst[di] |= 0xF0 + di++ + l := lLen - 0xF + for ; l >= 0xFF; l -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(l) + } + di++ + + // Literals. + copy(dst[di:], src[anchor:anchor+lLen]) + di += lLen + 2 + anchor = si + + // Encode offset. + _ = dst[di] // Bound check elimination. + dst[di-2], dst[di-1] = byte(offset), byte(offset>>8) + + // Encode match length part 2. + if mLen >= 0xF { + for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(mLen) + di++ + } + } + + if anchor == 0 { + // Incompressible. + return 0, nil + } + + // Last literals. + lLen := len(src) - anchor + if lLen < 0xF { + dst[di] = byte(lLen << 4) + } else { + dst[di] = 0xF0 + di++ + for lLen -= 0xF; lLen >= 0xFF; lLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(lLen) + } + di++ + + // Write the last literals. + if di >= anchor { + // Incompressible. + return 0, nil + } + di += copy(dst[di:], src[anchor:]) + return di, nil +} + +// CompressBlockHC compresses the source buffer src into the destination dst +// with max search depth (use 0 or negative value for no max). +// +// CompressBlockHC compression ratio is better than CompressBlock but it is also slower. +// +// The size of the compressed data is returned. If it is 0 and no error, then the data is not compressible. +// +// An error is returned if the destination buffer is too small. +func CompressBlockHC(src, dst []byte, depth int) (di int, err error) { + defer func() { + if recover() != nil { + err = ErrInvalidSourceShortBuffer + } + }() + + sn, dn := len(src)-mfLimit, len(dst) + if sn <= 0 || dn == 0 { + return 0, nil + } + var si int + + // hashTable: stores the last position found for a given hash + // chaingTable: stores previous positions for a given hash + var hashTable, chainTable [winSize]int + + if depth <= 0 { + depth = winSize + } + + anchor := si + for si < sn { + // Hash the next 4 bytes (sequence). + match := binary.LittleEndian.Uint32(src[si:]) + h := blockHash(match) + + // Follow the chain until out of window and give the longest match. + mLen := 0 + offset := 0 + for next, try := hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next = chainTable[next&winMask] { + // The first (mLen==0) or next byte (mLen>=minMatch) at current match length + // must match to improve on the match length. + if src[next+mLen] != src[si+mLen] { + continue + } + ml := 0 + // Compare the current position with a previous with the same hash. + for ml < sn-si && binary.LittleEndian.Uint64(src[next+ml:]) == binary.LittleEndian.Uint64(src[si+ml:]) { + ml += 8 + } + for ml < sn-si && src[next+ml] == src[si+ml] { + ml++ + } + if ml+1 < minMatch || ml <= mLen { + // Match too small ( winStart { + winStart = ws + } + for si, ml := winStart, si+mLen; si < ml; { + match >>= 8 + match |= uint32(src[si+3]) << 24 + h := blockHash(match) + chainTable[si&winMask] = hashTable[h] + hashTable[h] = si + si++ + } + + lLen := si - anchor + si += mLen + mLen -= minMatch // Match length does not include minMatch. + + if mLen < 0xF { + dst[di] = byte(mLen) + } else { + dst[di] = 0xF + } + + // Encode literals length. + if lLen < 0xF { + dst[di] |= byte(lLen << 4) + } else { + dst[di] |= 0xF0 + di++ + l := lLen - 0xF + for ; l >= 0xFF; l -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(l) + } + di++ + + // Literals. + copy(dst[di:], src[anchor:anchor+lLen]) + di += lLen + anchor = si + + // Encode offset. + di += 2 + dst[di-2], dst[di-1] = byte(offset), byte(offset>>8) + + // Encode match length part 2. + if mLen >= 0xF { + for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(mLen) + di++ + } + } + + if anchor == 0 { + // Incompressible. + return 0, nil + } + + // Last literals. + lLen := len(src) - anchor + if lLen < 0xF { + dst[di] = byte(lLen << 4) + } else { + dst[di] = 0xF0 + di++ + lLen -= 0xF + for ; lLen >= 0xFF; lLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(lLen) + } + di++ + + // Write the last literals. + if di >= anchor { + // Incompressible. + return 0, nil + } + di += copy(dst[di:], src[anchor:]) + return di, nil +} diff --git a/vendor/github.com/pierrec/lz4/debug.go b/vendor/github.com/pierrec/lz4/debug.go new file mode 100644 index 000000000..bc5e78d40 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/debug.go @@ -0,0 +1,23 @@ +// +build lz4debug + +package lz4 + +import ( + "fmt" + "os" + "path/filepath" + "runtime" +) + +const debugFlag = true + +func debug(args ...interface{}) { + _, file, line, _ := runtime.Caller(1) + file = filepath.Base(file) + + f := fmt.Sprintf("LZ4: %s:%d %s", file, line, args[0]) + if f[len(f)-1] != '\n' { + f += "\n" + } + fmt.Fprintf(os.Stderr, f, args[1:]...) +} diff --git a/vendor/github.com/pierrec/lz4/debug_stub.go b/vendor/github.com/pierrec/lz4/debug_stub.go new file mode 100644 index 000000000..44211ad96 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/debug_stub.go @@ -0,0 +1,7 @@ +// +build !lz4debug + +package lz4 + +const debugFlag = false + +func debug(args ...interface{}) {} diff --git a/vendor/github.com/pierrec/lz4/go.mod b/vendor/github.com/pierrec/lz4/go.mod new file mode 100644 index 000000000..f9f570aa9 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/go.mod @@ -0,0 +1,3 @@ +module github.com/pierrec/lz4 + +require github.com/pkg/profile v1.2.1 diff --git a/vendor/github.com/pierrec/lz4/go.sum b/vendor/github.com/pierrec/lz4/go.sum new file mode 100644 index 000000000..6ca759812 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/go.sum @@ -0,0 +1,2 @@ +github.com/pkg/profile v1.2.1 h1:F++O52m40owAmADcojzM+9gyjmMOY/T4oYJkgFDH8RE= +github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= diff --git a/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go new file mode 100644 index 000000000..850a6fdf6 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/internal/xxh32/xxh32zero.go @@ -0,0 +1,222 @@ +// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version). +// (https://github.com/Cyan4973/XXH/) +package xxh32 + +import ( + "encoding/binary" +) + +const ( + prime32_1 uint32 = 2654435761 + prime32_2 uint32 = 2246822519 + prime32_3 uint32 = 3266489917 + prime32_4 uint32 = 668265263 + prime32_5 uint32 = 374761393 + + prime32_1plus2 uint32 = 606290984 + prime32_minus1 uint32 = 1640531535 +) + +// XXHZero represents an xxhash32 object with seed 0. +type XXHZero struct { + v1 uint32 + v2 uint32 + v3 uint32 + v4 uint32 + totalLen uint64 + buf [16]byte + bufused int +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (xxh XXHZero) Sum(b []byte) []byte { + h32 := xxh.Sum32() + return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24)) +} + +// Reset resets the Hash to its initial state. +func (xxh *XXHZero) Reset() { + xxh.v1 = prime32_1plus2 + xxh.v2 = prime32_2 + xxh.v3 = 0 + xxh.v4 = prime32_minus1 + xxh.totalLen = 0 + xxh.bufused = 0 +} + +// Size returns the number of bytes returned by Sum(). +func (xxh *XXHZero) Size() int { + return 4 +} + +// BlockSize gives the minimum number of bytes accepted by Write(). +func (xxh *XXHZero) BlockSize() int { + return 1 +} + +// Write adds input bytes to the Hash. +// It never returns an error. +func (xxh *XXHZero) Write(input []byte) (int, error) { + if xxh.totalLen == 0 { + xxh.Reset() + } + n := len(input) + m := xxh.bufused + + xxh.totalLen += uint64(n) + + r := len(xxh.buf) - m + if n < r { + copy(xxh.buf[m:], input) + xxh.bufused += len(input) + return n, nil + } + + p := 0 + // Causes compiler to work directly from registers instead of stack: + v1, v2, v3, v4 := xxh.v1, xxh.v2, xxh.v3, xxh.v4 + if m > 0 { + // some data left from previous update + copy(xxh.buf[xxh.bufused:], input[:r]) + xxh.bufused += len(input) - r + + // fast rotl(13) + buf := xxh.buf[:16] // BCE hint. + v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime32_2) * prime32_1 + v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime32_2) * prime32_1 + v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime32_2) * prime32_1 + v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime32_2) * prime32_1 + p = r + xxh.bufused = 0 + } + + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime32_2) * prime32_1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime32_2) * prime32_1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime32_2) * prime32_1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime32_2) * prime32_1 + } + xxh.v1, xxh.v2, xxh.v3, xxh.v4 = v1, v2, v3, v4 + + copy(xxh.buf[xxh.bufused:], input[p:]) + xxh.bufused += len(input) - p + + return n, nil +} + +// Sum32 returns the 32 bits Hash value. +func (xxh *XXHZero) Sum32() uint32 { + h32 := uint32(xxh.totalLen) + if h32 >= 16 { + h32 += rol1(xxh.v1) + rol7(xxh.v2) + rol12(xxh.v3) + rol18(xxh.v4) + } else { + h32 += prime32_5 + } + + p := 0 + n := xxh.bufused + buf := xxh.buf + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime32_3 + h32 = rol17(h32) * prime32_4 + } + for ; p < n; p++ { + h32 += uint32(buf[p]) * prime32_5 + h32 = rol11(h32) * prime32_1 + } + + h32 ^= h32 >> 15 + h32 *= prime32_2 + h32 ^= h32 >> 13 + h32 *= prime32_3 + h32 ^= h32 >> 16 + + return h32 +} + +// ChecksumZero returns the 32bits Hash value. +func ChecksumZero(input []byte) uint32 { + n := len(input) + h32 := uint32(n) + + if n < 16 { + h32 += prime32_5 + } else { + v1 := prime32_1plus2 + v2 := prime32_2 + v3 := uint32(0) + v4 := prime32_minus1 + p := 0 + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime32_2) * prime32_1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime32_2) * prime32_1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime32_2) * prime32_1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime32_2) * prime32_1 + } + input = input[p:] + n -= p + h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + } + + p := 0 + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime32_3 + h32 = rol17(h32) * prime32_4 + } + for p < n { + h32 += uint32(input[p]) * prime32_5 + h32 = rol11(h32) * prime32_1 + p++ + } + + h32 ^= h32 >> 15 + h32 *= prime32_2 + h32 ^= h32 >> 13 + h32 *= prime32_3 + h32 ^= h32 >> 16 + + return h32 +} + +// Uint32Zero hashes x with seed 0. +func Uint32Zero(x uint32) uint32 { + h := prime32_5 + 4 + x*prime32_3 + h = rol17(h) * prime32_4 + h ^= h >> 15 + h *= prime32_2 + h ^= h >> 13 + h *= prime32_3 + h ^= h >> 16 + return h +} + +func rol1(u uint32) uint32 { + return u<<1 | u>>31 +} + +func rol7(u uint32) uint32 { + return u<<7 | u>>25 +} + +func rol11(u uint32) uint32 { + return u<<11 | u>>21 +} + +func rol12(u uint32) uint32 { + return u<<12 | u>>20 +} + +func rol13(u uint32) uint32 { + return u<<13 | u>>19 +} + +func rol17(u uint32) uint32 { + return u<<17 | u>>15 +} + +func rol18(u uint32) uint32 { + return u<<18 | u>>14 +} diff --git a/vendor/github.com/pierrec/lz4/lz4.go b/vendor/github.com/pierrec/lz4/lz4.go new file mode 100644 index 000000000..35802756c --- /dev/null +++ b/vendor/github.com/pierrec/lz4/lz4.go @@ -0,0 +1,68 @@ +// Package lz4 implements reading and writing lz4 compressed data (a frame), +// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html. +// +// Although the block level compression and decompression functions are exposed and are fully compatible +// with the lz4 block format definition, they are low level and should not be used directly. +// For a complete description of an lz4 compressed block, see: +// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html +// +// See https://github.com/Cyan4973/lz4 for the reference C implementation. +// +package lz4 + +const ( + // Extension is the LZ4 frame file name extension + Extension = ".lz4" + // Version is the LZ4 frame format version + Version = 1 + + frameMagic uint32 = 0x184D2204 + frameSkipMagic uint32 = 0x184D2A50 + + // The following constants are used to setup the compression algorithm. + minMatch = 4 // the minimum size of the match sequence size (4 bytes) + winSizeLog = 16 // LZ4 64Kb window size limit + winSize = 1 << winSizeLog + winMask = winSize - 1 // 64Kb window of previous data for dependent blocks + compressedBlockFlag = 1 << 31 + compressedBlockMask = compressedBlockFlag - 1 + + // hashLog determines the size of the hash table used to quickly find a previous match position. + // Its value influences the compression speed and memory usage, the lower the faster, + // but at the expense of the compression ratio. + // 16 seems to be the best compromise. + hashLog = 16 + hashTableSize = 1 << hashLog + hashShift = uint((minMatch * 8) - hashLog) + + mfLimit = 8 + minMatch // The last match cannot start within the last 12 bytes. + skipStrength = 6 // variable step for fast scan +) + +// map the block max size id with its value in bytes: 64Kb, 256Kb, 1Mb and 4Mb. +var ( + bsMapID = map[byte]int{4: 64 << 10, 5: 256 << 10, 6: 1 << 20, 7: 4 << 20} + bsMapValue = make(map[int]byte, len(bsMapID)) +) + +// Reversed. +func init() { + for i, v := range bsMapID { + bsMapValue[v] = i + } +} + +// Header describes the various flags that can be set on a Writer or obtained from a Reader. +// The default values match those of the LZ4 frame format definition +// (http://fastcompression.blogspot.com/2013/04/lz4-streaming-format-final.html). +// +// NB. in a Reader, in case of concatenated frames, the Header values may change between Read() calls. +// It is the caller responsibility to check them if necessary. +type Header struct { + BlockChecksum bool // Compressed blocks checksum flag. + NoChecksum bool // Frame checksum flag. + BlockMaxSize int // Size of the uncompressed data block (one of [64KB, 256KB, 1MB, 4MB]). Default=4MB. + Size uint64 // Frame total size. It is _not_ computed by the Writer. + CompressionLevel int // Compression level (higher is better, use 0 for fastest compression). + done bool // Header processed flag (Read or Write and checked). +} diff --git a/vendor/github.com/pierrec/lz4/lz4_go1.10.go b/vendor/github.com/pierrec/lz4/lz4_go1.10.go new file mode 100644 index 000000000..9a0fb0070 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/lz4_go1.10.go @@ -0,0 +1,29 @@ +//+build go1.10 + +package lz4 + +import ( + "fmt" + "strings" +) + +func (h Header) String() string { + var s strings.Builder + + s.WriteString(fmt.Sprintf("%T{", h)) + if h.BlockChecksum { + s.WriteString("BlockChecksum: true ") + } + if h.NoChecksum { + s.WriteString("NoChecksum: true ") + } + if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 { + s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs)) + } + if l := h.CompressionLevel; l != 0 { + s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l)) + } + s.WriteByte('}') + + return s.String() +} diff --git a/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go new file mode 100644 index 000000000..12c761a2e --- /dev/null +++ b/vendor/github.com/pierrec/lz4/lz4_notgo1.10.go @@ -0,0 +1,29 @@ +//+build !go1.10 + +package lz4 + +import ( + "bytes" + "fmt" +) + +func (h Header) String() string { + var s bytes.Buffer + + s.WriteString(fmt.Sprintf("%T{", h)) + if h.BlockChecksum { + s.WriteString("BlockChecksum: true ") + } + if h.NoChecksum { + s.WriteString("NoChecksum: true ") + } + if bs := h.BlockMaxSize; bs != 0 && bs != 4<<20 { + s.WriteString(fmt.Sprintf("BlockMaxSize: %d ", bs)) + } + if l := h.CompressionLevel; l != 0 { + s.WriteString(fmt.Sprintf("CompressionLevel: %d ", l)) + } + s.WriteByte('}') + + return s.String() +} diff --git a/vendor/github.com/pierrec/lz4/reader.go b/vendor/github.com/pierrec/lz4/reader.go new file mode 100644 index 000000000..f08db47df --- /dev/null +++ b/vendor/github.com/pierrec/lz4/reader.go @@ -0,0 +1,295 @@ +package lz4 + +import ( + "encoding/binary" + "fmt" + "io" + "io/ioutil" + + "github.com/pierrec/lz4/internal/xxh32" +) + +// Reader implements the LZ4 frame decoder. +// The Header is set after the first call to Read(). +// The Header may change between Read() calls in case of concatenated frames. +type Reader struct { + Header + + buf [8]byte // Scrap buffer. + pos int64 // Current position in src. + src io.Reader // Source. + zdata []byte // Compressed data. + data []byte // Uncompressed data. + idx int // Index of unread bytes into data. + checksum xxh32.XXHZero // Frame hash. +} + +// NewReader returns a new LZ4 frame decoder. +// No access to the underlying io.Reader is performed. +func NewReader(src io.Reader) *Reader { + r := &Reader{src: src} + return r +} + +// readHeader checks the frame magic number and parses the frame descriptoz. +// Skippable frames are supported even as a first frame although the LZ4 +// specifications recommends skippable frames not to be used as first frames. +func (z *Reader) readHeader(first bool) error { + defer z.checksum.Reset() + + buf := z.buf[:] + for { + magic, err := z.readUint32() + if err != nil { + z.pos += 4 + if !first && err == io.ErrUnexpectedEOF { + return io.EOF + } + return err + } + if magic == frameMagic { + break + } + if magic>>8 != frameSkipMagic>>8 { + return ErrInvalid + } + skipSize, err := z.readUint32() + if err != nil { + return err + } + z.pos += 4 + m, err := io.CopyN(ioutil.Discard, z.src, int64(skipSize)) + if err != nil { + return err + } + z.pos += m + } + + // Header. + if _, err := io.ReadFull(z.src, buf[:2]); err != nil { + return err + } + z.pos += 8 + + b := buf[0] + if v := b >> 6; v != Version { + return fmt.Errorf("lz4: invalid version: got %d; expected %d", v, Version) + } + if b>>5&1 == 0 { + return fmt.Errorf("lz4: block dependency not supported") + } + z.BlockChecksum = b>>4&1 > 0 + frameSize := b>>3&1 > 0 + z.NoChecksum = b>>2&1 == 0 + + bmsID := buf[1] >> 4 & 0x7 + bSize, ok := bsMapID[bmsID] + if !ok { + return fmt.Errorf("lz4: invalid block max size ID: %d", bmsID) + } + z.BlockMaxSize = bSize + + // Allocate the compressed/uncompressed buffers. + // The compressed buffer cannot exceed the uncompressed one. + if n := 2 * bSize; cap(z.zdata) < n { + z.zdata = make([]byte, n, n) + } + if debugFlag { + debug("header block max size id=%d size=%d", bmsID, bSize) + } + z.zdata = z.zdata[:bSize] + z.data = z.zdata[:cap(z.zdata)][bSize:] + z.idx = len(z.data) + + z.checksum.Write(buf[0:2]) + + if frameSize { + buf := buf[:8] + if _, err := io.ReadFull(z.src, buf); err != nil { + return err + } + z.Size = binary.LittleEndian.Uint64(buf) + z.pos += 8 + z.checksum.Write(buf) + } + + // Header checksum. + if _, err := io.ReadFull(z.src, buf[:1]); err != nil { + return err + } + z.pos++ + if h := byte(z.checksum.Sum32() >> 8 & 0xFF); h != buf[0] { + return fmt.Errorf("lz4: invalid header checksum: got %x; expected %x", buf[0], h) + } + + z.Header.done = true + if debugFlag { + debug("header read: %v", z.Header) + } + + return nil +} + +// Read decompresses data from the underlying source into the supplied buffer. +// +// Since there can be multiple streams concatenated, Header values may +// change between calls to Read(). If that is the case, no data is actually read from +// the underlying io.Reader, to allow for potential input buffer resizing. +func (z *Reader) Read(buf []byte) (int, error) { + if debugFlag { + debug("Read buf len=%d", len(buf)) + } + if !z.Header.done { + if err := z.readHeader(true); err != nil { + return 0, err + } + if debugFlag { + debug("header read OK compressed buffer %d / %d uncompressed buffer %d : %d index=%d", + len(z.zdata), cap(z.zdata), len(z.data), cap(z.data), z.idx) + } + } + + if len(buf) == 0 { + return 0, nil + } + + if z.idx == len(z.data) { + // No data ready for reading, process the next block. + if debugFlag { + debug("reading block from writer") + } + // Block length: 0 = end of frame, highest bit set: uncompressed. + bLen, err := z.readUint32() + if err != nil { + return 0, err + } + z.pos += 4 + + if bLen == 0 { + // End of frame reached. + if !z.NoChecksum { + // Validate the frame checksum. + checksum, err := z.readUint32() + if err != nil { + return 0, err + } + if debugFlag { + debug("frame checksum got=%x / want=%x", z.checksum.Sum32(), checksum) + } + z.pos += 4 + if h := z.checksum.Sum32(); checksum != h { + return 0, fmt.Errorf("lz4: invalid frame checksum: got %x; expected %x", h, checksum) + } + } + + // Get ready for the next concatenated frame and keep the position. + pos := z.pos + z.Reset(z.src) + z.pos = pos + + // Since multiple frames can be concatenated, check for more. + return 0, z.readHeader(false) + } + + if debugFlag { + debug("raw block size %d", bLen) + } + if bLen&compressedBlockFlag > 0 { + // Uncompressed block. + bLen &= compressedBlockMask + if debugFlag { + debug("uncompressed block size %d", bLen) + } + if int(bLen) > cap(z.data) { + return 0, fmt.Errorf("lz4: invalid block size: %d", bLen) + } + z.data = z.data[:bLen] + if _, err := io.ReadFull(z.src, z.data); err != nil { + return 0, err + } + z.pos += int64(bLen) + + if z.BlockChecksum { + checksum, err := z.readUint32() + if err != nil { + return 0, err + } + z.pos += 4 + + if h := xxh32.ChecksumZero(z.data); h != checksum { + return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum) + } + } + + } else { + // Compressed block. + if debugFlag { + debug("compressed block size %d", bLen) + } + if int(bLen) > cap(z.data) { + return 0, fmt.Errorf("lz4: invalid block size: %d", bLen) + } + zdata := z.zdata[:bLen] + if _, err := io.ReadFull(z.src, zdata); err != nil { + return 0, err + } + z.pos += int64(bLen) + + if z.BlockChecksum { + checksum, err := z.readUint32() + if err != nil { + return 0, err + } + z.pos += 4 + + if h := xxh32.ChecksumZero(zdata); h != checksum { + return 0, fmt.Errorf("lz4: invalid block checksum: got %x; expected %x", h, checksum) + } + } + + n, err := UncompressBlock(zdata, z.data) + if err != nil { + return 0, err + } + z.data = z.data[:n] + } + + if !z.NoChecksum { + z.checksum.Write(z.data) + if debugFlag { + debug("current frame checksum %x", z.checksum.Sum32()) + } + } + z.idx = 0 + } + + n := copy(buf, z.data[z.idx:]) + z.idx += n + if debugFlag { + debug("copied %d bytes to input", n) + } + + return n, nil +} + +// Reset discards the Reader's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) { + z.Header = Header{} + z.pos = 0 + z.src = r + z.zdata = z.zdata[:0] + z.data = z.data[:0] + z.idx = 0 + z.checksum.Reset() +} + +// readUint32 reads an uint32 into the supplied buffer. +// The idea is to make use of the already allocated buffers avoiding additional allocations. +func (z *Reader) readUint32() (uint32, error) { + buf := z.buf[:4] + _, err := io.ReadFull(z.src, buf) + x := binary.LittleEndian.Uint32(buf) + return x, err +} diff --git a/vendor/github.com/pierrec/lz4/writer.go b/vendor/github.com/pierrec/lz4/writer.go new file mode 100644 index 000000000..012043802 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/writer.go @@ -0,0 +1,267 @@ +package lz4 + +import ( + "encoding/binary" + "fmt" + "io" + + "github.com/pierrec/lz4/internal/xxh32" +) + +// Writer implements the LZ4 frame encoder. +type Writer struct { + Header + + buf [19]byte // magic number(4) + header(flags(2)+[Size(8)+DictID(4)]+checksum(1)) does not exceed 19 bytes + dst io.Writer // Destination. + checksum xxh32.XXHZero // Frame checksum. + zdata []byte // Compressed data. + data []byte // Data to be compressed. + idx int // Index into data. + hashtable [winSize]int // Hash table used in CompressBlock(). +} + +// NewWriter returns a new LZ4 frame encoder. +// No access to the underlying io.Writer is performed. +// The supplied Header is checked at the first Write. +// It is ok to change it before the first Write but then not until a Reset() is performed. +func NewWriter(dst io.Writer) *Writer { + return &Writer{dst: dst} +} + +// writeHeader builds and writes the header (magic+header) to the underlying io.Writer. +func (z *Writer) writeHeader() error { + // Default to 4Mb if BlockMaxSize is not set. + if z.Header.BlockMaxSize == 0 { + z.Header.BlockMaxSize = bsMapID[7] + } + // The only option that needs to be validated. + bSize := z.Header.BlockMaxSize + bSizeID, ok := bsMapValue[bSize] + if !ok { + return fmt.Errorf("lz4: invalid block max size: %d", bSize) + } + // Allocate the compressed/uncompressed buffers. + // The compressed buffer cannot exceed the uncompressed one. + if n := 2 * bSize; cap(z.zdata) < n { + z.zdata = make([]byte, n, n) + } + z.zdata = z.zdata[:bSize] + z.data = z.zdata[:cap(z.zdata)][bSize:] + z.idx = 0 + + // Size is optional. + buf := z.buf[:] + + // Set the fixed size data: magic number, block max size and flags. + binary.LittleEndian.PutUint32(buf[0:], frameMagic) + flg := byte(Version << 6) + flg |= 1 << 5 // No block dependency. + if z.Header.BlockChecksum { + flg |= 1 << 4 + } + if z.Header.Size > 0 { + flg |= 1 << 3 + } + if !z.Header.NoChecksum { + flg |= 1 << 2 + } + buf[4] = flg + buf[5] = bSizeID << 4 + + // Current buffer size: magic(4) + flags(1) + block max size (1). + n := 6 + // Optional items. + if z.Header.Size > 0 { + binary.LittleEndian.PutUint64(buf[n:], z.Header.Size) + n += 8 + } + + // The header checksum includes the flags, block max size and optional Size. + buf[n] = byte(xxh32.ChecksumZero(buf[4:n]) >> 8 & 0xFF) + z.checksum.Reset() + + // Header ready, write it out. + if _, err := z.dst.Write(buf[0 : n+1]); err != nil { + return err + } + z.Header.done = true + if debugFlag { + debug("wrote header %v", z.Header) + } + + return nil +} + +// Write compresses data from the supplied buffer into the underlying io.Writer. +// Write does not return until the data has been written. +func (z *Writer) Write(buf []byte) (int, error) { + if !z.Header.done { + if err := z.writeHeader(); err != nil { + return 0, err + } + } + if debugFlag { + debug("input buffer len=%d index=%d", len(buf), z.idx) + } + + zn := len(z.data) + var n int + for len(buf) > 0 { + if z.idx == 0 && len(buf) >= zn { + // Avoid a copy as there is enough data for a block. + if err := z.compressBlock(buf[:zn]); err != nil { + return n, err + } + n += zn + buf = buf[zn:] + continue + } + // Accumulate the data to be compressed. + m := copy(z.data[z.idx:], buf) + n += m + z.idx += m + buf = buf[m:] + if debugFlag { + debug("%d bytes copied to buf, current index %d", n, z.idx) + } + + if z.idx < len(z.data) { + // Buffer not filled. + if debugFlag { + debug("need more data for compression") + } + return n, nil + } + + // Buffer full. + if err := z.compressBlock(z.data); err != nil { + return n, err + } + z.idx = 0 + } + + return n, nil +} + +// compressBlock compresses a block. +func (z *Writer) compressBlock(data []byte) error { + if !z.NoChecksum { + z.checksum.Write(data) + } + + // The compressed block size cannot exceed the input's. + var zn int + var err error + + if level := z.Header.CompressionLevel; level != 0 { + zn, err = CompressBlockHC(data, z.zdata, level) + } else { + zn, err = CompressBlock(data, z.zdata, z.hashtable[:]) + } + + var zdata []byte + var bLen uint32 + if debugFlag { + debug("block compression %d => %d", len(data), zn) + } + if err == nil && zn > 0 && zn < len(data) { + // Compressible and compressed size smaller than uncompressed: ok! + bLen = uint32(zn) + zdata = z.zdata[:zn] + } else { + // Uncompressed block. + bLen = uint32(len(data)) | compressedBlockFlag + zdata = data + } + if debugFlag { + debug("block compression to be written len=%d data len=%d", bLen, len(zdata)) + } + + // Write the block. + if err := z.writeUint32(bLen); err != nil { + return err + } + if _, err := z.dst.Write(zdata); err != nil { + return err + } + + if z.BlockChecksum { + checksum := xxh32.ChecksumZero(zdata) + if debugFlag { + debug("block checksum %x", checksum) + } + if err := z.writeUint32(checksum); err != nil { + return err + } + } + if debugFlag { + debug("current frame checksum %x", z.checksum.Sum32()) + } + + return nil +} + +// Flush flushes any pending compressed data to the underlying writer. +// Flush does not return until the data has been written. +// If the underlying writer returns an error, Flush returns that error. +func (z *Writer) Flush() error { + if debugFlag { + debug("flush with index %d", z.idx) + } + if z.idx == 0 { + return nil + } + + return z.compressBlock(z.data[:z.idx]) +} + +// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if !z.Header.done { + if err := z.writeHeader(); err != nil { + return err + } + } + + if err := z.Flush(); err != nil { + return err + } + + if debugFlag { + debug("writing last empty block") + } + if err := z.writeUint32(0); err != nil { + return err + } + if !z.NoChecksum { + checksum := z.checksum.Sum32() + if debugFlag { + debug("stream checksum %x", checksum) + } + if err := z.writeUint32(checksum); err != nil { + return err + } + } + return nil +} + +// Reset clears the state of the Writer z such that it is equivalent to its +// initial state from NewWriter, but instead writing to w. +// No access to the underlying io.Writer is performed. +func (z *Writer) Reset(w io.Writer) { + z.Header = Header{} + z.dst = w + z.checksum.Reset() + z.zdata = z.zdata[:0] + z.data = z.data[:0] + z.idx = 0 +} + +// writeUint32 writes a uint32 to the underlying writer. +func (z *Writer) writeUint32(x uint32) error { + buf := z.buf[:4] + binary.LittleEndian.PutUint32(buf, x) + _, err := z.dst.Write(buf) + return err +} diff --git a/vendor/github.com/rasky/go-lzo/LICENSE.gpl b/vendor/github.com/rasky/go-lzo/LICENSE.gpl new file mode 100644 index 000000000..d159169d1 --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/LICENSE.gpl @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/vendor/github.com/rasky/go-lzo/README.md b/vendor/github.com/rasky/go-lzo/README.md new file mode 100644 index 000000000..00374385a --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/README.md @@ -0,0 +1,36 @@ +# go-lzo + +[![Build status](https://travis-ci.org/rasky/go-lzo.svg)](https://travis-ci.org/rasky/go-lzo) +[![Coverage Status](https://coveralls.io/repos/rasky/go-lzo/badge.svg?branch=master&service=github)](https://coveralls.io/github/rasky/go-lzo?branch=master) + +Native LZO1X implementation in Golang + +This code has been written using the original LZO1X source code as a reference, +to study and understand the algorithms. Both the LZO1X-1 and LZO1X-999 +algorithms are implemented. These are the most popular of the whole LZO suite +of algorithms. + +Being a straightforward port of the original source code, it shares the same +license (GPLv2) as I can't possibly claim any copyright on it. + +I plan to eventually reimplement LZO1X-1 from scratch. At that point, I will be +also changing license. + +# Benchmarks + +These are the benchmarks obtained running the testsuite over the Canterbury +corpus for the available compressor levels: + +Compressor | Level | Original | Compressed | Factor | Time | Speed +-----------|-------|----------|------------|--------|------|------ +LZO1X-1 | - | 18521760 | 8957481 | 51.6% | 0.16s | 109MiB/s +LZO1X-999 | 1 | 18521760 | 8217347 | 55.6% | 1.38s | 13MiB/s +LZO1X-999 | 2 | 18521760 | 7724879 | 58.3% | 1.50s | 12MiB/s +LZO1X-999 | 3 | 18521760 | 7384377 | 60.1% | 1.68s | 10MiB/s +LZO1X-999 | 4 | 18521760 | 7266674 | 60.8% | 1.69s | 10MiB/s +LZO1X-999 | 5 | 18521760 | 6979879 | 62.3% | 2.75s | 6.4MiB/s +LZO1X-999 | 6 | 18521760 | 6938593 | 62.5% | 4.53s | 3.9MiB/s +LZO1X-999 | 7 | 18521760 | 6905362 | 62.7% | 6.94s | 2.5MiB/s +LZO1X-999 | 8 | 18521760 | 6713477 | 63.8% | 20.96s | 863KiB/s +LZO1X-999 | 9 | 18521760 | 6712069 | 63.8% | 22.82s | 792KiB/s + diff --git a/vendor/github.com/rasky/go-lzo/compress.go b/vendor/github.com/rasky/go-lzo/compress.go new file mode 100644 index 000000000..1f1756b20 --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/compress.go @@ -0,0 +1,178 @@ +package lzo + +func appendMulti(out []byte, t int) []byte { + for t > 255 { + out = append(out, 0) + t -= 255 + } + out = append(out, byte(t)) + return out +} + +func compress(in []byte) (out []byte, sz int) { + var m_off int + in_len := len(in) + ip_len := in_len - m2_MAX_LEN - 5 + dict := make([]int32, 1<> 5) & d_MASK + m_pos := int(dict[dindex]) - 1 + if m_pos < 0 { + goto literal + } + if ip == m_pos || (ip-m_pos) > m4_MAX_OFFSET { + goto literal + } + m_off = ip - m_pos + if m_off <= m2_MAX_OFFSET || in[m_pos+3] == in[ip+3] { + goto try_match + } + + dindex = (dindex & (d_MASK & 0x7ff)) ^ (d_HIGH | 0x1f) + m_pos = int(dict[dindex]) - 1 + if m_pos < 0 { + goto literal + } + if ip == m_pos || (ip-m_pos) > m4_MAX_OFFSET { + goto literal + } + m_off = ip - m_pos + if m_off <= m2_MAX_OFFSET || in[m_pos+3] == in[ip+3] { + goto try_match + } + + goto literal + + try_match: + if in[m_pos] == in[ip] && in[m_pos+1] == in[ip+1] && in[m_pos+2] == in[ip+2] { + goto match + } + + literal: + dict[dindex] = int32(ip + 1) + ip += 1 + (ip-ii)>>5 + if ip >= ip_len { + break + } + continue + + match: + dict[dindex] = int32(ip + 1) + if ip != ii { + t := ip - ii + if t <= 3 { + out[len(out)-2] |= byte(t) + } else if t <= 18 { + out = append(out, byte(t-3)) + } else { + out = append(out, 0) + out = appendMulti(out, t-18) + } + + out = append(out, in[ii:ii+t]...) + ii += t + } + + var i int + ip += 3 + for i = 3; i < 9; i++ { + ip++ + if in[m_pos+i] != in[ip-1] { + break + } + } + if i < 9 { + ip-- + m_len := ip - ii + if m_off <= m2_MAX_OFFSET { + m_off -= 1 + out = append(out, + byte((((m_len - 1) << 5) | ((m_off & 7) << 2))), + byte((m_off >> 3))) + } else if m_off <= m3_MAX_OFFSET { + m_off -= 1 + out = append(out, + byte(m3_MARKER|(m_len-2)), + byte((m_off&63)<<2), + byte(m_off>>6)) + } else { + m_off -= 0x4000 + out = append(out, + byte(m4_MARKER|((m_off&0x4000)>>11)|(m_len-2)), + byte((m_off&63)<<2), + byte(m_off>>6)) + } + } else { + m := m_pos + m2_MAX_LEN + 1 + for ip < in_len && in[m] == in[ip] { + m++ + ip++ + } + m_len := ip - ii + if m_off <= m3_MAX_OFFSET { + m_off -= 1 + if m_len <= 33 { + out = append(out, byte(m3_MARKER|(m_len-2))) + } else { + m_len -= 33 + out = append(out, byte(m3_MARKER|0)) + out = appendMulti(out, m_len) + } + } else { + m_off -= 0x4000 + if m_len <= m4_MAX_LEN { + out = append(out, byte(m4_MARKER|((m_off&0x4000)>>11)|(m_len-2))) + } else { + m_len -= m4_MAX_LEN + out = append(out, byte(m4_MARKER|((m_off&0x4000)>>11))) + out = appendMulti(out, m_len) + } + } + out = append(out, byte((m_off&63)<<2), byte(m_off>>6)) + } + + ii = ip + if ip >= ip_len { + break + } + } + + sz = in_len - ii + return +} + +// Compress an input buffer with LZO1X +func Compress1X(in []byte) (out []byte) { + var t int + + in_len := len(in) + if in_len <= m2_MAX_LEN+5 { + t = in_len + } else { + out, t = compress(in) + } + + if t > 0 { + ii := in_len - t + if len(out) == 0 && t <= 238 { + out = append(out, byte(17+t)) + } else if t <= 3 { + out[len(out)-2] |= byte(t) + } else if t <= 18 { + out = append(out, byte(t-3)) + } else { + out = append(out, 0) + out = appendMulti(out, t-18) + } + out = append(out, in[ii:ii+t]...) + } + + out = append(out, m4_MARKER|1, 0, 0) + return +} diff --git a/vendor/github.com/rasky/go-lzo/compress999.go b/vendor/github.com/rasky/go-lzo/compress999.go new file mode 100644 index 000000000..3560495be --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/compress999.go @@ -0,0 +1,416 @@ +package lzo + +type compressor struct { + in []byte + ip int + bp int + + // stats + matchBytes int + litBytes int + lazy int + + r1lit int + r2lit int + m1am uint + m2m uint + m1bm uint + m3m uint + m4m uint + lit1r uint + lit2r uint + lit3r uint + + r1mlen int + + lastmlen int + lastmoff int + textsize uint + mlen int + moff int + look uint +} + +func (ctx *compressor) codeMatch(out []byte, mlen int, moff int) []byte { + xlen := mlen + xoff := moff + ctx.matchBytes += mlen + + switch { + case mlen == 2: + if moff > m1_MAX_OFFSET { + panic("codeMatch: mlen 2: moff error") + } + if ctx.r1lit < 1 || ctx.r1lit >= 4 { + panic("codeMatch: mlen 2: r1lit error") + } + moff -= 1 + out = append(out, + m1_MARKER|byte((moff&3)<<2), + byte(moff>>2)) + ctx.m1am++ + case mlen <= m2_MAX_LEN && moff <= m2_MAX_OFFSET: + if mlen < 3 { + panic("codeMatch: m2: mlen error") + } + moff -= 1 + out = append(out, + byte((mlen-1)<<5|(moff&7)<<2), + byte(moff>>3)) + if out[len(out)-2] < m2_MARKER { + panic("codeMatch: m2: invalid marker") + } + ctx.m2m++ + case mlen == m2_MIN_LEN && moff <= mX_MAX_OFFSET && ctx.r1lit >= 4: + if mlen != 3 { + panic("codeMatch: m2min: invalid mlen") + } + if moff <= m2_MAX_OFFSET { + panic("codeMatch: m2min: invalid moff") + } + moff -= 1 + m2_MAX_OFFSET + out = append(out, + byte(m1_MARKER|((moff&3)<<2)), + byte(moff>>2)) + ctx.m1bm++ + case moff <= m3_MAX_OFFSET: + if mlen < 3 { + panic("codeMatch: m3max: invalid mlen") + } + moff -= 1 + if mlen <= m3_MAX_LEN { + out = append(out, byte(m3_MARKER|(mlen-2))) + } else { + mlen -= m3_MAX_LEN + out = append(out, byte(m3_MARKER|0)) + out = appendMulti(out, mlen) + } + out = append(out, byte(moff<<2), byte(moff>>6)) + ctx.m3m++ + default: + if mlen < 3 { + panic("codeMatch: default: invalid mlen") + } + if moff <= 0x4000 || moff >= 0xc000 { + panic("codeMatch: default: invalid moff") + } + moff -= 0x4000 + k := (moff & 0x4000) >> 11 + if mlen <= m4_MAX_LEN { + out = append(out, byte(m4_MARKER|k|(mlen-2))) + } else { + mlen -= m4_MAX_LEN + out = append(out, byte(m4_MARKER|k|0)) + out = appendMulti(out, mlen) + } + out = append(out, byte(moff<<2), byte(moff>>6)) + ctx.m4m++ + } + + ctx.lastmlen = xlen + ctx.lastmoff = xoff + return out +} + +func (ctx *compressor) storeRun(out []byte, ii int, t int) []byte { + ctx.litBytes += t + + if len(out) == 0 && t <= 238 { + out = append(out, byte(17+t)) + } else if t <= 3 { + out[len(out)-2] |= byte(t) + ctx.lit1r++ + } else if t <= 18 { + out = append(out, byte(t-3)) + ctx.lit2r++ + } else { + out = append(out, 0) + out = appendMulti(out, t-18) + ctx.lit3r++ + } + + out = append(out, ctx.in[ii:ii+t]...) + return out +} + +func (ctx *compressor) codeRun(out []byte, ii int, lit int, mlen int) []byte { + if lit > 0 { + if mlen < 2 { + panic("codeRun: invalid mlen") + } + out = ctx.storeRun(out, ii, lit) + ctx.r1mlen = mlen + ctx.r1lit = lit + } else { + if mlen < 3 { + panic("codeRun: invalid mlen") + } + ctx.r1mlen = 0 + ctx.r1lit = 0 + } + return out +} + +func (ctx *compressor) lenOfCodedMatch(mlen int, moff int, lit int) int { + switch { + case mlen < 2: + return 0 + case mlen == 2: + if moff <= m1_MAX_OFFSET && lit > 0 && lit < 4 { + return 2 + } + return 0 + case mlen <= m2_MAX_LEN && moff <= m2_MAX_OFFSET: + return 2 + case mlen == m2_MIN_LEN && moff <= mX_MAX_OFFSET && lit >= 4: + return 2 + case moff <= m3_MAX_OFFSET: + if mlen <= m3_MAX_LEN { + return 3 + } + n := 4 + mlen -= m3_MAX_LEN + for mlen > 255 { + mlen -= 255 + n++ + } + return n + case moff <= m4_MAX_OFFSET: + if mlen <= m4_MAX_LEN { + return 3 + } + n := 4 + mlen -= m4_MAX_LEN + for mlen > 255 { + mlen -= 255 + n++ + } + return n + default: + return 0 + } +} + +func (ctx *compressor) minGain(ahead int, + lit1, lit2 int, l1, l2, l3 int) int { + + if ahead <= 0 { + panic("minGain: invalid ahead") + } + mingain := int(ahead) + if lit1 <= 3 { + if lit2 > 3 { + mingain += 2 + } + } else if lit1 <= 18 { + if lit2 > 18 { + mingain += 1 + } + } + + mingain += int((l2 - l1) * 2) + if l3 != 0 { + mingain -= int((ahead - l3) * 2) + } + if mingain < 0 { + mingain = 0 + } + return mingain +} + +type parms struct { + TryLazy int + GoodLen uint + MaxLazy uint + NiceLen uint + MaxChain uint + Flags uint32 +} + +func compress999(in []byte, p parms) []byte { + ctx := compressor{} + swd := swd{} + + if p.TryLazy < 0 { + p.TryLazy = 1 + } + if p.GoodLen == 0 { + p.GoodLen = 32 + } + if p.MaxLazy == 0 { + p.MaxLazy = 32 + } + if p.MaxChain == 0 { + p.MaxChain = cSWD_MAX_CHAIN + } + + ctx.in = in + + out := make([]byte, 0, len(in)/2) + ii := 0 + lit := 0 + + ctx.initMatch(&swd, p.Flags) + if p.MaxChain > 0 { + swd.MaxChain = p.MaxChain + } + if p.NiceLen > 0 { + swd.NiceLength = p.NiceLen + } + + ctx.findMatch(&swd, 0, 0) + for ctx.look > 0 { + mlen := ctx.mlen + moff := ctx.moff + if ctx.bp != ctx.ip-int(ctx.look) { + panic("assert: compress: invalid bp") + } + if ctx.bp < 0 { + panic("assert: compress: negative bp") + } + if lit == 0 { + ii = ctx.bp + } + if ii+lit != ctx.bp { + panic("assert: compress: invalid ii") + } + if swd.BChar != int(ctx.in[ctx.bp]) { + panic("assert: compress: invalid bchar") + } + + if mlen < 2 || + (mlen == 2 && (moff > m1_MAX_OFFSET || lit == 0 || lit >= 4)) || + (mlen == 2 && len(out) == 0) || + (len(out) == 0 && lit == 0) { + // literal + mlen = 0 + } else if mlen == m2_MIN_LEN { + if moff > mX_MAX_OFFSET && lit >= 4 { + mlen = 0 + } + } + + if mlen == 0 { + // literal + lit++ + swd.MaxChain = p.MaxChain + ctx.findMatch(&swd, 1, 0) + continue + } + + // a match + if swd.UseBestOff { + mlen, moff = ctx.betterMatch(&swd, mlen, moff) + } + + ctx.assertMatch(&swd, mlen, moff) + + // check if we want to try a lazy match + ahead := 0 + l1 := 0 + maxahead := 0 + if p.TryLazy != 0 && mlen < int(p.MaxLazy) { + l1 = ctx.lenOfCodedMatch(mlen, moff, lit) + if l1 == 0 { + panic("assert: compress: invalid len of coded match") + } + maxahead = p.TryLazy + if maxahead > l1-1 { + maxahead = l1 - 1 + } + } + + matchdone := false + for ahead < maxahead && int(ctx.look) > mlen { + if mlen >= int(p.GoodLen) { + swd.MaxChain = p.MaxChain >> 2 + } else { + swd.MaxChain = p.MaxChain + } + ctx.findMatch(&swd, 1, 0) + ahead++ + if ctx.look <= 0 { + panic("assert: compress: invalid look") + } + if ii+lit+ahead != ctx.bp { + panic("assert: compress: invalid bp") + } + if ctx.mlen < mlen { + continue + } + if ctx.mlen == mlen && ctx.moff >= moff { + continue + } + if swd.UseBestOff { + ctx.mlen, ctx.moff = ctx.betterMatch(&swd, ctx.mlen, ctx.moff) + } + l2 := ctx.lenOfCodedMatch(ctx.mlen, ctx.moff, lit+ahead) + if l2 == 0 { + continue + } + l3 := 0 + if len(out) > 0 { + l3 = ctx.lenOfCodedMatch(ahead, moff, lit) + } + mingain := ctx.minGain(ahead, lit, lit+ahead, l1, l2, l3) + if ctx.mlen >= mlen+mingain { + ctx.lazy++ + ctx.assertMatch(&swd, ctx.mlen, ctx.moff) + + if l3 > 0 { + out = ctx.codeRun(out, ii, lit, ahead) + lit = 0 + out = ctx.codeMatch(out, ahead, moff) + } else { + lit += ahead + if ii+lit != ctx.bp { + panic("assert: compress: invalid bp after l3") + } + } + matchdone = true + break + } + } + + if !matchdone { + if ii+lit+ahead != ctx.bp { + panic("assert: compress: invalid bp out of for loop") + } + + out = ctx.codeRun(out, ii, lit, mlen) + lit = 0 + out = ctx.codeMatch(out, mlen, moff) + swd.MaxChain = p.MaxChain + ctx.findMatch(&swd, uint(mlen), uint(1+ahead)) + } + } + + if lit > 0 { + out = ctx.storeRun(out, ii, lit) + } + out = append(out, m4_MARKER|1, 0, 0) + if ctx.litBytes+ctx.matchBytes != len(ctx.in) { + panic("assert: compress999: not processed full input") + } + return out +} + +var fixedLevels = [...]parms{ + {0, 0, 0, 8, 4, 0}, + {0, 0, 0, 16, 8, 0}, + {0, 0, 0, 32, 16, 0}, + {1, 4, 4, 16, 16, 0}, + {1, 8, 16, 32, 32, 0}, + {1, 8, 16, 128, 128, 0}, + {2, 8, 32, 128, 256, 0}, + {2, 32, 128, cSWD_F, 2048, 1}, + {2, cSWD_F, cSWD_F, cSWD_F, 4096, 1}, +} + +func Compress1X999Level(in []byte, level int) []byte { + return compress999(in, fixedLevels[level-1]) +} + +func Compress1X999(in []byte) []byte { + return Compress1X999Level(in, 9) +} diff --git a/vendor/github.com/rasky/go-lzo/decompress.go b/vendor/github.com/rasky/go-lzo/decompress.go new file mode 100644 index 000000000..4c2842ef1 --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/decompress.go @@ -0,0 +1,289 @@ +package lzo + +import ( + "errors" + "io" + "runtime" +) + +var ( + InputUnderrun = errors.New("input underrun") + LookBehindUnderrun = errors.New("lookbehind underrun") +) + +type reader struct { + r io.Reader + len int + buf [4096]byte + cur []byte + Err error +} + +func newReader(r io.Reader, inlen int) *reader { + if inlen == 0 { + inlen = -1 + } + in := &reader{r: r, len: inlen} + in.Rebuffer() + return in +} + +// Read more data from the underlying reader and put it into the buffer. +// Also makes sure there is always at least 32 bytes in the buffer, so that +// in the main loop we can avoid checking for the end of buffer. +func (in *reader) Rebuffer() { + const RBUF_WND = 32 + var rbuf [RBUF_WND]byte + + if len(in.cur) > RBUF_WND || in.len == 0 { + return + } + + rb := rbuf[:len(in.cur)] + copy(rb, in.cur) + in.cur = in.buf[:] + copy(in.cur, rb) + + cur := in.cur[len(rb):] + if in.len >= 0 && len(cur) > in.len { + cur = cur[:in.len] + } + n, err := in.r.Read(cur) + if err != nil { + // If EOF is returned, treat it as error only if there are no further + // bytes in the window. Otherwise, let's postpone because those bytes + // could contain the terminator. + if err != io.EOF || len(rb) == 0 { + in.Err = err + in.cur = nil + } + } + in.cur = in.cur[:len(rb)+n] + if in.len >= 0 { + in.len -= n + } +} + +func (in *reader) ReadAppend(out *[]byte, n int) { + for n > 0 { + m := len(in.cur) + if m > n { + m = n + } + *out = append(*out, in.cur[:m]...) + in.cur = in.cur[m:] + n -= m + if len(in.cur) == 0 { + in.Rebuffer() + if len(in.cur) == 0 { + in.Err = io.EOF + return + } + } + } + return +} + +func (in *reader) ReadU8() (ch byte) { + ch = in.cur[0] + in.cur = in.cur[1:] + return +} + +func (in *reader) ReadU16() int { + b0 := in.cur[0] + b1 := in.cur[1] + in.cur = in.cur[2:] + return int(b0) + int(b1)<<8 +} + +func (in *reader) ReadMulti(base int) (b int) { + for { + for i := 0; i < len(in.cur); i++ { + v := in.cur[i] + if v == 0 { + b += 255 + } else { + b += int(v) + base + in.cur = in.cur[i+1:] + return + } + } + in.cur = in.cur[0:0] + in.Rebuffer() + if len(in.cur) == 0 { + in.Err = io.EOF + return + } + } +} + +func copyMatch(out *[]byte, m_pos int, n int) { + if m_pos+n > len(*out) { + // fmt.Println("copy match WITH OVERLAP!") + for i := 0; i < n; i++ { + *out = append(*out, (*out)[m_pos]) + m_pos++ + } + } else { + // fmt.Println("copy match:", len(*out), m_pos, m_pos+n) + *out = append(*out, (*out)[m_pos:m_pos+n]...) + } +} + +// Decompress an input compressed with LZO1X. +// +// LZO1X has a stream terminator marker, so the decompression will always stop +// when this marker is found. +// +// If inLen is not zero, it is expected to match the length of the compressed +// input stream, and it is used to limit reads from the underlying reader; if +// inLen is smaller than the real stream, the decompression will abort with an +// error; if inLen is larger than the real stream, or if it is zero, the +// decompression will succeed but more bytes than necessary might be read +// from the underlying reader. If the reader returns EOF before the termination +// marker is found, the decompression aborts and EOF is returned. +// +// outLen is optional; if it's not zero, it is used as a hint to preallocate the +// output buffer to increase performance of the decompression. +func Decompress1X(r io.Reader, inLen int, outLen int) (out []byte, err error) { + var t, m_pos int + var last2 byte + + defer func() { + // To gain performance, we don't do any bounds checking while reading + // the input, so if the decompressor reads past the end of the input + // stream, a runtime error is raised. This saves about 7% of performance + // as the reading functions are very hot in the decompressor. + if r := recover(); r != nil { + if re, ok := r.(runtime.Error); ok { + if re.Error() == "runtime error: index out of range" { + err = io.EOF + return + } + } + panic(r) + } + }() + + out = make([]byte, 0, outLen) + + in := newReader(r, inLen) + ip := in.ReadU8() + if ip > 17 { + t = int(ip) - 17 + if t < 4 { + goto match_next + } + in.ReadAppend(&out, t) + // fmt.Println("begin:", string(out)) + goto first_literal_run + } + +begin_loop: + t = int(ip) + if t >= 16 { + goto match + } + if t == 0 { + t = in.ReadMulti(15) + } + in.ReadAppend(&out, t+3) + // fmt.Println("readappend", t+3, string(out[len(out)-t-3:])) +first_literal_run: + ip = in.ReadU8() + last2 = ip + t = int(ip) + if t >= 16 { + goto match + } + m_pos = len(out) - (1 + m2_MAX_OFFSET) + m_pos -= t >> 2 + ip = in.ReadU8() + m_pos -= int(ip) << 2 + // fmt.Println("m_pos flr", m_pos, len(out), "\n", string(out)) + if m_pos < 0 { + err = LookBehindUnderrun + return + } + copyMatch(&out, m_pos, 3) + goto match_done + +match: + in.Rebuffer() + if in.Err != nil { + err = in.Err + return + } + t = int(ip) + last2 = ip + if t >= 64 { + m_pos = len(out) - 1 + m_pos -= (t >> 2) & 7 + ip = in.ReadU8() + m_pos -= int(ip) << 3 + // fmt.Println("m_pos t64", m_pos, t, int(ip)) + t = (t >> 5) - 1 + goto copy_match + } else if t >= 32 { + t &= 31 + if t == 0 { + t = in.ReadMulti(31) + } + m_pos = len(out) - 1 + v16 := in.ReadU16() + m_pos -= v16 >> 2 + last2 = byte(v16 & 0xFF) + // fmt.Println("m_pos t32", m_pos) + } else if t >= 16 { + m_pos = len(out) + m_pos -= (t & 8) << 11 + t &= 7 + if t == 0 { + t = in.ReadMulti(7) + } + v16 := in.ReadU16() + m_pos -= v16 >> 2 + if m_pos == len(out) { + // fmt.Println("END", t, v16, m_pos) + return + } + m_pos -= 0x4000 + last2 = byte(v16 & 0xFF) + // fmt.Println("m_pos t16", m_pos) + } else { + m_pos = len(out) - 1 + m_pos -= t >> 2 + ip = in.ReadU8() + m_pos -= int(ip) << 2 + if m_pos < 0 { + err = LookBehindUnderrun + return + } + // fmt.Println("m_pos tX", m_pos) + copyMatch(&out, m_pos, 2) + goto match_done + } + +copy_match: + if m_pos < 0 { + err = LookBehindUnderrun + return + } + copyMatch(&out, m_pos, t+2) + +match_done: + t = int(last2 & 3) + if t == 0 { + goto match_end + } +match_next: + // fmt.Println("read append finale:", t) + in.ReadAppend(&out, t) + ip = in.ReadU8() + goto match + +match_end: + ip = in.ReadU8() + goto begin_loop +} diff --git a/vendor/github.com/rasky/go-lzo/defs.go b/vendor/github.com/rasky/go-lzo/defs.go new file mode 100644 index 000000000..af941a4c6 --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/defs.go @@ -0,0 +1,29 @@ +package lzo + +const ( + m1_MAX_OFFSET = 0x0400 + m2_MAX_OFFSET = 0x0800 + m3_MAX_OFFSET = 0x4000 + m4_MAX_OFFSET = 0xbfff + mX_MAX_OFFSET = m1_MAX_OFFSET + m2_MAX_OFFSET + + m1_MIN_LEN = 2 + m1_MAX_LEN = 2 + m2_MIN_LEN = 3 + m2_MAX_LEN = 8 + m3_MIN_LEN = 3 + m3_MAX_LEN = 33 + m4_MIN_LEN = 3 + m4_MAX_LEN = 9 + + m1_MARKER = 0 + m2_MARKER = 64 + m3_MARKER = 32 + m4_MARKER = 16 +) + +const ( + d_BITS = 14 + d_MASK = (1 << d_BITS) - 1 + d_HIGH = (d_MASK >> 1) + 1 +) diff --git a/vendor/github.com/rasky/go-lzo/fuzz.go b/vendor/github.com/rasky/go-lzo/fuzz.go new file mode 100644 index 000000000..252fafb67 --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/fuzz.go @@ -0,0 +1,10 @@ +// +build gofuzz + +package lzo + +import "bytes" + +func Fuzz(data []byte) int { + Decompress1X(bytes.NewBuffer(data), 0, 0) + return 0 +} diff --git a/vendor/github.com/rasky/go-lzo/match.go b/vendor/github.com/rasky/go-lzo/match.go new file mode 100644 index 000000000..cf9b05b29 --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/match.go @@ -0,0 +1,101 @@ +package lzo + +func (ctx *compressor) initMatch(s *swd, flags uint32) { + s.ctx = ctx + s.init() + if flags&1 != 0 { + s.UseBestOff = true + } +} + +func (ctx *compressor) findMatch(s *swd, thislen uint, skip uint) { + if skip > 0 { + if thislen < skip { + panic("assert: findMatch: invalid thislen") + } + s.accept(thislen - skip) + ctx.textsize += thislen - skip + 1 + } else { + if thislen > 1 { + panic("assert: findMatch: invalid thislen") + } + ctx.textsize += thislen - skip + } + + s.MLen = cSWD_THRESHOLD + s.MOff = 0 + for i := 0; i < len(s.bestPos); i++ { + s.bestPos[i] = 0 + } + + s.findbest() + ctx.mlen = int(s.MLen) + ctx.moff = int(s.MOff) + + s.getbyte() + if s.BChar < 0 { + ctx.look = 0 + ctx.mlen = 0 + } else { + ctx.look = s.Look + 1 + } + + ctx.bp = ctx.ip - int(ctx.look) +} + +func (ctx *compressor) betterMatch(s *swd, imlen, imoff int) (mlen int, moff int) { + mlen, moff = imlen, imoff + if mlen <= m2_MIN_LEN { + return + } + if moff <= m2_MAX_OFFSET { + return + } + + if moff > m2_MAX_OFFSET && mlen >= m2_MIN_LEN+1 && mlen <= m2_MAX_LEN+1 && + s.BestOff[mlen-1] > 0 && s.BestOff[mlen-1] <= m2_MAX_OFFSET { + mlen -= 1 + moff = int(s.BestOff[mlen]) + return + } + + if moff > m3_MAX_OFFSET && mlen >= m4_MAX_LEN+1 && mlen <= m2_MAX_LEN+2 && + s.BestOff[mlen-2] > 0 && s.BestOff[mlen-2] <= m2_MAX_OFFSET { + mlen -= 2 + moff = int(s.BestOff[mlen]) + return + } + + if moff > m3_MAX_OFFSET && mlen >= m4_MAX_LEN+1 && mlen <= m3_MAX_LEN+1 && + s.BestOff[mlen-1] > 0 && s.BestOff[mlen-1] <= m3_MAX_OFFSET { + mlen -= 1 + moff = int(s.BestOff[mlen]) + return + } + + return +} + +func assertMemcmp(b1, b2 []byte, l int) { + b1 = b1[:l] + b2 = b2[:l] + for i := 0; i < len(b1); i++ { + if b1[i] != b2[i] { + panic("assertMemcmp: dosn't match") + } + } +} + +func (ctx *compressor) assertMatch(s *swd, mlen, moff int) { + if mlen < 2 { + panic("assertMatch: invalid mlen") + } + if moff <= ctx.bp { + if ctx.bp-moff+mlen >= ctx.ip { + panic("assertMatch: invalid bp") + } + assertMemcmp(ctx.in[ctx.bp:], ctx.in[ctx.bp-moff:], mlen) + } else { + panic("dict should not exit") + } +} diff --git a/vendor/github.com/rasky/go-lzo/swd.go b/vendor/github.com/rasky/go-lzo/swd.go new file mode 100644 index 000000000..cda10d7af --- /dev/null +++ b/vendor/github.com/rasky/go-lzo/swd.go @@ -0,0 +1,349 @@ +package lzo + +const ( + cSWD_N = m4_MAX_OFFSET // ring buffer size + cSWD_THRESHOLD = 1 // lower limit for match length + cSWD_F = 2048 // upper limit for match length + cSWD_BEST_OFF = m3_MAX_LEN + 1 // max(m2,m3,m4)+1 + cSWD_HSIZE = 16384 + cSWD_MAX_CHAIN = 2048 +) + +type swd struct { + // Public builtin + SwdN uint + SwdF uint + SwdThreshold uint + + // Public configuration + MaxChain uint + NiceLength uint + UseBestOff bool + LazyInsert uint + + // Output + MLen uint + MOff uint + Look uint + BChar int + BestOff [cSWD_BEST_OFF]uint + + // Semi-public + ctx *compressor + mpos uint + bestPos [cSWD_BEST_OFF]uint + + // Private + ip uint // input pointer (lookahead) + bp uint // buffer pointer + rp uint // remove pointer + bsize uint + bwrap []byte + nodecount uint + firstrp uint + + b [cSWD_N + cSWD_F + cSWD_F]byte + head3 [cSWD_HSIZE]uint16 + succ3 [cSWD_N + cSWD_F]uint16 + best3 [cSWD_N + cSWD_F]uint16 + llen3 [cSWD_HSIZE]uint16 + head2 [65536]uint16 +} + +func head2(data []byte) uint { + return uint(data[1])<<8 | uint(data[0]) +} + +func head3(data []byte) uint { + key := uint(data[0]) + key = (key << 5) ^ uint(data[1]) + key = (key << 5) ^ uint(data[2]) + key = (key * 0x9f5f) >> 5 + return key & (cSWD_HSIZE - 1) +} + +func (s *swd) gethead3(key uint) uint16 { + if s.llen3[key] == 0 { + return 0xFFFF + } + return s.head3[key] +} + +func (s *swd) removeNode(node uint) { + if s.nodecount == 0 { + key := head3(s.b[node:]) + if s.llen3[key] == 0 { + panic("assert: swd.removeNode: invalid llen3") + } + s.llen3[key]-- + + key = head2(s.b[node:]) + if s.head2[key] == 0xFFFF { + panic("assert: swd.removeNode: invalid head2") + } + if uint(s.head2[key]) == node { + s.head2[key] = 0xFFFF + } + return + } + s.nodecount-- +} + +func (s *swd) init() { + s.SwdN = cSWD_N + s.SwdF = cSWD_F + s.SwdThreshold = cSWD_THRESHOLD + + s.MaxChain = cSWD_MAX_CHAIN + s.NiceLength = s.SwdF + s.bsize = s.SwdN + s.SwdF + s.bwrap = s.b[s.bsize:] + s.nodecount = s.SwdN + + for i := 0; i < len(s.head2); i++ { + s.head2[i] = 0xFFFF + } + + s.ip = 0 + s.bp = s.ip + s.firstrp = s.ip + if s.ip+s.SwdF > s.bsize { + panic("assert: swd.init: invalid ip") + } + + s.Look = uint(len(s.ctx.in)) - s.ip + if s.Look > 0 { + if s.Look > s.SwdF { + s.Look = s.SwdF + } + copy(s.b[s.ip:], s.ctx.in[:s.Look]) + s.ctx.ip += int(s.Look) + s.ip += s.Look + } + + if s.ip == s.bsize { + s.ip = 0 + } + + s.rp = s.firstrp + if s.rp >= s.nodecount { + s.rp -= s.nodecount + } else { + s.rp += s.bsize - s.nodecount + } + + if s.Look < 3 { + s.b[s.bp+s.Look] = 0 + s.b[s.bp+s.Look+1] = 0 + s.b[s.bp+s.Look+2] = 0 + } +} + +func (s *swd) getbyte() { + c := -1 + if s.ctx.ip < len(s.ctx.in) { + c = int(s.ctx.in[s.ctx.ip]) + s.ctx.ip++ + s.b[s.ip] = byte(c) + if s.ip < s.SwdF { + s.bwrap[s.ip] = byte(c) + } + } else { + if s.Look > 0 { + s.Look-- + } + s.b[s.ip] = 0 + if s.ip < s.SwdF { + s.bwrap[s.ip] = 0 + } + } + + s.ip++ + if s.ip == s.bsize { + s.ip = 0 + } + s.bp++ + if s.bp == s.bsize { + s.bp = 0 + } + s.rp++ + if s.rp == s.bsize { + s.rp = 0 + } +} + +func (s *swd) accept(n uint) { + if n > s.Look { + panic("swd: accept: invalid n") + } + + for i := uint(0); i < n; i++ { + s.removeNode(s.rp) + + key := head3(s.b[s.bp:]) + s.succ3[s.bp] = s.gethead3(key) + s.head3[key] = uint16(s.bp) + s.best3[s.bp] = uint16(s.SwdF + 1) + s.llen3[key]++ + if uint(s.llen3[key]) > s.SwdN { + panic("swd: accept: invalid llen3") + } + + key = head2(s.b[s.bp:]) + s.head2[key] = uint16(s.bp) + + s.getbyte() + } +} + +func (s *swd) search(node uint, cnt uint) { + if s.MLen <= 0 { + panic("assert: search: invalid mlen") + } + + mlen := s.MLen + bp := s.bp + bx := s.bp + s.Look + + scanend1 := s.b[s.bp+mlen-1] + for ; cnt > 0; cnt-- { + p1 := bp + p2 := node + px := bx + + if mlen >= s.Look { + panic("assert: search: invalid mlen in loop") + } + if s.b[p2+mlen-1] == scanend1 && + s.b[p2+mlen] == s.b[p1+mlen] && + s.b[p2] == s.b[p1] && + s.b[p2+1] == s.b[p1+1] { + + if s.b[bp] != s.b[node] || s.b[bp+1] != s.b[node+1] || s.b[bp+2] != s.b[node+2] { + panic("assert: seach: invalid initial match") + } + p1 = p1 + 2 + p2 = p2 + 2 + for p1 < px { + p1++ + p2++ + if s.b[p1] != s.b[p2] { + break + } + } + i := p1 - bp + + for j := uint(0); j < i; j++ { + if s.b[s.bp+j] != s.b[node+j] { + panic("assert: search: invalid final match") + } + } + + if i < cSWD_BEST_OFF { + if s.bestPos[i] == 0 { + s.bestPos[i] = node + 1 + } + } + if i > mlen { + mlen = i + s.MLen = mlen + s.mpos = node + if mlen == s.Look { + return + } + if mlen >= s.NiceLength { + return + } + if mlen > uint(s.best3[node]) { + return + } + scanend1 = s.b[s.bp+mlen-1] + } + } + + node = uint(s.succ3[node]) + } +} + +func (s *swd) search2() bool { + if s.Look < 2 { + panic("assert: search2: invalid look") + } + if s.MLen <= 0 { + panic("assert: search2: invalid mlen") + } + + key := s.head2[head2(s.b[s.bp:])] + if key == 0xFFFF { + return false + } + if s.b[s.bp] != s.b[key] || s.b[s.bp+1] != s.b[key+1] { + panic("assert: search2: invalid key found") + } + if s.bestPos[2] == 0 { + s.bestPos[2] = uint(key + 1) + } + if s.MLen < 2 { + s.MLen = 2 + s.mpos = uint(key) + } + return true +} + +func (s *swd) findbest() { + if s.MLen == 0 { + panic("swd: findbest: invalid mlen") + } + + key := head3(s.b[s.bp:]) + node := s.gethead3(key) + s.succ3[s.bp] = node + cnt := uint(s.llen3[key]) + s.llen3[key]++ + if cnt > s.SwdN+s.SwdF { + panic("swd: findbest: invalid llen3") + } + if cnt > s.MaxChain && s.MaxChain > 0 { + cnt = s.MaxChain + } + s.head3[key] = uint16(s.bp) + + s.BChar = int(s.b[s.bp]) + len := s.MLen + if s.MLen >= s.Look { + if s.Look == 0 { + s.BChar = -1 + } + s.MOff = 0 + s.best3[s.bp] = uint16(s.SwdF + 1) + } else { + if s.search2() && s.Look >= 3 { + s.search(uint(node), cnt) + } + + if s.MLen > len { + s.MOff = s.pos2off(s.mpos) + } + + if s.UseBestOff { + for i := 2; i < cSWD_BEST_OFF; i++ { + if s.bestPos[i] > 0 { + s.BestOff[i] = s.pos2off(s.bestPos[i] - 1) + } else { + s.BestOff[i] = 0 + } + } + } + } + + s.removeNode(s.rp) + key = head2(s.b[s.bp:]) + s.head2[key] = uint16(s.bp) +} + +func (s *swd) pos2off(pos uint) uint { + if s.bp > pos { + return s.bp - pos + } + return s.bsize - (pos - s.bp) +} diff --git a/vendor/vendor.json b/vendor/vendor.json index 9372bd17c..5a870f29c 100644 --- a/vendor/vendor.json +++ b/vendor/vendor.json @@ -62,6 +62,11 @@ "revision": "d1e19f5c23e955d7b65ff59e935007a2edce045e", "revisionTime": "2018-09-19T22:28:51Z" }, + { + "checksumSHA1": "apHxx2dlnW9TF3IBXFcFZj/Es6E=", + "path": "git.apache.org/thrift.git/lib/go/thrift", + "revision": "" + }, { "checksumSHA1": "02D1G/awFY2Jb/w/OLN2tlMuORk=", "path": "github.com/Azure/azure-sdk-for-go/storage", @@ -692,6 +697,18 @@ "revision": "39381cf62425050629c7264228fc2f9e0c6616f6", "revisionTime": "2018-11-15T04:56:45Z" }, + { + "checksumSHA1": "LQSZ9jpJwWrfqKGiBnw+qjRtwRE=", + "path": "github.com/minio/parquet-go", + "revision": "84e7daee0cea0bf82d1d15795dcea0f7bcd508c9", + "revisionTime": "2018-12-30T03:24:02Z" + }, + { + "checksumSHA1": "N4WRPw4p3AN958RH/O53kUsJacQ=", + "path": "github.com/minio/parquet-go/gen-go/parquet", + "revision": "d50385ed243d7120cf0f78de3f4f4e171936f12f", + "revisionTime": "2018-11-07T21:57:30Z" + }, { "checksumSHA1": "cYuXpiVBMypgkEr0Wqd79jPPyBg=", "path": "github.com/minio/sha256-simd", @@ -770,6 +787,18 @@ "revision": "0527e80f3ba5ecff59ad8d07db607677b5dc056a", "revisionTime": "2018-10-28T19:52:56Z" }, + { + "checksumSHA1": "85AmRAEmy9EqHBUhwkBBVhLkCVU=", + "path": "github.com/pierrec/lz4", + "revision": "1f6e18d34f6790fc0afea6f13a5fe3d9ab1770af", + "revisionTime": "2018-09-11T17:58:58Z" + }, + { + "checksumSHA1": "YzBjaYp2pbrwPhT6XHY0CBSh71A=", + "path": "github.com/pierrec/lz4/internal/xxh32", + "revision": "1f6e18d34f6790fc0afea6f13a5fe3d9ab1770af", + "revisionTime": "2018-09-11T17:58:58Z" + }, { "checksumSHA1": "xCv4GBFyw07vZkVtKF/XrUnkHRk=", "path": "github.com/pkg/errors", @@ -842,6 +871,12 @@ "revision": "8b1c2da0d56deffdbb9e48d4414b4e674bd8083e", "revisionTime": "2018-04-08T09:29:02Z" }, + { + "checksumSHA1": "lOtA2sG01oAO4Z/VSGqvX45CjfA=", + "path": "github.com/rasky/go-lzo", + "revision": "affec0788321cffe2c68821be1e07e87127b17f0", + "revisionTime": "2015-10-23T00:10:55Z" + }, { "checksumSHA1": "D8AVDI39CJ+jvw0HOotYU2gz54c=", "path": "github.com/rjeczalik/notify",