diff --git a/cmd/object-handlers.go b/cmd/object-handlers.go
index fbca4c4a5..3cbc70287 100644
--- a/cmd/object-handlers.go
+++ b/cmd/object-handlers.go
@@ -206,6 +206,9 @@ func (api objectAPIHandlers) SelectObjectContentHandler(w http.ResponseWriter, r
Start: offset,
End: offset + length,
}
+ if length == -1 {
+ rs.End = -1
+ }
return getObjectNInfo(ctx, bucket, object, rs, r.Header, readLock, opts)
}
diff --git a/internal/s3select/errors.go b/internal/s3select/errors.go
index eb99edbfc..dd04f0653 100644
--- a/internal/s3select/errors.go
+++ b/internal/s3select/errors.go
@@ -101,6 +101,15 @@ func errInvalidRequestParameter(err error) *s3Error {
}
}
+func errInvalidScanRangeParameter(err error) *s3Error {
+ return &s3Error{
+ code: "InvalidRequestParameter",
+ message: "The value of a parameter in ScanRange element is invalid. Check the service API documentation and try again.",
+ statusCode: 400,
+ cause: err,
+ }
+}
+
func errObjectSerializationConflict(err error) *s3Error {
return &s3Error{
code: "ObjectSerializationConflict",
diff --git a/internal/s3select/select.go b/internal/s3select/select.go
index d66c7f678..34e78d00d 100644
--- a/internal/s3select/select.go
+++ b/internal/s3select/select.go
@@ -212,6 +212,69 @@ type RequestProgress struct {
Enabled bool `xml:"Enabled"`
}
+// ScanRange represents the ScanRange parameter.
+type ScanRange struct {
+ // Start if byte offset form the start off the file.
+ Start *uint64 `xml:"Start"`
+ // End is the last byte that should be returned, if Start is set,
+ // or the offset from EOF to start reading if start is not present.
+ End *uint64 `xml:"End"`
+}
+
+// Validate if the scan range is valid.
+func (s *ScanRange) Validate() error {
+ if s == nil {
+ return nil
+ }
+ if s.Start == nil && s.End == nil {
+ // This parameter is optional, but when specified, it must not be empty.
+ // Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_SelectObjectContent.html#AmazonS3-SelectObjectContent-request-ScanRange
+ return errors.New("ScanRange: No Start or End specified")
+ }
+ if s.Start == nil || s.End == nil {
+ return nil
+ }
+ if *s.Start > *s.End {
+ return errors.New("ScanRange: Start cannot be after end")
+ }
+ return nil
+}
+
+// StartLen returns start offset plus length from range.
+func (s *ScanRange) StartLen() (start, length int64, err error) {
+ if s == nil {
+ return 0, -1, nil
+ }
+ err = s.Validate()
+ if err != nil {
+ return 0, 0, err
+ }
+
+ if s.End == nil && s.Start == nil {
+ // Not valid, but should be caught above.
+ return 0, -1, nil
+ }
+ if s.End == nil {
+ start := int64(*s.Start)
+ if start < 0 {
+ return 0, 0, errors.New("ScanRange: Start after EOF")
+ }
+ return start, -1, nil
+ }
+ if s.Start == nil {
+ // Suffix length
+ end := int64(*s.End)
+ if end < 0 {
+ return 0, 0, errors.New("ScanRange: End bigger than file")
+ }
+ // Suffix length
+ return -end, -1, nil
+ }
+ start = int64(*s.Start)
+ end := int64(*s.End)
+ return start, end - start + 1, nil
+}
+
// S3Select - filters the contents on a simple structured query language (SQL) statement. It
// represents elements inside in request XML specified in detail at
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
@@ -222,6 +285,7 @@ type S3Select struct {
Input InputSerialization `xml:"InputSerialization"`
Output OutputSerialization `xml:"OutputSerialization"`
Progress RequestProgress `xml:"RequestProgress"`
+ ScanRange *ScanRange `xml:"ScanRange"`
statement *sql.SelectStatement
progressReader *progressReader
@@ -251,7 +315,9 @@ func (s3Select *S3Select) UnmarshalXML(d *xml.Decoder, start xml.StartElement) e
return errMalformedXML(err)
}
-
+ if err := parsedS3Select.ScanRange.Validate(); err != nil {
+ return errInvalidScanRangeParameter(err)
+ }
parsedS3Select.ExpressionType = strings.ToLower(parsedS3Select.ExpressionType)
if parsedS3Select.ExpressionType != "sql" {
return errInvalidExpressionType(fmt.Errorf("invalid expression type '%v'", parsedS3Select.ExpressionType))
@@ -298,9 +364,13 @@ func (s3Select *S3Select) getProgress() (bytesScanned, bytesProcessed int64) {
// Open - opens S3 object by using callback for SQL selection query.
// Currently CSV, JSON and Apache Parquet formats are supported.
func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadCloser, error)) error {
+ offset, end, err := s3Select.ScanRange.StartLen()
+ if err != nil {
+ return err
+ }
switch s3Select.Input.format {
case csvFormat:
- rc, err := getReader(0, -1)
+ rc, err := getReader(offset, end)
if err != nil {
return err
}
@@ -335,7 +405,7 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos
s3Select.close = rc.Close
return nil
case jsonFormat:
- rc, err := getReader(0, -1)
+ rc, err := getReader(offset, end)
if err != nil {
return err
}
@@ -362,6 +432,10 @@ func (s3Select *S3Select) Open(getReader func(offset, length int64) (io.ReadClos
if !strings.EqualFold(os.Getenv("MINIO_API_SELECT_PARQUET"), "on") {
return errors.New("parquet format parsing not enabled on server")
}
+ if offset != 0 || end != -1 {
+ // Offsets do not make sense in parquet files.
+ return errors.New("parquet format does not support offsets")
+ }
var err error
s3Select.recordReader, err = parquet.NewReader(getReader, &s3Select.Input.ParquetArgs)
return err
diff --git a/internal/s3select/select_test.go b/internal/s3select/select_test.go
index 2d347f304..da95e6bd5 100644
--- a/internal/s3select/select_test.go
+++ b/internal/s3select/select_test.go
@@ -20,6 +20,7 @@ package s3select
import (
"bytes"
"encoding/xml"
+ "errors"
"fmt"
"io"
"io/ioutil"
@@ -1374,6 +1375,314 @@ func TestJSONInput(t *testing.T) {
}
}
+func TestCSVRanges(t *testing.T) {
+ testInput := []byte(`id,time,num,num2,text
+1,2010-01-01T,7867786,4565.908123,"a text, with comma"
+2,2017-01-02T03:04Z,-5, 0.765111,
+`)
+ testTable := []struct {
+ name string
+ query string
+ input []byte
+ requestXML []byte // override request XML
+ wantResult string
+ wantErr bool
+ }{
+ {
+ name: "select-all",
+ input: testInput,
+ query: ``,
+ // Since we are doing offset, no headers are used.
+ wantResult: `{"_1":"2","_2":"2017-01-02T03:04Z","_3":"-5","_4":" 0.765111","_5":""}`,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 76109
+`),
+ },
+ {
+ name: "select-remain",
+ input: testInput,
+ // Since we are doing offset, no headers are used.
+ wantResult: `{"_1":"2","_2":"2017-01-02T03:04Z","_3":"-5","_4":" 0.765111","_5":""}`,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 76
+`),
+ },
+ {
+ name: "select-end-bytes",
+ input: testInput,
+ query: ``,
+ // Since we are doing offset, no headers are used.
+ wantResult: `{"_1":"2","_2":"2017-01-02T03:04Z","_3":"-5","_4":" 0.765111","_5":""}`,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 35
+`),
+ },
+ {
+ name: "select-middle",
+ input: testInput,
+ // Since we are doing offset, no headers are used.
+ wantResult: `{"_1":"a text, with comma"}`,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 5676
+`),
+ },
+ {
+ name: "error-end-before-start",
+ input: testInput,
+ // Since we are doing offset, no headers are used.
+ wantResult: ``,
+ wantErr: true,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 5626
+`),
+ },
+ {
+ name: "error-empty",
+ input: testInput,
+ // Since we are doing offset, no headers are used.
+ wantResult: ``,
+ wantErr: true,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+
+`),
+ },
+ {
+ name: "error-after-eof",
+ input: testInput,
+ // Since we are doing offset, no headers are used.
+ wantResult: ``,
+ wantErr: true,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 2600000
+`),
+ },
+ {
+ name: "error-after-eof",
+ input: testInput,
+ // Since we are doing offset, no headers are used.
+ wantResult: ``,
+ wantErr: true,
+ requestXML: []byte(`
+
+ SELECT * from s3object AS s
+ SQL
+
+ NONE
+
+ NONE
+ "
+
+
+
+
+
+
+
+ FALSE
+
+ 26000002600001
+`),
+ },
+ }
+
+ for _, testCase := range testTable {
+ t.Run(testCase.name, func(t *testing.T) {
+ testReq := testCase.requestXML
+ s3Select, err := NewS3Select(bytes.NewReader(testReq))
+ if err != nil {
+ if !testCase.wantErr {
+ t.Fatal(err)
+ }
+ t.Logf("got expected error: %v", err)
+ return
+ }
+
+ if err = s3Select.Open(func(offset, length int64) (io.ReadCloser, error) {
+ in := testCase.input
+ if offset != 0 || length != -1 {
+ // Copy from SelectObjectContentHandler
+ isSuffixLength := false
+ if offset < 0 {
+ isSuffixLength = true
+ }
+
+ if length > 0 {
+ length--
+ }
+
+ rs := &httpRangeSpec{
+ IsSuffixLength: isSuffixLength,
+ Start: offset,
+ End: offset + length,
+ }
+ if length == -1 {
+ rs.End = -1
+ }
+ t.Log("input, offset:", offset, "length:", length, "size:", len(in))
+ offset, length, err = rs.GetOffsetLength(int64(len(in)))
+ if err != nil {
+ return nil, err
+ }
+ t.Log("rs:", *rs, "offset:", offset, "length:", length)
+ in = in[offset : offset+length]
+ }
+ return ioutil.NopCloser(bytes.NewBuffer(in)), nil
+ }); err != nil {
+ if !testCase.wantErr {
+ t.Fatal(err)
+ }
+ t.Logf("got expected error: %v", err)
+ return
+ } else if testCase.wantErr {
+ t.Error("did not get expected error")
+ return
+ }
+
+ w := &testResponseWriter{}
+ s3Select.Evaluate(w)
+ s3Select.Close()
+ resp := http.Response{
+ StatusCode: http.StatusOK,
+ Body: ioutil.NopCloser(bytes.NewReader(w.response)),
+ ContentLength: int64(len(w.response)),
+ }
+ res, err := minio.NewSelectResults(&resp, "testbucket")
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ got, err := ioutil.ReadAll(res)
+ if err != nil {
+ t.Error(err)
+ return
+ }
+ gotS := strings.TrimSpace(string(got))
+ if !reflect.DeepEqual(gotS, testCase.wantResult) {
+ t.Errorf("received response does not match with expected reply. Query: %s\ngot: %s\nwant:%s", testCase.query, gotS, testCase.wantResult)
+ }
+ })
+ }
+}
+
func TestParquetInput(t *testing.T) {
os.Setenv("MINIO_API_SELECT_PARQUET", "on")
defer os.Setenv("MINIO_API_SELECT_PARQUET", "off")
@@ -1728,3 +2037,76 @@ func TestParquetInputSchemaCSV(t *testing.T) {
})
}
}
+
+// httpRangeSpec represents a range specification as supported by S3 GET
+// object request.
+//
+// Case 1: Not present -> represented by a nil RangeSpec
+// Case 2: bytes=1-10 (absolute start and end offsets) -> RangeSpec{false, 1, 10}
+// Case 3: bytes=10- (absolute start offset with end offset unspecified) -> RangeSpec{false, 10, -1}
+// Case 4: bytes=-30 (suffix length specification) -> RangeSpec{true, -30, -1}
+type httpRangeSpec struct {
+ // Does the range spec refer to a suffix of the object?
+ IsSuffixLength bool
+
+ // Start and end offset specified in range spec
+ Start, End int64
+}
+
+func (h *httpRangeSpec) GetLength(resourceSize int64) (rangeLength int64, err error) {
+ switch {
+ case resourceSize < 0:
+ return 0, errors.New("Resource size cannot be negative")
+
+ case h == nil:
+ rangeLength = resourceSize
+
+ case h.IsSuffixLength:
+ specifiedLen := -h.Start
+ rangeLength = specifiedLen
+ if specifiedLen > resourceSize {
+ rangeLength = resourceSize
+ }
+
+ case h.Start >= resourceSize:
+ return 0, errors.New("errInvalidRange")
+
+ case h.End > -1:
+ end := h.End
+ if resourceSize <= end {
+ end = resourceSize - 1
+ }
+ rangeLength = end - h.Start + 1
+
+ case h.End == -1:
+ rangeLength = resourceSize - h.Start
+
+ default:
+ return 0, errors.New("Unexpected range specification case")
+ }
+
+ return rangeLength, nil
+}
+
+// GetOffsetLength computes the start offset and length of the range
+// given the size of the resource
+func (h *httpRangeSpec) GetOffsetLength(resourceSize int64) (start, length int64, err error) {
+ if h == nil {
+ // No range specified, implies whole object.
+ return 0, resourceSize, nil
+ }
+
+ length, err = h.GetLength(resourceSize)
+ if err != nil {
+ return 0, 0, err
+ }
+
+ start = h.Start
+ if h.IsSuffixLength {
+ start = resourceSize + h.Start
+ if start < 0 {
+ start = 0
+ }
+ }
+ return start, length, nil
+}