mirror of
https://github.com/minio/minio.git
synced 2024-12-26 15:15:55 -05:00
91c839ad28
Batching records into a single SQL Select message in the response leads to significant speed up as the message header overhead is made negligible. This change leads to a speed up of 3-5x for queries that select many small records.
415 lines
14 KiB
Go
415 lines
14 KiB
Go
/*
|
|
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package s3select
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"hash/crc32"
|
|
"net/http"
|
|
"strconv"
|
|
"sync/atomic"
|
|
"time"
|
|
)
|
|
|
|
// A message is in the format specified in
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-frame-overview.png
|
|
// hence the calculation is made accordingly.
|
|
func totalByteLength(headerLength, payloadLength int) int {
|
|
return 4 + 4 + 4 + headerLength + payloadLength + 4
|
|
}
|
|
|
|
func genMessage(header, payload []byte) []byte {
|
|
headerLength := len(header)
|
|
payloadLength := len(payload)
|
|
totalLength := totalByteLength(headerLength, payloadLength)
|
|
|
|
buf := new(bytes.Buffer)
|
|
binary.Write(buf, binary.BigEndian, uint32(totalLength))
|
|
binary.Write(buf, binary.BigEndian, uint32(headerLength))
|
|
prelude := buf.Bytes()
|
|
binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(prelude))
|
|
buf.Write(header)
|
|
if payload != nil {
|
|
buf.Write(payload)
|
|
}
|
|
message := buf.Bytes()
|
|
binary.Write(buf, binary.BigEndian, crc32.ChecksumIEEE(message))
|
|
|
|
return buf.Bytes()
|
|
}
|
|
|
|
// Refer genRecordsHeader().
|
|
var recordsHeader = []byte{
|
|
13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't',
|
|
13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 24, 'a', 'p', 'p', 'l', 'i', 'c', 'a', 't', 'i', 'o', 'n', '/', 'o', 'c', 't', 'e', 't', '-', 's', 't', 'r', 'e', 'a', 'm',
|
|
11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 7, 'R', 'e', 'c', 'o', 'r', 'd', 's',
|
|
}
|
|
|
|
// newRecordsMessage - creates new Records Message which can contain a single record, partial records,
|
|
// or multiple records. Depending on the size of the result, a response can contain one or more of these messages.
|
|
//
|
|
// Header specification
|
|
// Records messages contain three headers, as follows:
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-record.png
|
|
//
|
|
// Payload specification
|
|
// Records message payloads can contain a single record, partial records, or multiple records.
|
|
func newRecordsMessage(payload []byte) []byte {
|
|
return genMessage(recordsHeader, payload)
|
|
}
|
|
|
|
// continuationMessage - S3 periodically sends this message to keep the TCP connection open.
|
|
// These messages appear in responses at random. The client must detect the message type and process accordingly.
|
|
//
|
|
// Header specification:
|
|
// Continuation messages contain two headers, as follows:
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-cont.png
|
|
//
|
|
// Payload specification:
|
|
// Continuation messages have no payload.
|
|
var continuationMessage = []byte{
|
|
0, 0, 0, 57, // total byte-length.
|
|
0, 0, 0, 41, // headers byte-length.
|
|
139, 161, 157, 242, // prelude crc.
|
|
13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers.
|
|
11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 4, 'C', 'o', 'n', 't', // headers.
|
|
156, 134, 74, 13, // message crc.
|
|
}
|
|
|
|
// Refer genProgressHeader().
|
|
var progressHeader = []byte{
|
|
13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't',
|
|
13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l',
|
|
11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 'P', 'r', 'o', 'g', 'r', 'e', 's', 's',
|
|
}
|
|
|
|
// newProgressMessage - creates new Progress Message. S3 periodically sends this message, if requested.
|
|
// It contains information about the progress of a query that has started but has not yet completed.
|
|
//
|
|
// Header specification:
|
|
// Progress messages contain three headers, as follows:
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-progress.png
|
|
//
|
|
// Payload specification:
|
|
// Progress message payload is an XML document containing information about the progress of a request.
|
|
// * BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed).
|
|
// * BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed).
|
|
// * BytesReturned => Current number of bytes of records payload data returned by S3.
|
|
//
|
|
// For uncompressed files, BytesScanned and BytesProcessed are equal.
|
|
//
|
|
// Example:
|
|
//
|
|
// <?xml version="1.0" encoding="UTF-8"?>
|
|
// <Progress>
|
|
// <BytesScanned>512</BytesScanned>
|
|
// <BytesProcessed>1024</BytesProcessed>
|
|
// <BytesReturned>1024</BytesReturned>
|
|
// </Progress>
|
|
//
|
|
func newProgressMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte {
|
|
payload := []byte(`<?xml version="1.0" encoding="UTF-8"?><Progress><BytesScanned>` +
|
|
strconv.FormatInt(bytesScanned, 10) + `</BytesScanned><BytesProcessed>` +
|
|
strconv.FormatInt(bytesProcessed, 10) + `</BytesProcessed><BytesReturned>` +
|
|
strconv.FormatInt(bytesReturned, 10) + `</BytesReturned></Stats>`)
|
|
return genMessage(progressHeader, payload)
|
|
}
|
|
|
|
// Refer genStatsHeader().
|
|
var statsHeader = []byte{
|
|
13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't',
|
|
13, ':', 'c', 'o', 'n', 't', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 8, 't', 'e', 'x', 't', '/', 'x', 'm', 'l',
|
|
11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'S', 't', 'a', 't', 's',
|
|
}
|
|
|
|
// newStatsMessage - creates new Stats Message. S3 sends this message at the end of the request.
|
|
// It contains statistics about the query.
|
|
//
|
|
// Header specification:
|
|
// Stats messages contain three headers, as follows:
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-stats.png
|
|
//
|
|
// Payload specification:
|
|
// Stats message payload is an XML document containing information about a request's stats when processing is complete.
|
|
// * BytesScanned => Number of bytes that have been processed before being uncompressed (if the file is compressed).
|
|
// * BytesProcessed => Number of bytes that have been processed after being uncompressed (if the file is compressed).
|
|
// * BytesReturned => Total number of bytes of records payload data returned by S3.
|
|
//
|
|
// For uncompressed files, BytesScanned and BytesProcessed are equal.
|
|
//
|
|
// Example:
|
|
//
|
|
// <?xml version="1.0" encoding="UTF-8"?>
|
|
// <Stats>
|
|
// <BytesScanned>512</BytesScanned>
|
|
// <BytesProcessed>1024</BytesProcessed>
|
|
// <BytesReturned>1024</BytesReturned>
|
|
// </Stats>
|
|
func newStatsMessage(bytesScanned, bytesProcessed, bytesReturned int64) []byte {
|
|
payload := []byte(`<?xml version="1.0" encoding="UTF-8"?><Stats><BytesScanned>` +
|
|
strconv.FormatInt(bytesScanned, 10) + `</BytesScanned><BytesProcessed>` +
|
|
strconv.FormatInt(bytesProcessed, 10) + `</BytesProcessed><BytesReturned>` +
|
|
strconv.FormatInt(bytesReturned, 10) + `</BytesReturned></Stats>`)
|
|
return genMessage(statsHeader, payload)
|
|
}
|
|
|
|
// endMessage - indicates that the request is complete, and no more messages will be sent.
|
|
// You should not assume that the request is complete until the client receives an End message.
|
|
//
|
|
// Header specification:
|
|
// End messages contain two headers, as follows:
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-end.png
|
|
//
|
|
// Payload specification:
|
|
// End messages have no payload.
|
|
var endMessage = []byte{
|
|
0, 0, 0, 56, // total byte-length.
|
|
0, 0, 0, 40, // headers byte-length.
|
|
193, 198, 132, 212, // prelude crc.
|
|
13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'v', 'e', 'n', 't', // headers.
|
|
11, ':', 'e', 'v', 'e', 'n', 't', '-', 't', 'y', 'p', 'e', 7, 0, 3, 'E', 'n', 'd', // headers.
|
|
207, 151, 211, 146, // message crc.
|
|
}
|
|
|
|
// newErrorMessage - creates new Request Level Error Message. S3 sends this message if the request failed for any reason.
|
|
// It contains the error code and error message for the failure. If S3 sends a RequestLevelError message,
|
|
// it doesn't send an End message.
|
|
//
|
|
// Header specification:
|
|
// Request-level error messages contain three headers, as follows:
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/images/s3select-frame-diagram-error.png
|
|
//
|
|
// Payload specification:
|
|
// Request-level error messages have no payload.
|
|
func newErrorMessage(errorCode, errorMessage []byte) []byte {
|
|
buf := new(bytes.Buffer)
|
|
|
|
buf.Write([]byte{13, ':', 'm', 'e', 's', 's', 'a', 'g', 'e', '-', 't', 'y', 'p', 'e', 7, 0, 5, 'e', 'r', 'r', 'o', 'r'})
|
|
|
|
buf.Write([]byte{14, ':', 'e', 'r', 'r', 'o', 'r', '-', 'm', 'e', 's', 's', 'a', 'g', 'e', 7})
|
|
binary.Write(buf, binary.BigEndian, uint16(len(errorMessage)))
|
|
buf.Write(errorMessage)
|
|
|
|
buf.Write([]byte{11, ':', 'e', 'r', 'r', 'o', 'r', '-', 'c', 'o', 'd', 'e', 7})
|
|
binary.Write(buf, binary.BigEndian, uint16(len(errorCode)))
|
|
buf.Write(errorCode)
|
|
|
|
return genMessage(buf.Bytes(), nil)
|
|
}
|
|
|
|
// NewErrorMessage - creates new Request Level Error Message specified in
|
|
// https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html.
|
|
func NewErrorMessage(errorCode, errorMessage string) []byte {
|
|
return newErrorMessage([]byte(errorCode), []byte(errorMessage))
|
|
}
|
|
|
|
type messageWriter struct {
|
|
writer http.ResponseWriter
|
|
getProgressFunc func() (int64, int64)
|
|
bytesReturned int64
|
|
|
|
payloadBuffer []byte
|
|
payloadBufferIndex int
|
|
|
|
dataCh chan []byte
|
|
doneCh chan struct{}
|
|
closeCh chan struct{}
|
|
stopped uint32
|
|
closed uint32
|
|
}
|
|
|
|
func (writer *messageWriter) write(data []byte) bool {
|
|
if _, err := writer.writer.Write(data); err != nil {
|
|
return false
|
|
}
|
|
|
|
writer.writer.(http.Flusher).Flush()
|
|
return true
|
|
}
|
|
|
|
func (writer *messageWriter) start() {
|
|
keepAliveTicker := time.NewTicker(1 * time.Second)
|
|
var progressTicker *time.Ticker
|
|
if writer.getProgressFunc != nil {
|
|
progressTicker = time.NewTicker(1 * time.Minute)
|
|
}
|
|
|
|
go func() {
|
|
quitFlag := 0
|
|
for quitFlag == 0 {
|
|
if progressTicker == nil {
|
|
select {
|
|
case data, ok := <-writer.dataCh:
|
|
if !ok {
|
|
quitFlag = 1
|
|
break
|
|
}
|
|
if !writer.write(data) {
|
|
quitFlag = 1
|
|
}
|
|
case <-writer.doneCh:
|
|
quitFlag = 2
|
|
case <-keepAliveTicker.C:
|
|
if !writer.write(continuationMessage) {
|
|
quitFlag = 1
|
|
}
|
|
}
|
|
} else {
|
|
select {
|
|
case data, ok := <-writer.dataCh:
|
|
if !ok {
|
|
quitFlag = 1
|
|
break
|
|
}
|
|
if !writer.write(data) {
|
|
quitFlag = 1
|
|
}
|
|
case <-writer.doneCh:
|
|
quitFlag = 2
|
|
case <-keepAliveTicker.C:
|
|
if !writer.write(continuationMessage) {
|
|
quitFlag = 1
|
|
}
|
|
case <-progressTicker.C:
|
|
bytesScanned, bytesProcessed := writer.getProgressFunc()
|
|
bytesReturned := atomic.LoadInt64(&writer.bytesReturned)
|
|
if !writer.write(newProgressMessage(bytesScanned, bytesProcessed, bytesReturned)) {
|
|
quitFlag = 1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
atomic.StoreUint32(&writer.stopped, 1)
|
|
close(writer.closeCh)
|
|
|
|
keepAliveTicker.Stop()
|
|
if progressTicker != nil {
|
|
progressTicker.Stop()
|
|
}
|
|
|
|
if quitFlag == 2 {
|
|
for data := range writer.dataCh {
|
|
if _, err := writer.writer.Write(data); err != nil {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
func (writer *messageWriter) close() {
|
|
if atomic.SwapUint32(&writer.closed, 1) == 0 {
|
|
close(writer.doneCh)
|
|
for range writer.closeCh {
|
|
close(writer.dataCh)
|
|
}
|
|
}
|
|
}
|
|
|
|
const (
|
|
bufLength = maxRecordSize
|
|
)
|
|
|
|
// collectRecord - collects records into a buffer, and when it is
|
|
// full, sends a message with the collected payload.
|
|
func (writer *messageWriter) collectRecord(data []byte) (err error) {
|
|
freeSpace := bufLength - writer.payloadBufferIndex
|
|
if len(data) > freeSpace {
|
|
err = writer.FlushRecords()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
copy(writer.payloadBuffer[writer.payloadBufferIndex:], data)
|
|
writer.payloadBufferIndex += len(data)
|
|
return nil
|
|
}
|
|
|
|
func (writer *messageWriter) send(data []byte) error {
|
|
err := func() error {
|
|
if atomic.LoadUint32(&writer.stopped) == 1 {
|
|
return fmt.Errorf("writer already closed")
|
|
}
|
|
|
|
select {
|
|
case writer.dataCh <- data:
|
|
case <-writer.doneCh:
|
|
return fmt.Errorf("closed writer")
|
|
}
|
|
|
|
return nil
|
|
}()
|
|
|
|
if err != nil {
|
|
writer.close()
|
|
}
|
|
|
|
return err
|
|
}
|
|
|
|
func (writer *messageWriter) SendRecords(payload []byte) error {
|
|
return writer.collectRecord(payload)
|
|
}
|
|
|
|
func (writer *messageWriter) FlushRecords() (err error) {
|
|
err = writer.send(newRecordsMessage(writer.payloadBuffer[0:writer.payloadBufferIndex]))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
atomic.AddInt64(&writer.bytesReturned, int64(writer.payloadBufferIndex))
|
|
writer.payloadBufferIndex = 0
|
|
return nil
|
|
}
|
|
|
|
func (writer *messageWriter) SendStats(bytesScanned, bytesProcessed int64) error {
|
|
bytesReturned := atomic.LoadInt64(&writer.bytesReturned)
|
|
err := writer.send(newStatsMessage(bytesScanned, bytesProcessed, bytesReturned))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = writer.send(endMessage)
|
|
writer.close()
|
|
return err
|
|
}
|
|
|
|
func (writer *messageWriter) SendError(errorCode, errorMessage string) error {
|
|
err := writer.send(newErrorMessage([]byte(errorCode), []byte(errorMessage)))
|
|
if err == nil {
|
|
writer.close()
|
|
}
|
|
return err
|
|
}
|
|
|
|
func newMessageWriter(w http.ResponseWriter, getProgressFunc func() (bytesScanned, bytesProcessed int64)) *messageWriter {
|
|
writer := &messageWriter{
|
|
writer: w,
|
|
getProgressFunc: getProgressFunc,
|
|
|
|
payloadBuffer: make([]byte, bufLength),
|
|
|
|
dataCh: make(chan []byte),
|
|
doneCh: make(chan struct{}),
|
|
closeCh: make(chan struct{}),
|
|
}
|
|
writer.start()
|
|
return writer
|
|
}
|