mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
select: Check if CSV is valid utf8 (#10991)
Check if first block of data is valid utf8. Fixes #10970
This commit is contained in:
parent
7cbca43eb1
commit
02aecb2fc1
@ -16,6 +16,8 @@
|
||||
|
||||
package csv
|
||||
|
||||
import "errors"
|
||||
|
||||
type s3Error struct {
|
||||
code string
|
||||
message string
|
||||
@ -51,3 +53,12 @@ func errCSVParsingError(err error) *s3Error {
|
||||
cause: err,
|
||||
}
|
||||
}
|
||||
|
||||
func errInvalidTextEncodingError() *s3Error {
|
||||
return &s3Error{
|
||||
code: "InvalidTextEncoding",
|
||||
message: "UTF-8 encoding is required.",
|
||||
statusCode: 400,
|
||||
cause: errors.New("invalid utf8 encoding"),
|
||||
}
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import (
|
||||
"io"
|
||||
"runtime"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
|
||||
csv "github.com/minio/minio/pkg/csvparser"
|
||||
"github.com/minio/minio/pkg/s3select/sql"
|
||||
@ -159,6 +160,9 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
|
||||
r.err = err
|
||||
return err
|
||||
}
|
||||
if !utf8.Valid(b) {
|
||||
return errInvalidTextEncodingError()
|
||||
}
|
||||
reader := newReader(bytes.NewReader(b))
|
||||
record, err := reader.Read()
|
||||
if err != nil {
|
||||
@ -181,6 +185,13 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
|
||||
return make([]byte, csvSplitSize+1024)
|
||||
}
|
||||
|
||||
// Return first block
|
||||
next, nextErr := r.nextSplit(csvSplitSize, r.bufferPool.Get().([]byte))
|
||||
// Check if first block is valid.
|
||||
if !utf8.Valid(next) {
|
||||
return errInvalidTextEncodingError()
|
||||
}
|
||||
|
||||
// Create queue
|
||||
r.queue = make(chan *queueItem, runtime.GOMAXPROCS(0))
|
||||
r.input = make(chan *queueItem, runtime.GOMAXPROCS(0))
|
||||
@ -192,11 +203,10 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
|
||||
defer close(r.queue)
|
||||
defer r.readerWg.Done()
|
||||
for {
|
||||
next, err := r.nextSplit(csvSplitSize, r.bufferPool.Get().([]byte))
|
||||
q := queueItem{
|
||||
input: next,
|
||||
dst: make(chan [][]string, 1),
|
||||
err: err,
|
||||
err: nextErr,
|
||||
}
|
||||
select {
|
||||
case <-r.close:
|
||||
@ -209,10 +219,11 @@ func (r *Reader) startReaders(newReader func(io.Reader) *csv.Reader) error {
|
||||
return
|
||||
case r.input <- &q:
|
||||
}
|
||||
if err != nil {
|
||||
if nextErr != nil {
|
||||
// Exit on any error.
|
||||
return
|
||||
}
|
||||
next, nextErr = r.nextSplit(csvSplitSize, r.bufferPool.Get().([]byte))
|
||||
}
|
||||
}()
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user