fix: leaking connections in JSON SQL with limited return (#17239)

This commit is contained in:
Klaus Post 2023-05-18 11:26:46 -07:00 committed by GitHub
parent b784e458cb
commit b06d7bf834
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -21,6 +21,7 @@ import (
"fmt" "fmt"
"io" "io"
"sync" "sync"
"sync/atomic"
"github.com/minio/minio/internal/s3select/json" "github.com/minio/minio/internal/s3select/json"
"github.com/minio/minio/internal/s3select/sql" "github.com/minio/minio/internal/s3select/sql"
@ -34,8 +35,9 @@ type Reader struct {
decoded chan simdjson.Object decoded chan simdjson.Object
// err will only be returned after decoded has been closed. // err will only be returned after decoded has been closed.
err *error err *error
readCloser io.ReadCloser readCloser io.ReadCloser
onReaderExit func()
exitReader chan struct{} exitReader chan struct{}
readerWg sync.WaitGroup readerWg sync.WaitGroup
@ -79,9 +81,8 @@ func (r *Reader) Close() error {
// If r.input is closed, it is assumed that no more input will come. // If r.input is closed, it is assumed that no more input will come.
// When this function returns r.readerWg will be decremented and r.decoded will be closed. // When this function returns r.readerWg will be decremented and r.decoded will be closed.
// On errors, r.err will be set. This should only be accessed after r.decoded has been closed. // On errors, r.err will be set. This should only be accessed after r.decoded has been closed.
func (r *Reader) startReader() { func (r *Reader) startReader(reuse chan<- *simdjson.ParsedJson) {
defer r.readerWg.Done() defer r.onReaderExit()
defer close(r.decoded)
var tmpObj simdjson.Object var tmpObj simdjson.Object
for { for {
var in simdjson.Stream var in simdjson.Stream
@ -143,6 +144,11 @@ func (r *Reader) startReader() {
return return
} }
} }
// Don't block if we cannot reuse.
select {
case reuse <- in.Value:
default:
}
if in.Error == io.EOF { if in.Error == io.EOF {
return return
} }
@ -153,14 +159,25 @@ func (r *Reader) startReader() {
func NewReader(readCloser io.ReadCloser, args *json.ReaderArgs) *Reader { func NewReader(readCloser io.ReadCloser, args *json.ReaderArgs) *Reader {
r := Reader{ r := Reader{
args: args, args: args,
readCloser: readCloser, readCloser: &safeCloser{r: io.Reader(readCloser)},
decoded: make(chan simdjson.Object, 1000), decoded: make(chan simdjson.Object, 1000),
input: make(chan simdjson.Stream, 2), input: make(chan simdjson.Stream, 2),
exitReader: make(chan struct{}), exitReader: make(chan struct{}),
} }
simdjson.ParseNDStream(readCloser, r.input, nil) r.onReaderExit = func() {
close(r.decoded)
readCloser.Close()
for range r.input {
// Read until EOF trickles through.
// Otherwise, we risk the decoder hanging.
}
r.readerWg.Done()
}
reuse := make(chan *simdjson.ParsedJson, 1000)
simdjson.ParseNDStream(readCloser, r.input, reuse)
r.readerWg.Add(1) r.readerWg.Add(1)
go r.startReader() go r.startReader(reuse)
return &r return &r
} }
@ -174,15 +191,25 @@ func NewElementReader(ch chan simdjson.Object, err *error, args *json.ReaderArgs
} }
} }
// NewTapeReaderChan will start a reader that will read input from the provided channel. // safeCloser will wrap a Reader as a ReadCloser.
func NewTapeReaderChan(pj chan simdjson.Stream, args *json.ReaderArgs) *Reader { // It is safe to call Close while the reader is being used.
r := Reader{ type safeCloser struct {
args: args, closed uint32
decoded: make(chan simdjson.Object, 1000), r io.Reader
input: pj, }
exitReader: make(chan struct{}),
} func (s *safeCloser) Read(p []byte) (n int, err error) {
r.readerWg.Add(1) if atomic.LoadUint32(&s.closed) == 1 {
go r.startReader() return 0, io.EOF
return &r }
n, err = s.r.Read(p)
if atomic.LoadUint32(&s.closed) == 1 {
return 0, io.EOF
}
return n, err
}
func (s *safeCloser) Close() error {
atomic.CompareAndSwapUint32(&s.closed, 0, 1)
return nil
} }