mirror of
https://github.com/minio/minio.git
synced 2025-04-22 03:24:38 -04:00
use sendfile/splice implementation to perform DMA (#18411)
sendfile implementation to perform DMA on all platforms Go stdlib already supports sendfile/splice implementations for - Linux - Windows - *BSD - Solaris Along with this change however O_DIRECT for reads() must be removed as well since we need to use sendfile() implementation The main reason to add O_DIRECT for reads was to reduce the chances of page-cache causing OOMs for MinIO, however it would seem that avoiding buffer copies from user-space to kernel space this issue is not a problem anymore. There is no Go based memory allocation required, and neither the page-cache is referenced back to MinIO. This page- cache reference is fully owned by kernel at this point, this essentially should solve the problem of page-cache build up. With this now we also support SG - when NIC supports Scatter/Gather https://en.wikipedia.org/wiki/Gather/scatter_(vector_addressing)
This commit is contained in:
parent
80adc87a14
commit
91d8bddbd1
@ -611,6 +611,8 @@ func (s *storageRESTServer) ReadFileStreamHandler(w http.ResponseWriter, r *http
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
w.Header().Set(xhttp.ContentLength, strconv.Itoa(length))
|
||||||
|
|
||||||
rc, err := s.storage.ReadFileStream(r.Context(), volume, filePath, int64(offset), int64(length))
|
rc, err := s.storage.ReadFileStream(r.Context(), volume, filePath, int64(offset), int64(length))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
s.writeErrorResponse(w, err)
|
s.writeErrorResponse(w, err)
|
||||||
@ -618,13 +620,29 @@ func (s *storageRESTServer) ReadFileStreamHandler(w http.ResponseWriter, r *http
|
|||||||
}
|
}
|
||||||
defer rc.Close()
|
defer rc.Close()
|
||||||
|
|
||||||
w.Header().Set(xhttp.ContentLength, strconv.Itoa(length))
|
rf, ok := w.(io.ReaderFrom)
|
||||||
if _, err = xioutil.Copy(w, rc); err != nil {
|
if ok {
|
||||||
|
// Attempt to use splice/sendfile() optimization, A very specific behavior mentioned below is necessary.
|
||||||
|
// See https://github.com/golang/go/blob/f7c5cbb82087c55aa82081e931e0142783700ce8/src/net/sendfile_linux.go#L20
|
||||||
|
dr, ok := rc.(*xioutil.DeadlineReader)
|
||||||
|
if ok {
|
||||||
|
sr, ok := dr.ReadCloser.(*sendFileReader)
|
||||||
|
if ok {
|
||||||
|
_, err = rf.ReadFrom(sr.Reader)
|
||||||
if !xnet.IsNetworkOrHostDown(err, true) { // do not need to log disconnected clients
|
if !xnet.IsNetworkOrHostDown(err, true) { // do not need to log disconnected clients
|
||||||
logger.LogIf(r.Context(), err)
|
logger.LogIf(r.Context(), err)
|
||||||
}
|
}
|
||||||
|
if err == nil || !errors.Is(err, xhttp.ErrNotImplemented) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} // Fallback to regular copy
|
||||||
|
|
||||||
|
_, err = xioutil.Copy(w, rc)
|
||||||
|
if !xnet.IsNetworkOrHostDown(err, true) { // do not need to log disconnected clients
|
||||||
|
logger.LogIf(r.Context(), err)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ListDirHandler - list a directory.
|
// ListDirHandler - list a directory.
|
||||||
|
@ -421,7 +421,13 @@ func (p *xlStorageDiskIDCheck) ReadFile(ctx context.Context, volume string, path
|
|||||||
}
|
}
|
||||||
defer done(&err)
|
defer done(&err)
|
||||||
|
|
||||||
return p.storage.ReadFile(ctx, volume, path, offset, buf, verifier)
|
w := xioutil.NewDeadlineWorker(diskMaxTimeout)
|
||||||
|
err = w.Run(func() error {
|
||||||
|
n, err = p.storage.ReadFile(ctx, volume, path, offset, buf, verifier)
|
||||||
|
return err
|
||||||
|
})
|
||||||
|
|
||||||
|
return n, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Legacy API - does not have any deadlines
|
// Legacy API - does not have any deadlines
|
||||||
@ -432,7 +438,10 @@ func (p *xlStorageDiskIDCheck) AppendFile(ctx context.Context, volume string, pa
|
|||||||
}
|
}
|
||||||
defer done(&err)
|
defer done(&err)
|
||||||
|
|
||||||
|
w := xioutil.NewDeadlineWorker(diskMaxTimeout)
|
||||||
|
return w.Run(func() error {
|
||||||
return p.storage.AppendFile(ctx, volume, path, buf)
|
return p.storage.AppendFile(ctx, volume, path, buf)
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) (err error) {
|
func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path string, size int64, reader io.Reader) (err error) {
|
||||||
@ -442,7 +451,7 @@ func (p *xlStorageDiskIDCheck) CreateFile(ctx context.Context, volume, path stri
|
|||||||
}
|
}
|
||||||
defer done(&err)
|
defer done(&err)
|
||||||
|
|
||||||
return p.storage.CreateFile(ctx, volume, path, size, reader)
|
return p.storage.CreateFile(ctx, volume, path, size, xioutil.NewDeadlineReader(io.NopCloser(reader), diskMaxTimeout))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
|
func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
|
||||||
@ -452,9 +461,16 @@ func (p *xlStorageDiskIDCheck) ReadFileStream(ctx context.Context, volume, path
|
|||||||
}
|
}
|
||||||
defer done(&err)
|
defer done(&err)
|
||||||
|
|
||||||
rc, err := p.storage.ReadFileStream(ctx, volume, path, offset, length)
|
w := xioutil.NewDeadlineWorker(diskMaxTimeout)
|
||||||
|
|
||||||
|
var rc io.ReadCloser
|
||||||
|
err = w.Run(func() error {
|
||||||
|
var ierr error
|
||||||
|
rc, ierr = p.storage.ReadFileStream(ctx, volume, path, offset, length)
|
||||||
|
return ierr
|
||||||
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return rc, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return xioutil.NewDeadlineReader(rc, diskMaxTimeout), nil
|
return xioutil.NewDeadlineReader(rc, diskMaxTimeout), nil
|
||||||
|
@ -1779,6 +1779,11 @@ func (s *xlStorage) openFile(filePath string, mode int) (f *os.File, err error)
|
|||||||
return w, nil
|
return w, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type sendFileReader struct {
|
||||||
|
io.Reader
|
||||||
|
io.Closer
|
||||||
|
}
|
||||||
|
|
||||||
// ReadFileStream - Returns the read stream of the file.
|
// ReadFileStream - Returns the read stream of the file.
|
||||||
func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
|
func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
|
||||||
if offset < 0 {
|
if offset < 0 {
|
||||||
@ -1796,14 +1801,7 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
odirectEnabled := globalAPIConfig.odirectEnabled() && s.oDirect && length >= 0
|
file, err := OpenFile(filePath, readMode, 0o666)
|
||||||
|
|
||||||
var file *os.File
|
|
||||||
if odirectEnabled {
|
|
||||||
file, err = OpenFileDirectIO(filePath, readMode, 0o666)
|
|
||||||
} else {
|
|
||||||
file, err = OpenFile(filePath, readMode, 0o666)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
switch {
|
switch {
|
||||||
case osIsNotExist(err):
|
case osIsNotExist(err):
|
||||||
@ -1852,14 +1850,6 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
|
|||||||
return nil, errFileCorrupt
|
return nil, errFileCorrupt
|
||||||
}
|
}
|
||||||
|
|
||||||
alignment := offset%xioutil.DirectioAlignSize == 0
|
|
||||||
if !alignment && odirectEnabled {
|
|
||||||
if err = disk.DisableDirectIO(file); err != nil {
|
|
||||||
file.Close()
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if offset > 0 {
|
if offset > 0 {
|
||||||
if _, err = file.Seek(offset, io.SeekStart); err != nil {
|
if _, err = file.Seek(offset, io.SeekStart); err != nil {
|
||||||
file.Close()
|
file.Close()
|
||||||
@ -1867,26 +1857,7 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
or := &xioutil.ODirectReader{
|
return &sendFileReader{Reader: io.LimitReader(file, length), Closer: file}, nil
|
||||||
File: file,
|
|
||||||
// Select bigger blocks when reading at least 50% of a big block.
|
|
||||||
SmallFile: length <= xioutil.BlockSizeLarge/2,
|
|
||||||
}
|
|
||||||
|
|
||||||
r := struct {
|
|
||||||
io.Reader
|
|
||||||
io.Closer
|
|
||||||
}{Reader: io.LimitReader(diskHealthReader(ctx, or), length), Closer: closeWrapper(func() error {
|
|
||||||
if (!alignment || offset+length%xioutil.DirectioAlignSize != 0) && odirectEnabled {
|
|
||||||
// invalidate page-cache for unaligned reads.
|
|
||||||
// skip removing from page-cache only
|
|
||||||
// if O_DIRECT was disabled.
|
|
||||||
disk.FadviseDontNeed(file)
|
|
||||||
}
|
|
||||||
return or.Close()
|
|
||||||
})}
|
|
||||||
|
|
||||||
return r, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// closeWrapper converts a function to an io.Closer
|
// closeWrapper converts a function to an io.Closer
|
||||||
|
@ -94,7 +94,7 @@ var (
|
|||||||
},
|
},
|
||||||
config.HelpKV{
|
config.HelpKV{
|
||||||
Key: apiODirect,
|
Key: apiODirect,
|
||||||
Description: "set to enable or disable O_DIRECT for read and writes under special conditions. NOTE: do not disable O_DIRECT without prior testing" + defaultHelpPostfix(apiODirect),
|
Description: "set to enable or disable O_DIRECT for writes under special conditions. NOTE: do not disable O_DIRECT without prior testing" + defaultHelpPostfix(apiODirect),
|
||||||
Optional: true,
|
Optional: true,
|
||||||
Type: "boolean",
|
Type: "boolean",
|
||||||
},
|
},
|
||||||
|
@ -19,6 +19,7 @@ package http
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
@ -29,6 +30,7 @@ import (
|
|||||||
// status code and to record the response body
|
// status code and to record the response body
|
||||||
type ResponseRecorder struct {
|
type ResponseRecorder struct {
|
||||||
http.ResponseWriter
|
http.ResponseWriter
|
||||||
|
io.ReaderFrom
|
||||||
StatusCode int
|
StatusCode int
|
||||||
// Log body of 4xx or 5xx responses
|
// Log body of 4xx or 5xx responses
|
||||||
LogErrBody bool
|
LogErrBody bool
|
||||||
@ -51,13 +53,27 @@ type ResponseRecorder struct {
|
|||||||
// NewResponseRecorder - returns a wrapped response writer to trap
|
// NewResponseRecorder - returns a wrapped response writer to trap
|
||||||
// http status codes for auditing purposes.
|
// http status codes for auditing purposes.
|
||||||
func NewResponseRecorder(w http.ResponseWriter) *ResponseRecorder {
|
func NewResponseRecorder(w http.ResponseWriter) *ResponseRecorder {
|
||||||
|
rf, _ := w.(io.ReaderFrom)
|
||||||
return &ResponseRecorder{
|
return &ResponseRecorder{
|
||||||
ResponseWriter: w,
|
ResponseWriter: w,
|
||||||
|
ReaderFrom: rf,
|
||||||
StatusCode: http.StatusOK,
|
StatusCode: http.StatusOK,
|
||||||
StartTime: time.Now().UTC(),
|
StartTime: time.Now().UTC(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ErrNotImplemented when a functionality is not implemented
|
||||||
|
var ErrNotImplemented = errors.New("not implemented")
|
||||||
|
|
||||||
|
// ReadFrom implements support for calling internal io.ReaderFrom implementations
|
||||||
|
// returns an error if the underlying ResponseWriter does not implement io.ReaderFrom
|
||||||
|
func (lrw *ResponseRecorder) ReadFrom(r io.Reader) (int64, error) {
|
||||||
|
if lrw.ReaderFrom != nil {
|
||||||
|
return lrw.ReaderFrom.ReadFrom(r)
|
||||||
|
}
|
||||||
|
return 0, ErrNotImplemented
|
||||||
|
}
|
||||||
|
|
||||||
func (lrw *ResponseRecorder) Write(p []byte) (int, error) {
|
func (lrw *ResponseRecorder) Write(p []byte) (int, error) {
|
||||||
if !lrw.headersLogged {
|
if !lrw.headersLogged {
|
||||||
// We assume the response code to be '200 OK' when WriteHeader() is not called,
|
// We assume the response code to be '200 OK' when WriteHeader() is not called,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user