mirror of
https://github.com/minio/minio.git
synced 2025-01-12 07:23:23 -05:00
improve performance for inlined data (#15603)
inlined data often is bigger than the allowed O_DIRECT alignment, so potentially we can write 'xl.meta' without O_DSYNC instead we can rely on O_DIRECT + fdatasync() instead. This PR allows O_DIRECT on inlined data that would gain the benefits of performing O_DIRECT, eventually performing an fdatasync() at the end. Performance boost can be observed here for small objects < 128KiB. The performance boost is mainly seen on HDD, and marginal on NVMe setups.
This commit is contained in:
parent
92a0a59de2
commit
97376f6e8f
@ -1849,31 +1849,13 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
|
|||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if fileSize >= 0 && fileSize <= smallFileThreshold {
|
return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
|
||||||
// For streams smaller than 128KiB we simply write them as O_DSYNC (fdatasync)
|
}
|
||||||
// and not O_DIRECT to avoid the complexities of aligned I/O.
|
|
||||||
w, err := s.openFileSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
defer w.Close()
|
|
||||||
|
|
||||||
written, err := io.Copy(w, r)
|
|
||||||
if err != nil {
|
|
||||||
return osErrToFileErr(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if written < fileSize {
|
|
||||||
return errLessData
|
|
||||||
} else if written > fileSize {
|
|
||||||
return errMoreData
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
|
func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int) (err error) {
|
||||||
// Create top level directories if they don't exist.
|
// Create top level directories if they don't exist.
|
||||||
// with mode 0777 mkdir honors system umask.
|
// with mode 0777 mkdir honors system umask.
|
||||||
|
parentFilePath := pathutil.Dir(filePath)
|
||||||
if err = mkdirAll(parentFilePath, 0o777); err != nil {
|
if err = mkdirAll(parentFilePath, 0o777); err != nil {
|
||||||
return osErrToFileErr(err)
|
return osErrToFileErr(err)
|
||||||
}
|
}
|
||||||
@ -1881,24 +1863,23 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
|
|||||||
odirectEnabled := s.oDirect
|
odirectEnabled := s.oDirect
|
||||||
var w *os.File
|
var w *os.File
|
||||||
if odirectEnabled {
|
if odirectEnabled {
|
||||||
w, err = OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666)
|
w, err = OpenFileDirectIO(filePath, flags, 0o666)
|
||||||
} else {
|
} else {
|
||||||
w, err = OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666)
|
w, err = OpenFile(filePath, flags, 0o666)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return osErrToFileErr(err)
|
return osErrToFileErr(err)
|
||||||
}
|
}
|
||||||
|
defer w.Close()
|
||||||
defer func() {
|
|
||||||
Fdatasync(w) // Only interested in flushing the size_t not mtime/atime
|
|
||||||
w.Close()
|
|
||||||
}()
|
|
||||||
|
|
||||||
var bufp *[]byte
|
var bufp *[]byte
|
||||||
if fileSize > 0 && fileSize >= largestFileThreshold {
|
if fileSize > 0 && fileSize >= largestFileThreshold {
|
||||||
// use a larger 4MiB buffer for a really large streams.
|
// use a larger 4MiB buffer for a really large streams.
|
||||||
bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte)
|
bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte)
|
||||||
defer xioutil.ODirectPoolXLarge.Put(bufp)
|
defer xioutil.ODirectPoolXLarge.Put(bufp)
|
||||||
|
} else if fileSize <= smallFileThreshold {
|
||||||
|
bufp = xioutil.ODirectPoolSmall.Get().(*[]byte)
|
||||||
|
defer xioutil.ODirectPoolSmall.Put(bufp)
|
||||||
} else {
|
} else {
|
||||||
bufp = xioutil.ODirectPoolLarge.Get().(*[]byte)
|
bufp = xioutil.ODirectPoolLarge.Get().(*[]byte)
|
||||||
defer xioutil.ODirectPoolLarge.Put(bufp)
|
defer xioutil.ODirectPoolLarge.Put(bufp)
|
||||||
@ -1920,7 +1901,8 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
|
|||||||
return errMoreData
|
return errMoreData
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
// Only interested in flushing the size_t not mtime/atime
|
||||||
|
return Fdatasync(w)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) {
|
func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) {
|
||||||
@ -1934,11 +1916,22 @@ func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
|
||||||
|
|
||||||
var w *os.File
|
var w *os.File
|
||||||
if sync {
|
if sync {
|
||||||
w, err = s.openFileSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
|
// Perform directIO along with fdatasync for larger xl.meta, mostly when
|
||||||
|
// xl.meta has "inlined data" we prefer writing O_DIRECT and then doing
|
||||||
|
// fdatasync() at the end instead of opening the file with O_DSYNC.
|
||||||
|
//
|
||||||
|
// This is an optimization mainly to ensure faster I/O.
|
||||||
|
if len(b) > xioutil.DirectioAlignSize {
|
||||||
|
r := bytes.NewReader(b)
|
||||||
|
return s.writeAllDirect(ctx, filePath, r.Size(), r, flags)
|
||||||
|
}
|
||||||
|
w, err = s.openFileSync(filePath, flags)
|
||||||
} else {
|
} else {
|
||||||
w, err = s.openFileNoSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
|
w, err = s.openFileNoSync(filePath, flags)
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -40,9 +40,9 @@ type ODirectReader struct {
|
|||||||
|
|
||||||
// Block sizes constant.
|
// Block sizes constant.
|
||||||
const (
|
const (
|
||||||
BlockSizeSmall = 128 * humanize.KiByte // Default r/w block size for smaller objects.
|
BlockSizeSmall = 32 * humanize.KiByte // Default r/w block size for smaller objects.
|
||||||
BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects.
|
BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects.
|
||||||
BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB
|
BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB
|
||||||
)
|
)
|
||||||
|
|
||||||
// O_DIRECT aligned sync.Pool's
|
// O_DIRECT aligned sync.Pool's
|
||||||
|
Loading…
Reference in New Issue
Block a user