improve performance for inlined data (#15603)

inlined data often is bigger than the allowed
O_DIRECT alignment, so potentially we can write
'xl.meta' without O_DSYNC instead we can rely on
O_DIRECT + fdatasync() instead.

This PR allows O_DIRECT on inlined data that
would gain the benefits of performing O_DIRECT,
eventually performing an fdatasync() at the end.

Performance boost can be observed here for small
objects < 128KiB. The performance boost is mainly
seen on HDD, and marginal on NVMe setups.
This commit is contained in:
Harshavardhana 2022-08-29 11:19:29 -07:00 committed by GitHub
parent 92a0a59de2
commit 97376f6e8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 35 deletions

View File

@ -1849,31 +1849,13 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
}
}()
if fileSize >= 0 && fileSize <= smallFileThreshold {
// For streams smaller than 128KiB we simply write them as O_DSYNC (fdatasync)
// and not O_DIRECT to avoid the complexities of aligned I/O.
w, err := s.openFileSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
if err != nil {
return err
}
defer w.Close()
written, err := io.Copy(w, r)
if err != nil {
return osErrToFileErr(err)
}
if written < fileSize {
return errLessData
} else if written > fileSize {
return errMoreData
}
return nil
}
return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
}
func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int) (err error) {
// Create top level directories if they don't exist.
// with mode 0777 mkdir honors system umask.
parentFilePath := pathutil.Dir(filePath)
if err = mkdirAll(parentFilePath, 0o777); err != nil {
return osErrToFileErr(err)
}
@ -1881,24 +1863,23 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
odirectEnabled := s.oDirect
var w *os.File
if odirectEnabled {
w, err = OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666)
w, err = OpenFileDirectIO(filePath, flags, 0o666)
} else {
w, err = OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666)
w, err = OpenFile(filePath, flags, 0o666)
}
if err != nil {
return osErrToFileErr(err)
}
defer func() {
Fdatasync(w) // Only interested in flushing the size_t not mtime/atime
w.Close()
}()
defer w.Close()
var bufp *[]byte
if fileSize > 0 && fileSize >= largestFileThreshold {
// use a larger 4MiB buffer for a really large streams.
bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte)
defer xioutil.ODirectPoolXLarge.Put(bufp)
} else if fileSize <= smallFileThreshold {
bufp = xioutil.ODirectPoolSmall.Get().(*[]byte)
defer xioutil.ODirectPoolSmall.Put(bufp)
} else {
bufp = xioutil.ODirectPoolLarge.Get().(*[]byte)
defer xioutil.ODirectPoolLarge.Put(bufp)
@ -1920,7 +1901,8 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
return errMoreData
}
return nil
// Only interested in flushing the size_t not mtime/atime
return Fdatasync(w)
}
func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) {
@ -1934,11 +1916,22 @@ func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b
return err
}
flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
var w *os.File
if sync {
w, err = s.openFileSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
// Perform directIO along with fdatasync for larger xl.meta, mostly when
// xl.meta has "inlined data" we prefer writing O_DIRECT and then doing
// fdatasync() at the end instead of opening the file with O_DSYNC.
//
// This is an optimization mainly to ensure faster I/O.
if len(b) > xioutil.DirectioAlignSize {
r := bytes.NewReader(b)
return s.writeAllDirect(ctx, filePath, r.Size(), r, flags)
}
w, err = s.openFileSync(filePath, flags)
} else {
w, err = s.openFileNoSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
w, err = s.openFileNoSync(filePath, flags)
}
if err != nil {
return err

View File

@ -40,9 +40,9 @@ type ODirectReader struct {
// Block sizes constant.
const (
BlockSizeSmall = 128 * humanize.KiByte // Default r/w block size for smaller objects.
BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects.
BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB
BlockSizeSmall = 32 * humanize.KiByte // Default r/w block size for smaller objects.
BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects.
BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB
)
// O_DIRECT aligned sync.Pool's