improve performance for inlined data (#15603)

inlined data often is bigger than the allowed
O_DIRECT alignment, so potentially we can write
'xl.meta' without O_DSYNC instead we can rely on
O_DIRECT + fdatasync() instead.

This PR allows O_DIRECT on inlined data that
would gain the benefits of performing O_DIRECT,
eventually performing an fdatasync() at the end.

Performance boost can be observed here for small
objects < 128KiB. The performance boost is mainly
seen on HDD, and marginal on NVMe setups.
This commit is contained in:
Harshavardhana 2022-08-29 11:19:29 -07:00 committed by GitHub
parent 92a0a59de2
commit 97376f6e8f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 28 additions and 35 deletions

View File

@ -1849,31 +1849,13 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
} }
}() }()
if fileSize >= 0 && fileSize <= smallFileThreshold { return s.writeAllDirect(ctx, filePath, fileSize, r, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
// For streams smaller than 128KiB we simply write them as O_DSYNC (fdatasync) }
// and not O_DIRECT to avoid the complexities of aligned I/O.
w, err := s.openFileSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
if err != nil {
return err
}
defer w.Close()
written, err := io.Copy(w, r)
if err != nil {
return osErrToFileErr(err)
}
if written < fileSize {
return errLessData
} else if written > fileSize {
return errMoreData
}
return nil
}
func (s *xlStorage) writeAllDirect(ctx context.Context, filePath string, fileSize int64, r io.Reader, flags int) (err error) {
// Create top level directories if they don't exist. // Create top level directories if they don't exist.
// with mode 0777 mkdir honors system umask. // with mode 0777 mkdir honors system umask.
parentFilePath := pathutil.Dir(filePath)
if err = mkdirAll(parentFilePath, 0o777); err != nil { if err = mkdirAll(parentFilePath, 0o777); err != nil {
return osErrToFileErr(err) return osErrToFileErr(err)
} }
@ -1881,24 +1863,23 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
odirectEnabled := s.oDirect odirectEnabled := s.oDirect
var w *os.File var w *os.File
if odirectEnabled { if odirectEnabled {
w, err = OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666) w, err = OpenFileDirectIO(filePath, flags, 0o666)
} else { } else {
w, err = OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666) w, err = OpenFile(filePath, flags, 0o666)
} }
if err != nil { if err != nil {
return osErrToFileErr(err) return osErrToFileErr(err)
} }
defer w.Close()
defer func() {
Fdatasync(w) // Only interested in flushing the size_t not mtime/atime
w.Close()
}()
var bufp *[]byte var bufp *[]byte
if fileSize > 0 && fileSize >= largestFileThreshold { if fileSize > 0 && fileSize >= largestFileThreshold {
// use a larger 4MiB buffer for a really large streams. // use a larger 4MiB buffer for a really large streams.
bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte) bufp = xioutil.ODirectPoolXLarge.Get().(*[]byte)
defer xioutil.ODirectPoolXLarge.Put(bufp) defer xioutil.ODirectPoolXLarge.Put(bufp)
} else if fileSize <= smallFileThreshold {
bufp = xioutil.ODirectPoolSmall.Get().(*[]byte)
defer xioutil.ODirectPoolSmall.Put(bufp)
} else { } else {
bufp = xioutil.ODirectPoolLarge.Get().(*[]byte) bufp = xioutil.ODirectPoolLarge.Get().(*[]byte)
defer xioutil.ODirectPoolLarge.Put(bufp) defer xioutil.ODirectPoolLarge.Put(bufp)
@ -1920,7 +1901,8 @@ func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSiz
return errMoreData return errMoreData
} }
return nil // Only interested in flushing the size_t not mtime/atime
return Fdatasync(w)
} }
func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) { func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b []byte, sync bool) (err error) {
@ -1934,11 +1916,22 @@ func (s *xlStorage) writeAll(ctx context.Context, volume string, path string, b
return err return err
} }
flags := os.O_CREATE | os.O_WRONLY | os.O_TRUNC
var w *os.File var w *os.File
if sync { if sync {
w, err = s.openFileSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC) // Perform directIO along with fdatasync for larger xl.meta, mostly when
// xl.meta has "inlined data" we prefer writing O_DIRECT and then doing
// fdatasync() at the end instead of opening the file with O_DSYNC.
//
// This is an optimization mainly to ensure faster I/O.
if len(b) > xioutil.DirectioAlignSize {
r := bytes.NewReader(b)
return s.writeAllDirect(ctx, filePath, r.Size(), r, flags)
}
w, err = s.openFileSync(filePath, flags)
} else { } else {
w, err = s.openFileNoSync(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC) w, err = s.openFileNoSync(filePath, flags)
} }
if err != nil { if err != nil {
return err return err

View File

@ -40,7 +40,7 @@ type ODirectReader struct {
// Block sizes constant. // Block sizes constant.
const ( const (
BlockSizeSmall = 128 * humanize.KiByte // Default r/w block size for smaller objects. BlockSizeSmall = 32 * humanize.KiByte // Default r/w block size for smaller objects.
BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects. BlockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects.
BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB BlockSizeReallyLarge = 4 * humanize.MiByte // Default write block size for objects per shard >= 64MiB
) )