mirror of
https://github.com/minio/minio.git
synced 2025-11-20 01:50:24 -05:00
Use O_DIRECT while writing to disk (#7479)
- Use O_DIRECT while writing to disk - Remove MINIO_DRIVE_SYNC option
This commit is contained in:
committed by
Harshavardhana
parent
ab711fe1a2
commit
a3ec71bc28
139
cmd/posix.go
139
cmd/posix.go
@@ -37,12 +37,14 @@ import (
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/disk"
|
||||
"github.com/minio/minio/pkg/mountinfo"
|
||||
"github.com/ncw/directio"
|
||||
)
|
||||
|
||||
const (
|
||||
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
|
||||
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
|
||||
maxAllowedIOError = 5
|
||||
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
|
||||
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
|
||||
maxAllowedIOError = 5
|
||||
posixWriteBlockSize = 4 * humanize.MiByte
|
||||
)
|
||||
|
||||
// isValidVolname verifies a volname name in accordance with object
|
||||
@@ -71,7 +73,6 @@ type posix struct {
|
||||
connected bool
|
||||
|
||||
diskMount bool // indicates if the path is an actual mount.
|
||||
driveSync bool // indicates if the backend is synchronous.
|
||||
|
||||
diskFileInfo os.FileInfo
|
||||
// Disk usage metrics
|
||||
@@ -188,7 +189,7 @@ func newPosix(path string) (*posix, error) {
|
||||
// 1MiB buffer pool for posix internal operations.
|
||||
pool: sync.Pool{
|
||||
New: func() interface{} {
|
||||
b := make([]byte, readSizeV1)
|
||||
b := directio.AlignedBlock(posixWriteBlockSize)
|
||||
return &b
|
||||
},
|
||||
},
|
||||
@@ -197,15 +198,6 @@ func newPosix(path string) (*posix, error) {
|
||||
diskMount: mountinfo.IsLikelyMountPoint(path),
|
||||
}
|
||||
|
||||
var pf BoolFlag
|
||||
if driveSync := os.Getenv("MINIO_DRIVE_SYNC"); driveSync != "" {
|
||||
pf, err = ParseBoolFlag(driveSync)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.driveSync = bool(pf)
|
||||
}
|
||||
|
||||
if !p.diskMount {
|
||||
go p.diskUsage(GlobalServiceDoneCh)
|
||||
}
|
||||
@@ -1057,13 +1049,49 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
|
||||
return err
|
||||
}
|
||||
|
||||
// Create file if not found. Note that it is created with os.O_EXCL flag as the file
|
||||
// always is supposed to be created in the tmp directory with a unique file name.
|
||||
w, err := s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY|os.O_EXCL)
|
||||
if err != nil {
|
||||
if err = s.checkDiskFound(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
volumeDir, err := s.getVolDir(volume)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Stat a volume entry.
|
||||
_, err = os.Stat((volumeDir))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return errVolumeNotFound
|
||||
} else if isSysErrIO(err) {
|
||||
return errFaultyDisk
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
filePath := pathJoin(volumeDir, path)
|
||||
if err = checkPathLength((filePath)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create top level directories if they don't exist.
|
||||
// with mode 0777 mkdir honors system umask.
|
||||
if err = mkdirAll(slashpath.Dir(filePath), 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w, err := disk.OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL|os.O_SYNC, 0666)
|
||||
if err != nil {
|
||||
switch {
|
||||
case os.IsPermission(err):
|
||||
return errFileAccessDenied
|
||||
case os.IsExist(err):
|
||||
return errFileAccessDenied
|
||||
case isSysErrIO(err):
|
||||
return errFaultyDisk
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
var e error
|
||||
@@ -1090,16 +1118,69 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
|
||||
bufp := s.pool.Get().(*[]byte)
|
||||
defer s.pool.Put(bufp)
|
||||
|
||||
n, err := io.CopyBuffer(w, r, *bufp)
|
||||
if err != nil {
|
||||
return err
|
||||
buf := *bufp
|
||||
var written int64
|
||||
dioCount := int(fileSize) / len(buf)
|
||||
for i := 0; i < dioCount; i++ {
|
||||
var n int
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n, err = w.Write(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
written += int64(n)
|
||||
}
|
||||
if n < fileSize {
|
||||
return errLessData
|
||||
// The following logic writes the remainging data such that it writes whatever best is possible (aligned buffer)
|
||||
// in O_DIRECT mode and remaining (unaligned buffer) in non-O_DIRECT mode.
|
||||
remaining := fileSize % int64(len(buf))
|
||||
if remaining != 0 {
|
||||
buf = buf[:remaining]
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
remainingAligned := (remaining / directio.AlignSize) * directio.AlignSize
|
||||
remainingAlignedBuf := buf[:remainingAligned]
|
||||
remainingUnalignedBuf := buf[remainingAligned:]
|
||||
if len(remainingAlignedBuf) > 0 {
|
||||
var n int
|
||||
n, err = w.Write(remainingAlignedBuf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
written += int64(n)
|
||||
}
|
||||
if len(remainingUnalignedBuf) > 0 {
|
||||
var n int
|
||||
// Write on O_DIRECT fds fail if buffer is not 4K aligned, hence disable O_DIRECT.
|
||||
if err = disk.DisableDirectIO(w); err != nil {
|
||||
return err
|
||||
}
|
||||
n, err = w.Write(remainingUnalignedBuf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
written += int64(n)
|
||||
}
|
||||
}
|
||||
if n > fileSize {
|
||||
|
||||
// Do some sanity checks.
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != io.EOF {
|
||||
return errMoreData
|
||||
}
|
||||
|
||||
if written < fileSize {
|
||||
return errLessData
|
||||
}
|
||||
|
||||
if written > fileSize {
|
||||
return errMoreData
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1142,13 +1223,9 @@ func (s *posix) AppendFile(volume, path string, buf []byte) (err error) {
|
||||
}
|
||||
|
||||
var w *os.File
|
||||
// Create file if not found, additionally also enables synchronous
|
||||
// operation if asked by the user.
|
||||
if s.driveSync {
|
||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
|
||||
} else {
|
||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
|
||||
}
|
||||
// Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
|
||||
// AppendFile() is only used by healing code to heal objects written in old format.
|
||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user