Use O_DIRECT while writing to disk (#7479)

- Use O_DIRECT while writing to disk
- Remove MINIO_DRIVE_SYNC option
This commit is contained in:
Krishna Srinivas 2019-04-23 21:25:06 -07:00 committed by Harshavardhana
parent ab711fe1a2
commit a3ec71bc28
6 changed files with 226 additions and 31 deletions

View File

@ -37,12 +37,14 @@ import (
"github.com/minio/minio/cmd/logger" "github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/disk" "github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/mountinfo" "github.com/minio/minio/pkg/mountinfo"
"github.com/ncw/directio"
) )
const ( const (
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space. diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space. diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
maxAllowedIOError = 5 maxAllowedIOError = 5
posixWriteBlockSize = 4 * humanize.MiByte
) )
// isValidVolname verifies a volname name in accordance with object // isValidVolname verifies a volname name in accordance with object
@ -71,7 +73,6 @@ type posix struct {
connected bool connected bool
diskMount bool // indicates if the path is an actual mount. diskMount bool // indicates if the path is an actual mount.
driveSync bool // indicates if the backend is synchronous.
diskFileInfo os.FileInfo diskFileInfo os.FileInfo
// Disk usage metrics // Disk usage metrics
@ -188,7 +189,7 @@ func newPosix(path string) (*posix, error) {
// 1MiB buffer pool for posix internal operations. // 1MiB buffer pool for posix internal operations.
pool: sync.Pool{ pool: sync.Pool{
New: func() interface{} { New: func() interface{} {
b := make([]byte, readSizeV1) b := directio.AlignedBlock(posixWriteBlockSize)
return &b return &b
}, },
}, },
@ -197,15 +198,6 @@ func newPosix(path string) (*posix, error) {
diskMount: mountinfo.IsLikelyMountPoint(path), diskMount: mountinfo.IsLikelyMountPoint(path),
} }
var pf BoolFlag
if driveSync := os.Getenv("MINIO_DRIVE_SYNC"); driveSync != "" {
pf, err = ParseBoolFlag(driveSync)
if err != nil {
return nil, err
}
p.driveSync = bool(pf)
}
if !p.diskMount { if !p.diskMount {
go p.diskUsage(GlobalServiceDoneCh) go p.diskUsage(GlobalServiceDoneCh)
} }
@ -1057,13 +1049,49 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
return err return err
} }
// Create file if not found. Note that it is created with os.O_EXCL flag as the file if err = s.checkDiskFound(); err != nil {
// always is supposed to be created in the tmp directory with a unique file name.
w, err := s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY|os.O_EXCL)
if err != nil {
return err return err
} }
volumeDir, err := s.getVolDir(volume)
if err != nil {
return err
}
// Stat a volume entry.
_, err = os.Stat((volumeDir))
if err != nil {
if os.IsNotExist(err) {
return errVolumeNotFound
} else if isSysErrIO(err) {
return errFaultyDisk
}
return err
}
filePath := pathJoin(volumeDir, path)
if err = checkPathLength((filePath)); err != nil {
return err
}
// Create top level directories if they don't exist.
// with mode 0777 mkdir honors system umask.
if err = mkdirAll(slashpath.Dir(filePath), 0777); err != nil {
return err
}
w, err := disk.OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL|os.O_SYNC, 0666)
if err != nil {
switch {
case os.IsPermission(err):
return errFileAccessDenied
case os.IsExist(err):
return errFileAccessDenied
case isSysErrIO(err):
return errFaultyDisk
default:
return err
}
}
defer w.Close() defer w.Close()
var e error var e error
@ -1090,16 +1118,69 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
bufp := s.pool.Get().(*[]byte) bufp := s.pool.Get().(*[]byte)
defer s.pool.Put(bufp) defer s.pool.Put(bufp)
n, err := io.CopyBuffer(w, r, *bufp) buf := *bufp
var written int64
dioCount := int(fileSize) / len(buf)
for i := 0; i < dioCount; i++ {
var n int
_, err = io.ReadFull(r, buf)
if err != nil { if err != nil {
return err return err
} }
if n < fileSize { n, err = w.Write(buf)
return errLessData if err != nil {
return err
} }
if n > fileSize { written += int64(n)
}
// The following logic writes the remainging data such that it writes whatever best is possible (aligned buffer)
// in O_DIRECT mode and remaining (unaligned buffer) in non-O_DIRECT mode.
remaining := fileSize % int64(len(buf))
if remaining != 0 {
buf = buf[:remaining]
_, err = io.ReadFull(r, buf)
if err != nil {
return err
}
remainingAligned := (remaining / directio.AlignSize) * directio.AlignSize
remainingAlignedBuf := buf[:remainingAligned]
remainingUnalignedBuf := buf[remainingAligned:]
if len(remainingAlignedBuf) > 0 {
var n int
n, err = w.Write(remainingAlignedBuf)
if err != nil {
return err
}
written += int64(n)
}
if len(remainingUnalignedBuf) > 0 {
var n int
// Write on O_DIRECT fds fail if buffer is not 4K aligned, hence disable O_DIRECT.
if err = disk.DisableDirectIO(w); err != nil {
return err
}
n, err = w.Write(remainingUnalignedBuf)
if err != nil {
return err
}
written += int64(n)
}
}
// Do some sanity checks.
_, err = io.ReadFull(r, buf)
if err != io.EOF {
return errMoreData return errMoreData
} }
if written < fileSize {
return errLessData
}
if written > fileSize {
return errMoreData
}
return nil return nil
} }
@ -1142,13 +1223,9 @@ func (s *posix) AppendFile(volume, path string, buf []byte) (err error) {
} }
var w *os.File var w *os.File
// Create file if not found, additionally also enables synchronous // Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
// operation if asked by the user. // AppendFile() is only used by healing code to heal objects written in old format.
if s.driveSync {
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY) w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
} else {
w, err = s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
}
if err != nil { if err != nil {
return err return err
} }

1
go.mod
View File

@ -69,6 +69,7 @@ require (
github.com/nats-io/nats v1.7.2 github.com/nats-io/nats v1.7.2
github.com/nats-io/nkeys v0.0.2 // indirect github.com/nats-io/nkeys v0.0.2 // indirect
github.com/nats-io/nuid v1.0.1 // indirect github.com/nats-io/nuid v1.0.1 // indirect
github.com/ncw/directio v1.0.5
github.com/nsqio/go-nsq v1.0.7 github.com/nsqio/go-nsq v1.0.7
github.com/pascaldekloe/goe v0.1.0 // indirect github.com/pascaldekloe/goe v0.1.0 // indirect
github.com/pkg/errors v0.8.1 // indirect github.com/pkg/errors v0.8.1 // indirect

2
go.sum
View File

@ -432,6 +432,8 @@ github.com/nats-io/nuid v1.0.0 h1:44QGdhbiANq8ZCbUkdn6W5bqtg+mHuDE4wOUuxxndFs=
github.com/nats-io/nuid v1.0.0/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nats-io/nuid v1.0.0/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw= github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/ncw/directio v1.0.5 h1:JSUBhdjEvVaJvOoyPAbcW0fnd0tvRXD76wEfZ1KcQz4=
github.com/ncw/directio v1.0.5/go.mod h1:rX/pKEYkOXBGOggmcyJeJGloCkleSvphPx2eV3t6ROk=
github.com/nsqio/go-nsq v0.0.0-20181028195256-0527e80f3ba5/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito= github.com/nsqio/go-nsq v0.0.0-20181028195256-0527e80f3ba5/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
github.com/nsqio/go-nsq v1.0.7 h1:O0pIZJYTf+x7cZBA0UMY8WxFG79lYTURmWzAAh48ljY= github.com/nsqio/go-nsq v1.0.7 h1:O0pIZJYTf+x7cZBA0UMY8WxFG79lYTURmWzAAh48ljY=
github.com/nsqio/go-nsq v1.0.7/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito= github.com/nsqio/go-nsq v1.0.7/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=

View File

@ -0,0 +1,36 @@
/*
* Minio Cloud Storage, (C) 2019 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package disk
import (
"os"
"github.com/ncw/directio"
"golang.org/x/sys/unix"
)
// OpenFileDirectIO - bypass kernel cache.
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
return directio.OpenFile(filePath, flag, perm)
}
// DisableDirectIO - disables directio mode.
func DisableDirectIO(f *os.File) error {
fd := f.Fd()
_, err := unix.FcntlInt(fd, unix.F_NOCACHE, 0)
return err
}

44
pkg/disk/directio_unix.go Normal file
View File

@ -0,0 +1,44 @@
// +build linux netbsd freebsd
/*
* Minio Cloud Storage, (C) 2019 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package disk
import (
"os"
"syscall"
"github.com/ncw/directio"
"golang.org/x/sys/unix"
)
// OpenFileDirectIO - bypass kernel cache.
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
return directio.OpenFile(filePath, flag, perm)
}
// DisableDirectIO - disables directio mode.
func DisableDirectIO(f *os.File) error {
fd := f.Fd()
flag, err := unix.FcntlInt(fd, unix.F_GETFL, 0)
if err != nil {
return err
}
flag = flag & ^(syscall.O_DIRECT)
_, err = unix.FcntlInt(fd, unix.F_SETFL, flag)
return err
}

View File

@ -0,0 +1,35 @@
// +build !linux,!netbsd,!freebsd,!darwin
/*
* Minio Cloud Storage, (C) 2019 Minio, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package disk
import (
"os"
)
// OpenBSD and Windows not supported.
// On OpenBSD O_DIRECT is not supported
// On Windows there is no documentation on disabling O_DIRECT
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
return os.OpenFile(filePath, flag, perm)
}
func DisableDirectIO(f *os.File) error {
return nil
}