mirror of https://github.com/minio/minio.git
Use O_DIRECT while writing to disk (#7479)
- Use O_DIRECT while writing to disk - Remove MINIO_DRIVE_SYNC option
This commit is contained in:
parent
ab711fe1a2
commit
a3ec71bc28
139
cmd/posix.go
139
cmd/posix.go
|
@ -37,12 +37,14 @@ import (
|
|||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/disk"
|
||||
"github.com/minio/minio/pkg/mountinfo"
|
||||
"github.com/ncw/directio"
|
||||
)
|
||||
|
||||
const (
|
||||
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
|
||||
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
|
||||
maxAllowedIOError = 5
|
||||
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
|
||||
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
|
||||
maxAllowedIOError = 5
|
||||
posixWriteBlockSize = 4 * humanize.MiByte
|
||||
)
|
||||
|
||||
// isValidVolname verifies a volname name in accordance with object
|
||||
|
@ -71,7 +73,6 @@ type posix struct {
|
|||
connected bool
|
||||
|
||||
diskMount bool // indicates if the path is an actual mount.
|
||||
driveSync bool // indicates if the backend is synchronous.
|
||||
|
||||
diskFileInfo os.FileInfo
|
||||
// Disk usage metrics
|
||||
|
@ -188,7 +189,7 @@ func newPosix(path string) (*posix, error) {
|
|||
// 1MiB buffer pool for posix internal operations.
|
||||
pool: sync.Pool{
|
||||
New: func() interface{} {
|
||||
b := make([]byte, readSizeV1)
|
||||
b := directio.AlignedBlock(posixWriteBlockSize)
|
||||
return &b
|
||||
},
|
||||
},
|
||||
|
@ -197,15 +198,6 @@ func newPosix(path string) (*posix, error) {
|
|||
diskMount: mountinfo.IsLikelyMountPoint(path),
|
||||
}
|
||||
|
||||
var pf BoolFlag
|
||||
if driveSync := os.Getenv("MINIO_DRIVE_SYNC"); driveSync != "" {
|
||||
pf, err = ParseBoolFlag(driveSync)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
p.driveSync = bool(pf)
|
||||
}
|
||||
|
||||
if !p.diskMount {
|
||||
go p.diskUsage(GlobalServiceDoneCh)
|
||||
}
|
||||
|
@ -1057,13 +1049,49 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
|
|||
return err
|
||||
}
|
||||
|
||||
// Create file if not found. Note that it is created with os.O_EXCL flag as the file
|
||||
// always is supposed to be created in the tmp directory with a unique file name.
|
||||
w, err := s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY|os.O_EXCL)
|
||||
if err != nil {
|
||||
if err = s.checkDiskFound(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
volumeDir, err := s.getVolDir(volume)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Stat a volume entry.
|
||||
_, err = os.Stat((volumeDir))
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return errVolumeNotFound
|
||||
} else if isSysErrIO(err) {
|
||||
return errFaultyDisk
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
filePath := pathJoin(volumeDir, path)
|
||||
if err = checkPathLength((filePath)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create top level directories if they don't exist.
|
||||
// with mode 0777 mkdir honors system umask.
|
||||
if err = mkdirAll(slashpath.Dir(filePath), 0777); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
w, err := disk.OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL|os.O_SYNC, 0666)
|
||||
if err != nil {
|
||||
switch {
|
||||
case os.IsPermission(err):
|
||||
return errFileAccessDenied
|
||||
case os.IsExist(err):
|
||||
return errFileAccessDenied
|
||||
case isSysErrIO(err):
|
||||
return errFaultyDisk
|
||||
default:
|
||||
return err
|
||||
}
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
var e error
|
||||
|
@ -1090,16 +1118,69 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
|
|||
bufp := s.pool.Get().(*[]byte)
|
||||
defer s.pool.Put(bufp)
|
||||
|
||||
n, err := io.CopyBuffer(w, r, *bufp)
|
||||
if err != nil {
|
||||
return err
|
||||
buf := *bufp
|
||||
var written int64
|
||||
dioCount := int(fileSize) / len(buf)
|
||||
for i := 0; i < dioCount; i++ {
|
||||
var n int
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
n, err = w.Write(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
written += int64(n)
|
||||
}
|
||||
if n < fileSize {
|
||||
return errLessData
|
||||
// The following logic writes the remainging data such that it writes whatever best is possible (aligned buffer)
|
||||
// in O_DIRECT mode and remaining (unaligned buffer) in non-O_DIRECT mode.
|
||||
remaining := fileSize % int64(len(buf))
|
||||
if remaining != 0 {
|
||||
buf = buf[:remaining]
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
remainingAligned := (remaining / directio.AlignSize) * directio.AlignSize
|
||||
remainingAlignedBuf := buf[:remainingAligned]
|
||||
remainingUnalignedBuf := buf[remainingAligned:]
|
||||
if len(remainingAlignedBuf) > 0 {
|
||||
var n int
|
||||
n, err = w.Write(remainingAlignedBuf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
written += int64(n)
|
||||
}
|
||||
if len(remainingUnalignedBuf) > 0 {
|
||||
var n int
|
||||
// Write on O_DIRECT fds fail if buffer is not 4K aligned, hence disable O_DIRECT.
|
||||
if err = disk.DisableDirectIO(w); err != nil {
|
||||
return err
|
||||
}
|
||||
n, err = w.Write(remainingUnalignedBuf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
written += int64(n)
|
||||
}
|
||||
}
|
||||
if n > fileSize {
|
||||
|
||||
// Do some sanity checks.
|
||||
_, err = io.ReadFull(r, buf)
|
||||
if err != io.EOF {
|
||||
return errMoreData
|
||||
}
|
||||
|
||||
if written < fileSize {
|
||||
return errLessData
|
||||
}
|
||||
|
||||
if written > fileSize {
|
||||
return errMoreData
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -1142,13 +1223,9 @@ func (s *posix) AppendFile(volume, path string, buf []byte) (err error) {
|
|||
}
|
||||
|
||||
var w *os.File
|
||||
// Create file if not found, additionally also enables synchronous
|
||||
// operation if asked by the user.
|
||||
if s.driveSync {
|
||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
|
||||
} else {
|
||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
|
||||
}
|
||||
// Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
|
||||
// AppendFile() is only used by healing code to heal objects written in old format.
|
||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
|
1
go.mod
1
go.mod
|
@ -69,6 +69,7 @@ require (
|
|||
github.com/nats-io/nats v1.7.2
|
||||
github.com/nats-io/nkeys v0.0.2 // indirect
|
||||
github.com/nats-io/nuid v1.0.1 // indirect
|
||||
github.com/ncw/directio v1.0.5
|
||||
github.com/nsqio/go-nsq v1.0.7
|
||||
github.com/pascaldekloe/goe v0.1.0 // indirect
|
||||
github.com/pkg/errors v0.8.1 // indirect
|
||||
|
|
2
go.sum
2
go.sum
|
@ -432,6 +432,8 @@ github.com/nats-io/nuid v1.0.0 h1:44QGdhbiANq8ZCbUkdn6W5bqtg+mHuDE4wOUuxxndFs=
|
|||
github.com/nats-io/nuid v1.0.0/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||
github.com/ncw/directio v1.0.5 h1:JSUBhdjEvVaJvOoyPAbcW0fnd0tvRXD76wEfZ1KcQz4=
|
||||
github.com/ncw/directio v1.0.5/go.mod h1:rX/pKEYkOXBGOggmcyJeJGloCkleSvphPx2eV3t6ROk=
|
||||
github.com/nsqio/go-nsq v0.0.0-20181028195256-0527e80f3ba5/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
|
||||
github.com/nsqio/go-nsq v1.0.7 h1:O0pIZJYTf+x7cZBA0UMY8WxFG79lYTURmWzAAh48ljY=
|
||||
github.com/nsqio/go-nsq v1.0.7/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
|
||||
|
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package disk
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/ncw/directio"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFileDirectIO - bypass kernel cache.
|
||||
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
|
||||
return directio.OpenFile(filePath, flag, perm)
|
||||
}
|
||||
|
||||
// DisableDirectIO - disables directio mode.
|
||||
func DisableDirectIO(f *os.File) error {
|
||||
fd := f.Fd()
|
||||
_, err := unix.FcntlInt(fd, unix.F_NOCACHE, 0)
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
// +build linux netbsd freebsd
|
||||
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package disk
|
||||
|
||||
import (
|
||||
"os"
|
||||
"syscall"
|
||||
|
||||
"github.com/ncw/directio"
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// OpenFileDirectIO - bypass kernel cache.
|
||||
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
|
||||
return directio.OpenFile(filePath, flag, perm)
|
||||
}
|
||||
|
||||
// DisableDirectIO - disables directio mode.
|
||||
func DisableDirectIO(f *os.File) error {
|
||||
fd := f.Fd()
|
||||
flag, err := unix.FcntlInt(fd, unix.F_GETFL, 0)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
flag = flag & ^(syscall.O_DIRECT)
|
||||
_, err = unix.FcntlInt(fd, unix.F_SETFL, flag)
|
||||
return err
|
||||
}
|
|
@ -0,0 +1,35 @@
|
|||
// +build !linux,!netbsd,!freebsd,!darwin
|
||||
|
||||
/*
|
||||
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package disk
|
||||
|
||||
import (
|
||||
"os"
|
||||
)
|
||||
|
||||
// OpenBSD and Windows not supported.
|
||||
// On OpenBSD O_DIRECT is not supported
|
||||
// On Windows there is no documentation on disabling O_DIRECT
|
||||
|
||||
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
|
||||
return os.OpenFile(filePath, flag, perm)
|
||||
}
|
||||
|
||||
func DisableDirectIO(f *os.File) error {
|
||||
return nil
|
||||
}
|
Loading…
Reference in New Issue