mirror of https://github.com/minio/minio.git
Use O_DIRECT while writing to disk (#7479)
- Use O_DIRECT while writing to disk - Remove MINIO_DRIVE_SYNC option
This commit is contained in:
parent
ab711fe1a2
commit
a3ec71bc28
139
cmd/posix.go
139
cmd/posix.go
|
@ -37,12 +37,14 @@ import (
|
||||||
"github.com/minio/minio/cmd/logger"
|
"github.com/minio/minio/cmd/logger"
|
||||||
"github.com/minio/minio/pkg/disk"
|
"github.com/minio/minio/pkg/disk"
|
||||||
"github.com/minio/minio/pkg/mountinfo"
|
"github.com/minio/minio/pkg/mountinfo"
|
||||||
|
"github.com/ncw/directio"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
|
diskMinFreeSpace = 900 * humanize.MiByte // Min 900MiB free space.
|
||||||
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
|
diskMinTotalSpace = diskMinFreeSpace // Min 900MiB total space.
|
||||||
maxAllowedIOError = 5
|
maxAllowedIOError = 5
|
||||||
|
posixWriteBlockSize = 4 * humanize.MiByte
|
||||||
)
|
)
|
||||||
|
|
||||||
// isValidVolname verifies a volname name in accordance with object
|
// isValidVolname verifies a volname name in accordance with object
|
||||||
|
@ -71,7 +73,6 @@ type posix struct {
|
||||||
connected bool
|
connected bool
|
||||||
|
|
||||||
diskMount bool // indicates if the path is an actual mount.
|
diskMount bool // indicates if the path is an actual mount.
|
||||||
driveSync bool // indicates if the backend is synchronous.
|
|
||||||
|
|
||||||
diskFileInfo os.FileInfo
|
diskFileInfo os.FileInfo
|
||||||
// Disk usage metrics
|
// Disk usage metrics
|
||||||
|
@ -188,7 +189,7 @@ func newPosix(path string) (*posix, error) {
|
||||||
// 1MiB buffer pool for posix internal operations.
|
// 1MiB buffer pool for posix internal operations.
|
||||||
pool: sync.Pool{
|
pool: sync.Pool{
|
||||||
New: func() interface{} {
|
New: func() interface{} {
|
||||||
b := make([]byte, readSizeV1)
|
b := directio.AlignedBlock(posixWriteBlockSize)
|
||||||
return &b
|
return &b
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
@ -197,15 +198,6 @@ func newPosix(path string) (*posix, error) {
|
||||||
diskMount: mountinfo.IsLikelyMountPoint(path),
|
diskMount: mountinfo.IsLikelyMountPoint(path),
|
||||||
}
|
}
|
||||||
|
|
||||||
var pf BoolFlag
|
|
||||||
if driveSync := os.Getenv("MINIO_DRIVE_SYNC"); driveSync != "" {
|
|
||||||
pf, err = ParseBoolFlag(driveSync)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
p.driveSync = bool(pf)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !p.diskMount {
|
if !p.diskMount {
|
||||||
go p.diskUsage(GlobalServiceDoneCh)
|
go p.diskUsage(GlobalServiceDoneCh)
|
||||||
}
|
}
|
||||||
|
@ -1057,13 +1049,49 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create file if not found. Note that it is created with os.O_EXCL flag as the file
|
if err = s.checkDiskFound(); err != nil {
|
||||||
// always is supposed to be created in the tmp directory with a unique file name.
|
|
||||||
w, err := s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY|os.O_EXCL)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
volumeDir, err := s.getVolDir(volume)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// Stat a volume entry.
|
||||||
|
_, err = os.Stat((volumeDir))
|
||||||
|
if err != nil {
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return errVolumeNotFound
|
||||||
|
} else if isSysErrIO(err) {
|
||||||
|
return errFaultyDisk
|
||||||
|
}
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
filePath := pathJoin(volumeDir, path)
|
||||||
|
if err = checkPathLength((filePath)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create top level directories if they don't exist.
|
||||||
|
// with mode 0777 mkdir honors system umask.
|
||||||
|
if err = mkdirAll(slashpath.Dir(filePath), 0777); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
w, err := disk.OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL|os.O_SYNC, 0666)
|
||||||
|
if err != nil {
|
||||||
|
switch {
|
||||||
|
case os.IsPermission(err):
|
||||||
|
return errFileAccessDenied
|
||||||
|
case os.IsExist(err):
|
||||||
|
return errFileAccessDenied
|
||||||
|
case isSysErrIO(err):
|
||||||
|
return errFaultyDisk
|
||||||
|
default:
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
defer w.Close()
|
defer w.Close()
|
||||||
|
|
||||||
var e error
|
var e error
|
||||||
|
@ -1090,16 +1118,69 @@ func (s *posix) CreateFile(volume, path string, fileSize int64, r io.Reader) (er
|
||||||
bufp := s.pool.Get().(*[]byte)
|
bufp := s.pool.Get().(*[]byte)
|
||||||
defer s.pool.Put(bufp)
|
defer s.pool.Put(bufp)
|
||||||
|
|
||||||
n, err := io.CopyBuffer(w, r, *bufp)
|
buf := *bufp
|
||||||
if err != nil {
|
var written int64
|
||||||
return err
|
dioCount := int(fileSize) / len(buf)
|
||||||
|
for i := 0; i < dioCount; i++ {
|
||||||
|
var n int
|
||||||
|
_, err = io.ReadFull(r, buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
n, err = w.Write(buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
written += int64(n)
|
||||||
}
|
}
|
||||||
if n < fileSize {
|
// The following logic writes the remainging data such that it writes whatever best is possible (aligned buffer)
|
||||||
return errLessData
|
// in O_DIRECT mode and remaining (unaligned buffer) in non-O_DIRECT mode.
|
||||||
|
remaining := fileSize % int64(len(buf))
|
||||||
|
if remaining != 0 {
|
||||||
|
buf = buf[:remaining]
|
||||||
|
_, err = io.ReadFull(r, buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
remainingAligned := (remaining / directio.AlignSize) * directio.AlignSize
|
||||||
|
remainingAlignedBuf := buf[:remainingAligned]
|
||||||
|
remainingUnalignedBuf := buf[remainingAligned:]
|
||||||
|
if len(remainingAlignedBuf) > 0 {
|
||||||
|
var n int
|
||||||
|
n, err = w.Write(remainingAlignedBuf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
written += int64(n)
|
||||||
|
}
|
||||||
|
if len(remainingUnalignedBuf) > 0 {
|
||||||
|
var n int
|
||||||
|
// Write on O_DIRECT fds fail if buffer is not 4K aligned, hence disable O_DIRECT.
|
||||||
|
if err = disk.DisableDirectIO(w); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
n, err = w.Write(remainingUnalignedBuf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
written += int64(n)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if n > fileSize {
|
|
||||||
|
// Do some sanity checks.
|
||||||
|
_, err = io.ReadFull(r, buf)
|
||||||
|
if err != io.EOF {
|
||||||
return errMoreData
|
return errMoreData
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if written < fileSize {
|
||||||
|
return errLessData
|
||||||
|
}
|
||||||
|
|
||||||
|
if written > fileSize {
|
||||||
|
return errMoreData
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1142,13 +1223,9 @@ func (s *posix) AppendFile(volume, path string, buf []byte) (err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
var w *os.File
|
var w *os.File
|
||||||
// Create file if not found, additionally also enables synchronous
|
// Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
|
||||||
// operation if asked by the user.
|
// AppendFile() is only used by healing code to heal objects written in old format.
|
||||||
if s.driveSync {
|
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
|
||||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_SYNC|os.O_APPEND|os.O_WRONLY)
|
|
||||||
} else {
|
|
||||||
w, err = s.openFile(volume, path, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
1
go.mod
1
go.mod
|
@ -69,6 +69,7 @@ require (
|
||||||
github.com/nats-io/nats v1.7.2
|
github.com/nats-io/nats v1.7.2
|
||||||
github.com/nats-io/nkeys v0.0.2 // indirect
|
github.com/nats-io/nkeys v0.0.2 // indirect
|
||||||
github.com/nats-io/nuid v1.0.1 // indirect
|
github.com/nats-io/nuid v1.0.1 // indirect
|
||||||
|
github.com/ncw/directio v1.0.5
|
||||||
github.com/nsqio/go-nsq v1.0.7
|
github.com/nsqio/go-nsq v1.0.7
|
||||||
github.com/pascaldekloe/goe v0.1.0 // indirect
|
github.com/pascaldekloe/goe v0.1.0 // indirect
|
||||||
github.com/pkg/errors v0.8.1 // indirect
|
github.com/pkg/errors v0.8.1 // indirect
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -432,6 +432,8 @@ github.com/nats-io/nuid v1.0.0 h1:44QGdhbiANq8ZCbUkdn6W5bqtg+mHuDE4wOUuxxndFs=
|
||||||
github.com/nats-io/nuid v1.0.0/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.0/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
github.com/nats-io/nuid v1.0.1 h1:5iA8DT8V7q8WK2EScv2padNa/rTESc1KdnPw4TC2paw=
|
||||||
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
|
||||||
|
github.com/ncw/directio v1.0.5 h1:JSUBhdjEvVaJvOoyPAbcW0fnd0tvRXD76wEfZ1KcQz4=
|
||||||
|
github.com/ncw/directio v1.0.5/go.mod h1:rX/pKEYkOXBGOggmcyJeJGloCkleSvphPx2eV3t6ROk=
|
||||||
github.com/nsqio/go-nsq v0.0.0-20181028195256-0527e80f3ba5/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
|
github.com/nsqio/go-nsq v0.0.0-20181028195256-0527e80f3ba5/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
|
||||||
github.com/nsqio/go-nsq v1.0.7 h1:O0pIZJYTf+x7cZBA0UMY8WxFG79lYTURmWzAAh48ljY=
|
github.com/nsqio/go-nsq v1.0.7 h1:O0pIZJYTf+x7cZBA0UMY8WxFG79lYTURmWzAAh48ljY=
|
||||||
github.com/nsqio/go-nsq v1.0.7/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
|
github.com/nsqio/go-nsq v1.0.7/go.mod h1:XP5zaUs3pqf+Q71EqUJs3HYfBIqfK6G83WQMdNN+Ito=
|
||||||
|
|
|
@ -0,0 +1,36 @@
|
||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package disk
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
|
||||||
|
"github.com/ncw/directio"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenFileDirectIO - bypass kernel cache.
|
||||||
|
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
|
||||||
|
return directio.OpenFile(filePath, flag, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DisableDirectIO - disables directio mode.
|
||||||
|
func DisableDirectIO(f *os.File) error {
|
||||||
|
fd := f.Fd()
|
||||||
|
_, err := unix.FcntlInt(fd, unix.F_NOCACHE, 0)
|
||||||
|
return err
|
||||||
|
}
|
|
@ -0,0 +1,44 @@
|
||||||
|
// +build linux netbsd freebsd
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package disk
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"syscall"
|
||||||
|
|
||||||
|
"github.com/ncw/directio"
|
||||||
|
"golang.org/x/sys/unix"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenFileDirectIO - bypass kernel cache.
|
||||||
|
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
|
||||||
|
return directio.OpenFile(filePath, flag, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
// DisableDirectIO - disables directio mode.
|
||||||
|
func DisableDirectIO(f *os.File) error {
|
||||||
|
fd := f.Fd()
|
||||||
|
flag, err := unix.FcntlInt(fd, unix.F_GETFL, 0)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
flag = flag & ^(syscall.O_DIRECT)
|
||||||
|
_, err = unix.FcntlInt(fd, unix.F_SETFL, flag)
|
||||||
|
return err
|
||||||
|
}
|
|
@ -0,0 +1,35 @@
|
||||||
|
// +build !linux,!netbsd,!freebsd,!darwin
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minio Cloud Storage, (C) 2019 Minio, Inc.
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package disk
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
// OpenBSD and Windows not supported.
|
||||||
|
// On OpenBSD O_DIRECT is not supported
|
||||||
|
// On Windows there is no documentation on disabling O_DIRECT
|
||||||
|
|
||||||
|
func OpenFileDirectIO(filePath string, flag int, perm os.FileMode) (*os.File, error) {
|
||||||
|
return os.OpenFile(filePath, flag, perm)
|
||||||
|
}
|
||||||
|
|
||||||
|
func DisableDirectIO(f *os.File) error {
|
||||||
|
return nil
|
||||||
|
}
|
Loading…
Reference in New Issue