mirror of
https://github.com/minio/minio.git
synced 2025-01-11 15:03:22 -05:00
fix: add support for O_DIRECT reads for erasure backends (#10718)
This commit is contained in:
parent
6135f072d2
commit
3a2f89b3c0
@ -18,6 +18,7 @@ package storageclass
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -32,17 +33,26 @@ const (
|
||||
RRS = "REDUCED_REDUNDANCY"
|
||||
// Standard storage class
|
||||
STANDARD = "STANDARD"
|
||||
// DMA storage class
|
||||
DMA = "DMA"
|
||||
|
||||
// Valid values are "write" and "read-write"
|
||||
DMAWrite = "write"
|
||||
DMAReadWrite = "read-write"
|
||||
)
|
||||
|
||||
// Standard constats for config info storage class
|
||||
const (
|
||||
ClassStandard = "standard"
|
||||
ClassRRS = "rrs"
|
||||
ClassDMA = "dma"
|
||||
|
||||
// Reduced redundancy storage class environment variable
|
||||
RRSEnv = "MINIO_STORAGE_CLASS_RRS"
|
||||
// Standard storage class environment variable
|
||||
StandardEnv = "MINIO_STORAGE_CLASS_STANDARD"
|
||||
// DMA storage class environment variable
|
||||
DMAEnv = "MINIO_STORAGE_CLASS_DMA"
|
||||
|
||||
// Supported storage class scheme is EC
|
||||
schemePrefix = "EC"
|
||||
@ -52,6 +62,9 @@ const (
|
||||
|
||||
// Default RRS parity is always minimum parity.
|
||||
defaultRRSParity = minParityDisks
|
||||
|
||||
// Default DMA value
|
||||
defaultDMA = DMAWrite
|
||||
)
|
||||
|
||||
// DefaultKVS - default storage class config
|
||||
@ -65,18 +78,24 @@ var (
|
||||
Key: ClassRRS,
|
||||
Value: "EC:2",
|
||||
},
|
||||
config.KV{
|
||||
Key: ClassDMA,
|
||||
Value: defaultDMA,
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
// StorageClass - holds storage class information
|
||||
type StorageClass struct {
|
||||
Parity int
|
||||
DMA string
|
||||
}
|
||||
|
||||
// Config storage class configuration
|
||||
type Config struct {
|
||||
Standard StorageClass `json:"standard"`
|
||||
RRS StorageClass `json:"rrs"`
|
||||
DMA StorageClass `json:"dma"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON - Validate SS and RRS parity when unmarshalling JSON.
|
||||
@ -93,7 +112,7 @@ func (sCfg *Config) UnmarshalJSON(data []byte) error {
|
||||
// IsValid - returns true if input string is a valid
|
||||
// storage class kind supported.
|
||||
func IsValid(sc string) bool {
|
||||
return sc == RRS || sc == STANDARD
|
||||
return sc == RRS || sc == STANDARD || sc == DMA
|
||||
}
|
||||
|
||||
// UnmarshalText unmarshals storage class from its textual form into
|
||||
@ -103,6 +122,14 @@ func (sc *StorageClass) UnmarshalText(b []byte) error {
|
||||
if scStr == "" {
|
||||
return nil
|
||||
}
|
||||
if scStr == DMAWrite {
|
||||
sc.DMA = DMAWrite
|
||||
return nil
|
||||
}
|
||||
if scStr == DMAReadWrite {
|
||||
sc.DMA = DMAReadWrite
|
||||
return nil
|
||||
}
|
||||
s, err := parseStorageClass(scStr)
|
||||
if err != nil {
|
||||
return err
|
||||
@ -116,14 +143,14 @@ func (sc *StorageClass) MarshalText() ([]byte, error) {
|
||||
if sc.Parity != 0 {
|
||||
return []byte(fmt.Sprintf("%s:%d", schemePrefix, sc.Parity)), nil
|
||||
}
|
||||
return []byte(""), nil
|
||||
return []byte(sc.DMA), nil
|
||||
}
|
||||
|
||||
func (sc *StorageClass) String() string {
|
||||
if sc.Parity != 0 {
|
||||
return fmt.Sprintf("%s:%d", schemePrefix, sc.Parity)
|
||||
}
|
||||
return ""
|
||||
return sc.DMA
|
||||
}
|
||||
|
||||
// Parses given storageClassEnv and returns a storageClass structure.
|
||||
@ -212,6 +239,11 @@ func (sCfg Config) GetParityForSC(sc string) (parity int) {
|
||||
}
|
||||
}
|
||||
|
||||
// GetDMA - returns DMA configuration.
|
||||
func (sCfg Config) GetDMA() string {
|
||||
return sCfg.DMA.DMA
|
||||
}
|
||||
|
||||
// Enabled returns if etcd is enabled.
|
||||
func Enabled(kvs config.KVS) bool {
|
||||
ssc := kvs.Get(ClassStandard)
|
||||
@ -231,6 +263,7 @@ func LookupConfig(kvs config.KVS, setDriveCount int) (cfg Config, err error) {
|
||||
|
||||
ssc := env.Get(StandardEnv, kvs.Get(ClassStandard))
|
||||
rrsc := env.Get(RRSEnv, kvs.Get(ClassRRS))
|
||||
dma := env.Get(DMAEnv, kvs.Get(ClassDMA))
|
||||
// Check for environment variables and parse into storageClass struct
|
||||
if ssc != "" {
|
||||
cfg.Standard, err = parseStorageClass(ssc)
|
||||
@ -252,6 +285,14 @@ func LookupConfig(kvs config.KVS, setDriveCount int) (cfg Config, err error) {
|
||||
cfg.RRS.Parity = defaultRRSParity
|
||||
}
|
||||
|
||||
if dma == "" {
|
||||
dma = defaultDMA
|
||||
}
|
||||
if dma != DMAReadWrite && dma != DMAWrite {
|
||||
return Config{}, errors.New(`valid dma values are "read-write" and "write"`)
|
||||
}
|
||||
cfg.DMA.DMA = dma
|
||||
|
||||
// Validation is done after parsing both the storage classes. This is needed because we need one
|
||||
// storage class value to deduce the correct value of the other storage class.
|
||||
if err = validateParity(cfg.Standard.Parity, cfg.RRS.Parity, setDriveCount); err != nil {
|
||||
|
@ -42,6 +42,7 @@ import (
|
||||
jsoniter "github.com/json-iterator/go"
|
||||
"github.com/klauspost/readahead"
|
||||
"github.com/minio/minio/cmd/config"
|
||||
"github.com/minio/minio/cmd/config/storageclass"
|
||||
"github.com/minio/minio/cmd/logger"
|
||||
"github.com/minio/minio/pkg/disk"
|
||||
"github.com/minio/minio/pkg/env"
|
||||
@ -1584,6 +1585,56 @@ func (s *xlStorage) openFile(volume, path string, mode int) (f *os.File, err err
|
||||
return w, nil
|
||||
}
|
||||
|
||||
// To support O_DIRECT reads for erasure backends.
|
||||
type odirectreader struct {
|
||||
f *os.File
|
||||
buf []byte
|
||||
bufp *[]byte
|
||||
freshRead bool
|
||||
s *xlStorage
|
||||
err error
|
||||
}
|
||||
|
||||
// Read - Implements Reader interface.
|
||||
func (o *odirectreader) Read(buf []byte) (n int, err error) {
|
||||
if o.err != nil {
|
||||
return 0, o.err
|
||||
}
|
||||
if o.buf == nil {
|
||||
o.bufp = o.s.pool.Get().(*[]byte)
|
||||
}
|
||||
if o.freshRead {
|
||||
o.buf = *o.bufp
|
||||
n, err = o.f.Read(o.buf)
|
||||
if err != nil && err != io.EOF {
|
||||
o.err = err
|
||||
return n, err
|
||||
}
|
||||
if n == 0 {
|
||||
// err is io.EOF
|
||||
o.err = err
|
||||
return n, err
|
||||
}
|
||||
o.buf = o.buf[:n]
|
||||
o.freshRead = false
|
||||
}
|
||||
if len(buf) >= len(o.buf) {
|
||||
n = copy(buf, o.buf)
|
||||
o.freshRead = true
|
||||
return n, nil
|
||||
}
|
||||
n = copy(buf, o.buf)
|
||||
o.buf = o.buf[n:]
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// Close - Release the buffer and close the file.
|
||||
func (o *odirectreader) Close() error {
|
||||
o.s.pool.Put(o.bufp)
|
||||
atomic.AddInt32(&o.s.activeIOCount, -1)
|
||||
return o.f.Close()
|
||||
}
|
||||
|
||||
// ReadFileStream - Returns the read stream of the file.
|
||||
func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
|
||||
if offset < 0 {
|
||||
@ -1611,6 +1662,29 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if offset == 0 && globalStorageClass.GetDMA() == storageclass.DMAReadWrite {
|
||||
file, err := disk.OpenFileDirectIO(filePath, os.O_RDONLY, 0666)
|
||||
if err != nil {
|
||||
switch {
|
||||
case os.IsNotExist(err):
|
||||
return nil, errFileNotFound
|
||||
case os.IsPermission(err):
|
||||
return nil, errFileAccessDenied
|
||||
case isSysErrNotDir(err):
|
||||
return nil, errFileAccessDenied
|
||||
case isSysErrIO(err):
|
||||
return nil, errFaultyDisk
|
||||
case isSysErrTooManyFiles(err):
|
||||
return nil, errTooManyOpenFiles
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
atomic.AddInt32(&s.activeIOCount, 1)
|
||||
return &odirectreader{file, nil, nil, true, s, nil}, nil
|
||||
}
|
||||
|
||||
// Open the file for reading.
|
||||
file, err := os.Open(filePath)
|
||||
if err != nil {
|
||||
|
@ -30,6 +30,7 @@ import (
|
||||
"syscall"
|
||||
"testing"
|
||||
|
||||
"github.com/minio/minio/cmd/config/storageclass"
|
||||
"github.com/minio/minio/pkg/disk"
|
||||
)
|
||||
|
||||
@ -1063,62 +1064,71 @@ func TestXLStorageReadFile(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// Following block validates all ReadFile test cases.
|
||||
for i, testCase := range testCases {
|
||||
var n int64
|
||||
// Common read buffer.
|
||||
var buf = make([]byte, testCase.bufSize)
|
||||
n, err = xlStorage.ReadFile(context.Background(), testCase.volume, testCase.fileName, testCase.offset, buf, v)
|
||||
if err != nil && testCase.expectedErr != nil {
|
||||
// Validate if the type string of the errors are an exact match.
|
||||
if err.Error() != testCase.expectedErr.Error() {
|
||||
if runtime.GOOS != globalWindowsOSName {
|
||||
t.Errorf("Case: %d %#v, expected: %s, got: %s", i+1, testCase, testCase.expectedErr, err)
|
||||
} else {
|
||||
var resultErrno, expectErrno uintptr
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errno, pok := pathErr.Err.(syscall.Errno); pok {
|
||||
resultErrno = uintptr(errno)
|
||||
}
|
||||
}
|
||||
if pathErr, ok := testCase.expectedErr.(*os.PathError); ok {
|
||||
if errno, pok := pathErr.Err.(syscall.Errno); pok {
|
||||
expectErrno = uintptr(errno)
|
||||
}
|
||||
}
|
||||
if !(expectErrno != 0 && resultErrno != 0 && expectErrno == resultErrno) {
|
||||
for l := 0; l < 2; l++ {
|
||||
// 1st loop tests with dma=write, 2nd loop tests with dma=read-write.
|
||||
if l == 1 {
|
||||
globalStorageClass.DMA.DMA = storageclass.DMAReadWrite
|
||||
}
|
||||
// Following block validates all ReadFile test cases.
|
||||
for i, testCase := range testCases {
|
||||
var n int64
|
||||
// Common read buffer.
|
||||
var buf = make([]byte, testCase.bufSize)
|
||||
n, err = xlStorage.ReadFile(context.Background(), testCase.volume, testCase.fileName, testCase.offset, buf, v)
|
||||
if err != nil && testCase.expectedErr != nil {
|
||||
// Validate if the type string of the errors are an exact match.
|
||||
if err.Error() != testCase.expectedErr.Error() {
|
||||
if runtime.GOOS != globalWindowsOSName {
|
||||
t.Errorf("Case: %d %#v, expected: %s, got: %s", i+1, testCase, testCase.expectedErr, err)
|
||||
} else {
|
||||
var resultErrno, expectErrno uintptr
|
||||
if pathErr, ok := err.(*os.PathError); ok {
|
||||
if errno, pok := pathErr.Err.(syscall.Errno); pok {
|
||||
resultErrno = uintptr(errno)
|
||||
}
|
||||
}
|
||||
if pathErr, ok := testCase.expectedErr.(*os.PathError); ok {
|
||||
if errno, pok := pathErr.Err.(syscall.Errno); pok {
|
||||
expectErrno = uintptr(errno)
|
||||
}
|
||||
}
|
||||
if !(expectErrno != 0 && resultErrno != 0 && expectErrno == resultErrno) {
|
||||
t.Errorf("Case: %d %#v, expected: %s, got: %s", i+1, testCase, testCase.expectedErr, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Err unexpected EOF special case, where we verify we have provided a larger
|
||||
// buffer than the data itself, but the results are in-fact valid. So we validate
|
||||
// this error condition specifically treating it as a good condition with valid
|
||||
// results. In this scenario return 'n' is always lesser than the input buffer.
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
if !bytes.Equal(testCase.expectedBuf, buf[:n]) {
|
||||
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:n]))
|
||||
}
|
||||
if n > int64(len(buf)) {
|
||||
t.Errorf("Case: %d %#v, expected: %d, got: %d", i+1, testCase, testCase.bufSize, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
// Err unexpected EOF special case, where we verify we have provided a larger
|
||||
// buffer than the data itself, but the results are in-fact valid. So we validate
|
||||
// this error condition specifically treating it as a good condition with valid
|
||||
// results. In this scenario return 'n' is always lesser than the input buffer.
|
||||
if err == io.ErrUnexpectedEOF {
|
||||
if !bytes.Equal(testCase.expectedBuf, buf[:n]) {
|
||||
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:n]))
|
||||
// ReadFile has returned success, but our expected error is non 'nil'.
|
||||
if err == nil && err != testCase.expectedErr {
|
||||
t.Errorf("Case: %d %#v, expected: %s, got :%s", i+1, testCase, testCase.expectedErr, err)
|
||||
}
|
||||
// Expected error retured, proceed further to validate the returned results.
|
||||
if err == nil && err == testCase.expectedErr {
|
||||
if !bytes.Equal(testCase.expectedBuf, buf) {
|
||||
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:testCase.bufSize]))
|
||||
}
|
||||
if n > int64(len(buf)) {
|
||||
if n != int64(testCase.bufSize) {
|
||||
t.Errorf("Case: %d %#v, expected: %d, got: %d", i+1, testCase, testCase.bufSize, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
// ReadFile has returned success, but our expected error is non 'nil'.
|
||||
if err == nil && err != testCase.expectedErr {
|
||||
t.Errorf("Case: %d %#v, expected: %s, got :%s", i+1, testCase, testCase.expectedErr, err)
|
||||
}
|
||||
// Expected error retured, proceed further to validate the returned results.
|
||||
if err == nil && err == testCase.expectedErr {
|
||||
if !bytes.Equal(testCase.expectedBuf, buf) {
|
||||
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:testCase.bufSize]))
|
||||
}
|
||||
if n != int64(testCase.bufSize) {
|
||||
t.Errorf("Case: %d %#v, expected: %d, got: %d", i+1, testCase, testCase.bufSize, n)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the flag.
|
||||
globalStorageClass.DMA.DMA = storageclass.DMAWrite
|
||||
|
||||
// TestXLStorage for permission denied.
|
||||
if runtime.GOOS != globalWindowsOSName {
|
||||
permDeniedDir := createPermDeniedFile(t)
|
||||
|
Loading…
Reference in New Issue
Block a user