fix: add support for O_DIRECT reads for erasure backends (#10718)

This commit is contained in:
Krishna Srinivas 2020-10-30 11:04:29 -07:00 committed by GitHub
parent 6135f072d2
commit 3a2f89b3c0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 173 additions and 48 deletions

View File

@ -18,6 +18,7 @@ package storageclass
import (
"encoding/json"
"errors"
"fmt"
"strconv"
"strings"
@ -32,17 +33,26 @@ const (
RRS = "REDUCED_REDUNDANCY"
// Standard storage class
STANDARD = "STANDARD"
// DMA storage class
DMA = "DMA"
// Valid values are "write" and "read-write"
DMAWrite = "write"
DMAReadWrite = "read-write"
)
// Standard constats for config info storage class
const (
ClassStandard = "standard"
ClassRRS = "rrs"
ClassDMA = "dma"
// Reduced redundancy storage class environment variable
RRSEnv = "MINIO_STORAGE_CLASS_RRS"
// Standard storage class environment variable
StandardEnv = "MINIO_STORAGE_CLASS_STANDARD"
// DMA storage class environment variable
DMAEnv = "MINIO_STORAGE_CLASS_DMA"
// Supported storage class scheme is EC
schemePrefix = "EC"
@ -52,6 +62,9 @@ const (
// Default RRS parity is always minimum parity.
defaultRRSParity = minParityDisks
// Default DMA value
defaultDMA = DMAWrite
)
// DefaultKVS - default storage class config
@ -65,18 +78,24 @@ var (
Key: ClassRRS,
Value: "EC:2",
},
config.KV{
Key: ClassDMA,
Value: defaultDMA,
},
}
)
// StorageClass - holds storage class information
type StorageClass struct {
Parity int
DMA string
}
// Config storage class configuration
type Config struct {
Standard StorageClass `json:"standard"`
RRS StorageClass `json:"rrs"`
DMA StorageClass `json:"dma"`
}
// UnmarshalJSON - Validate SS and RRS parity when unmarshalling JSON.
@ -93,7 +112,7 @@ func (sCfg *Config) UnmarshalJSON(data []byte) error {
// IsValid - returns true if input string is a valid
// storage class kind supported.
func IsValid(sc string) bool {
return sc == RRS || sc == STANDARD
return sc == RRS || sc == STANDARD || sc == DMA
}
// UnmarshalText unmarshals storage class from its textual form into
@ -103,6 +122,14 @@ func (sc *StorageClass) UnmarshalText(b []byte) error {
if scStr == "" {
return nil
}
if scStr == DMAWrite {
sc.DMA = DMAWrite
return nil
}
if scStr == DMAReadWrite {
sc.DMA = DMAReadWrite
return nil
}
s, err := parseStorageClass(scStr)
if err != nil {
return err
@ -116,14 +143,14 @@ func (sc *StorageClass) MarshalText() ([]byte, error) {
if sc.Parity != 0 {
return []byte(fmt.Sprintf("%s:%d", schemePrefix, sc.Parity)), nil
}
return []byte(""), nil
return []byte(sc.DMA), nil
}
func (sc *StorageClass) String() string {
if sc.Parity != 0 {
return fmt.Sprintf("%s:%d", schemePrefix, sc.Parity)
}
return ""
return sc.DMA
}
// Parses given storageClassEnv and returns a storageClass structure.
@ -212,6 +239,11 @@ func (sCfg Config) GetParityForSC(sc string) (parity int) {
}
}
// GetDMA - returns DMA configuration.
func (sCfg Config) GetDMA() string {
return sCfg.DMA.DMA
}
// Enabled returns if etcd is enabled.
func Enabled(kvs config.KVS) bool {
ssc := kvs.Get(ClassStandard)
@ -231,6 +263,7 @@ func LookupConfig(kvs config.KVS, setDriveCount int) (cfg Config, err error) {
ssc := env.Get(StandardEnv, kvs.Get(ClassStandard))
rrsc := env.Get(RRSEnv, kvs.Get(ClassRRS))
dma := env.Get(DMAEnv, kvs.Get(ClassDMA))
// Check for environment variables and parse into storageClass struct
if ssc != "" {
cfg.Standard, err = parseStorageClass(ssc)
@ -252,6 +285,14 @@ func LookupConfig(kvs config.KVS, setDriveCount int) (cfg Config, err error) {
cfg.RRS.Parity = defaultRRSParity
}
if dma == "" {
dma = defaultDMA
}
if dma != DMAReadWrite && dma != DMAWrite {
return Config{}, errors.New(`valid dma values are "read-write" and "write"`)
}
cfg.DMA.DMA = dma
// Validation is done after parsing both the storage classes. This is needed because we need one
// storage class value to deduce the correct value of the other storage class.
if err = validateParity(cfg.Standard.Parity, cfg.RRS.Parity, setDriveCount); err != nil {

View File

@ -42,6 +42,7 @@ import (
jsoniter "github.com/json-iterator/go"
"github.com/klauspost/readahead"
"github.com/minio/minio/cmd/config"
"github.com/minio/minio/cmd/config/storageclass"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/pkg/disk"
"github.com/minio/minio/pkg/env"
@ -1584,6 +1585,56 @@ func (s *xlStorage) openFile(volume, path string, mode int) (f *os.File, err err
return w, nil
}
// To support O_DIRECT reads for erasure backends.
type odirectreader struct {
f *os.File
buf []byte
bufp *[]byte
freshRead bool
s *xlStorage
err error
}
// Read - Implements Reader interface.
func (o *odirectreader) Read(buf []byte) (n int, err error) {
if o.err != nil {
return 0, o.err
}
if o.buf == nil {
o.bufp = o.s.pool.Get().(*[]byte)
}
if o.freshRead {
o.buf = *o.bufp
n, err = o.f.Read(o.buf)
if err != nil && err != io.EOF {
o.err = err
return n, err
}
if n == 0 {
// err is io.EOF
o.err = err
return n, err
}
o.buf = o.buf[:n]
o.freshRead = false
}
if len(buf) >= len(o.buf) {
n = copy(buf, o.buf)
o.freshRead = true
return n, nil
}
n = copy(buf, o.buf)
o.buf = o.buf[n:]
return n, nil
}
// Close - Release the buffer and close the file.
func (o *odirectreader) Close() error {
o.s.pool.Put(o.bufp)
atomic.AddInt32(&o.s.activeIOCount, -1)
return o.f.Close()
}
// ReadFileStream - Returns the read stream of the file.
func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
if offset < 0 {
@ -1611,6 +1662,29 @@ func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, off
return nil, err
}
if offset == 0 && globalStorageClass.GetDMA() == storageclass.DMAReadWrite {
file, err := disk.OpenFileDirectIO(filePath, os.O_RDONLY, 0666)
if err != nil {
switch {
case os.IsNotExist(err):
return nil, errFileNotFound
case os.IsPermission(err):
return nil, errFileAccessDenied
case isSysErrNotDir(err):
return nil, errFileAccessDenied
case isSysErrIO(err):
return nil, errFaultyDisk
case isSysErrTooManyFiles(err):
return nil, errTooManyOpenFiles
default:
return nil, err
}
}
atomic.AddInt32(&s.activeIOCount, 1)
return &odirectreader{file, nil, nil, true, s, nil}, nil
}
// Open the file for reading.
file, err := os.Open(filePath)
if err != nil {

View File

@ -30,6 +30,7 @@ import (
"syscall"
"testing"
"github.com/minio/minio/cmd/config/storageclass"
"github.com/minio/minio/pkg/disk"
)
@ -1063,62 +1064,71 @@ func TestXLStorageReadFile(t *testing.T) {
}
}
// Following block validates all ReadFile test cases.
for i, testCase := range testCases {
var n int64
// Common read buffer.
var buf = make([]byte, testCase.bufSize)
n, err = xlStorage.ReadFile(context.Background(), testCase.volume, testCase.fileName, testCase.offset, buf, v)
if err != nil && testCase.expectedErr != nil {
// Validate if the type string of the errors are an exact match.
if err.Error() != testCase.expectedErr.Error() {
if runtime.GOOS != globalWindowsOSName {
t.Errorf("Case: %d %#v, expected: %s, got: %s", i+1, testCase, testCase.expectedErr, err)
} else {
var resultErrno, expectErrno uintptr
if pathErr, ok := err.(*os.PathError); ok {
if errno, pok := pathErr.Err.(syscall.Errno); pok {
resultErrno = uintptr(errno)
}
}
if pathErr, ok := testCase.expectedErr.(*os.PathError); ok {
if errno, pok := pathErr.Err.(syscall.Errno); pok {
expectErrno = uintptr(errno)
}
}
if !(expectErrno != 0 && resultErrno != 0 && expectErrno == resultErrno) {
for l := 0; l < 2; l++ {
// 1st loop tests with dma=write, 2nd loop tests with dma=read-write.
if l == 1 {
globalStorageClass.DMA.DMA = storageclass.DMAReadWrite
}
// Following block validates all ReadFile test cases.
for i, testCase := range testCases {
var n int64
// Common read buffer.
var buf = make([]byte, testCase.bufSize)
n, err = xlStorage.ReadFile(context.Background(), testCase.volume, testCase.fileName, testCase.offset, buf, v)
if err != nil && testCase.expectedErr != nil {
// Validate if the type string of the errors are an exact match.
if err.Error() != testCase.expectedErr.Error() {
if runtime.GOOS != globalWindowsOSName {
t.Errorf("Case: %d %#v, expected: %s, got: %s", i+1, testCase, testCase.expectedErr, err)
} else {
var resultErrno, expectErrno uintptr
if pathErr, ok := err.(*os.PathError); ok {
if errno, pok := pathErr.Err.(syscall.Errno); pok {
resultErrno = uintptr(errno)
}
}
if pathErr, ok := testCase.expectedErr.(*os.PathError); ok {
if errno, pok := pathErr.Err.(syscall.Errno); pok {
expectErrno = uintptr(errno)
}
}
if !(expectErrno != 0 && resultErrno != 0 && expectErrno == resultErrno) {
t.Errorf("Case: %d %#v, expected: %s, got: %s", i+1, testCase, testCase.expectedErr, err)
}
}
}
// Err unexpected EOF special case, where we verify we have provided a larger
// buffer than the data itself, but the results are in-fact valid. So we validate
// this error condition specifically treating it as a good condition with valid
// results. In this scenario return 'n' is always lesser than the input buffer.
if err == io.ErrUnexpectedEOF {
if !bytes.Equal(testCase.expectedBuf, buf[:n]) {
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:n]))
}
if n > int64(len(buf)) {
t.Errorf("Case: %d %#v, expected: %d, got: %d", i+1, testCase, testCase.bufSize, n)
}
}
}
// Err unexpected EOF special case, where we verify we have provided a larger
// buffer than the data itself, but the results are in-fact valid. So we validate
// this error condition specifically treating it as a good condition with valid
// results. In this scenario return 'n' is always lesser than the input buffer.
if err == io.ErrUnexpectedEOF {
if !bytes.Equal(testCase.expectedBuf, buf[:n]) {
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:n]))
// ReadFile has returned success, but our expected error is non 'nil'.
if err == nil && err != testCase.expectedErr {
t.Errorf("Case: %d %#v, expected: %s, got :%s", i+1, testCase, testCase.expectedErr, err)
}
// Expected error retured, proceed further to validate the returned results.
if err == nil && err == testCase.expectedErr {
if !bytes.Equal(testCase.expectedBuf, buf) {
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:testCase.bufSize]))
}
if n > int64(len(buf)) {
if n != int64(testCase.bufSize) {
t.Errorf("Case: %d %#v, expected: %d, got: %d", i+1, testCase, testCase.bufSize, n)
}
}
}
// ReadFile has returned success, but our expected error is non 'nil'.
if err == nil && err != testCase.expectedErr {
t.Errorf("Case: %d %#v, expected: %s, got :%s", i+1, testCase, testCase.expectedErr, err)
}
// Expected error retured, proceed further to validate the returned results.
if err == nil && err == testCase.expectedErr {
if !bytes.Equal(testCase.expectedBuf, buf) {
t.Errorf("Case: %d %#v, expected: \"%s\", got: \"%s\"", i+1, testCase, string(testCase.expectedBuf), string(buf[:testCase.bufSize]))
}
if n != int64(testCase.bufSize) {
t.Errorf("Case: %d %#v, expected: %d, got: %d", i+1, testCase, testCase.bufSize, n)
}
}
}
// Reset the flag.
globalStorageClass.DMA.DMA = storageclass.DMAWrite
// TestXLStorage for permission denied.
if runtime.GOOS != globalWindowsOSName {
permDeniedDir := createPermDeniedFile(t)