2015-10-16 14:26:01 -04:00
|
|
|
/*
|
2020-03-12 21:57:41 -04:00
|
|
|
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
|
2015-10-16 14:26:01 -04:00
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2016-08-18 19:23:42 -04:00
|
|
|
package cmd
|
2015-10-16 14:26:01 -04:00
|
|
|
|
|
|
|
import (
|
Remove read-ahead for small files (#8522)
We should only read ahead if we are reading big files. We enable it for files >= 16MB.
Benchmark on 64KB objects.
Before:
```
Operation: GET
Errors: 0
Average: 59.976s, 87.13 MB/s, 1394.07 ops ended/s.
Fastest: 1s, 90.99 MB/s, 1455.00 ops ended/s.
50% Median: 1s, 87.53 MB/s, 1401.00 ops ended/s.
Slowest: 1s, 81.39 MB/s, 1301.00 ops ended/s.
```
After:
```
Operation: GET
Errors: 0
Average: 59.992s, 207.99 MB/s, 3327.85 ops ended/s.
Fastest: 1s, 219.20 MB/s, 3507.00 ops ended/s.
50% Median: 1s, 210.54 MB/s, 3368.00 ops ended/s.
Slowest: 1s, 179.14 MB/s, 2865.00 ops ended/s.
```
The 64KB buffer is actually a small disadvantage for this case, but I believe it will be better in general than no buffer.
2019-11-14 15:58:41 -05:00
|
|
|
"bufio"
|
2020-06-12 13:28:21 -04:00
|
|
|
"bytes"
|
2018-04-05 18:04:40 -04:00
|
|
|
"context"
|
2019-11-22 12:50:17 -05:00
|
|
|
"crypto/rand"
|
|
|
|
"encoding/hex"
|
2019-09-11 13:21:43 -04:00
|
|
|
"errors"
|
2020-06-12 23:04:01 -04:00
|
|
|
"fmt"
|
2016-04-08 13:37:38 -04:00
|
|
|
"io"
|
2016-06-25 17:51:06 -04:00
|
|
|
"io/ioutil"
|
2020-08-25 13:55:15 -04:00
|
|
|
"net/url"
|
2016-04-08 20:13:16 -04:00
|
|
|
"os"
|
2021-02-08 13:15:12 -05:00
|
|
|
pathutil "path"
|
2016-05-28 18:13:15 -04:00
|
|
|
"path/filepath"
|
2016-06-20 09:18:47 -04:00
|
|
|
"runtime"
|
2018-02-20 18:33:26 -05:00
|
|
|
"strings"
|
2016-10-26 20:14:05 -04:00
|
|
|
"sync"
|
2018-05-23 06:11:29 -04:00
|
|
|
"time"
|
2015-10-16 14:26:01 -04:00
|
|
|
|
2020-09-12 03:08:12 -04:00
|
|
|
"github.com/dustin/go-humanize"
|
2020-12-18 11:51:09 -05:00
|
|
|
"github.com/google/uuid"
|
2019-10-25 13:37:53 -04:00
|
|
|
jsoniter "github.com/json-iterator/go"
|
2019-05-22 16:47:15 -04:00
|
|
|
"github.com/klauspost/readahead"
|
2020-08-04 17:55:53 -04:00
|
|
|
"github.com/minio/minio/cmd/config"
|
2020-10-30 14:04:29 -04:00
|
|
|
"github.com/minio/minio/cmd/config/storageclass"
|
2018-04-05 18:04:40 -04:00
|
|
|
"github.com/minio/minio/cmd/logger"
|
2020-11-12 15:12:09 -05:00
|
|
|
"github.com/minio/minio/pkg/bucket/lifecycle"
|
2020-12-13 15:05:54 -05:00
|
|
|
"github.com/minio/minio/pkg/color"
|
2020-12-29 04:57:28 -05:00
|
|
|
"github.com/minio/minio/pkg/console"
|
2016-04-08 20:13:16 -04:00
|
|
|
"github.com/minio/minio/pkg/disk"
|
2020-08-04 17:55:53 -04:00
|
|
|
"github.com/minio/minio/pkg/env"
|
2019-05-22 16:47:15 -04:00
|
|
|
xioutil "github.com/minio/minio/pkg/ioutil"
|
2015-10-16 14:26:01 -04:00
|
|
|
)
|
|
|
|
|
2016-04-08 13:37:38 -04:00
|
|
|
const (
|
2021-01-22 18:38:21 -05:00
|
|
|
nullVersionID = "null"
|
2021-02-11 01:00:42 -05:00
|
|
|
blockSizeLarge = 2 * humanize.MiByte // Default r/w block size for larger objects.
|
2021-01-22 18:38:21 -05:00
|
|
|
blockSizeSmall = 128 * humanize.KiByte // Default r/w block size for smaller objects.
|
Remove read-ahead for small files (#8522)
We should only read ahead if we are reading big files. We enable it for files >= 16MB.
Benchmark on 64KB objects.
Before:
```
Operation: GET
Errors: 0
Average: 59.976s, 87.13 MB/s, 1394.07 ops ended/s.
Fastest: 1s, 90.99 MB/s, 1455.00 ops ended/s.
50% Median: 1s, 87.53 MB/s, 1401.00 ops ended/s.
Slowest: 1s, 81.39 MB/s, 1301.00 ops ended/s.
```
After:
```
Operation: GET
Errors: 0
Average: 59.992s, 207.99 MB/s, 3327.85 ops ended/s.
Fastest: 1s, 219.20 MB/s, 3507.00 ops ended/s.
50% Median: 1s, 210.54 MB/s, 3368.00 ops ended/s.
Slowest: 1s, 179.14 MB/s, 2865.00 ops ended/s.
```
The 64KB buffer is actually a small disadvantage for this case, but I believe it will be better in general than no buffer.
2019-11-14 15:58:41 -05:00
|
|
|
|
|
|
|
// On regular files bigger than this;
|
|
|
|
readAheadSize = 16 << 20
|
|
|
|
// Read this many buffers ahead.
|
|
|
|
readAheadBuffers = 4
|
|
|
|
// Size of each buffer.
|
|
|
|
readAheadBufSize = 1 << 20
|
2019-12-12 09:02:37 -05:00
|
|
|
|
2021-01-12 13:20:39 -05:00
|
|
|
// Small file threshold below which data accompanies metadata from storage layer.
|
|
|
|
smallFileThreshold = 128 * humanize.KiByte // Optimized for NVMe/SSDs
|
|
|
|
// For hardrives it is possible to set this to a lower value to avoid any
|
|
|
|
// spike in latency. But currently we are simply keeping it optimal for SSDs.
|
2021-01-07 22:27:31 -05:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// XL metadata file carries per object metadata.
|
|
|
|
xlStorageFormatFile = "xl.meta"
|
2016-04-08 13:37:38 -04:00
|
|
|
)
|
|
|
|
|
2021-03-18 17:09:55 -04:00
|
|
|
var alignedBuf []byte
|
|
|
|
|
|
|
|
func init() {
|
|
|
|
alignedBuf = disk.AlignedBlock(4096)
|
|
|
|
_, _ = rand.Read(alignedBuf)
|
|
|
|
}
|
|
|
|
|
2018-06-07 03:01:40 -04:00
|
|
|
// isValidVolname verifies a volname name in accordance with object
|
|
|
|
// layer requirements.
|
|
|
|
func isValidVolname(volname string) bool {
|
|
|
|
if len(volname) < 3 {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
|
|
|
if runtime.GOOS == "windows" {
|
|
|
|
// Volname shouldn't have reserved characters in Windows.
|
|
|
|
return !strings.ContainsAny(volname, `\:*?\"<>|`)
|
|
|
|
}
|
|
|
|
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// xlStorage - implements StorageAPI interface.
|
|
|
|
type xlStorage struct {
|
2019-10-25 13:37:53 -04:00
|
|
|
diskPath string
|
2020-09-28 22:39:32 -04:00
|
|
|
endpoint Endpoint
|
2020-05-19 17:27:20 -04:00
|
|
|
|
2020-08-04 17:55:53 -04:00
|
|
|
globalSync bool
|
|
|
|
|
2021-01-12 13:20:39 -05:00
|
|
|
poolLarge sync.Pool
|
|
|
|
poolSmall sync.Pool
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
|
2020-08-18 17:37:26 -04:00
|
|
|
rootDisk bool
|
2018-06-27 21:59:38 -04:00
|
|
|
|
2019-10-25 13:37:53 -04:00
|
|
|
diskID string
|
|
|
|
|
2021-03-04 17:36:23 -05:00
|
|
|
// Indexes, will be -1 until assigned a set.
|
|
|
|
poolIndex, setIndex, diskIndex int
|
|
|
|
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
formatFileInfo os.FileInfo
|
2021-01-11 05:27:04 -05:00
|
|
|
formatLegacy bool
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
formatLastCheck time.Time
|
2019-10-25 13:37:53 -04:00
|
|
|
|
2020-09-29 12:54:41 -04:00
|
|
|
diskInfoCache timedValue
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
ctx context.Context
|
2019-10-25 13:37:53 -04:00
|
|
|
sync.RWMutex
|
2016-03-28 12:52:09 -04:00
|
|
|
}
|
|
|
|
|
2016-05-11 15:55:02 -04:00
|
|
|
// checkPathLength - returns error if given path name length more than 255
|
|
|
|
func checkPathLength(pathName string) error {
|
2016-07-03 14:17:08 -04:00
|
|
|
// Apple OS X path length is limited to 1016
|
|
|
|
if runtime.GOOS == "darwin" && len(pathName) > 1016 {
|
|
|
|
return errFileNameTooLong
|
|
|
|
}
|
|
|
|
|
2020-04-28 20:32:46 -04:00
|
|
|
// Disallow more than 1024 characters on windows, there
|
|
|
|
// are no known name_max limits on Windows.
|
2020-05-14 02:55:38 -04:00
|
|
|
if runtime.GOOS == "windows" && len(pathName) > 1024 {
|
2020-05-04 16:11:56 -04:00
|
|
|
return errFileNameTooLong
|
2018-11-26 00:05:14 -05:00
|
|
|
}
|
2016-10-31 12:34:44 -04:00
|
|
|
|
2020-04-28 20:32:46 -04:00
|
|
|
// On Unix we reject paths if they are just '.', '..' or '/'
|
|
|
|
if pathName == "." || pathName == ".." || pathName == slashSeparator {
|
|
|
|
return errFileAccessDenied
|
|
|
|
}
|
2016-05-11 15:55:02 -04:00
|
|
|
|
2020-04-28 20:32:46 -04:00
|
|
|
// Check each path segment length is > 255 on all Unix
|
|
|
|
// platforms, look for this value as NAME_MAX in
|
|
|
|
// /usr/include/linux/limits.h
|
|
|
|
var count int64
|
|
|
|
for _, p := range pathName {
|
|
|
|
switch p {
|
|
|
|
case '/':
|
|
|
|
count = 0 // Reset
|
2020-05-14 02:55:38 -04:00
|
|
|
case '\\':
|
|
|
|
if runtime.GOOS == globalWindowsOSName {
|
|
|
|
count = 0
|
|
|
|
}
|
2020-04-28 20:32:46 -04:00
|
|
|
default:
|
|
|
|
count++
|
|
|
|
if count > 255 {
|
|
|
|
return errFileNameTooLong
|
|
|
|
}
|
2016-05-11 15:55:02 -04:00
|
|
|
}
|
2016-06-13 05:53:09 -04:00
|
|
|
} // Success.
|
2016-05-11 15:55:02 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-01-22 18:38:21 -05:00
|
|
|
func getValidPath(path string) (string, error) {
|
2018-04-09 23:56:09 -04:00
|
|
|
if path == "" {
|
|
|
|
return path, errInvalidArgument
|
|
|
|
}
|
|
|
|
|
|
|
|
var err error
|
|
|
|
// Disallow relative paths, figure out absolute paths.
|
|
|
|
path, err = filepath.Abs(path)
|
|
|
|
if err != nil {
|
|
|
|
return path, err
|
|
|
|
}
|
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
fi, err := Lstat(path)
|
2020-11-23 11:36:49 -05:00
|
|
|
if err != nil && !osIsNotExist(err) {
|
2018-04-09 23:56:09 -04:00
|
|
|
return path, err
|
|
|
|
}
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2018-04-09 23:56:09 -04:00
|
|
|
// Disk not found create it.
|
2021-01-22 18:38:21 -05:00
|
|
|
if err = reliableMkdirAll(path, 0777); err != nil {
|
2018-04-09 23:56:09 -04:00
|
|
|
return path, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if fi != nil && !fi.IsDir() {
|
2020-06-12 23:04:01 -04:00
|
|
|
return path, errDiskNotDir
|
2018-04-09 23:56:09 -04:00
|
|
|
}
|
|
|
|
|
2018-06-12 19:36:31 -04:00
|
|
|
return path, nil
|
2018-04-09 23:56:09 -04:00
|
|
|
}
|
|
|
|
|
2016-04-08 13:37:38 -04:00
|
|
|
// isDirEmpty - returns whether given directory is empty or not.
|
2016-05-08 04:58:05 -04:00
|
|
|
func isDirEmpty(dirname string) bool {
|
2020-11-04 16:05:21 -05:00
|
|
|
entries, err := readDirN(dirname, 1)
|
2016-05-08 04:58:05 -04:00
|
|
|
if err != nil {
|
2020-11-04 16:05:21 -05:00
|
|
|
if err != errFileNotFound {
|
2020-04-09 12:30:02 -04:00
|
|
|
logger.LogIf(GlobalContext, err)
|
2017-08-04 13:43:51 -04:00
|
|
|
}
|
2016-05-08 04:58:05 -04:00
|
|
|
return false
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2020-11-04 16:05:21 -05:00
|
|
|
return len(entries) == 0
|
2016-02-18 03:38:58 -05:00
|
|
|
}
|
|
|
|
|
2016-04-08 13:37:38 -04:00
|
|
|
// Initialize a new storage disk.
|
2020-08-25 13:55:15 -04:00
|
|
|
func newLocalXLStorage(path string) (*xlStorage, error) {
|
|
|
|
u := url.URL{Path: path}
|
|
|
|
return newXLStorage(Endpoint{
|
|
|
|
URL: &u,
|
|
|
|
IsLocal: true,
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
// Initialize a new storage disk.
|
|
|
|
func newXLStorage(ep Endpoint) (*xlStorage, error) {
|
|
|
|
path := ep.Path
|
2018-04-09 23:56:09 -04:00
|
|
|
var err error
|
2021-01-22 18:38:21 -05:00
|
|
|
if path, err = getValidPath(path); err != nil {
|
2016-06-13 05:53:09 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2020-12-12 19:10:07 -05:00
|
|
|
var rootDisk bool
|
|
|
|
if env.Get("MINIO_CI_CD", "") != "" {
|
|
|
|
rootDisk = true
|
|
|
|
} else {
|
2021-02-22 13:32:21 -05:00
|
|
|
if IsDocker() || IsKubernetes() {
|
|
|
|
// Start with overlay "/" to check if
|
|
|
|
// possible the path has device id as
|
|
|
|
// "overlay" that would mean the path
|
|
|
|
// is emphemeral and we should treat it
|
|
|
|
// as root disk from the baremetal
|
|
|
|
// terminology.
|
2021-03-17 12:38:38 -04:00
|
|
|
rootDisk, err = disk.IsRootDisk(path, SlashSeparator)
|
2021-02-22 13:32:21 -05:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if !rootDisk {
|
|
|
|
// No root disk was found, its possible that
|
2021-03-17 12:38:38 -04:00
|
|
|
// path is referenced at "/etc/hosts" which has
|
2021-02-22 13:32:21 -05:00
|
|
|
// different device ID that points to the original
|
|
|
|
// "/" on the host system, fall back to that instead
|
|
|
|
// to verify of the device id is same.
|
2021-03-17 12:38:38 -04:00
|
|
|
rootDisk, err = disk.IsRootDisk(path, "/etc/hosts")
|
2021-02-22 13:32:21 -05:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
// On baremetal setups its always "/" is the root disk.
|
2021-03-17 12:38:38 -04:00
|
|
|
rootDisk, err = disk.IsRootDisk(path, SlashSeparator)
|
2021-02-22 13:32:21 -05:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-12-12 19:10:07 -05:00
|
|
|
}
|
2020-08-18 17:37:26 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
p := &xlStorage{
|
2019-10-25 13:37:53 -04:00
|
|
|
diskPath: path,
|
2020-09-28 22:39:32 -04:00
|
|
|
endpoint: ep,
|
2021-01-12 13:20:39 -05:00
|
|
|
poolLarge: sync.Pool{
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
New: func() interface{} {
|
2021-01-12 13:20:39 -05:00
|
|
|
b := disk.AlignedBlock(blockSizeLarge)
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
return &b
|
|
|
|
},
|
|
|
|
},
|
2021-01-12 13:20:39 -05:00
|
|
|
poolSmall: sync.Pool{
|
2016-10-26 20:14:05 -04:00
|
|
|
New: func() interface{} {
|
2021-01-12 13:20:39 -05:00
|
|
|
b := disk.AlignedBlock(blockSizeSmall)
|
2016-10-26 20:14:05 -04:00
|
|
|
return &b
|
|
|
|
},
|
|
|
|
},
|
2021-03-18 23:16:50 -04:00
|
|
|
globalSync: env.Get(config.EnvFSOSync, config.EnableOff) == config.EnableOn,
|
|
|
|
ctx: GlobalContext,
|
|
|
|
rootDisk: rootDisk,
|
|
|
|
poolIndex: -1,
|
|
|
|
setIndex: -1,
|
|
|
|
diskIndex: -1,
|
2018-06-27 21:59:38 -04:00
|
|
|
}
|
2018-05-23 06:11:29 -04:00
|
|
|
|
2021-01-22 18:38:21 -05:00
|
|
|
// Create all necessary bucket folders if possible.
|
|
|
|
if err = p.MakeVolBulk(context.TODO(), minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, dataUsageBucket); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check if backend is writable and supports O_DIRECT
|
|
|
|
var rnd [8]byte
|
|
|
|
_, _ = rand.Read(rnd[:])
|
|
|
|
tmpFile := ".writable-check-" + hex.EncodeToString(rnd[:]) + ".tmp"
|
2021-03-18 17:09:55 -04:00
|
|
|
filePath := pathJoin(p.diskPath, minioMetaTmpBucket, tmpFile)
|
|
|
|
w, err := disk.OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0666)
|
|
|
|
if err != nil {
|
|
|
|
return p, err
|
|
|
|
}
|
|
|
|
if _, err = w.Write(alignedBuf[:]); err != nil {
|
|
|
|
w.Close()
|
2021-01-22 18:38:21 -05:00
|
|
|
return p, err
|
|
|
|
}
|
2021-03-18 17:09:55 -04:00
|
|
|
w.Close()
|
2021-03-23 17:51:27 -04:00
|
|
|
defer Remove(filePath)
|
2021-01-22 18:38:21 -05:00
|
|
|
|
2017-07-10 21:14:48 -04:00
|
|
|
// Success.
|
2018-06-06 04:51:56 -04:00
|
|
|
return p, nil
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2016-01-25 02:03:38 -05:00
|
|
|
|
2016-06-29 14:25:35 -04:00
|
|
|
// getDiskInfo returns given disk information.
|
|
|
|
func getDiskInfo(diskPath string) (di disk.Info, err error) {
|
|
|
|
if err = checkPathLength(diskPath); err == nil {
|
|
|
|
di, err = disk.GetInfo(diskPath)
|
|
|
|
}
|
|
|
|
|
2019-07-25 16:35:27 -04:00
|
|
|
switch {
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsNotExist(err):
|
2016-06-29 14:25:35 -04:00
|
|
|
err = errDiskNotFound
|
2019-07-25 16:35:27 -04:00
|
|
|
case isSysErrTooLong(err):
|
|
|
|
err = errFileNameTooLong
|
|
|
|
case isSysErrIO(err):
|
|
|
|
err = errFaultyDisk
|
2016-06-29 14:25:35 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return di, err
|
|
|
|
}
|
|
|
|
|
2016-10-05 15:48:07 -04:00
|
|
|
// Implements stringer compatible interface.
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) String() string {
|
2016-10-27 06:30:52 -04:00
|
|
|
return s.diskPath
|
2016-10-05 15:48:07 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) Hostname() string {
|
2020-09-28 22:39:32 -04:00
|
|
|
return s.endpoint.Host
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *xlStorage) Endpoint() Endpoint {
|
|
|
|
return s.endpoint
|
2020-01-13 16:09:10 -05:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
func (*xlStorage) Close() error {
|
2016-11-23 18:48:10 -05:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) IsOnline() bool {
|
2019-10-25 13:37:53 -04:00
|
|
|
return true
|
2016-11-23 18:48:10 -05:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) IsLocal() bool {
|
2020-05-19 17:27:20 -04:00
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2021-03-04 17:36:23 -05:00
|
|
|
// Retrieve location indexes.
|
|
|
|
func (s *xlStorage) GetDiskLoc() (poolIdx, setIdx, diskIdx int) {
|
|
|
|
// If unset, see if we can locate it.
|
|
|
|
if s.poolIndex < 0 || s.setIndex < 0 || s.diskIndex < 0 {
|
|
|
|
return getXLDiskLoc(s.diskID)
|
|
|
|
}
|
|
|
|
return s.poolIndex, s.setIndex, s.diskIndex
|
|
|
|
}
|
|
|
|
|
|
|
|
// Set location indexes.
|
|
|
|
func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) {
|
|
|
|
s.poolIndex = poolIdx
|
|
|
|
s.setIndex = setIdx
|
|
|
|
s.diskIndex = diskIdx
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *xlStorage) Healing() *healingTracker {
|
2020-09-28 22:39:32 -04:00
|
|
|
healingFile := pathJoin(s.diskPath, minioMetaBucket,
|
|
|
|
bucketMetaPrefix, healingTrackerFilename)
|
2021-03-04 17:36:23 -05:00
|
|
|
b, err := ioutil.ReadFile(healingFile)
|
|
|
|
if err != nil {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
var h healingTracker
|
|
|
|
_, err = h.UnmarshalMsg(b)
|
|
|
|
logger.LogIf(GlobalContext, err)
|
|
|
|
return &h
|
2020-09-28 22:39:32 -04:00
|
|
|
}
|
|
|
|
|
2021-02-26 18:11:42 -05:00
|
|
|
func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache) (dataUsageCache, error) {
|
2020-12-27 01:58:06 -05:00
|
|
|
var lc *lifecycle.Lifecycle
|
|
|
|
var err error
|
|
|
|
|
2020-06-12 13:28:21 -04:00
|
|
|
// Check if the current bucket has a configured lifecycle policy
|
2020-12-27 01:58:06 -05:00
|
|
|
if globalLifecycleSys != nil {
|
|
|
|
lc, err = globalLifecycleSys.Get(cache.Info.Name)
|
|
|
|
if err == nil && lc.HasActiveRules("", true) {
|
|
|
|
cache.Info.lifeCycle = lc
|
|
|
|
if intDataUpdateTracker.debug {
|
2021-02-26 18:11:42 -05:00
|
|
|
console.Debugln(color.Green("scannerDisk:") + " lifecycle: Active rules found")
|
2020-12-27 01:58:06 -05:00
|
|
|
}
|
2020-12-13 15:05:54 -05:00
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
}
|
|
|
|
|
2020-12-27 01:58:06 -05:00
|
|
|
// return initialized object layer
|
2020-10-09 12:59:52 -04:00
|
|
|
objAPI := newObjectLayerFn()
|
2020-12-04 12:32:35 -05:00
|
|
|
|
|
|
|
globalHealConfigMu.Lock()
|
|
|
|
healOpts := globalHealConfig
|
|
|
|
globalHealConfigMu.Unlock()
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2021-02-26 18:11:42 -05:00
|
|
|
dataUsageInfo, err := scanDataFolder(ctx, s.diskPath, cache, func(item scannerItem) (sizeSummary, error) {
|
2020-06-12 23:04:01 -04:00
|
|
|
// Look for `xl.meta/xl.json' at the leaf.
|
|
|
|
if !strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFile) &&
|
|
|
|
!strings.HasSuffix(item.Path, SlashSeparator+xlStorageFormatFileV1) {
|
|
|
|
// if no xl.meta/xl.json found, skip the file.
|
2020-12-07 16:47:48 -05:00
|
|
|
return sizeSummary{}, errSkipFile
|
2019-12-12 09:02:37 -05:00
|
|
|
}
|
|
|
|
|
2021-02-24 03:14:16 -05:00
|
|
|
buf, err := xioutil.ReadFile(item.Path)
|
2020-01-21 17:07:49 -05:00
|
|
|
if err != nil {
|
2020-12-13 15:05:54 -05:00
|
|
|
if intDataUpdateTracker.debug {
|
2021-02-26 18:11:42 -05:00
|
|
|
console.Debugf(color.Green("scannerBucket:")+" object path missing: %v: %w\n", item.Path, err)
|
2020-12-13 15:05:54 -05:00
|
|
|
}
|
2020-12-07 16:47:48 -05:00
|
|
|
return sizeSummary{}, errSkipFile
|
2019-12-12 09:02:37 -05:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// Remove filename which is the meta file.
|
|
|
|
item.transformMetaDir()
|
|
|
|
|
2020-11-22 02:48:50 -05:00
|
|
|
fivs, err := getFileInfoVersions(buf, item.bucket, item.objectPath())
|
2020-05-24 14:19:17 -04:00
|
|
|
if err != nil {
|
2020-12-13 15:05:54 -05:00
|
|
|
if intDataUpdateTracker.debug {
|
2021-02-26 18:11:42 -05:00
|
|
|
console.Debugf(color.Green("scannerBucket:")+" reading xl.meta failed: %v: %w\n", item.Path, err)
|
2020-12-13 15:05:54 -05:00
|
|
|
}
|
2020-12-07 16:47:48 -05:00
|
|
|
return sizeSummary{}, errSkipFile
|
2020-05-24 14:19:17 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
var totalSize int64
|
2020-09-09 21:11:24 -04:00
|
|
|
|
2020-12-07 16:47:48 -05:00
|
|
|
sizeS := sizeSummary{}
|
2021-02-01 12:52:11 -05:00
|
|
|
for _, version := range fivs.Versions {
|
2020-09-02 03:19:03 -04:00
|
|
|
oi := version.ToObjectInfo(item.bucket, item.objectPath())
|
2020-12-27 01:58:06 -05:00
|
|
|
if objAPI != nil {
|
2020-12-28 13:31:00 -05:00
|
|
|
totalSize += item.applyActions(ctx, objAPI, actionMeta{
|
2021-02-01 12:52:11 -05:00
|
|
|
oi: oi,
|
|
|
|
bitRotScan: healOpts.Bitrot,
|
2020-12-27 01:58:06 -05:00
|
|
|
})
|
2021-02-03 23:41:33 -05:00
|
|
|
item.healReplication(ctx, objAPI, oi.Clone(), &sizeS)
|
2020-07-03 12:15:44 -04:00
|
|
|
}
|
2020-07-21 20:49:56 -04:00
|
|
|
}
|
2020-12-07 16:47:48 -05:00
|
|
|
sizeS.totalSize = totalSize
|
|
|
|
return sizeS, nil
|
2020-01-21 17:07:49 -05:00
|
|
|
})
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
if err != nil {
|
|
|
|
return dataUsageInfo, err
|
|
|
|
}
|
2020-06-12 13:28:21 -04:00
|
|
|
|
2020-03-18 19:19:29 -04:00
|
|
|
dataUsageInfo.Info.LastUpdate = time.Now()
|
2019-12-12 09:02:37 -05:00
|
|
|
return dataUsageInfo, nil
|
|
|
|
}
|
|
|
|
|
2016-08-25 20:16:34 -04:00
|
|
|
// DiskInfo provides current information about disk space usage,
|
|
|
|
// total free inodes and underlying filesystem.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) DiskInfo(context.Context) (info DiskInfo, err error) {
|
2020-09-29 12:54:41 -04:00
|
|
|
s.diskInfoCache.Once.Do(func() {
|
|
|
|
s.diskInfoCache.TTL = time.Second
|
|
|
|
s.diskInfoCache.Update = func() (interface{}, error) {
|
|
|
|
dcinfo := DiskInfo{
|
|
|
|
RootDisk: s.rootDisk,
|
|
|
|
MountPath: s.diskPath,
|
|
|
|
Endpoint: s.endpoint.String(),
|
|
|
|
}
|
|
|
|
di, err := getDiskInfo(s.diskPath)
|
|
|
|
if err != nil {
|
|
|
|
return dcinfo, err
|
|
|
|
}
|
|
|
|
dcinfo.Total = di.Total
|
|
|
|
dcinfo.Free = di.Free
|
2020-09-30 01:54:02 -04:00
|
|
|
dcinfo.Used = di.Used
|
2020-12-27 01:58:06 -05:00
|
|
|
dcinfo.UsedInodes = di.Files - di.Ffree
|
2020-09-29 12:54:41 -04:00
|
|
|
dcinfo.FSType = di.FSType
|
|
|
|
|
|
|
|
diskID, err := s.GetDiskID()
|
|
|
|
if errors.Is(err, errUnformattedDisk) {
|
|
|
|
// if we found an unformatted disk then
|
|
|
|
// healing is automatically true.
|
|
|
|
dcinfo.Healing = true
|
|
|
|
} else {
|
|
|
|
// Check if the disk is being healed if GetDiskID
|
|
|
|
// returned any error other than fresh disk
|
2021-03-04 17:36:23 -05:00
|
|
|
dcinfo.Healing = s.Healing() != nil
|
2020-09-29 12:54:41 -04:00
|
|
|
}
|
2019-07-25 16:35:27 -04:00
|
|
|
|
2020-09-29 12:54:41 -04:00
|
|
|
dcinfo.ID = diskID
|
|
|
|
return dcinfo, err
|
|
|
|
}
|
|
|
|
})
|
2020-07-13 12:51:07 -04:00
|
|
|
|
2020-09-29 12:54:41 -04:00
|
|
|
v, err := s.diskInfoCache.Get()
|
|
|
|
info = v.(DiskInfo)
|
2020-07-13 12:51:07 -04:00
|
|
|
return info, err
|
2016-08-25 20:16:34 -04:00
|
|
|
}
|
|
|
|
|
2016-05-18 00:22:27 -04:00
|
|
|
// getVolDir - will convert incoming volume names to
|
2016-04-13 14:32:47 -04:00
|
|
|
// corresponding valid volume names on the backend in a platform
|
|
|
|
// compatible way for all operating systems. If volume is not found
|
|
|
|
// an error is generated.
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) getVolDir(volume string) (string, error) {
|
2018-04-23 23:27:33 -04:00
|
|
|
if volume == "" || volume == "." || volume == ".." {
|
|
|
|
return "", errVolumeNotFound
|
2016-04-13 14:32:47 -04:00
|
|
|
}
|
2016-05-05 04:39:26 -04:00
|
|
|
volumeDir := pathJoin(s.diskPath, volume)
|
2016-05-18 00:22:27 -04:00
|
|
|
return volumeDir, nil
|
2016-04-13 14:32:47 -04:00
|
|
|
}
|
2016-04-08 13:37:38 -04:00
|
|
|
|
2020-03-27 17:48:30 -04:00
|
|
|
// GetDiskID - returns the cached disk uuid
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) GetDiskID() (string, error) {
|
2019-10-25 13:37:53 -04:00
|
|
|
s.RLock()
|
|
|
|
diskID := s.diskID
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
fileInfo := s.formatFileInfo
|
|
|
|
lastCheck := s.formatLastCheck
|
2019-10-25 13:37:53 -04:00
|
|
|
s.RUnlock()
|
|
|
|
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
// check if we have a valid disk ID that is less than 1 second old.
|
2020-09-29 12:54:41 -04:00
|
|
|
if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= time.Second {
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
return diskID, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
s.Lock()
|
|
|
|
// If somebody else updated the disk ID and changed the time, return what they got.
|
2020-03-27 17:48:30 -04:00
|
|
|
if !lastCheck.IsZero() && !s.formatLastCheck.Equal(lastCheck) && diskID != "" {
|
2021-01-11 05:27:04 -05:00
|
|
|
s.Unlock()
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
// Somebody else got the lock first.
|
|
|
|
return diskID, nil
|
|
|
|
}
|
2021-01-11 05:27:04 -05:00
|
|
|
s.Unlock()
|
2020-08-03 21:17:48 -04:00
|
|
|
|
2019-10-25 13:37:53 -04:00
|
|
|
formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile)
|
2021-03-23 17:51:27 -04:00
|
|
|
fi, err := Lstat(formatFile)
|
2019-10-25 13:37:53 -04:00
|
|
|
if err != nil {
|
|
|
|
// If the disk is still not initialized.
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(s.diskPath)
|
2020-07-13 12:51:07 -04:00
|
|
|
if err == nil {
|
2020-07-21 16:54:06 -04:00
|
|
|
// Disk is present but missing `format.json`
|
2020-07-13 12:51:07 -04:00
|
|
|
return "", errUnformattedDisk
|
|
|
|
}
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-07-13 12:51:07 -04:00
|
|
|
return "", errDiskNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
} else if osIsPermission(err) {
|
2020-07-13 12:51:07 -04:00
|
|
|
return "", errDiskAccessDenied
|
|
|
|
}
|
2020-08-03 21:17:48 -04:00
|
|
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
|
|
|
return "", errCorruptedFormat
|
2020-11-23 11:36:49 -05:00
|
|
|
} else if osIsPermission(err) {
|
2020-08-03 21:17:48 -04:00
|
|
|
return "", errDiskAccessDenied
|
2020-03-27 17:48:30 -04:00
|
|
|
}
|
2020-08-03 21:17:48 -04:00
|
|
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
2020-03-27 17:48:30 -04:00
|
|
|
return "", errCorruptedFormat
|
2019-10-25 13:37:53 -04:00
|
|
|
}
|
|
|
|
|
2020-03-27 17:48:30 -04:00
|
|
|
if xioutil.SameFile(fi, fileInfo) && diskID != "" {
|
2021-01-11 05:27:04 -05:00
|
|
|
s.Lock()
|
2019-10-25 13:37:53 -04:00
|
|
|
// If the file has not changed, just return the cached diskID information.
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
s.formatLastCheck = time.Now()
|
2021-01-11 05:27:04 -05:00
|
|
|
s.Unlock()
|
2019-10-25 13:37:53 -04:00
|
|
|
return diskID, nil
|
2018-02-15 20:45:57 -05:00
|
|
|
}
|
2019-10-25 13:37:53 -04:00
|
|
|
|
2021-02-24 03:14:16 -05:00
|
|
|
b, err := xioutil.ReadFile(formatFile)
|
2016-10-31 12:34:44 -04:00
|
|
|
if err != nil {
|
2020-08-03 21:17:48 -04:00
|
|
|
// If the disk is still not initialized.
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(s.diskPath)
|
2020-08-03 21:17:48 -04:00
|
|
|
if err == nil {
|
|
|
|
// Disk is present but missing `format.json`
|
|
|
|
return "", errUnformattedDisk
|
|
|
|
}
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-08-03 21:17:48 -04:00
|
|
|
return "", errDiskNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
} else if osIsPermission(err) {
|
2020-08-03 21:17:48 -04:00
|
|
|
return "", errDiskAccessDenied
|
|
|
|
}
|
|
|
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
|
|
|
return "", errCorruptedFormat
|
2020-11-23 11:36:49 -05:00
|
|
|
} else if osIsPermission(err) {
|
2020-08-03 21:17:48 -04:00
|
|
|
return "", errDiskAccessDenied
|
|
|
|
}
|
|
|
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
2020-03-27 17:48:30 -04:00
|
|
|
return "", errCorruptedFormat
|
2016-10-31 12:34:44 -04:00
|
|
|
}
|
2020-08-03 21:17:48 -04:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
format := &formatErasureV3{}
|
2019-10-25 13:37:53 -04:00
|
|
|
var json = jsoniter.ConfigCompatibleWithStandardLibrary
|
|
|
|
if err = json.Unmarshal(b, &format); err != nil {
|
2020-08-03 21:17:48 -04:00
|
|
|
logger.LogIf(GlobalContext, err) // log unexpected errors
|
2020-03-27 17:48:30 -04:00
|
|
|
return "", errCorruptedFormat
|
2019-02-20 16:32:29 -05:00
|
|
|
}
|
2020-08-03 21:17:48 -04:00
|
|
|
|
2021-01-11 05:27:04 -05:00
|
|
|
s.Lock()
|
|
|
|
defer s.Unlock()
|
2020-06-12 23:04:01 -04:00
|
|
|
s.diskID = format.Erasure.This
|
2021-01-16 15:08:02 -05:00
|
|
|
s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1
|
2019-10-25 13:37:53 -04:00
|
|
|
s.formatFileInfo = fi
|
posix: cache disk ID for a short while (#8564)
`*posix.getDiskID()` takes up to 30% of all CPU due to the `os.Stat` call on `GET` calls.
Before:
```
Operation: GET - Concurrency: 12
* Average: 1333.97 MB/s, 1365.99 obj/s, 1365.98 ops ended/s (4m59.975s)
* First Byte: Average: 7.801487ms, Median: 7.9974ms, Best: 1.9822ms, Worst: 110.0021ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1453.50 MB/s, 1488.38 obj/s, 1492.00 ops ended/s (1s)
* 50% Median: 1360.47 MB/s, 1393.12 obj/s, 1393.00 ops ended/s (1s)
* Slowest: 978.68 MB/s, 1002.17 obj/s, 1004.00 ops ended/s (1s)
```
After:
```
Operation: GET - Concurrency: 12
* Average: 1706.07 MB/s, 1747.02 obj/s, 1747.01 ops ended/s (4m59.985s)
* First Byte: Average: 5.797886ms, Median: 5.9959ms, Best: 996.3µs, Worst: 84.0007ms
Aggregated, split into 299 x 1s time segments:
* Fastest: 1830.03 MB/s, 1873.96 obj/s, 1872.00 ops ended/s (1s)
* 50% Median: 1735.04 MB/s, 1776.68 obj/s, 1776.00 ops ended/s (1s)
* Slowest: 994.94 MB/s, 1018.82 obj/s, 1018.00 ops ended/s (1s)
```
TLDR; `os.Stat` is not free.
2019-11-29 05:57:14 -05:00
|
|
|
s.formatLastCheck = time.Now()
|
2019-10-25 13:37:53 -04:00
|
|
|
return s.diskID, nil
|
2016-10-31 12:34:44 -04:00
|
|
|
}
|
|
|
|
|
2019-10-25 13:37:53 -04:00
|
|
|
// Make a volume entry.
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) SetDiskID(id string) {
|
|
|
|
// NO-OP for xlStorage as it is handled either by xlStorageDiskIDCheck{} for local disks or
|
2019-10-25 13:37:53 -04:00
|
|
|
// storage rest server for remote disks.
|
|
|
|
}
|
|
|
|
|
2021-01-22 18:38:21 -05:00
|
|
|
func (s *xlStorage) MakeVolBulk(ctx context.Context, volumes ...string) error {
|
2019-12-23 19:31:03 -05:00
|
|
|
for _, volume := range volumes {
|
2021-01-22 18:38:21 -05:00
|
|
|
if err := s.MakeVol(ctx, volume); err != nil {
|
|
|
|
if errors.Is(err, errDiskAccessDenied) {
|
|
|
|
return errDiskAccessDenied
|
2020-01-10 05:35:06 -05:00
|
|
|
}
|
2019-12-23 19:31:03 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-04-13 14:32:47 -04:00
|
|
|
// Make a volume entry.
|
2021-01-22 18:38:21 -05:00
|
|
|
func (s *xlStorage) MakeVol(ctx context.Context, volume string) error {
|
2018-04-23 23:27:33 -04:00
|
|
|
if !isValidVolname(volume) {
|
|
|
|
return errInvalidArgument
|
|
|
|
}
|
|
|
|
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2018-01-29 21:43:13 -05:00
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
if _, err := Lstat(volumeDir); err != nil {
|
2018-01-29 21:43:13 -05:00
|
|
|
// Volume does not exist we proceed to create.
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2018-01-29 21:43:13 -05:00
|
|
|
// Make a volume entry, with mode 0777 mkdir honors system umask.
|
2021-01-22 18:38:21 -05:00
|
|
|
err = reliableMkdirAll(volumeDir, 0777)
|
2018-01-29 21:43:13 -05:00
|
|
|
}
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsPermission(err) {
|
2016-07-02 04:59:28 -04:00
|
|
|
return errDiskAccessDenied
|
2018-07-27 18:32:19 -04:00
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
2016-07-02 04:59:28 -04:00
|
|
|
}
|
|
|
|
return err
|
2016-04-13 14:32:47 -04:00
|
|
|
}
|
2018-01-29 21:43:13 -05:00
|
|
|
|
|
|
|
// Stat succeeds we return errVolumeExists.
|
|
|
|
return errVolumeExists
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// ListVols - list volumes.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) ListVols(context.Context) (volsInfo []VolInfo, err error) {
|
2020-10-08 15:32:32 -04:00
|
|
|
return listVols(s.diskPath)
|
2015-10-16 14:26:01 -04:00
|
|
|
}
|
config/main: Re-write config files - add to new config v3
- New config format.
```
{
"version": "3",
"address": ":9000",
"backend": {
"type": "fs",
"disk": "/path"
},
"credential": {
"accessKey": "WLGDGYAQYIGI833EV05A",
"secretKey": "BYvgJM101sHngl2uzjXS/OBF/aMxAN06JrJ3qJlF"
},
"region": "us-east-1",
"logger": {
"file": {
"enable": false,
"fileName": "",
"level": "error"
},
"syslog": {
"enable": false,
"address": "",
"level": "debug"
},
"console": {
"enable": true,
"level": "fatal"
}
}
}
```
New command lines in lieu of supporting XL.
Minio initialize filesystem backend.
~~~
$ minio init fs <path>
~~~
Minio initialize XL backend.
~~~
$ minio init xl <url1>...<url16>
~~~
For 'fs' backend it starts the server.
~~~
$ minio server
~~~
For 'xl' backend it waits for servers to join.
~~~
$ minio server
... [PROGRESS BAR] of servers connecting
~~~
Now on other servers execute 'join' and they connect.
~~~
....
minio join <url1> -- from <url2> && minio server
minio join <url1> -- from <url3> && minio server
...
...
minio join <url1> -- from <url16> && minio server
~~~
2016-02-12 18:27:10 -05:00
|
|
|
|
2016-08-11 22:57:14 -04:00
|
|
|
// List all the volumes from diskPath.
|
|
|
|
func listVols(dirPath string) ([]VolInfo, error) {
|
|
|
|
if err := checkPathLength(dirPath); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
entries, err := readDir(dirPath)
|
|
|
|
if err != nil {
|
|
|
|
return nil, errDiskNotFound
|
|
|
|
}
|
2020-09-14 23:44:18 -04:00
|
|
|
volsInfo := make([]VolInfo, 0, len(entries))
|
2016-08-11 22:57:14 -04:00
|
|
|
for _, entry := range entries {
|
2021-02-08 13:15:12 -05:00
|
|
|
if !HasSuffix(entry, SlashSeparator) || !isValidVolname(pathutil.Clean(entry)) {
|
2016-08-11 22:57:14 -04:00
|
|
|
// Skip if entry is neither a directory not a valid volume name.
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
volsInfo = append(volsInfo, VolInfo{
|
2021-02-08 13:15:12 -05:00
|
|
|
Name: pathutil.Clean(entry),
|
2016-08-11 22:57:14 -04:00
|
|
|
})
|
|
|
|
}
|
|
|
|
return volsInfo, nil
|
|
|
|
}
|
|
|
|
|
2016-04-08 13:37:38 -04:00
|
|
|
// StatVol - get volume info.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) StatVol(ctx context.Context, volume string) (vol VolInfo, err error) {
|
2016-04-13 14:32:47 -04:00
|
|
|
// Verify if volume is valid and it exists.
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
2016-04-13 14:32:47 -04:00
|
|
|
if err != nil {
|
|
|
|
return VolInfo{}, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
// Stat a volume entry.
|
|
|
|
var st os.FileInfo
|
2021-03-23 17:51:27 -04:00
|
|
|
st, err = Lstat(volumeDir)
|
2016-04-08 13:37:38 -04:00
|
|
|
if err != nil {
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
switch {
|
|
|
|
case osIsNotExist(err):
|
2016-04-08 13:37:38 -04:00
|
|
|
return VolInfo{}, errVolumeNotFound
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
case osIsPermission(err):
|
|
|
|
return VolInfo{}, errDiskAccessDenied
|
|
|
|
case isSysErrIO(err):
|
2018-07-27 18:32:19 -04:00
|
|
|
return VolInfo{}, errFaultyDisk
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
default:
|
|
|
|
return VolInfo{}, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2016-04-08 20:13:16 -04:00
|
|
|
}
|
2021-02-05 13:12:25 -05:00
|
|
|
// As os.Lstat() doesn't carry other than ModTime(), use ModTime()
|
2016-04-16 15:48:41 -04:00
|
|
|
// as CreatedTime.
|
2016-04-13 14:32:47 -04:00
|
|
|
createdTime := st.ModTime()
|
2016-04-08 13:37:38 -04:00
|
|
|
return VolInfo{
|
2016-04-13 14:32:47 -04:00
|
|
|
Name: volume,
|
|
|
|
Created: createdTime,
|
2016-04-08 13:37:38 -04:00
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeleteVol - delete a volume.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) DeleteVol(ctx context.Context, volume string, forceDelete bool) (err error) {
|
2016-04-13 14:32:47 -04:00
|
|
|
// Verify if volume is valid and it exists.
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
2016-04-13 14:32:47 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2020-03-28 00:52:59 -04:00
|
|
|
|
|
|
|
if forceDelete {
|
2021-03-23 17:51:27 -04:00
|
|
|
err = RemoveAll(volumeDir)
|
2020-03-28 00:52:59 -04:00
|
|
|
} else {
|
2021-03-23 17:51:27 -04:00
|
|
|
err = Remove(volumeDir)
|
2020-03-28 00:52:59 -04:00
|
|
|
}
|
|
|
|
|
2016-04-16 15:48:41 -04:00
|
|
|
if err != nil {
|
2018-08-06 13:26:40 -04:00
|
|
|
switch {
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsNotExist(err):
|
2016-04-16 15:48:41 -04:00
|
|
|
return errVolumeNotFound
|
2018-08-06 13:26:40 -04:00
|
|
|
case isSysErrNotEmpty(err):
|
2016-04-16 15:48:41 -04:00
|
|
|
return errVolumeNotEmpty
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsPermission(err):
|
2018-04-09 23:56:09 -04:00
|
|
|
return errDiskAccessDenied
|
2018-08-06 13:26:40 -04:00
|
|
|
case isSysErrIO(err):
|
2018-07-27 18:32:19 -04:00
|
|
|
return errFaultyDisk
|
2018-08-06 13:26:40 -04:00
|
|
|
default:
|
|
|
|
return err
|
2016-04-16 15:48:41 -04:00
|
|
|
}
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2016-03-28 00:52:38 -04:00
|
|
|
return nil
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
2020-08-25 15:26:48 -04:00
|
|
|
func (s *xlStorage) isLeaf(volume string, leafPath string) bool {
|
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(pathJoin(volumeDir, leafPath, xlStorageFormatFile))
|
2020-08-25 15:26:48 -04:00
|
|
|
if err == nil {
|
|
|
|
return true
|
|
|
|
}
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-08-25 15:26:48 -04:00
|
|
|
// We need a fallback code where directory might contain
|
|
|
|
// legacy `xl.json`, in such situation we just rename
|
|
|
|
// and proceed if rename is successful we know that it
|
|
|
|
// is the leaf since `xl.json` was present.
|
2021-01-11 05:27:04 -05:00
|
|
|
return s.renameLegacyMetadata(volumeDir, leafPath) == nil
|
2020-08-25 15:26:48 -04:00
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2016-05-05 15:51:56 -04:00
|
|
|
// ListDir - return all the entries at the given directory path.
|
2019-08-06 15:08:58 -04:00
|
|
|
// If an entry is a directory it will be returned with a trailing SlashSeparator.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) ListDir(ctx context.Context, volume, dirPath string, count int) (entries []string, err error) {
|
2016-04-13 14:32:47 -04:00
|
|
|
// Verify if volume is valid and it exists.
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
2016-04-13 14:32:47 -04:00
|
|
|
if err != nil {
|
2016-05-05 15:51:56 -04:00
|
|
|
return nil, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2020-04-08 22:42:57 -04:00
|
|
|
|
2020-06-17 16:58:38 -04:00
|
|
|
dirPathAbs := pathJoin(volumeDir, dirPath)
|
2018-05-08 22:08:21 -04:00
|
|
|
if count > 0 {
|
2020-06-17 16:58:38 -04:00
|
|
|
entries, err = readDirN(dirPathAbs, count)
|
2019-04-23 17:54:28 -04:00
|
|
|
} else {
|
2020-06-17 16:58:38 -04:00
|
|
|
entries, err = readDir(dirPathAbs)
|
2018-05-08 22:08:21 -04:00
|
|
|
}
|
2020-04-08 22:42:57 -04:00
|
|
|
if err != nil {
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
if err == errFileNotFound {
|
2021-03-23 17:51:27 -04:00
|
|
|
if _, verr := Lstat(volumeDir); verr != nil {
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
if osIsNotExist(verr) {
|
|
|
|
return nil, errVolumeNotFound
|
|
|
|
} else if isSysErrIO(verr) {
|
|
|
|
return nil, errFaultyDisk
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-04-08 22:42:57 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
2019-04-23 17:54:28 -04:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
return entries, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// DeleteVersions deletes slice of versions, it can be same object
|
|
|
|
// or multiple objects.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) DeleteVersions(ctx context.Context, volume string, versions []FileInfo) []error {
|
2020-06-12 23:04:01 -04:00
|
|
|
errs := make([]error, len(versions))
|
2021-03-24 12:08:05 -04:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
for i, version := range versions {
|
2021-02-03 13:33:43 -05:00
|
|
|
if err := s.DeleteVersion(ctx, volume, version.Name, version, false); err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
errs[i] = err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return errs
|
|
|
|
}
|
|
|
|
|
2021-02-03 13:33:43 -05:00
|
|
|
// DeleteVersion - deletes FileInfo metadata for path at `xl.meta`. forceDelMarker
|
|
|
|
// will force creating a new `xl.meta` to create a new delete marker
|
|
|
|
func (s *xlStorage) DeleteVersion(ctx context.Context, volume, path string, fi FileInfo, forceDelMarker bool) error {
|
2020-06-12 23:04:01 -04:00
|
|
|
if HasSuffix(path, SlashSeparator) {
|
2020-10-28 12:18:35 -04:00
|
|
|
return s.Delete(ctx, volume, path, false)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
2020-09-04 12:45:06 -04:00
|
|
|
buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
2021-02-01 16:23:50 -05:00
|
|
|
if err != errFileNotFound {
|
|
|
|
return err
|
2020-11-29 00:15:45 -05:00
|
|
|
}
|
2021-02-03 13:33:43 -05:00
|
|
|
if fi.Deleted && forceDelMarker {
|
2021-02-01 16:23:50 -05:00
|
|
|
// Create a new xl.meta with a delete marker in it
|
|
|
|
return s.WriteMetadata(ctx, volume, path, fi)
|
|
|
|
}
|
|
|
|
if fi.VersionID != "" {
|
|
|
|
return errFileVersionNotFound
|
|
|
|
}
|
|
|
|
return errFileNotFound
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if len(buf) == 0 {
|
2020-11-29 00:15:45 -05:00
|
|
|
if fi.VersionID != "" {
|
|
|
|
return errFileVersionNotFound
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
return errFileNotFound
|
|
|
|
}
|
|
|
|
|
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if !isXL2V1Format(buf) {
|
|
|
|
// Delete the meta file, if there are no more versions the
|
|
|
|
// top level parent is automatically removed.
|
2021-03-24 17:19:52 -04:00
|
|
|
return s.deleteFile(volumeDir, pathJoin(volumeDir, path), true)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
var xlMeta xlMetaV2
|
|
|
|
if err = xlMeta.Load(buf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
dataDir, lastVersion, err := xlMeta.DeleteVersion(fi)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
buf, err = xlMeta.MarshalMsg(append(xlHeader[:], xlVersionV1[:]...))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-11-12 15:12:09 -05:00
|
|
|
// when data-dir is specified. Transition leverages existing DeleteObject
|
2021-01-20 16:12:12 -05:00
|
|
|
// api call to mark object as deleted. When object is pending transition,
|
2020-11-12 15:12:09 -05:00
|
|
|
// just update the metadata and avoid deleting data dir.
|
|
|
|
if dataDir != "" && fi.TransitionStatus != lifecycle.TransitionPending {
|
2020-06-12 23:04:01 -04:00
|
|
|
filePath := pathJoin(volumeDir, path, dataDir)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-02-26 12:52:27 -05:00
|
|
|
tmpuuid := mustGetUUID()
|
|
|
|
if err = renameAll(filePath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid)); err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2021-01-20 16:12:12 -05:00
|
|
|
|
2020-11-12 15:12:09 -05:00
|
|
|
// transitioned objects maintains metadata on the source cluster. When transition
|
|
|
|
// status is set, update the metadata to disk.
|
|
|
|
if !lastVersion || fi.TransitionStatus != "" {
|
2020-11-02 19:14:31 -05:00
|
|
|
return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Delete the meta file, if there are no more versions the
|
|
|
|
// top level parent is automatically removed.
|
|
|
|
filePath := pathJoin(volumeDir, path, xlStorageFormatFile)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-03-24 17:19:52 -04:00
|
|
|
return s.deleteFile(volumeDir, filePath, false)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// WriteMetadata - writes FileInfo metadata for path at `xl.meta`
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) error {
|
|
|
|
buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil && err != errFileNotFound {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
var xlMeta xlMetaV2
|
|
|
|
if !isXL2V1Format(buf) {
|
|
|
|
xlMeta, err = newXLMetaV2(fi)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
buf, err = xlMeta.MarshalMsg(append(xlHeader[:], xlVersionV1[:]...))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if err = xlMeta.Load(buf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err = xlMeta.AddVersion(fi); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
buf, err = xlMeta.MarshalMsg(append(xlHeader[:], xlVersionV1[:]...))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-11-02 19:14:31 -05:00
|
|
|
return s.WriteAll(ctx, volume, pathJoin(path, xlStorageFormatFile), buf)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
2021-01-11 05:27:04 -05:00
|
|
|
func (s *xlStorage) renameLegacyMetadata(volumeDir, path string) (err error) {
|
|
|
|
s.RLock()
|
|
|
|
legacy := s.formatLegacy
|
|
|
|
s.RUnlock()
|
|
|
|
if !legacy {
|
|
|
|
// if its not a legacy backend then this function is
|
|
|
|
// a no-op always returns errFileNotFound
|
|
|
|
return errFileNotFound
|
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// Validate file path length, before reading.
|
|
|
|
filePath := pathJoin(volumeDir, path)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
srcFilePath := pathJoin(filePath, xlStorageFormatFileV1)
|
|
|
|
dstFilePath := pathJoin(filePath, xlStorageFormatFile)
|
2020-06-19 13:58:17 -04:00
|
|
|
|
|
|
|
// Renaming xl.json to xl.meta should be fully synced to disk.
|
2020-07-11 12:37:34 -04:00
|
|
|
defer func() {
|
|
|
|
if err == nil {
|
2020-08-04 17:55:53 -04:00
|
|
|
if s.globalSync {
|
|
|
|
// Sync to disk only upon success.
|
|
|
|
globalSync()
|
|
|
|
}
|
2020-07-11 12:37:34 -04:00
|
|
|
}
|
|
|
|
}()
|
2020-06-19 13:58:17 -04:00
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
if err = Rename(srcFilePath, dstFilePath); err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
switch {
|
|
|
|
case isSysErrNotDir(err):
|
|
|
|
return errFileNotFound
|
|
|
|
case isSysErrPathNotFound(err):
|
|
|
|
return errFileNotFound
|
|
|
|
case isSysErrCrossDevice(err):
|
|
|
|
return fmt.Errorf("%w (%s)->(%s)", errCrossDeviceLink, srcFilePath, dstFilePath)
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsNotExist(err):
|
2020-06-12 23:04:01 -04:00
|
|
|
return errFileNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsExist(err):
|
2020-06-12 23:04:01 -04:00
|
|
|
// This is returned only when destination is a directory and we
|
|
|
|
// are attempting a rename from file to directory.
|
|
|
|
return errIsNotRegular
|
|
|
|
default:
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadVersion - reads metadata and returns FileInfo at path `xl.meta`
|
2021-01-07 22:27:31 -05:00
|
|
|
// for all objects less than `32KiB` this call returns data as well
|
|
|
|
// along with metadata.
|
|
|
|
func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (fi FileInfo, err error) {
|
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return fi, err
|
|
|
|
}
|
|
|
|
|
2020-09-04 12:45:06 -04:00
|
|
|
buf, err := s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
|
|
|
if err == errFileNotFound {
|
2021-01-11 05:27:04 -05:00
|
|
|
if err = s.renameLegacyMetadata(volumeDir, path); err != nil {
|
2020-11-29 00:15:45 -05:00
|
|
|
if err == errFileNotFound {
|
|
|
|
if versionID != "" {
|
|
|
|
return fi, errFileVersionNotFound
|
|
|
|
}
|
|
|
|
return fi, errFileNotFound
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
return fi, err
|
2019-04-23 17:54:28 -04:00
|
|
|
}
|
2020-09-04 12:45:06 -04:00
|
|
|
buf, err = s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
2020-11-29 00:15:45 -05:00
|
|
|
if err == errFileNotFound {
|
|
|
|
if versionID != "" {
|
|
|
|
return fi, errFileVersionNotFound
|
|
|
|
}
|
|
|
|
return fi, errFileNotFound
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
return fi, err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return fi, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(buf) == 0 {
|
|
|
|
if versionID != "" {
|
|
|
|
return fi, errFileVersionNotFound
|
2019-04-23 17:54:28 -04:00
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
return fi, errFileNotFound
|
2019-04-23 17:54:28 -04:00
|
|
|
}
|
|
|
|
|
2021-01-07 22:27:31 -05:00
|
|
|
fi, err = getFileInfo(buf, volume, path, versionID)
|
|
|
|
if err != nil {
|
|
|
|
return fi, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if readData {
|
|
|
|
// Reading data for small objects when
|
|
|
|
// - object has not yet transitioned
|
|
|
|
// - object size lesser than 32KiB
|
|
|
|
// - object has maximum of 1 parts
|
|
|
|
if fi.TransitionStatus == "" && fi.DataDir != "" && fi.Size <= smallFileThreshold && len(fi.Parts) == 1 {
|
2021-01-22 18:38:21 -05:00
|
|
|
// Enable O_DIRECT optionally only if drive supports it.
|
2021-03-18 23:16:50 -04:00
|
|
|
requireDirectIO := globalStorageClass.GetDMA() == storageclass.DMAReadWrite
|
2021-01-22 18:38:21 -05:00
|
|
|
partPath := fmt.Sprintf("part.%d", fi.Parts[0].Number)
|
|
|
|
fi.Data, err = s.readAllData(volumeDir, pathJoin(volumeDir, path, fi.DataDir, partPath), requireDirectIO)
|
2021-01-07 22:27:31 -05:00
|
|
|
if err != nil {
|
|
|
|
return FileInfo{}, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return fi, nil
|
|
|
|
}
|
|
|
|
|
2021-02-22 04:36:17 -05:00
|
|
|
func (s *xlStorage) readAllData(volumeDir string, filePath string, requireDirectIO bool) (buf []byte, err error) {
|
2021-01-07 22:27:31 -05:00
|
|
|
var f *os.File
|
2021-01-22 18:38:21 -05:00
|
|
|
if requireDirectIO {
|
2021-02-24 03:14:16 -05:00
|
|
|
f, err = disk.OpenFileDirectIO(filePath, readMode, 0666)
|
2021-01-07 22:27:31 -05:00
|
|
|
} else {
|
2021-03-23 17:51:27 -04:00
|
|
|
f, err = OpenFile(filePath, readMode, 0)
|
2021-01-07 22:27:31 -05:00
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
if osIsNotExist(err) {
|
|
|
|
// Check if the object doesn't exist because its bucket
|
|
|
|
// is missing in order to return the correct error.
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(volumeDir)
|
2021-01-07 22:27:31 -05:00
|
|
|
if err != nil && osIsNotExist(err) {
|
|
|
|
return nil, errVolumeNotFound
|
|
|
|
}
|
|
|
|
return nil, errFileNotFound
|
|
|
|
} else if osIsPermission(err) {
|
|
|
|
return nil, errFileAccessDenied
|
|
|
|
} else if isSysErrNotDir(err) || isSysErrIsDir(err) {
|
|
|
|
return nil, errFileNotFound
|
|
|
|
} else if isSysErrHandleInvalid(err) {
|
|
|
|
// This case is special and needs to be handled for windows.
|
|
|
|
return nil, errFileNotFound
|
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return nil, errFaultyDisk
|
|
|
|
} else if isSysErrTooManyFiles(err) {
|
|
|
|
return nil, errTooManyOpenFiles
|
|
|
|
} else if isSysErrInvalidArg(err) {
|
2021-03-23 17:51:27 -04:00
|
|
|
st, _ := Lstat(filePath)
|
2021-02-24 03:14:16 -05:00
|
|
|
if st != nil && st.IsDir() {
|
|
|
|
// Linux returns InvalidArg for directory O_DIRECT
|
|
|
|
// we need to keep this fallback code to return correct
|
|
|
|
// errors upwards.
|
|
|
|
return nil, errFileNotFound
|
|
|
|
}
|
2021-01-22 18:38:21 -05:00
|
|
|
return nil, errUnsupportedDisk
|
2021-01-07 22:27:31 -05:00
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-02-28 23:09:23 -05:00
|
|
|
or := &odirectReader{f, nil, nil, true, true, s, nil}
|
2021-03-16 23:06:57 -04:00
|
|
|
defer or.Close()
|
2021-01-07 22:27:31 -05:00
|
|
|
|
2021-03-16 23:06:57 -04:00
|
|
|
buf, err = ioutil.ReadAll(or)
|
2021-01-22 18:38:21 -05:00
|
|
|
if err != nil {
|
|
|
|
err = osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
return buf, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
2016-06-25 17:51:06 -04:00
|
|
|
// ReadAll reads from r until an error or EOF and returns the data it read.
|
|
|
|
// A successful call returns err == nil, not err == EOF. Because ReadAll is
|
|
|
|
// defined to read from src until EOF, it does not treat an EOF from Read
|
|
|
|
// as an error to be reported.
|
|
|
|
// This API is meant to be used on files which have small memory footprint, do
|
|
|
|
// not use this on large files as it would cause server to crash.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) {
|
2016-06-25 17:51:06 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2016-06-25 17:51:06 -04:00
|
|
|
// Validate file path length, before reading.
|
|
|
|
filePath := pathJoin(volumeDir, path)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err = checkPathLength(filePath); err != nil {
|
2016-06-25 17:51:06 -04:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-03-18 23:16:50 -04:00
|
|
|
requireDirectIO := globalStorageClass.GetDMA() == storageclass.DMAReadWrite
|
2021-02-22 04:36:17 -05:00
|
|
|
return s.readAllData(volumeDir, filePath, requireDirectIO)
|
2016-06-25 17:51:06 -04:00
|
|
|
}
|
|
|
|
|
2016-05-28 18:13:15 -04:00
|
|
|
// ReadFile reads exactly len(buf) bytes into buf. It returns the
|
|
|
|
// number of bytes copied. The error is EOF only if no bytes were
|
|
|
|
// read. On return, n == len(buf) if and only if err == nil. n == 0
|
2016-11-21 02:42:53 -05:00
|
|
|
// for io.EOF.
|
2017-05-16 17:21:52 -04:00
|
|
|
//
|
2016-11-21 02:42:53 -05:00
|
|
|
// If an EOF happens after reading some but not all the bytes,
|
2017-09-25 14:32:56 -04:00
|
|
|
// ReadFile returns ErrUnexpectedEOF.
|
|
|
|
//
|
|
|
|
// If the BitrotVerifier is not nil or not verified ReadFile
|
|
|
|
// tries to verify whether the disk has bitrot.
|
2017-05-16 17:21:52 -04:00
|
|
|
//
|
|
|
|
// Additionally ReadFile also starts reading from an offset. ReadFile
|
|
|
|
// semantics are same as io.ReadFull.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) ReadFile(ctx context.Context, volume string, path string, offset int64, buffer []byte, verifier *BitrotVerifier) (int64, error) {
|
2018-08-06 18:14:08 -04:00
|
|
|
if offset < 0 {
|
|
|
|
return 0, errInvalidArgument
|
|
|
|
}
|
|
|
|
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, err
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2020-01-17 16:34:43 -05:00
|
|
|
|
|
|
|
var n int
|
|
|
|
|
2016-05-18 00:22:27 -04:00
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(volumeDir)
|
2016-04-13 14:32:47 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, errVolumeNotFound
|
2018-07-27 18:32:19 -04:00
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return 0, errFaultyDisk
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2016-04-13 14:32:47 -04:00
|
|
|
|
2016-06-25 17:51:06 -04:00
|
|
|
// Validate effective path length before reading.
|
2016-05-05 04:39:26 -04:00
|
|
|
filePath := pathJoin(volumeDir, path)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err = checkPathLength(filePath); err != nil {
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, err
|
2016-05-11 15:55:02 -04:00
|
|
|
}
|
2016-06-25 17:51:06 -04:00
|
|
|
|
|
|
|
// Open the file for reading.
|
2021-03-23 17:51:27 -04:00
|
|
|
file, err := Open(filePath)
|
2016-04-08 13:37:38 -04:00
|
|
|
if err != nil {
|
2018-08-06 13:26:40 -04:00
|
|
|
switch {
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsNotExist(err):
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, errFileNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsPermission(err):
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, errFileAccessDenied
|
2018-08-06 13:26:40 -04:00
|
|
|
case isSysErrNotDir(err):
|
2016-07-29 00:57:11 -04:00
|
|
|
return 0, errFileAccessDenied
|
2018-08-06 13:26:40 -04:00
|
|
|
case isSysErrIO(err):
|
2018-07-27 18:32:19 -04:00
|
|
|
return 0, errFaultyDisk
|
2019-05-02 10:09:57 -04:00
|
|
|
case isSysErrTooManyFiles(err):
|
|
|
|
return 0, errTooManyOpenFiles
|
2018-08-06 13:26:40 -04:00
|
|
|
default:
|
|
|
|
return 0, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
}
|
2016-07-27 22:22:32 -04:00
|
|
|
|
|
|
|
// Close the file descriptor.
|
|
|
|
defer file.Close()
|
|
|
|
|
2016-04-08 13:37:38 -04:00
|
|
|
st, err := file.Stat()
|
|
|
|
if err != nil {
|
2016-05-28 18:13:15 -04:00
|
|
|
return 0, err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2016-07-29 00:57:11 -04:00
|
|
|
|
2017-05-16 17:21:52 -04:00
|
|
|
// Verify it is a regular file, otherwise subsequent Seek is
|
|
|
|
// undefined.
|
2016-04-08 13:37:38 -04:00
|
|
|
if !st.Mode().IsRegular() {
|
2016-07-29 00:57:11 -04:00
|
|
|
return 0, errIsNotRegular
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2016-07-29 00:57:11 -04:00
|
|
|
|
2018-08-06 18:14:08 -04:00
|
|
|
if verifier == nil {
|
|
|
|
n, err = file.ReadAt(buffer, offset)
|
|
|
|
return int64(n), err
|
|
|
|
}
|
2017-05-16 17:21:52 -04:00
|
|
|
|
2018-08-06 18:14:08 -04:00
|
|
|
h := verifier.algorithm.New()
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
if _, err = io.Copy(h, io.LimitReader(file, offset)); err != nil {
|
2018-08-06 18:14:08 -04:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
if n, err = io.ReadFull(file, buffer); err != nil {
|
|
|
|
return int64(n), err
|
|
|
|
}
|
|
|
|
|
|
|
|
if _, err = h.Write(buffer); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
if _, err = io.Copy(h, file); err != nil {
|
2018-08-06 18:14:08 -04:00
|
|
|
return 0, err
|
2017-05-16 17:21:52 -04:00
|
|
|
}
|
2016-05-28 18:13:15 -04:00
|
|
|
|
2019-02-13 07:59:36 -05:00
|
|
|
if !bytes.Equal(h.Sum(nil), verifier.sum) {
|
2019-10-01 16:12:15 -04:00
|
|
|
return 0, errFileCorrupt
|
2017-08-14 21:08:42 -04:00
|
|
|
}
|
2018-08-06 18:14:08 -04:00
|
|
|
|
|
|
|
return int64(len(buffer)), nil
|
2016-04-08 20:13:16 -04:00
|
|
|
}
|
|
|
|
|
2021-03-24 12:08:05 -04:00
|
|
|
func (s *xlStorage) openFile(filePath string, mode int) (f *os.File, err error) {
|
2021-02-24 03:14:16 -05:00
|
|
|
// Create top level directories if they don't exist.
|
|
|
|
// with mode 0777 mkdir honors system umask.
|
|
|
|
if err = mkdirAll(pathutil.Dir(filePath), 0777); err != nil {
|
2021-03-17 12:38:38 -04:00
|
|
|
return nil, osErrToFileErr(err)
|
2016-05-28 18:13:15 -04:00
|
|
|
}
|
2016-06-23 23:19:27 -04:00
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
w, err := OpenFile(filePath, mode|writeMode, 0666)
|
2016-05-04 15:18:40 -04:00
|
|
|
if err != nil {
|
|
|
|
// File path cannot be verified since one of the parents is a file.
|
2018-08-06 13:26:40 -04:00
|
|
|
switch {
|
2021-02-24 03:14:16 -05:00
|
|
|
case isSysErrIsDir(err):
|
|
|
|
return nil, errIsNotRegular
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsPermission(err):
|
2018-04-09 23:56:09 -04:00
|
|
|
return nil, errFileAccessDenied
|
2018-08-06 13:26:40 -04:00
|
|
|
case isSysErrIO(err):
|
2018-07-27 18:32:19 -04:00
|
|
|
return nil, errFaultyDisk
|
2019-05-02 10:09:57 -04:00
|
|
|
case isSysErrTooManyFiles(err):
|
|
|
|
return nil, errTooManyOpenFiles
|
2018-08-06 13:26:40 -04:00
|
|
|
default:
|
|
|
|
return nil, err
|
2016-05-04 15:18:40 -04:00
|
|
|
}
|
2016-10-29 15:44:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
return w, nil
|
|
|
|
}
|
|
|
|
|
2020-10-30 14:04:29 -04:00
|
|
|
// To support O_DIRECT reads for erasure backends.
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
type odirectReader struct {
|
2020-10-30 14:04:29 -04:00
|
|
|
f *os.File
|
|
|
|
buf []byte
|
|
|
|
bufp *[]byte
|
|
|
|
freshRead bool
|
2021-01-12 13:20:39 -05:00
|
|
|
smallFile bool
|
2020-10-30 14:04:29 -04:00
|
|
|
s *xlStorage
|
|
|
|
err error
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read - Implements Reader interface.
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
func (o *odirectReader) Read(buf []byte) (n int, err error) {
|
2020-10-30 14:04:29 -04:00
|
|
|
if o.err != nil {
|
|
|
|
return 0, o.err
|
|
|
|
}
|
|
|
|
if o.buf == nil {
|
2021-01-12 13:20:39 -05:00
|
|
|
if o.smallFile {
|
|
|
|
o.bufp = o.s.poolSmall.Get().(*[]byte)
|
|
|
|
} else {
|
|
|
|
o.bufp = o.s.poolLarge.Get().(*[]byte)
|
|
|
|
}
|
2020-10-30 14:04:29 -04:00
|
|
|
}
|
|
|
|
if o.freshRead {
|
|
|
|
o.buf = *o.bufp
|
|
|
|
n, err = o.f.Read(o.buf)
|
|
|
|
if err != nil && err != io.EOF {
|
2021-03-18 23:16:50 -04:00
|
|
|
if isSysErrInvalidArg(err) {
|
|
|
|
if err = disk.DisableDirectIO(o.f); err != nil {
|
|
|
|
o.err = err
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
n, err = o.f.Read(o.buf)
|
|
|
|
}
|
|
|
|
if err != nil && err != io.EOF {
|
|
|
|
o.err = err
|
|
|
|
return n, err
|
|
|
|
}
|
2020-10-30 14:04:29 -04:00
|
|
|
}
|
|
|
|
if n == 0 {
|
|
|
|
// err is io.EOF
|
|
|
|
o.err = err
|
|
|
|
return n, err
|
|
|
|
}
|
|
|
|
o.buf = o.buf[:n]
|
|
|
|
o.freshRead = false
|
|
|
|
}
|
|
|
|
if len(buf) >= len(o.buf) {
|
|
|
|
n = copy(buf, o.buf)
|
|
|
|
o.freshRead = true
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
n = copy(buf, o.buf)
|
|
|
|
o.buf = o.buf[n:]
|
|
|
|
return n, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Close - Release the buffer and close the file.
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
func (o *odirectReader) Close() error {
|
2021-01-12 13:20:39 -05:00
|
|
|
if o.smallFile {
|
|
|
|
o.s.poolSmall.Put(o.bufp)
|
|
|
|
} else {
|
|
|
|
o.s.poolLarge.Put(o.bufp)
|
|
|
|
}
|
2020-10-30 14:04:29 -04:00
|
|
|
return o.f.Close()
|
|
|
|
}
|
|
|
|
|
2019-01-17 07:58:18 -05:00
|
|
|
// ReadFileStream - Returns the read stream of the file.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) ReadFileStream(ctx context.Context, volume, path string, offset, length int64) (io.ReadCloser, error) {
|
2019-01-17 07:58:18 -05:00
|
|
|
if offset < 0 {
|
|
|
|
return nil, errInvalidArgument
|
|
|
|
}
|
|
|
|
|
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Validate effective path length before reading.
|
|
|
|
filePath := pathJoin(volumeDir, path)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err = checkPathLength(filePath); err != nil {
|
2019-01-17 07:58:18 -05:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2021-01-07 22:27:31 -05:00
|
|
|
var file *os.File
|
2021-01-12 13:20:39 -05:00
|
|
|
// O_DIRECT only supported if offset is zero
|
2021-03-18 23:16:50 -04:00
|
|
|
if offset == 0 && globalStorageClass.GetDMA() == storageclass.DMAReadWrite {
|
2021-02-28 18:33:03 -05:00
|
|
|
file, err = disk.OpenFileDirectIO(filePath, readMode, 0666)
|
2021-01-07 22:27:31 -05:00
|
|
|
} else {
|
2021-01-08 06:22:19 -05:00
|
|
|
// Open the file for reading.
|
2021-03-23 17:51:27 -04:00
|
|
|
file, err = OpenFile(filePath, readMode, 0666)
|
2020-10-30 14:04:29 -04:00
|
|
|
}
|
2019-01-17 07:58:18 -05:00
|
|
|
if err != nil {
|
|
|
|
switch {
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsNotExist(err):
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(volumeDir)
|
2021-01-01 12:44:36 -05:00
|
|
|
if err != nil && osIsNotExist(err) {
|
|
|
|
return nil, errVolumeNotFound
|
|
|
|
}
|
2019-01-17 07:58:18 -05:00
|
|
|
return nil, errFileNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsPermission(err):
|
2019-01-17 07:58:18 -05:00
|
|
|
return nil, errFileAccessDenied
|
|
|
|
case isSysErrNotDir(err):
|
|
|
|
return nil, errFileAccessDenied
|
|
|
|
case isSysErrIO(err):
|
|
|
|
return nil, errFaultyDisk
|
2019-05-02 10:09:57 -04:00
|
|
|
case isSysErrTooManyFiles(err):
|
|
|
|
return nil, errTooManyOpenFiles
|
2021-01-22 18:38:21 -05:00
|
|
|
case isSysErrInvalidArg(err):
|
|
|
|
return nil, errUnsupportedDisk
|
2019-01-17 07:58:18 -05:00
|
|
|
default:
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
st, err := file.Stat()
|
|
|
|
if err != nil {
|
2021-02-28 18:33:03 -05:00
|
|
|
file.Close()
|
2019-01-17 07:58:18 -05:00
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify it is a regular file, otherwise subsequent Seek is
|
|
|
|
// undefined.
|
|
|
|
if !st.Mode().IsRegular() {
|
2021-02-28 18:33:03 -05:00
|
|
|
file.Close()
|
2019-01-17 07:58:18 -05:00
|
|
|
return nil, errIsNotRegular
|
|
|
|
}
|
|
|
|
|
2021-03-18 23:16:50 -04:00
|
|
|
if offset == 0 && globalStorageClass.GetDMA() == storageclass.DMAReadWrite {
|
2021-02-28 18:33:03 -05:00
|
|
|
or := &odirectReader{file, nil, nil, true, false, s, nil}
|
2021-01-12 13:20:39 -05:00
|
|
|
if length <= smallFileThreshold {
|
2021-02-28 18:33:03 -05:00
|
|
|
or = &odirectReader{file, nil, nil, true, true, s, nil}
|
2021-01-07 22:27:31 -05:00
|
|
|
}
|
2021-02-28 18:33:03 -05:00
|
|
|
r := struct {
|
|
|
|
io.Reader
|
|
|
|
io.Closer
|
|
|
|
}{Reader: io.LimitReader(or, length), Closer: closeWrapper(func() error {
|
|
|
|
return or.Close()
|
|
|
|
})}
|
|
|
|
return r, nil
|
2019-01-17 07:58:18 -05:00
|
|
|
}
|
2019-05-22 16:47:15 -04:00
|
|
|
|
|
|
|
r := struct {
|
2019-03-18 01:20:26 -04:00
|
|
|
io.Reader
|
|
|
|
io.Closer
|
2020-02-06 23:13:55 -05:00
|
|
|
}{Reader: io.LimitReader(file, length), Closer: closeWrapper(func() error {
|
|
|
|
return file.Close()
|
|
|
|
})}
|
|
|
|
|
2021-02-28 18:33:03 -05:00
|
|
|
if offset > 0 {
|
|
|
|
if _, err = file.Seek(offset, io.SeekStart); err != nil {
|
|
|
|
r.Close()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-06 23:13:55 -05:00
|
|
|
// Add readahead to big reads
|
Remove read-ahead for small files (#8522)
We should only read ahead if we are reading big files. We enable it for files >= 16MB.
Benchmark on 64KB objects.
Before:
```
Operation: GET
Errors: 0
Average: 59.976s, 87.13 MB/s, 1394.07 ops ended/s.
Fastest: 1s, 90.99 MB/s, 1455.00 ops ended/s.
50% Median: 1s, 87.53 MB/s, 1401.00 ops ended/s.
Slowest: 1s, 81.39 MB/s, 1301.00 ops ended/s.
```
After:
```
Operation: GET
Errors: 0
Average: 59.992s, 207.99 MB/s, 3327.85 ops ended/s.
Fastest: 1s, 219.20 MB/s, 3507.00 ops ended/s.
50% Median: 1s, 210.54 MB/s, 3368.00 ops ended/s.
Slowest: 1s, 179.14 MB/s, 2865.00 ops ended/s.
```
The 64KB buffer is actually a small disadvantage for this case, but I believe it will be better in general than no buffer.
2019-11-14 15:58:41 -05:00
|
|
|
if length >= readAheadSize {
|
2020-02-06 23:13:55 -05:00
|
|
|
rc, err := readahead.NewReadCloserSize(r, readAheadBuffers, readAheadBufSize)
|
|
|
|
if err != nil {
|
|
|
|
r.Close()
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return rc, nil
|
Remove read-ahead for small files (#8522)
We should only read ahead if we are reading big files. We enable it for files >= 16MB.
Benchmark on 64KB objects.
Before:
```
Operation: GET
Errors: 0
Average: 59.976s, 87.13 MB/s, 1394.07 ops ended/s.
Fastest: 1s, 90.99 MB/s, 1455.00 ops ended/s.
50% Median: 1s, 87.53 MB/s, 1401.00 ops ended/s.
Slowest: 1s, 81.39 MB/s, 1301.00 ops ended/s.
```
After:
```
Operation: GET
Errors: 0
Average: 59.992s, 207.99 MB/s, 3327.85 ops ended/s.
Fastest: 1s, 219.20 MB/s, 3507.00 ops ended/s.
50% Median: 1s, 210.54 MB/s, 3368.00 ops ended/s.
Slowest: 1s, 179.14 MB/s, 2865.00 ops ended/s.
```
The 64KB buffer is actually a small disadvantage for this case, but I believe it will be better in general than no buffer.
2019-11-14 15:58:41 -05:00
|
|
|
}
|
2019-05-22 16:47:15 -04:00
|
|
|
|
Remove read-ahead for small files (#8522)
We should only read ahead if we are reading big files. We enable it for files >= 16MB.
Benchmark on 64KB objects.
Before:
```
Operation: GET
Errors: 0
Average: 59.976s, 87.13 MB/s, 1394.07 ops ended/s.
Fastest: 1s, 90.99 MB/s, 1455.00 ops ended/s.
50% Median: 1s, 87.53 MB/s, 1401.00 ops ended/s.
Slowest: 1s, 81.39 MB/s, 1301.00 ops ended/s.
```
After:
```
Operation: GET
Errors: 0
Average: 59.992s, 207.99 MB/s, 3327.85 ops ended/s.
Fastest: 1s, 219.20 MB/s, 3507.00 ops ended/s.
50% Median: 1s, 210.54 MB/s, 3368.00 ops ended/s.
Slowest: 1s, 179.14 MB/s, 2865.00 ops ended/s.
```
The 64KB buffer is actually a small disadvantage for this case, but I believe it will be better in general than no buffer.
2019-11-14 15:58:41 -05:00
|
|
|
// Just add a small 64k buffer.
|
|
|
|
r.Reader = bufio.NewReaderSize(r.Reader, 64<<10)
|
|
|
|
return r, nil
|
2019-01-17 07:58:18 -05:00
|
|
|
}
|
|
|
|
|
2020-02-06 23:13:55 -05:00
|
|
|
// closeWrapper converts a function to an io.Closer
|
|
|
|
type closeWrapper func() error
|
|
|
|
|
|
|
|
// Close calls the wrapped function.
|
|
|
|
func (c closeWrapper) Close() error {
|
|
|
|
return c()
|
|
|
|
}
|
|
|
|
|
2019-01-17 07:58:18 -05:00
|
|
|
// CreateFile - creates the file.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) CreateFile(ctx context.Context, volume, path string, fileSize int64, r io.Reader) (err error) {
|
2019-04-30 19:27:31 -04:00
|
|
|
if fileSize < -1 {
|
2016-10-29 15:44:44 -04:00
|
|
|
return errInvalidArgument
|
|
|
|
}
|
|
|
|
|
2021-03-24 12:08:05 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
filePath := pathJoin(volumeDir, path)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
parentFilePath := pathutil.Dir(filePath)
|
|
|
|
defer func() {
|
|
|
|
if err != nil {
|
2021-03-24 12:35:45 -04:00
|
|
|
if volume == minioMetaTmpBucket {
|
|
|
|
removeAll(parentFilePath)
|
|
|
|
}
|
2021-03-24 12:08:05 -04:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
2021-03-18 17:09:55 -04:00
|
|
|
if fileSize >= 0 && fileSize <= smallFileThreshold {
|
2021-03-17 12:38:38 -04:00
|
|
|
// For streams smaller than 128KiB we simply write them as O_DSYNC (fdatasync)
|
|
|
|
// and not O_DIRECT to avoid the complexities of aligned I/O.
|
2021-03-24 12:08:05 -04:00
|
|
|
w, err := s.openFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
|
2021-03-17 12:38:38 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer w.Close()
|
2021-01-07 22:27:31 -05:00
|
|
|
|
2021-03-17 12:38:38 -04:00
|
|
|
written, err := io.Copy(w, r)
|
|
|
|
if err != nil {
|
|
|
|
return osErrToFileErr(err)
|
2019-04-24 00:25:06 -04:00
|
|
|
}
|
2021-03-17 12:38:38 -04:00
|
|
|
|
|
|
|
if written > fileSize {
|
|
|
|
return errMoreData
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2019-04-24 00:25:06 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create top level directories if they don't exist.
|
|
|
|
// with mode 0777 mkdir honors system umask.
|
2021-03-24 12:08:05 -04:00
|
|
|
if err = mkdirAll(parentFilePath, 0777); err != nil {
|
2021-03-17 12:38:38 -04:00
|
|
|
return osErrToFileErr(err)
|
2019-04-24 00:25:06 -04:00
|
|
|
}
|
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
w, err := OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0666)
|
2019-04-24 00:25:06 -04:00
|
|
|
if err != nil {
|
2021-03-17 12:38:38 -04:00
|
|
|
return osErrToFileErr(err)
|
2016-10-29 15:44:44 -04:00
|
|
|
}
|
2019-01-17 07:58:18 -05:00
|
|
|
|
2020-07-01 13:57:23 -04:00
|
|
|
defer func() {
|
|
|
|
disk.Fdatasync(w) // Only interested in flushing the size_t not mtime/atime
|
|
|
|
w.Close()
|
|
|
|
}()
|
2019-05-22 16:47:15 -04:00
|
|
|
|
2021-03-17 12:38:38 -04:00
|
|
|
bufp := s.poolLarge.Get().(*[]byte)
|
|
|
|
defer s.poolLarge.Put(bufp)
|
2019-01-17 07:58:18 -05:00
|
|
|
|
2019-08-23 18:36:46 -04:00
|
|
|
written, err := xioutil.CopyAligned(w, r, *bufp, fileSize)
|
2019-05-22 16:47:15 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
2019-01-17 07:58:18 -05:00
|
|
|
}
|
2019-04-24 00:25:06 -04:00
|
|
|
|
2021-03-18 17:09:55 -04:00
|
|
|
if written < fileSize && fileSize >= 0 {
|
2019-05-22 16:47:15 -04:00
|
|
|
return errLessData
|
2021-03-18 17:09:55 -04:00
|
|
|
} else if written > fileSize && fileSize >= 0 {
|
2019-05-22 16:47:15 -04:00
|
|
|
return errMoreData
|
2019-01-17 07:58:18 -05:00
|
|
|
}
|
2019-05-22 16:47:15 -04:00
|
|
|
|
|
|
|
return nil
|
2016-10-29 15:44:44 -04:00
|
|
|
}
|
|
|
|
|
2020-11-02 19:14:31 -05:00
|
|
|
func (s *xlStorage) WriteAll(ctx context.Context, volume string, path string, b []byte) (err error) {
|
2021-03-24 12:08:05 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
filePath := pathJoin(volumeDir, path)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
w, err := s.openFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC)
|
2018-11-14 09:18:35 -05:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-05-22 16:47:15 -04:00
|
|
|
defer w.Close()
|
2021-02-24 03:14:16 -05:00
|
|
|
|
2020-11-02 19:14:31 -05:00
|
|
|
n, err := w.Write(b)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-02-24 03:14:16 -05:00
|
|
|
|
2020-11-02 19:14:31 -05:00
|
|
|
if n != len(b) {
|
|
|
|
return io.ErrShortWrite
|
|
|
|
}
|
2021-02-24 03:14:16 -05:00
|
|
|
|
2020-11-02 19:14:31 -05:00
|
|
|
return nil
|
2018-11-14 09:18:35 -05:00
|
|
|
}
|
|
|
|
|
2016-10-29 15:44:44 -04:00
|
|
|
// AppendFile - append a byte array at path, if file doesn't exist at
|
|
|
|
// path this call explicitly creates it.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) AppendFile(ctx context.Context, volume string, path string, buf []byte) (err error) {
|
2021-02-24 03:14:16 -05:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
if _, err = Lstat(volumeDir); err != nil {
|
2021-02-24 03:14:16 -05:00
|
|
|
if osIsNotExist(err) {
|
|
|
|
return errVolumeNotFound
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-03-24 12:08:05 -04:00
|
|
|
filePath := pathJoin(volumeDir, path)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2018-12-11 19:22:56 -05:00
|
|
|
var w *os.File
|
2019-04-24 00:25:06 -04:00
|
|
|
// Create file if not found. Not doing O_DIRECT here to avoid the code that does buffer aligned writes.
|
|
|
|
// AppendFile() is only used by healing code to heal objects written in old format.
|
2021-03-24 12:08:05 -04:00
|
|
|
w, err = s.openFile(filePath, os.O_CREATE|os.O_APPEND|os.O_WRONLY)
|
2016-10-29 15:44:44 -04:00
|
|
|
if err != nil {
|
2016-06-19 18:31:13 -04:00
|
|
|
return err
|
2016-05-04 15:18:40 -04:00
|
|
|
}
|
2021-02-24 03:14:16 -05:00
|
|
|
defer w.Close()
|
2018-11-14 09:18:35 -05:00
|
|
|
|
2021-02-24 03:14:16 -05:00
|
|
|
n, err := w.Write(buf)
|
|
|
|
if err != nil {
|
2018-11-14 09:18:35 -05:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-02-24 03:14:16 -05:00
|
|
|
if n != len(buf) {
|
|
|
|
return io.ErrShortWrite
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// CheckParts check if path has necessary parts available.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) error {
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return err
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2016-05-18 00:22:27 -04:00
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
if _, err = Lstat(volumeDir); err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return errVolumeNotFound
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
return err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
for _, part := range fi.Parts {
|
|
|
|
partPath := pathJoin(path, fi.DataDir, fmt.Sprintf("part.%d", part.Number))
|
2020-07-17 20:41:29 -04:00
|
|
|
if fi.XLV1 {
|
|
|
|
partPath = pathJoin(path, fmt.Sprintf("part.%d", part.Number))
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
filePath := pathJoin(volumeDir, partPath)
|
|
|
|
if err = checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-03-23 17:51:27 -04:00
|
|
|
st, err := Lstat(filePath)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
if st.Mode().IsDir() {
|
|
|
|
return errFileNotFound
|
|
|
|
}
|
2020-09-01 15:06:45 -04:00
|
|
|
// Check if shard is truncated.
|
|
|
|
if st.Size() < fi.Erasure.ShardFileSize(part.Size) {
|
|
|
|
return errFileCorrupt
|
|
|
|
}
|
2016-05-11 15:55:02 -04:00
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// CheckFile check if path has necessary metadata.
|
2021-01-18 15:25:22 -05:00
|
|
|
// This function does the following check, suppose
|
|
|
|
// you are creating a metadata file at "a/b/c/d/xl.meta",
|
|
|
|
// makes sure that there is no `xl.meta` at
|
|
|
|
// - "a/b/c/"
|
|
|
|
// - "a/b/"
|
|
|
|
// - "a/"
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) CheckFile(ctx context.Context, volume string, path string) error {
|
2020-06-12 23:04:01 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
2016-04-08 13:37:38 -04:00
|
|
|
if err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-01-18 15:25:22 -05:00
|
|
|
var checkFile func(p string) error
|
|
|
|
checkFile = func(p string) error {
|
|
|
|
if p == "." || p == SlashSeparator {
|
2021-01-02 15:01:29 -05:00
|
|
|
return errPathNotFound
|
2016-04-12 15:45:15 -04:00
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2021-01-18 15:25:22 -05:00
|
|
|
filePath := pathJoin(volumeDir, p, xlStorageFormatFile)
|
|
|
|
if err := checkPathLength(filePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
st, _ := Lstat(filePath)
|
2021-01-18 15:25:22 -05:00
|
|
|
if st == nil {
|
2021-01-19 13:01:06 -05:00
|
|
|
if !s.formatLegacy {
|
|
|
|
return errPathNotFound
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2021-01-19 13:01:06 -05:00
|
|
|
filePathOld := pathJoin(volumeDir, p, xlStorageFormatFileV1)
|
|
|
|
if err := checkPathLength(filePathOld); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
st, _ = Lstat(filePathOld)
|
2021-01-19 13:01:06 -05:00
|
|
|
if st == nil {
|
|
|
|
return errPathNotFound
|
2021-01-18 15:25:22 -05:00
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
2021-01-18 15:25:22 -05:00
|
|
|
if st != nil {
|
|
|
|
if !st.Mode().IsRegular() {
|
|
|
|
// not a regular file return error.
|
|
|
|
return errFileNotFound
|
|
|
|
}
|
|
|
|
// Success fully found
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-02-08 13:15:12 -05:00
|
|
|
return checkFile(pathutil.Dir(p))
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
2021-01-18 15:25:22 -05:00
|
|
|
return checkFile(path)
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
2020-03-11 11:56:36 -04:00
|
|
|
// deleteFile deletes a file or a directory if its empty unless recursive
|
|
|
|
// is set to true. If the target is successfully deleted, it will recursively
|
|
|
|
// move up the tree, deleting empty parent directories until it finds one
|
|
|
|
// with files in it. Returns nil for a non-empty directory even when
|
|
|
|
// recursive is set to false.
|
2021-03-24 17:19:52 -04:00
|
|
|
func (s *xlStorage) deleteFile(basePath, deletePath string, recursive bool) error {
|
2020-03-11 11:56:36 -04:00
|
|
|
if basePath == "" || deletePath == "" {
|
|
|
|
return nil
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
isObjectDir := HasSuffix(deletePath, SlashSeparator)
|
2021-02-08 13:15:12 -05:00
|
|
|
basePath = pathutil.Clean(basePath)
|
|
|
|
deletePath = pathutil.Clean(deletePath)
|
2020-03-11 11:56:36 -04:00
|
|
|
if !strings.HasPrefix(deletePath, basePath) || deletePath == basePath {
|
2016-04-08 13:37:38 -04:00
|
|
|
return nil
|
|
|
|
}
|
2017-08-03 23:04:28 -04:00
|
|
|
|
2020-03-11 11:56:36 -04:00
|
|
|
var err error
|
|
|
|
if recursive {
|
2021-03-24 17:19:52 -04:00
|
|
|
err = renameAll(deletePath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, mustGetUUID()))
|
2020-03-11 11:56:36 -04:00
|
|
|
} else {
|
2021-03-23 17:51:27 -04:00
|
|
|
err = Remove(deletePath)
|
2020-03-11 11:56:36 -04:00
|
|
|
}
|
|
|
|
if err != nil {
|
2019-03-18 10:46:20 -04:00
|
|
|
switch {
|
|
|
|
case isSysErrNotEmpty(err):
|
2020-06-12 23:04:01 -04:00
|
|
|
// if object is a directory, but if its not empty
|
|
|
|
// return FileNotFound to indicate its an empty prefix.
|
|
|
|
if isObjectDir {
|
|
|
|
return errFileNotFound
|
|
|
|
}
|
2019-03-18 10:46:20 -04:00
|
|
|
// Ignore errors if the directory is not empty. The server relies on
|
|
|
|
// this functionality, and sometimes uses recursion that should not
|
|
|
|
// error on parent directories.
|
2017-08-03 23:04:28 -04:00
|
|
|
return nil
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsNotExist(err):
|
2016-10-17 19:38:46 -04:00
|
|
|
return errFileNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
case osIsPermission(err):
|
2016-10-17 19:38:46 -04:00
|
|
|
return errFileAccessDenied
|
2019-03-18 10:46:20 -04:00
|
|
|
case isSysErrIO(err):
|
2018-07-27 18:32:19 -04:00
|
|
|
return errFaultyDisk
|
2019-03-18 10:46:20 -04:00
|
|
|
default:
|
|
|
|
return err
|
2016-10-17 19:38:46 -04:00
|
|
|
}
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
2017-08-03 23:04:28 -04:00
|
|
|
|
2021-02-08 13:15:12 -05:00
|
|
|
deletePath = pathutil.Dir(deletePath)
|
2018-02-20 18:33:26 -05:00
|
|
|
|
2020-03-11 11:56:36 -04:00
|
|
|
// Delete parent directory obviously not recursively. Errors for
|
|
|
|
// parent directories shouldn't trickle down.
|
2021-03-24 17:19:52 -04:00
|
|
|
s.deleteFile(basePath, deletePath, false)
|
posix: do not upstream errors in deleteFile (#4771)
This commit changes posix's deleteFile() to not upstream errors from
removing parent directories. This fixes a race condition.
The race condition occurs when multiple deleteFile()s are called on the
same parent directory, but different child files. Because deleteFile()
recursively removes parent directories if they are empty, but
deleteFile() errors if the selected deletePath does not exist, there was
an opportunity for a race condition. The two processes would remove the
child directories successfully, then depend on the parent directory
still existing. In some cases this is an invalid assumption, because
other processes can remove the parent directory beforehand. This commit
changes deleteFile() to not upstream an error if one occurs, because the
only required error should be from the immediate deletePath, not from a
parent path.
In the specific bug report, multiple CompleteMultipartUpload requests
would launch multiple deleteFile() requests. Because they chain up on
parent directories, ultimately at the end, there would be multiple
remove files for the ultimate parent directory,
.minio.sys/multipart/{bucket}. Because only one will succeed and one
will fail, an error would be upstreamed saying that the file does not
exist, and the CompleteMultipartUpload code interpreted this as
NoSuchKey, or that the object/part id doesn't exist. This was faulty
behavior and is now fixed.
The added test fails before this change and passes after this change.
Fixes: https://github.com/minio/minio/issues/4727
2017-08-04 19:51:20 -04:00
|
|
|
|
|
|
|
return nil
|
2016-04-08 20:13:16 -04:00
|
|
|
}
|
|
|
|
|
2016-04-08 13:37:38 -04:00
|
|
|
// DeleteFile - delete a file at path.
|
2020-10-28 12:18:35 -04:00
|
|
|
func (s *xlStorage) Delete(ctx context.Context, volume string, path string, recursive bool) (err error) {
|
2016-05-18 00:22:27 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-10-01 16:12:15 -04:00
|
|
|
|
2016-05-18 00:22:27 -04:00
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(volumeDir)
|
2016-04-13 14:32:47 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2016-05-18 00:22:27 -04:00
|
|
|
return errVolumeNotFound
|
2020-11-23 11:36:49 -05:00
|
|
|
} else if osIsPermission(err) {
|
2019-10-01 16:12:15 -04:00
|
|
|
return errVolumeAccessDenied
|
2018-07-27 18:32:19 -04:00
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2016-04-13 14:32:47 -04:00
|
|
|
return err
|
2016-04-08 13:37:38 -04:00
|
|
|
}
|
|
|
|
|
2019-08-06 15:08:58 -04:00
|
|
|
// Following code is needed so that we retain SlashSeparator suffix if any in
|
2016-04-13 14:32:47 -04:00
|
|
|
// path argument.
|
2016-05-05 04:39:26 -04:00
|
|
|
filePath := pathJoin(volumeDir, path)
|
2020-03-11 11:56:36 -04:00
|
|
|
if err = checkPathLength(filePath); err != nil {
|
2016-05-11 15:55:02 -04:00
|
|
|
return err
|
|
|
|
}
|
2016-04-08 13:37:38 -04:00
|
|
|
|
2020-10-28 12:18:35 -04:00
|
|
|
// Delete file and delete parent directory as well if it's empty.
|
2021-03-24 17:19:52 -04:00
|
|
|
return s.deleteFile(volumeDir, filePath, recursive)
|
2019-05-13 15:25:49 -04:00
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// RenameData - rename source path to destination path atomically, metadata and data directory.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) RenameData(ctx context.Context, srcVolume, srcPath, dataDir, dstVolume, dstPath string) (err error) {
|
2020-06-12 23:04:01 -04:00
|
|
|
srcVolumeDir, err := s.getVolDir(srcVolume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
dstVolumeDir, err := s.getVolDir(dstVolume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(srcVolumeDir)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return errVolumeNotFound
|
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
2020-12-18 11:51:09 -05:00
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
if _, err = Lstat(dstVolumeDir); err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return errVolumeNotFound
|
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-02-08 13:15:12 -05:00
|
|
|
srcFilePath := pathutil.Join(srcVolumeDir, pathJoin(srcPath, xlStorageFormatFile))
|
|
|
|
dstFilePath := pathutil.Join(dstVolumeDir, pathJoin(dstPath, xlStorageFormatFile))
|
2020-06-12 23:04:01 -04:00
|
|
|
|
|
|
|
var srcDataPath string
|
|
|
|
var dstDataPath string
|
|
|
|
if dataDir != "" {
|
|
|
|
srcDataPath = retainSlash(pathJoin(srcVolumeDir, srcPath, dataDir))
|
|
|
|
// make sure to always use path.Join here, do not use pathJoin as
|
|
|
|
// it would additionally add `/` at the end and it comes in the
|
|
|
|
// way of renameAll(), parentDir creation.
|
2021-02-08 13:15:12 -05:00
|
|
|
dstDataPath = pathutil.Join(dstVolumeDir, dstPath, dataDir)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
if err = checkPathLength(srcFilePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = checkPathLength(dstFilePath); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-02-24 03:14:16 -05:00
|
|
|
srcBuf, err := xioutil.ReadFile(srcFilePath)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
fi, err := getFileInfo(srcBuf, dstVolume, dstPath, "")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2021-02-24 03:14:16 -05:00
|
|
|
dstBuf, err := xioutil.ReadFile(dstFilePath)
|
2020-06-19 13:58:17 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if !osIsNotExist(err) {
|
2020-06-19 13:58:17 -04:00
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
2021-01-11 05:27:04 -05:00
|
|
|
// errFileNotFound comes here.
|
|
|
|
err = s.renameLegacyMetadata(dstVolumeDir, dstPath)
|
2020-06-19 13:58:17 -04:00
|
|
|
if err != nil && err != errFileNotFound {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err == nil {
|
2021-02-24 03:14:16 -05:00
|
|
|
dstBuf, err = xioutil.ReadFile(dstFilePath)
|
2020-11-23 11:36:49 -05:00
|
|
|
if err != nil && !osIsNotExist(err) {
|
2020-06-19 13:58:17 -04:00
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
}
|
2020-12-18 11:51:09 -05:00
|
|
|
if err == errFileNotFound {
|
|
|
|
// Verification to ensure that we
|
|
|
|
// don't have objects already created
|
|
|
|
// at this location, verify that resultant
|
|
|
|
// directories don't have any unexpected
|
|
|
|
// directories that we do not understand
|
|
|
|
// or expect. If its already there we should
|
|
|
|
// make sure to reject further renames
|
|
|
|
// for such objects.
|
|
|
|
//
|
|
|
|
// This elaborate check is necessary to avoid
|
|
|
|
// scenarios such as these.
|
|
|
|
//
|
|
|
|
// bucket1/name1/obj1/xl.meta
|
|
|
|
// bucket1/name1/xl.meta --> this should never
|
|
|
|
// be allowed.
|
|
|
|
{
|
2021-02-08 13:15:12 -05:00
|
|
|
entries, err := readDirN(pathutil.Dir(dstFilePath), 1)
|
2020-12-18 11:51:09 -05:00
|
|
|
if err != nil && err != errFileNotFound {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if len(entries) > 0 {
|
2021-02-08 13:15:12 -05:00
|
|
|
entry := pathutil.Clean(entries[0])
|
2020-12-18 11:51:09 -05:00
|
|
|
if entry != legacyDataDir {
|
|
|
|
_, uerr := uuid.Parse(entry)
|
|
|
|
if uerr != nil {
|
|
|
|
return errFileParentIsFile
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
var xlMeta xlMetaV2
|
|
|
|
var legacyPreserved bool
|
|
|
|
if len(dstBuf) > 0 {
|
|
|
|
if isXL2V1Format(dstBuf) {
|
|
|
|
if err = xlMeta.Load(dstBuf); err != nil {
|
|
|
|
logger.LogIf(s.ctx, err)
|
|
|
|
return errFileCorrupt
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// This code-path is to preserve the legacy data.
|
|
|
|
xlMetaLegacy := &xlMetaV1Object{}
|
|
|
|
var json = jsoniter.ConfigCompatibleWithStandardLibrary
|
|
|
|
if err := json.Unmarshal(dstBuf, xlMetaLegacy); err != nil {
|
|
|
|
logger.LogIf(s.ctx, err)
|
|
|
|
return errFileCorrupt
|
|
|
|
}
|
|
|
|
if err = xlMeta.AddLegacy(xlMetaLegacy); err != nil {
|
|
|
|
logger.LogIf(s.ctx, err)
|
|
|
|
return errFileCorrupt
|
|
|
|
}
|
|
|
|
legacyPreserved = true
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// It is possible that some drives may not have `xl.meta` file
|
|
|
|
// in such scenarios verify if atleast `part.1` files exist
|
|
|
|
// to verify for legacy version.
|
2021-01-19 13:01:06 -05:00
|
|
|
if s.formatLegacy {
|
|
|
|
// We only need this code if we are moving
|
|
|
|
// from `xl.json` to `xl.meta`, we can avoid
|
|
|
|
// one extra readdir operation here for all
|
|
|
|
// new deployments.
|
|
|
|
currentDataPath := pathJoin(dstVolumeDir, dstPath)
|
|
|
|
entries, err := readDirN(currentDataPath, 1)
|
|
|
|
if err != nil && err != errFileNotFound {
|
|
|
|
return osErrToFileErr(err)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
2021-01-19 13:01:06 -05:00
|
|
|
for _, entry := range entries {
|
|
|
|
if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if strings.HasPrefix(entry, "part.") {
|
|
|
|
legacyPreserved = true
|
|
|
|
break
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if legacyPreserved {
|
|
|
|
// Preserve all the legacy data, could be slow, but at max there can be 10,000 parts.
|
|
|
|
currentDataPath := pathJoin(dstVolumeDir, dstPath)
|
|
|
|
entries, err := readDir(currentDataPath)
|
|
|
|
if err != nil {
|
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
2020-08-05 16:31:12 -04:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
legacyDataPath := pathJoin(dstVolumeDir, dstPath, legacyDataDir)
|
|
|
|
// legacy data dir means its old content, honor system umask.
|
2021-01-22 18:38:21 -05:00
|
|
|
if err = reliableMkdirAll(legacyDataPath, 0777); err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
|
2020-08-04 17:55:53 -04:00
|
|
|
if s.globalSync {
|
|
|
|
// Sync all the previous directory operations.
|
|
|
|
globalSync()
|
|
|
|
}
|
2020-06-19 13:58:17 -04:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
for _, entry := range entries {
|
2020-08-05 16:31:12 -04:00
|
|
|
// Skip xl.meta renames further, also ignore any directories such as `legacyDataDir`
|
2020-08-07 16:22:53 -04:00
|
|
|
if entry == xlStorageFormatFile || strings.HasSuffix(entry, slashSeparator) {
|
2020-06-12 23:04:01 -04:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-03-23 17:51:27 -04:00
|
|
|
if err = Rename(pathJoin(currentDataPath, entry), pathJoin(legacyDataPath, entry)); err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
}
|
2020-07-21 16:54:06 -04:00
|
|
|
|
|
|
|
// Sync all the metadata operations once renames are done.
|
2020-08-04 17:55:53 -04:00
|
|
|
if s.globalSync {
|
|
|
|
globalSync()
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
var oldDstDataPath string
|
|
|
|
if fi.VersionID == "" {
|
|
|
|
// return the latest "null" versionId info
|
|
|
|
ofi, err := xlMeta.ToFileInfo(dstVolume, dstPath, nullVersionID)
|
2020-09-02 03:19:03 -04:00
|
|
|
if err == nil && !ofi.Deleted {
|
2020-06-12 23:04:01 -04:00
|
|
|
// Purge the destination path as we are not preserving anything
|
|
|
|
// versioned object was not requested.
|
|
|
|
oldDstDataPath = pathJoin(dstVolumeDir, dstPath, ofi.DataDir)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = xlMeta.AddVersion(fi); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
dstBuf, err = xlMeta.MarshalMsg(append(xlHeader[:], xlVersionV1[:]...))
|
|
|
|
if err != nil {
|
|
|
|
return errFileCorrupt
|
|
|
|
}
|
|
|
|
|
2020-11-02 19:14:31 -05:00
|
|
|
if err = s.WriteAll(ctx, srcVolume, pathJoin(srcPath, xlStorageFormatFile), dstBuf); err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-10-24 00:54:58 -04:00
|
|
|
// Commit data
|
2020-06-12 23:04:01 -04:00
|
|
|
if srcDataPath != "" {
|
2021-02-26 12:52:27 -05:00
|
|
|
tmpuuid := mustGetUUID()
|
|
|
|
renameAll(oldDstDataPath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid))
|
|
|
|
tmpuuid = mustGetUUID()
|
|
|
|
renameAll(dstDataPath, pathutil.Join(s.diskPath, minioMetaTmpDeletedBucket, tmpuuid))
|
2020-06-12 23:04:01 -04:00
|
|
|
if err = renameAll(srcDataPath, dstDataPath); err != nil {
|
2020-08-07 16:22:53 -04:00
|
|
|
return osErrToFileErr(err)
|
2020-06-12 23:04:01 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-24 00:54:58 -04:00
|
|
|
// Commit meta-file
|
|
|
|
if err = renameAll(srcFilePath, dstFilePath); err != nil {
|
|
|
|
return osErrToFileErr(err)
|
|
|
|
}
|
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
// Remove parent dir of the source file if empty
|
2021-03-19 18:42:01 -04:00
|
|
|
parentDir := pathutil.Dir(srcFilePath)
|
2021-03-24 17:19:52 -04:00
|
|
|
s.deleteFile(srcVolumeDir, parentDir, false)
|
2020-06-12 23:04:01 -04:00
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-05-28 18:13:15 -04:00
|
|
|
// RenameFile - rename source path to destination path atomically.
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) RenameFile(ctx context.Context, srcVolume, srcPath, dstVolume, dstPath string) (err error) {
|
2016-05-18 00:22:27 -04:00
|
|
|
srcVolumeDir, err := s.getVolDir(srcVolume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dstVolumeDir, err := s.getVolDir(dstVolume)
|
2016-04-29 15:17:48 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2016-05-18 00:22:27 -04:00
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(srcVolumeDir)
|
2016-04-29 15:17:48 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2016-05-18 00:22:27 -04:00
|
|
|
return errVolumeNotFound
|
2018-07-27 18:32:19 -04:00
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2016-04-29 15:17:48 -04:00
|
|
|
return err
|
|
|
|
}
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(dstVolumeDir)
|
2016-05-18 00:22:27 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2016-05-18 00:22:27 -04:00
|
|
|
return errVolumeNotFound
|
2018-07-27 18:32:19 -04:00
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
2020-04-11 14:15:30 -04:00
|
|
|
return err
|
2016-05-18 00:22:27 -04:00
|
|
|
}
|
|
|
|
|
2019-12-06 02:16:06 -05:00
|
|
|
srcIsDir := HasSuffix(srcPath, SlashSeparator)
|
|
|
|
dstIsDir := HasSuffix(dstPath, SlashSeparator)
|
2016-05-16 17:31:28 -04:00
|
|
|
// Either src and dst have to be directories or files, else return error.
|
2016-05-13 14:52:36 -04:00
|
|
|
if !(srcIsDir && dstIsDir || !srcIsDir && !dstIsDir) {
|
|
|
|
return errFileAccessDenied
|
|
|
|
}
|
2021-02-08 13:15:12 -05:00
|
|
|
srcFilePath := pathutil.Join(srcVolumeDir, srcPath)
|
2018-02-20 15:20:18 -05:00
|
|
|
if err = checkPathLength(srcFilePath); err != nil {
|
2016-06-17 14:57:51 -04:00
|
|
|
return err
|
|
|
|
}
|
2021-02-08 13:15:12 -05:00
|
|
|
dstFilePath := pathutil.Join(dstVolumeDir, dstPath)
|
2018-02-20 15:20:18 -05:00
|
|
|
if err = checkPathLength(dstFilePath); err != nil {
|
2016-06-17 14:57:51 -04:00
|
|
|
return err
|
|
|
|
}
|
2016-05-13 14:52:36 -04:00
|
|
|
if srcIsDir {
|
2018-02-20 15:20:18 -05:00
|
|
|
// If source is a directory, we expect the destination to be non-existent but we
|
|
|
|
// we still need to allow overwriting an empty directory since it represents
|
|
|
|
// an object empty directory.
|
2021-03-23 17:51:27 -04:00
|
|
|
dirInfo, err := Lstat(dstFilePath)
|
2018-07-27 18:32:19 -04:00
|
|
|
if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
|
|
|
}
|
2021-03-19 18:42:01 -04:00
|
|
|
if err != nil {
|
|
|
|
if !osIsNotExist(err) {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if !dirInfo.IsDir() {
|
|
|
|
return errFileAccessDenied
|
|
|
|
}
|
2021-03-23 17:51:27 -04:00
|
|
|
if err = Remove(dstFilePath); err != nil {
|
2019-03-26 17:57:44 -04:00
|
|
|
if isSysErrNotEmpty(err) {
|
|
|
|
return errFileAccessDenied
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2016-05-13 14:52:36 -04:00
|
|
|
}
|
2018-01-13 12:13:02 -05:00
|
|
|
|
|
|
|
if err = renameAll(srcFilePath, dstFilePath); err != nil {
|
2020-08-07 16:22:53 -04:00
|
|
|
return osErrToFileErr(err)
|
2016-05-03 19:10:24 -04:00
|
|
|
}
|
2016-11-21 19:34:57 -05:00
|
|
|
|
|
|
|
// Remove parent dir of the source file if empty
|
2021-03-19 18:42:01 -04:00
|
|
|
parentDir := pathutil.Dir(srcFilePath)
|
2021-03-24 17:19:52 -04:00
|
|
|
s.deleteFile(srcVolumeDir, parentDir, false)
|
2016-11-21 19:34:57 -05:00
|
|
|
|
2016-05-03 19:10:24 -04:00
|
|
|
return nil
|
2016-04-29 15:17:48 -04:00
|
|
|
}
|
2019-07-08 16:51:18 -04:00
|
|
|
|
2020-06-12 23:04:01 -04:00
|
|
|
func (s *xlStorage) bitrotVerify(partPath string, partSize int64, algo BitrotAlgorithm, sum []byte, shardSize int64) error {
|
2019-07-08 16:51:18 -04:00
|
|
|
// Open the file for reading.
|
2021-03-23 17:51:27 -04:00
|
|
|
file, err := Open(partPath)
|
2019-07-08 16:51:18 -04:00
|
|
|
if err != nil {
|
2020-06-12 23:04:01 -04:00
|
|
|
return osErrToFileErr(err)
|
2019-07-08 16:51:18 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
// Close the file descriptor.
|
|
|
|
defer file.Close()
|
|
|
|
|
|
|
|
if algo != HighwayHash256S {
|
|
|
|
h := algo.New()
|
fix: use buffers only when necessary for io.Copy() (#11229)
Use separate sync.Pool for writes/reads
Avoid passing buffers for io.CopyBuffer()
if the writer or reader implement io.WriteTo or io.ReadFrom
respectively then its useless for sync.Pool to allocate
buffers on its own since that will be completely ignored
by the io.CopyBuffer Go implementation.
Improve this wherever we see this to be optimal.
This allows us to be more efficient on memory usage.
```
385 // copyBuffer is the actual implementation of Copy and CopyBuffer.
386 // if buf is nil, one is allocated.
387 func copyBuffer(dst Writer, src Reader, buf []byte) (written int64, err error) {
388 // If the reader has a WriteTo method, use it to do the copy.
389 // Avoids an allocation and a copy.
390 if wt, ok := src.(WriterTo); ok {
391 return wt.WriteTo(dst)
392 }
393 // Similarly, if the writer has a ReadFrom method, use it to do the copy.
394 if rt, ok := dst.(ReaderFrom); ok {
395 return rt.ReadFrom(src)
396 }
```
From readahead package
```
// WriteTo writes data to w until there's no more data to write or when an error occurs.
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (a *reader) WriteTo(w io.Writer) (n int64, err error) {
if a.err != nil {
return 0, a.err
}
n = 0
for {
err = a.fill()
if err != nil {
return n, err
}
n2, err := w.Write(a.cur.buffer())
a.cur.inc(n2)
n += int64(n2)
if err != nil {
return n, err
}
```
2021-01-06 12:36:55 -05:00
|
|
|
if _, err = io.Copy(h, file); err != nil {
|
2019-10-01 16:12:15 -04:00
|
|
|
// Premature failure in reading the object,file is corrupt.
|
|
|
|
return errFileCorrupt
|
2019-07-08 16:51:18 -04:00
|
|
|
}
|
|
|
|
if !bytes.Equal(h.Sum(nil), sum) {
|
2019-10-01 16:12:15 -04:00
|
|
|
return errFileCorrupt
|
2019-07-08 16:51:18 -04:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
buf := make([]byte, shardSize)
|
|
|
|
h := algo.New()
|
|
|
|
hashBuf := make([]byte, h.Size())
|
|
|
|
fi, err := file.Stat()
|
|
|
|
if err != nil {
|
2019-10-01 16:12:15 -04:00
|
|
|
// Unable to stat on the file, return an expected error
|
|
|
|
// for healing code to fix this file.
|
2019-07-08 16:51:18 -04:00
|
|
|
return err
|
|
|
|
}
|
2019-07-12 19:29:44 -04:00
|
|
|
|
2019-10-01 16:12:15 -04:00
|
|
|
size := fi.Size()
|
|
|
|
|
2019-09-11 16:49:53 -04:00
|
|
|
// Calculate the size of the bitrot file and compare
|
|
|
|
// it with the actual file size.
|
2020-06-12 23:04:01 -04:00
|
|
|
if size != bitrotShardFileSize(partSize, shardSize, algo) {
|
2019-10-01 16:12:15 -04:00
|
|
|
return errFileCorrupt
|
2019-07-12 19:29:44 -04:00
|
|
|
}
|
|
|
|
|
2019-10-01 16:12:15 -04:00
|
|
|
var n int
|
2019-07-08 16:51:18 -04:00
|
|
|
for {
|
|
|
|
if size == 0 {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
h.Reset()
|
2019-10-01 16:12:15 -04:00
|
|
|
n, err = file.Read(hashBuf)
|
2019-07-08 16:51:18 -04:00
|
|
|
if err != nil {
|
2019-10-01 16:12:15 -04:00
|
|
|
// Read's failed for object with right size, file is corrupt.
|
2019-07-08 16:51:18 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
size -= int64(n)
|
|
|
|
if size < int64(len(buf)) {
|
|
|
|
buf = buf[:size]
|
|
|
|
}
|
|
|
|
n, err = file.Read(buf)
|
|
|
|
if err != nil {
|
2019-10-01 16:12:15 -04:00
|
|
|
// Read's failed for object with right size, at different offsets.
|
2019-07-08 16:51:18 -04:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
size -= int64(n)
|
|
|
|
h.Write(buf)
|
|
|
|
if !bytes.Equal(h.Sum(nil), hashBuf) {
|
2019-10-01 16:12:15 -04:00
|
|
|
return errFileCorrupt
|
2019-07-08 16:51:18 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
|
2020-09-04 12:45:06 -04:00
|
|
|
func (s *xlStorage) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) (err error) {
|
2020-06-12 23:04:01 -04:00
|
|
|
volumeDir, err := s.getVolDir(volume)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Stat a volume entry.
|
2021-03-23 17:51:27 -04:00
|
|
|
_, err = Lstat(volumeDir)
|
2020-06-12 23:04:01 -04:00
|
|
|
if err != nil {
|
2020-11-23 11:36:49 -05:00
|
|
|
if osIsNotExist(err) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return errVolumeNotFound
|
|
|
|
} else if isSysErrIO(err) {
|
|
|
|
return errFaultyDisk
|
2020-11-23 11:36:49 -05:00
|
|
|
} else if osIsPermission(err) {
|
2020-06-12 23:04:01 -04:00
|
|
|
return errVolumeAccessDenied
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
erasure := fi.Erasure
|
|
|
|
for _, part := range fi.Parts {
|
|
|
|
checksumInfo := erasure.GetChecksumInfo(part.Number)
|
|
|
|
partPath := pathJoin(volumeDir, path, fi.DataDir, fmt.Sprintf("part.%d", part.Number))
|
2020-07-17 20:41:29 -04:00
|
|
|
if fi.XLV1 {
|
|
|
|
partPath = pathJoin(volumeDir, path, fmt.Sprintf("part.%d", part.Number))
|
|
|
|
}
|
2020-06-12 23:04:01 -04:00
|
|
|
if err := s.bitrotVerify(partPath,
|
|
|
|
erasure.ShardFileSize(part.Size),
|
|
|
|
checksumInfo.Algorithm,
|
|
|
|
checksumInfo.Hash, erasure.ShardSize()); err != nil {
|
|
|
|
if !IsErr(err, []error{
|
|
|
|
errFileNotFound,
|
|
|
|
errVolumeNotFound,
|
|
|
|
errFileCorrupt,
|
|
|
|
}...) {
|
|
|
|
logger.GetReqInfo(s.ctx).AppendTags("disk", s.String())
|
|
|
|
logger.LogIf(s.ctx, err)
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|