speed up startup sequence for all operations (#14148)

This speed-up is intended for faster startup times
for almost all MinIO operations. Changes here are

- Drives are not re-read for 'format.json' on a regular
  basis once read during init is remembered and refreshed
  at 5 second intervals.

- Do not do O_DIRECT tests on drives with existing 'format.json'
  only fresh setups need this check.

- Parallelize initializing erasureSets for multiple sets.

- Avoid re-reading format.json when migrating 'format.json'
  from really old V1->V2->V3

- Keep a copy of local drives for any given server in memory
  for a quick lookup.
This commit is contained in:
Harshavardhana 2022-01-24 11:28:45 -08:00 committed by GitHub
parent f30afa4956
commit 5a9f133491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 308 additions and 281 deletions

View File

@ -289,20 +289,20 @@ func initAutoHeal(ctx context.Context, objAPI ObjectLayer) {
}
func getLocalDisksToHeal() (disksToHeal Endpoints) {
for _, ep := range globalEndpoints {
for _, endpoint := range ep.Endpoints {
if !endpoint.IsLocal {
continue
}
// Try to connect to the current endpoint
// and reformat if the current disk is not formatted
disk, _, err := connectEndpoint(endpoint)
if errors.Is(err, errUnformattedDisk) {
disksToHeal = append(disksToHeal, endpoint)
} else if err == nil && disk != nil && disk.Healing() != nil {
disksToHeal = append(disksToHeal, disk.Endpoint())
}
for _, disk := range globalLocalDrives {
_, err := disk.GetDiskID()
if errors.Is(err, errUnformattedDisk) {
disksToHeal = append(disksToHeal, disk.Endpoint())
continue
}
if disk.Healing() != nil {
disksToHeal = append(disksToHeal, disk.Endpoint())
}
}
if len(disksToHeal) == globalEndpoints.NEndpoints() {
// When all disks == all command line endpoints
// this is a fresh setup, no need to trigger healing.
return Endpoints{}
}
return disksToHeal
}

View File

@ -35,8 +35,8 @@ import (
const (
minioConfigPrefix = "config"
kvPrefix = ".kv"
minioConfigBucket = minioMetaBucket + SlashSeparator + minioConfigPrefix
kvPrefix = ".kv"
// Captures all the previous SetKV operations and allows rollback.
minioConfigHistoryPrefix = minioConfigPrefix + "/history"

View File

@ -204,14 +204,14 @@ func (d *dataUpdateTracker) latestWithDir(dir string) uint64 {
// All of these will exit when the context is canceled.
func (d *dataUpdateTracker) start(ctx context.Context, drives ...string) {
if len(drives) == 0 {
logger.LogIf(ctx, errors.New("dataUpdateTracker.start: No drives specified"))
logger.LogIf(ctx, errors.New("dataUpdateTracker.start: No local drives specified"))
return
}
d.load(ctx, drives...)
go d.startCollector(ctx)
// startSaver will unlock.
d.mu.Lock()
go d.startSaver(ctx, dataUpdateTrackerSaveInterval, drives)
go d.startSaver(ctx, dataUpdateTrackerSaveInterval, drives...)
}
// load will attempt to load data tracking information from the supplied drives.
@ -221,7 +221,7 @@ func (d *dataUpdateTracker) start(ctx context.Context, drives ...string) {
// If object is shared the caller should lock it.
func (d *dataUpdateTracker) load(ctx context.Context, drives ...string) {
if len(drives) == 0 {
logger.LogIf(ctx, errors.New("dataUpdateTracker.load: No drives specified"))
logger.LogIf(ctx, errors.New("dataUpdateTracker.load: No local drives specified"))
return
}
for _, drive := range drives {
@ -246,7 +246,11 @@ func (d *dataUpdateTracker) load(ctx context.Context, drives ...string) {
// startSaver will start a saver that will write d to all supplied drives at specific intervals.
// 'd' must be write locked when started and will be unlocked.
// The saver will save and exit when supplied context is closed.
func (d *dataUpdateTracker) startSaver(ctx context.Context, interval time.Duration, drives []string) {
func (d *dataUpdateTracker) startSaver(ctx context.Context, interval time.Duration, drives ...string) {
if len(drives) == 0 {
return
}
saveNow := d.save
exited := make(chan struct{})
d.saveExited = exited

View File

@ -72,16 +72,9 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
}
)
var localDrives []string
var localDrives []StorageAPI
local := endpointServerPools.FirstLocal()
for i, ep := range endpointServerPools {
for _, endpoint := range ep.Endpoints {
if endpoint.IsLocal {
localDrives = append(localDrives, endpoint.Path)
}
}
// If storage class is not set during startup, default values are used
// -- Default for Reduced Redundancy Storage class is, parity = 2
// -- Default for Standard Storage class is, parity = 2 - disks 4, 5
@ -101,6 +94,12 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
return nil, err
}
for _, storageDisk := range storageDisks[i] {
if storageDisk != nil && storageDisk.IsLocal() {
localDrives = append(localDrives, storageDisk)
}
}
if deploymentID == "" {
// all zones should have same deployment ID
deploymentID = formats[i].ID
@ -124,7 +123,7 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
z.decommissionCancelers = make([]context.CancelFunc, len(z.serverPools))
r := rand.New(rand.NewSource(time.Now().UnixNano()))
for {
err := z.Init(ctx)
err := z.Init(ctx) // Initializes all pools.
if err != nil {
if !configRetriableErrors(err) {
logger.Fatal(err, "Unable to initialize backend")
@ -135,8 +134,14 @@ func newErasureServerPools(ctx context.Context, endpointServerPools EndpointServ
break
}
drives := make([]string, 0, len(localDrives))
for _, localDrive := range localDrives {
drives = append(drives, localDrive.Endpoint().Path)
}
globalLocalDrives = localDrives
ctx, z.shutdown = context.WithCancel(ctx)
go intDataUpdateTracker.start(ctx, localDrives...)
go intDataUpdateTracker.start(ctx, drives...)
return z, nil
}
@ -177,7 +182,7 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string
found := 0
for _, s := range z.serverPools {
for _, disks := range s.erasureDisks {
for i, disk := range disks {
for _, disk := range disks {
if disk == OfflineDisk {
continue
}
@ -185,10 +190,6 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string
if err != nil {
continue
}
did, err := disk.GetDiskID()
if err != nil {
did = fmt.Sprintf("disk-%d", i)
}
for _, si := range stats {
found++
var r io.ReadCloser
@ -200,7 +201,9 @@ func (z *erasureServerPools) GetRawData(ctx context.Context, volume, file string
} else {
r = io.NopCloser(bytes.NewBuffer([]byte{}))
}
err = fn(r, disk.Hostname(), did, pathJoin(volume, si.Name), si)
// Keep disk path instead of ID, to ensure that the downloaded zip file can be
// easily automated with `minio server hostname{1...n}/disk{1...m}`.
err = fn(r, disk.Hostname(), disk.Endpoint().Path, pathJoin(volume, si.Name), si)
r.Close()
if err != nil {
return err

View File

@ -414,44 +414,67 @@ func newErasureSets(ctx context.Context, endpoints PoolEndpoints, storageDisks [
lockerEpSet.Add(endpoint.Host)
s.erasureLockers[i] = append(s.erasureLockers[i], locker)
}
disk := storageDisks[i*setDriveCount+j]
if disk == nil {
continue
}
diskID, derr := disk.GetDiskID()
if derr != nil {
continue
}
m, n, err := findDiskIndexByDiskID(format, diskID)
if err != nil {
continue
}
if m != i || n != j {
logger.LogIf(GlobalContext, fmt.Errorf("Detected unexpected disk ordering refusing to use the disk - poolID: %s, found disk mounted at (set=%s, disk=%s) expected mount at (set=%s, disk=%s): %s(%s)", humanize.Ordinal(poolIdx+1), humanize.Ordinal(m+1), humanize.Ordinal(n+1), humanize.Ordinal(i+1), humanize.Ordinal(j+1), disk, diskID))
s.erasureDisks[i][j] = &unrecognizedDisk{storage: disk}
continue
}
disk.SetDiskLoc(s.poolIndex, m, n)
s.endpointStrings[m*setDriveCount+n] = disk.String()
s.erasureDisks[m][n] = disk
}
// Initialize erasure objects for a given set.
s.sets[i] = &erasureObjects{
setIndex: i,
poolIndex: poolIdx,
setDriveCount: setDriveCount,
defaultParityCount: defaultParityCount,
getDisks: s.GetDisks(i),
getLockers: s.GetLockers(i),
getEndpoints: s.GetEndpoints(i),
deletedCleanupSleeper: newDynamicSleeper(10, 2*time.Second),
nsMutex: mutex,
bp: bp,
bpOld: bpOld,
}
}
var wg sync.WaitGroup
for i := 0; i < setCount; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
var innerWg sync.WaitGroup
for j := 0; j < setDriveCount; j++ {
disk := storageDisks[i*setDriveCount+j]
if disk == nil {
continue
}
innerWg.Add(1)
go func(disk StorageAPI, i, j int) {
defer innerWg.Done()
diskID, err := disk.GetDiskID()
if err != nil {
if !errors.Is(err, errUnformattedDisk) {
logger.LogIf(ctx, err)
}
return
}
m, n, err := findDiskIndexByDiskID(format, diskID)
if err != nil {
logger.LogIf(ctx, err)
return
}
if m != i || n != j {
logger.LogIf(ctx, fmt.Errorf("Detected unexpected disk ordering refusing to use the disk - poolID: %s, found disk mounted at (set=%s, disk=%s) expected mount at (set=%s, disk=%s): %s(%s)", humanize.Ordinal(poolIdx+1), humanize.Ordinal(m+1), humanize.Ordinal(n+1), humanize.Ordinal(i+1), humanize.Ordinal(j+1), disk, diskID))
s.erasureDisks[i][j] = &unrecognizedDisk{storage: disk}
return
}
disk.SetDiskLoc(s.poolIndex, m, n)
s.endpointStrings[m*setDriveCount+n] = disk.String()
s.erasureDisks[m][n] = disk
}(disk, i, j)
}
innerWg.Wait()
// Initialize erasure objects for a given set.
s.sets[i] = &erasureObjects{
setIndex: i,
poolIndex: poolIdx,
setDriveCount: setDriveCount,
defaultParityCount: defaultParityCount,
getDisks: s.GetDisks(i),
getLockers: s.GetLockers(i),
getEndpoints: s.GetEndpoints(i),
deletedCleanupSleeper: newDynamicSleeper(10, 2*time.Second),
nsMutex: mutex,
bp: bp,
bpOld: bpOld,
}
}(i)
}
wg.Wait()
// start cleanup stale uploads go-routine.
go s.cleanupStaleUploads(ctx)

View File

@ -24,6 +24,7 @@ import (
"encoding/json"
"errors"
"fmt"
"io/fs"
"io/ioutil"
"reflect"
"sync"
@ -156,13 +157,9 @@ func newFormatErasureV3(numSets int, setLen int) *formatErasureV3 {
// Returns format Erasure version after reading `format.json`, returns
// successfully the version only if the backend is Erasure.
func formatGetBackendErasureVersion(formatPath string) (string, error) {
func formatGetBackendErasureVersion(b []byte) (string, error) {
meta := &formatMetaV1{}
b, err := xioutil.ReadFile(formatPath)
if err != nil {
return "", err
}
if err = json.Unmarshal(b, meta); err != nil {
if err := json.Unmarshal(b, meta); err != nil {
return "", err
}
if meta.Version != formatMetaVersionV1 {
@ -173,7 +170,7 @@ func formatGetBackendErasureVersion(formatPath string) (string, error) {
}
// Erasure backend found, proceed to detect version.
format := &formatErasureVersionDetect{}
if err = json.Unmarshal(b, format); err != nil {
if err := json.Unmarshal(b, format); err != nil {
return "", err
}
return format.Erasure.Version, nil
@ -182,50 +179,63 @@ func formatGetBackendErasureVersion(formatPath string) (string, error) {
// Migrates all previous versions to latest version of `format.json`,
// this code calls migration in sequence, such as V1 is migrated to V2
// first before it V2 migrates to V3.n
func formatErasureMigrate(export string) error {
func formatErasureMigrate(export string) ([]byte, fs.FileInfo, error) {
formatPath := pathJoin(export, minioMetaBucket, formatConfigFile)
version, err := formatGetBackendErasureVersion(formatPath)
formatData, formatFi, err := xioutil.ReadFileWithFileInfo(formatPath)
if err != nil {
return fmt.Errorf("Disk %s: %w", export, err)
return nil, nil, err
}
version, err := formatGetBackendErasureVersion(formatData)
if err != nil {
return nil, nil, fmt.Errorf("Disk %s: %w", export, err)
}
migrate := func(formatPath string, formatData []byte) ([]byte, fs.FileInfo, error) {
if err = ioutil.WriteFile(formatPath, formatData, 0o666); err != nil {
return nil, nil, err
}
formatFi, err := Lstat(formatPath)
if err != nil {
return nil, nil, err
}
return formatData, formatFi, nil
}
switch version {
case formatErasureVersionV1:
if err = formatErasureMigrateV1ToV2(export, version); err != nil {
return fmt.Errorf("Disk %s: %w", export, err)
formatData, err = formatErasureMigrateV1ToV2(formatData, version)
if err != nil {
return nil, nil, fmt.Errorf("Disk %s: %w", export, err)
}
// Migrate successful v1 => v2, proceed to v2 => v3
version = formatErasureVersionV2
fallthrough
case formatErasureVersionV2:
if err = formatErasureMigrateV2ToV3(export, version); err != nil {
return fmt.Errorf("Disk %s: %w", export, err)
formatData, err = formatErasureMigrateV2ToV3(formatData, export, version)
if err != nil {
return nil, nil, fmt.Errorf("Disk %s: %w", export, err)
}
// Migrate successful v2 => v3, v3 is latest
// version = formatXLVersionV3
fallthrough
return migrate(formatPath, formatData)
case formatErasureVersionV3:
// v3 is the latest version, return.
return nil
return formatData, formatFi, nil
}
return fmt.Errorf(`Disk %s: unknown format version %s`, export, version)
return nil, nil, fmt.Errorf(`Disk %s: unknown format version %s`, export, version)
}
// Migrates version V1 of format.json to version V2 of format.json,
// migration fails upon any error.
func formatErasureMigrateV1ToV2(export, version string) error {
func formatErasureMigrateV1ToV2(data []byte, version string) ([]byte, error) {
if version != formatErasureVersionV1 {
return fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV1, version)
return nil, fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV1, version)
}
formatPath := pathJoin(export, minioMetaBucket, formatConfigFile)
formatV1 := &formatErasureV1{}
b, err := xioutil.ReadFile(formatPath)
if err != nil {
return err
}
if err = json.Unmarshal(b, formatV1); err != nil {
return err
if err := json.Unmarshal(data, formatV1); err != nil {
return nil, err
}
formatV2 := &formatErasureV2{}
@ -238,53 +248,38 @@ func formatErasureMigrateV1ToV2(export, version string) error {
formatV2.Erasure.Sets[0] = make([]string, len(formatV1.Erasure.JBOD))
copy(formatV2.Erasure.Sets[0], formatV1.Erasure.JBOD)
b, err = json.Marshal(formatV2)
if err != nil {
return err
}
return ioutil.WriteFile(formatPath, b, 0o666)
return json.Marshal(formatV2)
}
// Migrates V2 for format.json to V3 (Flat hierarchy for multipart)
func formatErasureMigrateV2ToV3(export, version string) error {
func formatErasureMigrateV2ToV3(data []byte, export, version string) ([]byte, error) {
if version != formatErasureVersionV2 {
return fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV2, version)
return nil, fmt.Errorf(`format version expected %s, found %s`, formatErasureVersionV2, version)
}
formatPath := pathJoin(export, minioMetaBucket, formatConfigFile)
formatV2 := &formatErasureV2{}
b, err := xioutil.ReadFile(formatPath)
if err != nil {
return err
}
err = json.Unmarshal(b, formatV2)
if err != nil {
return err
if err := json.Unmarshal(data, formatV2); err != nil {
return nil, err
}
if err = removeAll(pathJoin(export, minioMetaMultipartBucket)); err != nil {
return err
}
if err = mkdirAll(pathJoin(export, minioMetaMultipartBucket), 0o755); err != nil {
return err
tmpOld := pathJoin(export, minioMetaTmpDeletedBucket, mustGetUUID())
if err := renameAll(pathJoin(export, minioMetaMultipartBucket),
tmpOld); err != nil && err != errFileNotFound {
logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate",
pathJoin(export, minioMetaMultipartBucket),
tmpOld,
osErrToFileErr(err)))
}
// format-V2 struct is exactly same as format-V1 except that version is "3"
// which indicates the simplified multipart backend.
formatV3 := formatErasureV3{}
formatV3.Version = formatV2.Version
formatV3.Format = formatV2.Format
formatV3.Erasure = formatV2.Erasure
formatV3.Erasure.Version = formatErasureVersionV3
b, err = json.Marshal(formatV3)
if err != nil {
return err
}
return ioutil.WriteFile(formatPath, b, 0o666)
return json.Marshal(formatV3)
}
// countErrs - count a specific error.
@ -382,25 +377,6 @@ func saveFormatErasure(disk StorageAPI, format *formatErasureV3, heal bool) erro
return nil
}
var ignoredHiddenDirectories = map[string]struct{}{
minioMetaBucket: {}, // metabucket '.minio.sys'
".minio": {}, // users may choose to double down the backend as the config folder for certs
".snapshot": {}, // .snapshot for ignoring NetApp based persistent volumes WAFL snapshot
"lost+found": {}, // 'lost+found' directory default on ext4 filesystems
"$RECYCLE.BIN": {}, // windows specific directory for each drive (hidden)
"System Volume Information": {}, // windows specific directory for each drive (hidden)
}
func isHiddenDirectories(vols ...VolInfo) bool {
for _, vol := range vols {
if _, ok := ignoredHiddenDirectories[vol.Name]; ok {
continue
}
return false
}
return true
}
// loadFormatErasure - loads format.json from disk.
func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) {
buf, err := disk.ReadAll(context.TODO(), minioMetaBucket, formatConfigFile)
@ -408,17 +384,6 @@ func loadFormatErasure(disk StorageAPI) (format *formatErasureV3, err error) {
// 'file not found' and 'volume not found' as
// same. 'volume not found' usually means its a fresh disk.
if err == errFileNotFound || err == errVolumeNotFound {
var vols []VolInfo
vols, err = disk.ListVols(context.TODO())
if err != nil {
return nil, err
}
if !isHiddenDirectories(vols...) {
// 'format.json' not found, but we found user data, reject such disks.
return nil, fmt.Errorf("some unexpected files '%v' found on %s: %w",
vols, disk, errCorruptedFormat)
}
// No other data found, its a fresh disk.
return nil, errUnformattedDisk
}
return nil, err
@ -718,12 +683,18 @@ func saveFormatErasureAll(ctx context.Context, storageDisks []StorageAPI, format
// relinquishes the underlying connection for all storage disks.
func closeStorageDisks(storageDisks []StorageAPI) {
var wg sync.WaitGroup
for _, disk := range storageDisks {
if disk == nil {
continue
}
disk.Close()
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
disk.Close()
}(disk)
}
wg.Wait()
}
func initStorageDisksWithErrorsWithoutHealthCheck(endpoints Endpoints) ([]StorageAPI, []error) {
@ -897,13 +868,10 @@ func makeFormatErasureMetaVolumes(disk StorageAPI) error {
return errDiskNotFound
}
volumes := []string{
minioMetaBucket,
minioMetaTmpBucket,
minioMetaMultipartBucket,
minioMetaTmpDeletedBucket,
dataUsageBucket,
pathJoin(minioMetaBucket, minioConfigPrefix),
minioMetaTmpBucket + "-old",
minioMetaTmpDeletedBucket, // creates .minio.sys/tmp as well as .minio.sys/tmp/.trash
minioMetaMultipartBucket, // creates .minio.sys/multipart
dataUsageBucket, // creates .minio.sys/buckets
minioConfigBucket, // creates .minio.sys/config
}
// Attempt to create MinIO internal buckets.
return disk.MakeVolBulk(context.TODO(), volumes...)

View File

@ -132,11 +132,12 @@ func TestFormatErasureMigrate(t *testing.T) {
t.Fatal(err)
}
if err = formatErasureMigrate(rootPath); err != nil {
formatData, _, err := formatErasureMigrate(rootPath)
if err != nil {
t.Fatal(err)
}
migratedVersion, err := formatGetBackendErasureVersion(pathJoin(rootPath, minioMetaBucket, formatConfigFile))
migratedVersion, err := formatGetBackendErasureVersion(formatData)
if err != nil {
t.Fatal(err)
}
@ -179,7 +180,7 @@ func TestFormatErasureMigrate(t *testing.T) {
t.Fatal(err)
}
if err = formatErasureMigrate(rootPath); err == nil {
if _, _, err = formatErasureMigrate(rootPath); err == nil {
t.Fatal("Expected to fail with unexpected backend format")
}
@ -199,7 +200,7 @@ func TestFormatErasureMigrate(t *testing.T) {
t.Fatal(err)
}
if err = formatErasureMigrate(rootPath); err == nil {
if _, _, err = formatErasureMigrate(rootPath); err == nil {
t.Fatal("Expected to fail with unexpected backend format version number")
}
}

View File

@ -338,6 +338,9 @@ var (
globalServiceFreezeCnt int32
globalServiceFreezeMu sync.Mutex // Updates.
// List of local drives to this node, this is only set during server startup.
globalLocalDrives []StorageAPI
// Add new variable global values here.
)

View File

@ -20,6 +20,7 @@ package cmd
import (
"context"
"crypto/tls"
"errors"
"fmt"
"net/http"
"net/url"
@ -70,7 +71,7 @@ var printEndpointError = func() func(Endpoint, error, bool) {
}()
// Cleans up tmp directory of the local disk.
func formatErasureCleanupTmp(diskPath string) error {
func formatErasureCleanupTmp(diskPath string) {
// Need to move temporary objects left behind from previous run of minio
// server to a unique directory under `minioMetaTmpBucket-old` to clean
// up `minioMetaTmpBucket` for the current run.
@ -81,9 +82,23 @@ func formatErasureCleanupTmp(diskPath string) error {
//
// In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains
// temporary objects from one of the previous runs of minio server.
tmpOld := pathJoin(diskPath, minioMetaTmpBucket+"-old", mustGetUUID())
tmpID := mustGetUUID()
tmpOld := pathJoin(diskPath, minioMetaTmpBucket+"-old", tmpID)
if err := renameAll(pathJoin(diskPath, minioMetaTmpBucket),
tmpOld); err != nil && err != errFileNotFound {
tmpOld); err != nil && !errors.Is(err, errFileNotFound) {
logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate",
pathJoin(diskPath, minioMetaTmpBucket),
tmpOld,
osErrToFileErr(err)))
}
if err := mkdirAll(pathJoin(diskPath, minioMetaTmpDeletedBucket), 0o777); err != nil {
logger.LogIf(GlobalContext, fmt.Errorf("unable to create (%s) %w, drive may be faulty please investigate",
pathJoin(diskPath, minioMetaTmpBucket),
err))
}
if err := renameAll(tmpOld, pathJoin(diskPath, minioMetaTmpDeletedBucket, tmpID)); err != nil && !errors.Is(err, errFileNotFound) {
logger.LogIf(GlobalContext, fmt.Errorf("unable to rename (%s -> %s) %w, drive may be faulty please investigate",
pathJoin(diskPath, minioMetaTmpBucket),
tmpOld,
@ -92,16 +107,6 @@ func formatErasureCleanupTmp(diskPath string) error {
// Renames and schedules for purging all bucket metacache.
renameAllBucketMetacache(diskPath)
// Removal of tmp-old folder is backgrounded completely.
go removeAll(pathJoin(diskPath, minioMetaTmpBucket+"-old"))
if err := mkdirAll(pathJoin(diskPath, minioMetaTmpDeletedBucket), 0o777); err != nil {
logger.LogIf(GlobalContext, fmt.Errorf("unable to create (%s) %w, drive may be faulty please investigate",
pathJoin(diskPath, minioMetaTmpBucket),
err))
}
return nil
}
// Following error message is added to fix a regression in release
@ -178,19 +183,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
}
}(storageDisks)
// Sanitize all local disks during server startup.
var wg sync.WaitGroup
for _, disk := range storageDisks {
if disk != nil && disk.IsLocal() {
wg.Add(1)
go func(disk StorageAPI) {
defer wg.Done()
disk.(*xlStorageDiskIDCheck).storage.(*xlStorage).Sanitize()
}(disk)
}
}
wg.Wait()
for i, err := range errs {
if err != nil {
if err == errDiskNotFound && retryCount >= 5 {
@ -298,10 +290,6 @@ func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints,
return nil, nil, err
}
// The will always recreate some directories inside .minio.sys of
// the local disk such as tmp, multipart and background-ops
initErasureMetaVolumesInLocalDisks(storageDisks, formatConfigs)
return storageDisks, format, nil
}

View File

@ -141,6 +141,8 @@ func TestTreeWalk(t *testing.T) {
if err != nil {
t.Fatalf("Unable to create tmp directory: %s", err)
}
defer os.RemoveAll(fsDir)
endpoints := mustGetNewEndpoints(fsDir)
disk, err := newStorageAPI(endpoints[0])
if err != nil {
@ -175,11 +177,6 @@ func TestTreeWalk(t *testing.T) {
// Simple test when marker is set.
testTreeWalkMarker(t, listDir, isLeaf, isLeafDir)
err = os.RemoveAll(fsDir)
if err != nil {
t.Fatal(err)
}
}
// Test if tree walk go-routine exits cleanly if tree walk is aborted because of timeout.
@ -188,6 +185,7 @@ func TestTreeWalkTimeout(t *testing.T) {
if err != nil {
t.Fatalf("Unable to create tmp directory: %s", err)
}
defer os.RemoveAll(fsDir)
endpoints := mustGetNewEndpoints(fsDir)
disk, err := newStorageAPI(endpoints[0])
if err != nil {
@ -250,10 +248,6 @@ func TestTreeWalkTimeout(t *testing.T) {
if ok {
t.Error("Tree-walk go routine has not exited after timeout.")
}
err = os.RemoveAll(fsDir)
if err != nil {
t.Error(err)
}
}
// TestRecursiveWalk - tests if treeWalk returns entries correctly with and
@ -264,6 +258,7 @@ func TestRecursiveTreeWalk(t *testing.T) {
if err != nil {
t.Fatalf("Unable to create tmp directory: %s", err)
}
defer os.RemoveAll(fsDir1)
endpoints := mustGetNewEndpoints(fsDir1)
disk1, err := newStorageAPI(endpoints[0])
@ -366,10 +361,6 @@ func TestRecursiveTreeWalk(t *testing.T) {
}
})
}
err = os.RemoveAll(fsDir1)
if err != nil {
t.Error(err)
}
}
func TestSortedness(t *testing.T) {
@ -378,6 +369,7 @@ func TestSortedness(t *testing.T) {
if err != nil {
t.Errorf("Unable to create tmp directory: %s", err)
}
defer os.RemoveAll(fsDir1)
endpoints := mustGetNewEndpoints(fsDir1)
disk1, err := newStorageAPI(endpoints[0])
@ -444,12 +436,6 @@ func TestSortedness(t *testing.T) {
t.Error(i+1, "Expected entries to be sort, but it wasn't")
}
}
// Remove directory created for testing
err = os.RemoveAll(fsDir1)
if err != nil {
t.Error(err)
}
}
func TestTreeWalkIsEnd(t *testing.T) {
@ -458,6 +444,7 @@ func TestTreeWalkIsEnd(t *testing.T) {
if err != nil {
t.Errorf("Unable to create tmp directory: %s", err)
}
defer os.RemoveAll(fsDir1)
endpoints := mustGetNewEndpoints(fsDir1)
disk1, err := newStorageAPI(endpoints[0])
@ -526,10 +513,4 @@ func TestTreeWalkIsEnd(t *testing.T) {
t.Errorf("Test %d: Last entry %s, doesn't have EOF marker set", i, entry.entry)
}
}
// Remove directory created for testing
err = os.RemoveAll(fsDir1)
if err != nil {
t.Error(err)
}
}

View File

@ -110,6 +110,8 @@ type xlStorage struct {
diskInfoCache timedValue
sync.RWMutex
formatData []byte
// mutex to prevent concurrent read operations overloading walks.
walkMu sync.Mutex
walkReadMu sync.Mutex
@ -205,23 +207,9 @@ func newLocalXLStorage(path string) (*xlStorage, error) {
})
}
// Sanitize - sanitizes the `format.json`, cleanup tmp.
// all other future cleanups should be added here.
func (s *xlStorage) Sanitize() error {
if err := formatErasureMigrate(s.diskPath); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
// Create any missing paths.
makeFormatErasureMetaVolumes(s)
return formatErasureCleanupTmp(s.diskPath)
}
// Initialize a new storage disk.
func newXLStorage(ep Endpoint) (*xlStorage, error) {
func newXLStorage(ep Endpoint) (s *xlStorage, err error) {
path := ep.Path
var err error
if path, err = getValidPath(path); err != nil {
return nil, err
}
@ -255,7 +243,7 @@ func newXLStorage(ep Endpoint) (*xlStorage, error) {
}
}
p := &xlStorage{
s = &xlStorage{
diskPath: path,
endpoint: ep,
globalSync: env.Get(config.EnvFSOSync, config.EnableOff) == config.EnableOn,
@ -265,42 +253,56 @@ func newXLStorage(ep Endpoint) (*xlStorage, error) {
diskIndex: -1,
}
go formatErasureCleanupTmp(s.diskPath) // cleanup any old data.
formatData, formatFi, err := formatErasureMigrate(s.diskPath)
if err != nil && !errors.Is(err, os.ErrNotExist) {
if os.IsPermission(err) {
return nil, errDiskAccessDenied
} else if isSysErrIO(err) {
return nil, errFaultyDisk
}
return nil, err
}
s.formatData = formatData
s.formatFileInfo = formatFi
if len(s.formatData) == 0 { // Unformatted disk check if O_DIRECT is supported.
// Check if backend is writable and supports O_DIRECT
var rnd [32]byte
_, _ = rand.Read(rnd[:])
filePath := pathJoin(s.diskPath, ".writable-check-"+hex.EncodeToString(rnd[:])+".tmp")
w, err := s.openFileDirect(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL)
if err != nil {
return s, err
}
_, err = w.Write(alignedBuf)
w.Close()
if err != nil {
if isSysErrInvalidArg(err) {
return s, errUnsupportedDisk
}
return s, err
}
Remove(filePath)
} else {
format := &formatErasureV3{}
json := jsoniter.ConfigCompatibleWithStandardLibrary
if err = json.Unmarshal(s.formatData, &format); err != nil {
return s, errCorruptedFormat
}
s.diskID = format.Erasure.This
s.formatLastCheck = time.Now()
s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1
}
// Create all necessary bucket folders if possible.
if err = p.MakeVolBulk(context.TODO(), minioMetaBucket, minioMetaTmpBucket, minioMetaMultipartBucket, dataUsageBucket, minioMetaSpeedTestBucket); err != nil {
if err = makeFormatErasureMetaVolumes(s); err != nil {
return nil, err
}
// Check if backend is writable and supports O_DIRECT
var rnd [8]byte
_, _ = rand.Read(rnd[:])
tmpFile := ".writable-check-" + hex.EncodeToString(rnd[:]) + ".tmp"
filePath := pathJoin(p.diskPath, minioMetaTmpBucket, tmpFile)
w, err := OpenFileDirectIO(filePath, os.O_CREATE|os.O_WRONLY|os.O_EXCL, 0o666)
if err != nil {
switch {
case isSysErrInvalidArg(err):
return p, errUnsupportedDisk
case osIsPermission(err):
return p, errDiskAccessDenied
case isSysErrIO(err):
return p, errFaultyDisk
case isSysErrNotDir(err):
return p, errDiskNotDir
}
return p, err
}
if _, err = w.Write(alignedBuf); err != nil {
w.Close()
if isSysErrInvalidArg(err) {
return p, errUnsupportedDisk
}
return p, err
}
w.Close()
Remove(filePath)
// Success.
return p, nil
return s, nil
}
// getDiskInfo returns given disk information.
@ -308,7 +310,6 @@ func getDiskInfo(diskPath string) (di disk.Info, err error) {
if err = checkPathLength(diskPath); err == nil {
di, err = disk.GetInfo(diskPath)
}
switch {
case osIsNotExist(err):
err = errDiskNotFound
@ -371,7 +372,7 @@ func (s *xlStorage) SetDiskLoc(poolIdx, setIdx, diskIdx int) {
func (s *xlStorage) Healing() *healingTracker {
healingFile := pathJoin(s.diskPath, minioMetaBucket,
bucketMetaPrefix, healingTrackerFilename)
b, err := xioutil.ReadFile(healingFile)
b, err := ioutil.ReadFile(healingFile)
if err != nil {
return nil
}
@ -624,8 +625,8 @@ func (s *xlStorage) GetDiskID() (string, error) {
fileInfo := s.formatFileInfo
lastCheck := s.formatLastCheck
// check if we have a valid disk ID that is less than 1 second old.
if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= time.Second {
// check if we have a valid disk ID that is less than 1 seconds old.
if fileInfo != nil && diskID != "" && time.Since(lastCheck) <= 1*time.Second {
s.RUnlock()
return diskID, nil
}
@ -645,7 +646,7 @@ func (s *xlStorage) GetDiskID() (string, error) {
}
formatFile := pathJoin(s.diskPath, minioMetaBucket, formatConfigFile)
b, err := xioutil.ReadFile(formatFile)
b, err := ioutil.ReadFile(formatFile)
if err != nil {
// If the disk is still not initialized.
if osIsNotExist(err) {
@ -676,6 +677,7 @@ func (s *xlStorage) GetDiskID() (string, error) {
s.Lock()
defer s.Unlock()
s.formatData = b
s.diskID = format.Erasure.This
s.formatLegacy = format.Erasure.DistributionAlgo == formatErasureVersionV2DistributionAlgoV1
s.formatFileInfo = fi
@ -1417,6 +1419,16 @@ func (s *xlStorage) readAllData(ctx context.Context, volumeDir string, filePath
// This API is meant to be used on files which have small memory footprint, do
// not use this on large files as it would cause server to crash.
func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) {
// Specific optimization to avoid re-read from the drives for `format.json`
// in-case the caller is a network operation.
if volume == minioMetaBucket && path == formatConfigFile {
s.RLock()
formatData := s.formatData
s.RUnlock()
if len(formatData) > 0 {
return formatData, nil
}
}
volumeDir, err := s.getVolDir(volume)
if err != nil {
return nil, err
@ -1537,6 +1549,30 @@ func (s *xlStorage) ReadFile(ctx context.Context, volume string, path string, of
return int64(len(buffer)), nil
}
func (s *xlStorage) openFileDirect(path string, mode int) (f *os.File, err error) {
// Create top level directories if they don't exist.
// with mode 0o777 mkdir honors system umask.
mkdirAll(pathutil.Dir(path), 0o777) // don't need to fail here
w, err := OpenFileDirectIO(path, mode, 0o666)
if err != nil {
switch {
case isSysErrInvalidArg(err):
return nil, errUnsupportedDisk
case osIsPermission(err):
return nil, errDiskAccessDenied
case isSysErrIO(err):
return nil, errFaultyDisk
case isSysErrNotDir(err):
return nil, errDiskNotDir
case os.IsNotExist(err):
return nil, errDiskNotFound
}
}
return w, nil
}
func (s *xlStorage) openFileSync(filePath string, mode int) (f *os.File, err error) {
// Create top level directories if they don't exist.
// with mode 0777 mkdir honors system umask.

View File

@ -128,13 +128,14 @@ func newXLStorageTestSetup() (*xlStorageDiskIDCheck, string, error) {
if err != nil {
return nil, "", err
}
// Create a sample format.json file
err = storage.WriteAll(context.Background(), minioMetaBucket, formatConfigFile, []byte(`{"version":"1","format":"xl","id":"592a41c2-b7cc-4130-b883-c4b5cb15965b","xl":{"version":"3","this":"da017d62-70e3-45f1-8a1a-587707e69ad1","sets":[["e07285a6-8c73-4962-89c6-047fb939f803","33b8d431-482d-4376-b63c-626d229f0a29","cff6513a-4439-4dc1-bcaa-56c9e880c352","da017d62-70e3-45f1-8a1a-587707e69ad1","9c9f21d5-1f15-4737-bce6-835faa0d9626","0a59b346-1424-4fc2-9fa2-a2e80541d0c1","7924a3dc-b69a-4971-9a2e-014966d6aebb","4d2b8dd9-4e48-444b-bdca-c89194b26042"]],"distributionAlgo":"CRCMOD"}}`))
if err != nil {
if err = storage.WriteAll(context.Background(), minioMetaBucket, formatConfigFile, []byte(`{"version":"1","format":"xl","id":"592a41c2-b7cc-4130-b883-c4b5cb15965b","xl":{"version":"3","this":"da017d62-70e3-45f1-8a1a-587707e69ad1","sets":[["e07285a6-8c73-4962-89c6-047fb939f803","33b8d431-482d-4376-b63c-626d229f0a29","cff6513a-4439-4dc1-bcaa-56c9e880c352","da017d62-70e3-45f1-8a1a-587707e69ad1","9c9f21d5-1f15-4737-bce6-835faa0d9626","0a59b346-1424-4fc2-9fa2-a2e80541d0c1","7924a3dc-b69a-4971-9a2e-014966d6aebb","4d2b8dd9-4e48-444b-bdca-c89194b26042"]],"distributionAlgo":"CRCMOD"}}`)); err != nil {
return nil, "", err
}
disk := newXLStorageDiskIDCheck(storage)
disk.diskID = "da017d62-70e3-45f1-8a1a-587707e69ad1"
disk.SetDiskID("da017d62-70e3-45f1-8a1a-587707e69ad1")
return disk, diskPath, nil
}

View File

@ -238,10 +238,7 @@ func SameFile(fi1, fi2 os.FileInfo) bool {
if fi1.Mode() != fi2.Mode() {
return false
}
if fi1.Size() != fi2.Size() {
return false
}
return true
return fi1.Size() == fi2.Size()
}
// DirectioAlignSize - DirectIO alignment needs to be 4K. Defined here as

View File

@ -19,11 +19,33 @@ package ioutil
import (
"io"
"io/fs"
"os"
"github.com/minio/minio/internal/disk"
)
// ReadFileWithFileInfo reads the named file and returns the contents.
// A successful call returns err == nil, not err == EOF.
// Because ReadFile reads the whole file, it does not treat an EOF from Read
// as an error to be reported, additionall returns os.FileInfo
func ReadFileWithFileInfo(name string) ([]byte, fs.FileInfo, error) {
f, err := os.Open(name)
if err != nil {
return nil, nil, err
}
defer f.Close()
st, err := f.Stat()
if err != nil {
return nil, nil, err
}
dst := make([]byte, st.Size())
_, err = io.ReadFull(f, dst)
return dst, st, err
}
// ReadFile reads the named file and returns the contents.
// A successful call returns err == nil, not err == EOF.
// Because ReadFile reads the whole file, it does not treat an EOF from Read