2021-04-18 15:41:13 -04:00
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2016-05-04 15:18:20 -04:00
2016-08-18 19:23:42 -04:00
package cmd
2016-05-04 15:18:20 -04:00
2016-05-07 03:59:43 -04:00
import (
2018-04-05 18:04:40 -04:00
"context"
2016-05-07 03:59:43 -04:00
"encoding/json"
2020-09-28 22:39:32 -04:00
"errors"
2016-05-07 03:59:43 -04:00
"fmt"
2022-01-24 14:28:45 -05:00
"io/fs"
2022-09-19 14:05:16 -04:00
"os"
2018-07-18 23:17:35 -04:00
"reflect"
2020-01-13 16:09:10 -05:00
"sync"
2018-02-15 20:45:57 -05:00
2022-09-19 14:05:16 -04:00
"github.com/dustin/go-humanize"
2021-06-01 17:59:40 -04:00
"github.com/minio/minio/internal/color"
"github.com/minio/minio/internal/config"
"github.com/minio/minio/internal/config/storageclass"
xioutil "github.com/minio/minio/internal/ioutil"
"github.com/minio/minio/internal/logger"
2023-09-04 15:57:37 -04:00
"github.com/minio/pkg/v2/sync/errgroup"
2017-06-12 20:40:28 -04:00
)
const (
2020-06-12 23:04:01 -04:00
// Represents Erasure backend.
formatBackendErasure = "xl"
2017-06-12 20:40:28 -04:00
2022-05-30 13:58:37 -04:00
// Represents Erasure backend - single drive
formatBackendErasureSingle = "xl-single"
2020-06-12 23:04:01 -04:00
// formatErasureV1.Erasure.Version - version '1'.
formatErasureVersionV1 = "1"
2018-02-15 20:45:57 -05:00
2020-06-12 23:04:01 -04:00
// formatErasureV2.Erasure.Version - version '2'.
formatErasureVersionV2 = "2"
2018-02-15 20:45:57 -05:00
2020-06-12 23:04:01 -04:00
// formatErasureV3.Erasure.Version - version '3'.
formatErasureVersionV3 = "3"
2018-03-15 16:55:23 -04:00
2020-06-12 23:04:01 -04:00
// Distribution algorithm used, legacy
2021-01-16 15:08:02 -05:00
formatErasureVersionV2DistributionAlgoV1 = "CRCMOD"
2020-06-12 23:04:01 -04:00
2021-01-16 15:08:02 -05:00
// Distributed algorithm used, with N/2 default parity
formatErasureVersionV3DistributionAlgoV2 = "SIPMOD"
// Distributed algorithm used, with EC:4 default parity
formatErasureVersionV3DistributionAlgoV3 = "SIPMOD+PARITY"
2018-01-08 17:30:55 -05:00
)
2017-06-12 20:40:28 -04:00
2018-02-15 20:45:57 -05:00
// Offline disk UUID represents an offline disk.
const offlineDiskUUID = "ffffffff-ffff-ffff-ffff-ffffffffffff"
// Used to detect the version of "xl" format.
2020-06-12 23:04:01 -04:00
type formatErasureVersionDetect struct {
Erasure struct {
2018-02-15 20:45:57 -05:00
Version string ` json:"version" `
} ` json:"xl" `
}
// Represents the V1 backend disk structure version
// under `.minio.sys` and actual data namespace.
2020-06-12 23:04:01 -04:00
// formatErasureV1 - structure holds format config version '1'.
type formatErasureV1 struct {
2018-01-08 17:30:55 -05:00
formatMetaV1
2020-06-12 23:04:01 -04:00
Erasure struct {
2018-01-08 17:30:55 -05:00
Version string ` json:"version" ` // Version of 'xl' format.
2022-08-04 19:10:08 -04:00
Disk string ` json:"drive" ` // Disk field carries assigned disk uuid.
2018-01-08 17:30:55 -05:00
// JBOD field carries the input disk order generated the first
// time when fresh disks were supplied.
JBOD [ ] string ` json:"jbod" `
2020-06-12 23:04:01 -04:00
} ` json:"xl" ` // Erasure field holds xl format.
2017-06-12 20:40:28 -04:00
}
2018-02-15 20:45:57 -05:00
// Represents the V2 backend disk structure version
// under `.minio.sys` and actual data namespace.
2020-06-12 23:04:01 -04:00
// formatErasureV2 - structure holds format config version '2'.
2018-07-16 23:26:42 -04:00
// The V2 format to support "large bucket" support where a bucket
// can span multiple erasure sets.
2020-06-12 23:04:01 -04:00
type formatErasureV2 struct {
2018-07-16 23:26:42 -04:00
formatMetaV1
2020-06-12 23:04:01 -04:00
Erasure struct {
2018-02-15 20:45:57 -05:00
Version string ` json:"version" ` // Version of 'xl' format.
This string ` json:"this" ` // This field carries assigned disk uuid.
// Sets field carries the input disk order generated the first
// time when fresh disks were supplied, it is a two dimensional
// array second dimension represents list of disks used per set.
Sets [ ] [ ] string ` json:"sets" `
// Distribution algorithm represents the hashing algorithm
// to pick the right set index for an object.
DistributionAlgo string ` json:"distributionAlgo" `
} ` json:"xl" `
}
2020-06-12 23:04:01 -04:00
// formatErasureV3 struct is same as formatErasureV2 struct except that formatErasureV3.Erasure.Version is "3" indicating
2018-03-15 16:55:23 -04:00
// the simplified multipart backend which is a flat hierarchy now.
// In .minio.sys/multipart we have:
2020-06-12 23:04:01 -04:00
// sha256(bucket/object)/uploadID/[xl.meta, part.1, part.2 ....]
type formatErasureV3 struct {
2018-07-16 23:26:42 -04:00
formatMetaV1
2020-06-12 23:04:01 -04:00
Erasure struct {
2018-03-15 16:55:23 -04:00
Version string ` json:"version" ` // Version of 'xl' format.
This string ` json:"this" ` // This field carries assigned disk uuid.
// Sets field carries the input disk order generated the first
// time when fresh disks were supplied, it is a two dimensional
// array second dimension represents list of disks used per set.
Sets [ ] [ ] string ` json:"sets" `
// Distribution algorithm represents the hashing algorithm
// to pick the right set index for an object.
DistributionAlgo string ` json:"distributionAlgo" `
} ` json:"xl" `
2023-12-29 18:52:41 -05:00
Info DiskInfo ` json:"-" `
2018-03-15 16:55:23 -04:00
}
2022-02-04 15:21:21 -05:00
func ( f * formatErasureV3 ) Drives ( ) ( drives int ) {
for _ , set := range f . Erasure . Sets {
drives += len ( set )
}
return drives
}
2020-06-12 23:04:01 -04:00
func ( f * formatErasureV3 ) Clone ( ) * formatErasureV3 {
2019-11-21 07:24:51 -05:00
b , err := json . Marshal ( f )
if err != nil {
panic ( err )
}
2020-06-12 23:04:01 -04:00
var dst formatErasureV3
2019-11-21 07:24:51 -05:00
if err = json . Unmarshal ( b , & dst ) ; err != nil {
panic ( err )
}
return & dst
}
2020-06-12 23:04:01 -04:00
// Returns formatErasure.Erasure.Version
2021-01-16 15:08:02 -05:00
func newFormatErasureV3 ( numSets int , setLen int ) * formatErasureV3 {
2020-06-12 23:04:01 -04:00
format := & formatErasureV3 { }
2018-02-15 20:45:57 -05:00
format . Version = formatMetaVersionV1
2020-06-12 23:04:01 -04:00
format . Format = formatBackendErasure
2022-05-30 13:58:37 -04:00
if setLen == 1 {
format . Format = formatBackendErasureSingle
}
2018-07-18 23:17:35 -04:00
format . ID = mustGetUUID ( )
2020-06-12 23:04:01 -04:00
format . Erasure . Version = formatErasureVersionV3
2021-01-16 15:08:02 -05:00
format . Erasure . DistributionAlgo = formatErasureVersionV3DistributionAlgoV3
2020-06-12 23:04:01 -04:00
format . Erasure . Sets = make ( [ ] [ ] string , numSets )
2018-02-15 20:45:57 -05:00
for i := 0 ; i < numSets ; i ++ {
2020-06-12 23:04:01 -04:00
format . Erasure . Sets [ i ] = make ( [ ] string , setLen )
2018-02-15 20:45:57 -05:00
for j := 0 ; j < setLen ; j ++ {
2020-06-12 23:04:01 -04:00
format . Erasure . Sets [ i ] [ j ] = mustGetUUID ( )
2018-02-15 20:45:57 -05:00
}
}
return format
}
2020-06-12 23:04:01 -04:00
// Returns format Erasure version after reading `format.json`, returns
// successfully the version only if the backend is Erasure.
2022-01-24 14:28:45 -05:00
func formatGetBackendErasureVersion ( b [ ] byte ) ( string , error ) {
2018-02-15 20:45:57 -05:00
meta := & formatMetaV1 { }
2022-01-24 14:28:45 -05:00
if err := json . Unmarshal ( b , meta ) ; err != nil {
2018-02-15 20:45:57 -05:00
return "" , err
}
if meta . Version != formatMetaVersionV1 {
return "" , fmt . Errorf ( ` format.Version expected: %s, got: %s ` , formatMetaVersionV1 , meta . Version )
}
2022-05-30 13:58:37 -04:00
if meta . Format != formatBackendErasure && meta . Format != formatBackendErasureSingle {
2022-11-07 03:11:58 -05:00
return "" , fmt . Errorf ( ` found backend type %s, expected %s or %s - to migrate to a supported backend visit https://min.io/docs/minio/linux/operations/install-deploy-manage/migrate-fs-gateway.html ` , meta . Format , formatBackendErasure , formatBackendErasureSingle )
2019-12-23 19:31:03 -05:00
}
2020-06-12 23:04:01 -04:00
// Erasure backend found, proceed to detect version.
format := & formatErasureVersionDetect { }
2022-01-24 14:28:45 -05:00
if err := json . Unmarshal ( b , format ) ; err != nil {
2019-12-23 19:31:03 -05:00
return "" , err
}
2020-06-12 23:04:01 -04:00
return format . Erasure . Version , nil
2018-02-15 20:45:57 -05:00
}
// Migrates all previous versions to latest version of `format.json`,
// this code calls migration in sequence, such as V1 is migrated to V2
2020-11-23 11:36:49 -05:00
// first before it V2 migrates to V3.n
2022-01-24 14:28:45 -05:00
func formatErasureMigrate ( export string ) ( [ ] byte , fs . FileInfo , error ) {
2018-02-15 20:45:57 -05:00
formatPath := pathJoin ( export , minioMetaBucket , formatConfigFile )
2022-01-24 14:28:45 -05:00
formatData , formatFi , err := xioutil . ReadFileWithFileInfo ( formatPath )
2018-02-15 20:45:57 -05:00
if err != nil {
2022-01-24 14:28:45 -05:00
return nil , nil , err
2018-02-15 20:45:57 -05:00
}
2022-01-24 14:28:45 -05:00
version , err := formatGetBackendErasureVersion ( formatData )
if err != nil {
2022-08-04 19:10:08 -04:00
return nil , nil , fmt . Errorf ( "Drive %s: %w" , export , err )
2022-01-24 14:28:45 -05:00
}
migrate := func ( formatPath string , formatData [ ] byte ) ( [ ] byte , fs . FileInfo , error ) {
2022-09-19 14:05:16 -04:00
if err = os . WriteFile ( formatPath , formatData , 0 o666 ) ; err != nil {
2022-01-24 14:28:45 -05:00
return nil , nil , err
}
formatFi , err := Lstat ( formatPath )
if err != nil {
return nil , nil , err
}
return formatData , formatFi , nil
}
2018-02-15 20:45:57 -05:00
switch version {
2020-06-12 23:04:01 -04:00
case formatErasureVersionV1 :
2022-01-24 14:28:45 -05:00
formatData , err = formatErasureMigrateV1ToV2 ( formatData , version )
if err != nil {
2022-08-04 19:10:08 -04:00
return nil , nil , fmt . Errorf ( "Drive %s: %w" , export , err )
2018-02-15 20:45:57 -05:00
}
2019-12-23 19:31:03 -05:00
// Migrate successful v1 => v2, proceed to v2 => v3
2020-06-12 23:04:01 -04:00
version = formatErasureVersionV2
2018-02-15 20:45:57 -05:00
fallthrough
2020-06-12 23:04:01 -04:00
case formatErasureVersionV2 :
2022-01-24 14:28:45 -05:00
formatData , err = formatErasureMigrateV2ToV3 ( formatData , export , version )
if err != nil {
2022-08-04 19:10:08 -04:00
return nil , nil , fmt . Errorf ( "Drive %s: %w" , export , err )
2018-03-15 16:55:23 -04:00
}
2019-12-23 19:31:03 -05:00
// Migrate successful v2 => v3, v3 is latest
2020-05-18 12:59:45 -04:00
// version = formatXLVersionV3
2022-01-24 14:28:45 -05:00
return migrate ( formatPath , formatData )
2020-06-12 23:04:01 -04:00
case formatErasureVersionV3 :
2019-12-23 19:31:03 -05:00
// v3 is the latest version, return.
2022-01-24 14:28:45 -05:00
return formatData , formatFi , nil
2018-02-15 20:45:57 -05:00
}
2022-01-24 14:28:45 -05:00
return nil , nil , fmt . Errorf ( ` Disk %s: unknown format version %s ` , export , version )
2018-02-15 20:45:57 -05:00
}
// Migrates version V1 of format.json to version V2 of format.json,
// migration fails upon any error.
2022-01-24 14:28:45 -05:00
func formatErasureMigrateV1ToV2 ( data [ ] byte , version string ) ( [ ] byte , error ) {
2020-06-12 23:04:01 -04:00
if version != formatErasureVersionV1 {
2022-01-24 14:28:45 -05:00
return nil , fmt . Errorf ( ` format version expected %s, found %s ` , formatErasureVersionV1 , version )
2018-02-15 20:45:57 -05:00
}
2020-06-12 23:04:01 -04:00
formatV1 := & formatErasureV1 { }
2022-01-24 14:28:45 -05:00
if err := json . Unmarshal ( data , formatV1 ) ; err != nil {
return nil , err
2018-02-15 20:45:57 -05:00
}
2016-06-02 19:34:15 -04:00
2020-06-12 23:04:01 -04:00
formatV2 := & formatErasureV2 { }
2018-03-15 16:55:23 -04:00
formatV2 . Version = formatMetaVersionV1
2020-06-12 23:04:01 -04:00
formatV2 . Format = formatBackendErasure
formatV2 . Erasure . Version = formatErasureVersionV2
2021-01-16 15:08:02 -05:00
formatV2 . Erasure . DistributionAlgo = formatErasureVersionV2DistributionAlgoV1
2020-06-12 23:04:01 -04:00
formatV2 . Erasure . This = formatV1 . Erasure . Disk
formatV2 . Erasure . Sets = make ( [ ] [ ] string , 1 )
formatV2 . Erasure . Sets [ 0 ] = make ( [ ] string , len ( formatV1 . Erasure . JBOD ) )
copy ( formatV2 . Erasure . Sets [ 0 ] , formatV1 . Erasure . JBOD )
2018-02-15 20:45:57 -05:00
2022-01-24 14:28:45 -05:00
return json . Marshal ( formatV2 )
2018-02-15 20:45:57 -05:00
}
2017-08-03 07:37:02 -04:00
2018-03-15 16:55:23 -04:00
// Migrates V2 for format.json to V3 (Flat hierarchy for multipart)
2022-01-24 14:28:45 -05:00
func formatErasureMigrateV2ToV3 ( data [ ] byte , export , version string ) ( [ ] byte , error ) {
2020-06-12 23:04:01 -04:00
if version != formatErasureVersionV2 {
2022-01-24 14:28:45 -05:00
return nil , fmt . Errorf ( ` format version expected %s, found %s ` , formatErasureVersionV2 , version )
2018-03-15 16:55:23 -04:00
}
2019-12-23 19:31:03 -05:00
2020-06-12 23:04:01 -04:00
formatV2 := & formatErasureV2 { }
2022-01-24 14:28:45 -05:00
if err := json . Unmarshal ( data , formatV2 ) ; err != nil {
return nil , err
2018-03-15 16:55:23 -04:00
}
2018-08-06 00:15:28 -04:00
2022-01-24 14:28:45 -05:00
tmpOld := pathJoin ( export , minioMetaTmpDeletedBucket , mustGetUUID ( ) )
if err := renameAll ( pathJoin ( export , minioMetaMultipartBucket ) ,
2023-09-13 11:14:36 -04:00
tmpOld , export ) ; err != nil && err != errFileNotFound {
2022-01-24 14:28:45 -05:00
logger . LogIf ( GlobalContext , fmt . Errorf ( "unable to rename (%s -> %s) %w, drive may be faulty please investigate" ,
pathJoin ( export , minioMetaMultipartBucket ) ,
tmpOld ,
osErrToFileErr ( err ) ) )
2018-03-15 16:55:23 -04:00
}
// format-V2 struct is exactly same as format-V1 except that version is "3"
// which indicates the simplified multipart backend.
2020-06-12 23:04:01 -04:00
formatV3 := formatErasureV3 { }
2018-03-15 16:55:23 -04:00
formatV3 . Version = formatV2 . Version
formatV3 . Format = formatV2 . Format
2020-06-12 23:04:01 -04:00
formatV3 . Erasure = formatV2 . Erasure
formatV3 . Erasure . Version = formatErasureVersionV3
2018-03-15 16:55:23 -04:00
2022-01-24 14:28:45 -05:00
return json . Marshal ( formatV3 )
2018-03-15 16:55:23 -04:00
}
2018-02-15 20:45:57 -05:00
// countErrs - count a specific error.
func countErrs ( errs [ ] error , err error ) int {
2022-01-02 12:15:06 -05:00
i := 0
2018-02-15 20:45:57 -05:00
for _ , err1 := range errs {
2022-05-30 13:58:37 -04:00
if err1 == err || errors . Is ( err1 , err ) {
2018-02-15 20:45:57 -05:00
i ++
}
}
return i
2016-06-02 19:34:15 -04:00
}
2018-04-12 18:43:38 -04:00
// Check if unformatted disks are equal to write quorum.
func quorumUnformattedDisks ( errs [ ] error ) bool {
return countErrs ( errs , errUnformattedDisk ) >= ( len ( errs ) / 2 ) + 1
}
2020-06-12 23:04:01 -04:00
// loadFormatErasureAll - load all format config from all input disks in parallel.
func loadFormatErasureAll ( storageDisks [ ] StorageAPI , heal bool ) ( [ ] * formatErasureV3 , [ ] error ) {
2016-06-02 19:34:15 -04:00
// Initialize list of errors.
2019-10-14 12:44:51 -04:00
g := errgroup . WithNErrs ( len ( storageDisks ) )
2016-06-02 19:34:15 -04:00
// Initialize format configs.
2022-01-02 12:15:06 -05:00
formats := make ( [ ] * formatErasureV3 , len ( storageDisks ) )
2016-06-02 19:34:15 -04:00
2018-01-22 17:54:55 -05:00
// Load format from each disk in parallel
2019-10-14 12:44:51 -04:00
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
if storageDisks [ index ] == nil {
return errDiskNotFound
}
2024-01-23 17:11:46 -05:00
format , formatData , err := loadFormatErasureWithData ( storageDisks [ index ] )
2019-10-14 12:44:51 -04:00
if err != nil {
return err
2016-06-02 19:34:15 -04:00
}
2023-12-29 18:52:41 -05:00
info , err := storageDisks [ index ] . DiskInfo ( context . Background ( ) , false )
if err != nil {
return err
}
format . Info = info
2018-01-08 17:30:55 -05:00
formats [ index ] = format
2020-03-27 17:48:30 -04:00
if ! heal {
// If no healing required, make the disks valid and
// online.
2020-06-12 23:04:01 -04:00
storageDisks [ index ] . SetDiskID ( format . Erasure . This )
2024-01-23 17:11:46 -05:00
storageDisks [ index ] . SetFormatData ( formatData )
2020-03-27 17:48:30 -04:00
}
2019-10-14 12:44:51 -04:00
return nil
} , index )
2016-06-02 19:34:15 -04:00
}
2019-10-14 12:44:51 -04:00
// Return all formats and errors if any.
return formats , g . Wait ( )
2016-06-02 19:34:15 -04:00
}
2023-01-05 23:41:19 -05:00
func saveFormatErasure ( disk StorageAPI , format * formatErasureV3 , healID string ) error {
2020-08-18 17:37:26 -04:00
if disk == nil || format == nil {
2020-03-27 17:48:30 -04:00
return errDiskNotFound
}
2018-02-15 20:45:57 -05:00
// Marshal and write to disk.
2024-01-23 17:11:46 -05:00
formatData , err := json . Marshal ( format )
2018-02-15 20:45:57 -05:00
if err != nil {
return err
2017-08-03 07:37:02 -04:00
}
2019-12-23 19:31:03 -05:00
tmpFormat := mustGetUUID ( )
2019-09-24 21:47:26 -04:00
2018-02-15 20:45:57 -05:00
// Purge any existing temporary file, okay to ignore errors here.
2022-07-11 12:15:54 -04:00
defer disk . Delete ( context . TODO ( ) , minioMetaBucket , tmpFormat , DeleteOptions {
Recursive : false ,
2023-11-29 01:35:16 -05:00
Immediate : false ,
2022-07-11 12:15:54 -04:00
} )
2017-08-03 07:37:02 -04:00
2019-12-23 19:31:03 -05:00
// write to unique file.
2024-01-23 17:11:46 -05:00
if err = disk . WriteAll ( context . TODO ( ) , minioMetaBucket , tmpFormat , formatData ) ; err != nil {
2018-02-15 20:45:57 -05:00
return err
2017-08-03 07:37:02 -04:00
}
2019-09-24 21:47:26 -04:00
// Rename file `uuid.json` --> `format.json`.
2020-09-04 12:45:06 -04:00
if err = disk . RenameFile ( context . TODO ( ) , minioMetaBucket , tmpFormat , minioMetaBucket , formatConfigFile ) ; err != nil {
2020-03-27 17:48:30 -04:00
return err
}
2024-01-17 23:41:23 -05:00
disk . SetDiskID ( format . Erasure . This )
2024-01-23 17:11:46 -05:00
disk . SetFormatData ( formatData )
2023-01-05 23:41:19 -05:00
if healID != "" {
2021-03-04 17:36:23 -05:00
ctx := context . Background ( )
2023-04-18 17:49:56 -04:00
ht := initHealingTracker ( disk , healID )
2021-03-04 17:36:23 -05:00
return ht . save ( ctx )
2020-09-28 22:39:32 -04:00
}
2020-03-27 17:48:30 -04:00
return nil
2016-05-20 05:22:22 -04:00
}
2024-01-23 17:11:46 -05:00
// loadFormatErasureWithData - loads format.json from disk.
func loadFormatErasureWithData ( disk StorageAPI ) ( format * formatErasureV3 , data [ ] byte , err error ) {
// Ensure that the grid is online.
if _ , err := disk . DiskInfo ( context . Background ( ) , false ) ; err != nil {
if errors . Is ( err , errDiskNotFound ) {
return nil , nil , err
}
}
data , err = disk . ReadAll ( context . TODO ( ) , minioMetaBucket , formatConfigFile )
if err != nil {
// 'file not found' and 'volume not found' as
// same. 'volume not found' usually means its a fresh disk.
if errors . Is ( err , errFileNotFound ) || errors . Is ( err , errVolumeNotFound ) {
return nil , nil , errUnformattedDisk
}
return nil , nil , err
}
// Try to decode format json into formatConfigV1 struct.
format = & formatErasureV3 { }
if err = json . Unmarshal ( data , format ) ; err != nil {
return nil , nil , err
}
// Success.
return format , data , nil
}
2020-06-12 23:04:01 -04:00
// loadFormatErasure - loads format.json from disk.
func loadFormatErasure ( disk StorageAPI ) ( format * formatErasureV3 , err error ) {
perf: websocket grid connectivity for all internode communication (#18461)
This PR adds a WebSocket grid feature that allows servers to communicate via
a single two-way connection.
There are two request types:
* Single requests, which are `[]byte => ([]byte, error)`. This is for efficient small
roundtrips with small payloads.
* Streaming requests which are `[]byte, chan []byte => chan []byte (and error)`,
which allows for different combinations of full two-way streams with an initial payload.
Only a single stream is created between two machines - and there is, as such, no
server/client relation since both sides can initiate and handle requests. Which server
initiates the request is decided deterministically on the server names.
Requests are made through a mux client and server, which handles message
passing, congestion, cancelation, timeouts, etc.
If a connection is lost, all requests are canceled, and the calling server will try
to reconnect. Registered handlers can operate directly on byte
slices or use a higher-level generics abstraction.
There is no versioning of handlers/clients, and incompatible changes should
be handled by adding new handlers.
The request path can be changed to a new one for any protocol changes.
First, all servers create a "Manager." The manager must know its address
as well as all remote addresses. This will manage all connections.
To get a connection to any remote, ask the manager to provide it given
the remote address using.
```
func (m *Manager) Connection(host string) *Connection
```
All serverside handlers must also be registered on the manager. This will
make sure that all incoming requests are served. The number of in-flight
requests and responses must also be given for streaming requests.
The "Connection" returned manages the mux-clients. Requests issued
to the connection will be sent to the remote.
* `func (c *Connection) Request(ctx context.Context, h HandlerID, req []byte) ([]byte, error)`
performs a single request and returns the result. Any deadline provided on the request is
forwarded to the server, and canceling the context will make the function return at once.
* `func (c *Connection) NewStream(ctx context.Context, h HandlerID, payload []byte) (st *Stream, err error)`
will initiate a remote call and send the initial payload.
```Go
// A Stream is a two-way stream.
// All responses *must* be read by the caller.
// If the call is canceled through the context,
//The appropriate error will be returned.
type Stream struct {
// Responses from the remote server.
// Channel will be closed after an error or when the remote closes.
// All responses *must* be read by the caller until either an error is returned or the channel is closed.
// Canceling the context will cause the context cancellation error to be returned.
Responses <-chan Response
// Requests sent to the server.
// If the handler is defined with 0 incoming capacity this will be nil.
// Channel *must* be closed to signal the end of the stream.
// If the request context is canceled, the stream will no longer process requests.
Requests chan<- []byte
}
type Response struct {
Msg []byte
Err error
}
```
There are generic versions of the server/client handlers that allow the use of type
safe implementations for data types that support msgpack marshal/unmarshal.
2023-11-20 20:09:35 -05:00
// Ensure that the grid is online.
if _ , err := disk . DiskInfo ( context . Background ( ) , false ) ; err != nil {
2023-11-24 12:07:14 -05:00
if errors . Is ( err , errDiskNotFound ) {
perf: websocket grid connectivity for all internode communication (#18461)
This PR adds a WebSocket grid feature that allows servers to communicate via
a single two-way connection.
There are two request types:
* Single requests, which are `[]byte => ([]byte, error)`. This is for efficient small
roundtrips with small payloads.
* Streaming requests which are `[]byte, chan []byte => chan []byte (and error)`,
which allows for different combinations of full two-way streams with an initial payload.
Only a single stream is created between two machines - and there is, as such, no
server/client relation since both sides can initiate and handle requests. Which server
initiates the request is decided deterministically on the server names.
Requests are made through a mux client and server, which handles message
passing, congestion, cancelation, timeouts, etc.
If a connection is lost, all requests are canceled, and the calling server will try
to reconnect. Registered handlers can operate directly on byte
slices or use a higher-level generics abstraction.
There is no versioning of handlers/clients, and incompatible changes should
be handled by adding new handlers.
The request path can be changed to a new one for any protocol changes.
First, all servers create a "Manager." The manager must know its address
as well as all remote addresses. This will manage all connections.
To get a connection to any remote, ask the manager to provide it given
the remote address using.
```
func (m *Manager) Connection(host string) *Connection
```
All serverside handlers must also be registered on the manager. This will
make sure that all incoming requests are served. The number of in-flight
requests and responses must also be given for streaming requests.
The "Connection" returned manages the mux-clients. Requests issued
to the connection will be sent to the remote.
* `func (c *Connection) Request(ctx context.Context, h HandlerID, req []byte) ([]byte, error)`
performs a single request and returns the result. Any deadline provided on the request is
forwarded to the server, and canceling the context will make the function return at once.
* `func (c *Connection) NewStream(ctx context.Context, h HandlerID, payload []byte) (st *Stream, err error)`
will initiate a remote call and send the initial payload.
```Go
// A Stream is a two-way stream.
// All responses *must* be read by the caller.
// If the call is canceled through the context,
//The appropriate error will be returned.
type Stream struct {
// Responses from the remote server.
// Channel will be closed after an error or when the remote closes.
// All responses *must* be read by the caller until either an error is returned or the channel is closed.
// Canceling the context will cause the context cancellation error to be returned.
Responses <-chan Response
// Requests sent to the server.
// If the handler is defined with 0 incoming capacity this will be nil.
// Channel *must* be closed to signal the end of the stream.
// If the request context is canceled, the stream will no longer process requests.
Requests chan<- []byte
}
type Response struct {
Msg []byte
Err error
}
```
There are generic versions of the server/client handlers that allow the use of type
safe implementations for data types that support msgpack marshal/unmarshal.
2023-11-20 20:09:35 -05:00
return nil , err
}
}
2020-09-04 12:45:06 -04:00
buf , err := disk . ReadAll ( context . TODO ( ) , minioMetaBucket , formatConfigFile )
2016-06-25 17:51:06 -04:00
if err != nil {
2016-05-20 05:22:22 -04:00
// 'file not found' and 'volume not found' as
// same. 'volume not found' usually means its a fresh disk.
if err == errFileNotFound || err == errVolumeNotFound {
return nil , errUnformattedDisk
}
2016-05-07 03:59:43 -04:00
return nil , err
2016-05-04 15:18:20 -04:00
}
2016-06-24 05:06:23 -04:00
// Try to decode format json into formatConfigV1 struct.
2020-06-12 23:04:01 -04:00
format = & formatErasureV3 { }
2016-06-25 17:51:06 -04:00
if err = json . Unmarshal ( buf , format ) ; err != nil {
2016-05-07 03:59:43 -04:00
return nil , err
2016-05-04 15:18:20 -04:00
}
2016-06-24 05:06:23 -04:00
// Success.
2016-05-20 05:22:22 -04:00
return format , nil
}
2020-06-12 23:04:01 -04:00
// Valid formatErasure basic versions.
2021-01-29 14:40:55 -05:00
func checkFormatErasureValue ( formatErasure * formatErasureV3 , disk StorageAPI ) error {
2018-02-15 20:45:57 -05:00
// Validate format version and format type.
2020-06-12 23:04:01 -04:00
if formatErasure . Version != formatMetaVersionV1 {
2021-01-29 14:40:55 -05:00
return fmt . Errorf ( "Unsupported version of backend format [%s] found on %s" , formatErasure . Version , disk )
2016-07-26 06:18:47 -04:00
}
2022-05-30 13:58:37 -04:00
if formatErasure . Format != formatBackendErasure && formatErasure . Format != formatBackendErasureSingle {
2021-01-29 14:40:55 -05:00
return fmt . Errorf ( "Unsupported backend format [%s] found on %s" , formatErasure . Format , disk )
2017-08-03 07:37:02 -04:00
}
2020-06-12 23:04:01 -04:00
if formatErasure . Erasure . Version != formatErasureVersionV3 {
2021-01-29 14:40:55 -05:00
return fmt . Errorf ( "Unsupported Erasure backend format found [%s] on %s" , formatErasure . Erasure . Version , disk )
2018-01-22 17:54:55 -05:00
}
return nil
2017-08-03 07:37:02 -04:00
}
2018-02-15 20:45:57 -05:00
// Check all format values.
2021-01-29 14:40:55 -05:00
func checkFormatErasureValues ( formats [ ] * formatErasureV3 , disks [ ] StorageAPI , setDriveCount int ) error {
2020-06-12 23:04:01 -04:00
for i , formatErasure := range formats {
if formatErasure == nil {
2018-02-15 20:45:57 -05:00
continue
}
2021-01-29 14:40:55 -05:00
if err := checkFormatErasureValue ( formatErasure , disks [ i ] ) ; err != nil {
2018-02-15 20:45:57 -05:00
return err
}
2020-06-12 23:04:01 -04:00
if len ( formats ) != len ( formatErasure . Erasure . Sets ) * len ( formatErasure . Erasure . Sets [ 0 ] ) {
2022-08-04 19:10:08 -04:00
return fmt . Errorf ( "%s drive is already being used in another erasure deployment. (Number of drives specified: %d but the number of drives found in the %s drive's format.json: %d)" ,
2021-01-29 14:40:55 -05:00
disks [ i ] , len ( formats ) , humanize . Ordinal ( i + 1 ) , len ( formatErasure . Erasure . Sets ) * len ( formatErasure . Erasure . Sets [ 0 ] ) )
2018-02-15 20:45:57 -05:00
}
2021-01-16 15:08:02 -05:00
// Only if custom erasure drive count is set, verify if the
// set_drive_count was manually set - we need to honor what is
// present on the drives.
2020-08-26 22:29:35 -04:00
if globalCustomErasureDriveCount && len ( formatErasure . Erasure . Sets [ 0 ] ) != setDriveCount {
2022-08-04 19:10:08 -04:00
return fmt . Errorf ( "%s drive is already formatted with %d drives per erasure set. This cannot be changed to %d, please revert your MINIO_ERASURE_SET_DRIVE_COUNT setting" , disks [ i ] , len ( formatErasure . Erasure . Sets [ 0 ] ) , setDriveCount )
2020-03-08 16:30:25 -04:00
}
2017-08-03 07:37:02 -04:00
}
2018-02-15 20:45:57 -05:00
return nil
}
2017-08-03 07:37:02 -04:00
2020-06-12 23:04:01 -04:00
// Get Deployment ID for the Erasure sets from format.json.
2018-07-18 23:17:35 -04:00
// This need not be in quorum. Even if one of the format.json
// file has this value, we assume it is valid.
// If more than one format.json's have different id, it is considered a corrupt
// backend format.
2020-06-12 23:04:01 -04:00
func formatErasureGetDeploymentID ( refFormat * formatErasureV3 , formats [ ] * formatErasureV3 ) ( string , error ) {
2018-07-18 23:17:35 -04:00
var deploymentID string
for _ , format := range formats {
if format == nil || format . ID == "" {
continue
}
2020-06-12 23:04:01 -04:00
if reflect . DeepEqual ( format . Erasure . Sets , refFormat . Erasure . Sets ) {
2018-07-18 23:17:35 -04:00
// Found an ID in one of the format.json file
// Set deploymentID for the first time.
if deploymentID == "" {
deploymentID = format . ID
} else if deploymentID != format . ID {
// DeploymentID found earlier doesn't match with the
// current format.json's ID.
2020-08-03 21:17:48 -04:00
return "" , fmt . Errorf ( "Deployment IDs do not match expected %s, got %s: %w" ,
deploymentID , format . ID , errCorruptedFormat )
2018-07-18 23:17:35 -04:00
}
}
}
return deploymentID , nil
}
2020-06-12 23:04:01 -04:00
// formatErasureFixDeploymentID - Add deployment id if it is not present.
2024-01-23 17:11:46 -05:00
func formatErasureFixDeploymentID ( endpoints Endpoints , storageDisks [ ] StorageAPI , refFormat * formatErasureV3 , formats [ ] * formatErasureV3 ) ( err error ) {
2018-07-18 23:17:35 -04:00
for index := range formats {
2020-06-12 23:04:01 -04:00
// If the Erasure sets do not match, set those formats to nil,
2018-07-18 23:17:35 -04:00
// We do not have to update the ID on those format.json file.
2020-06-12 23:04:01 -04:00
if formats [ index ] != nil && ! reflect . DeepEqual ( formats [ index ] . Erasure . Sets , refFormat . Erasure . Sets ) {
2018-07-18 23:17:35 -04:00
formats [ index ] = nil
}
}
2020-08-03 21:17:48 -04:00
2020-06-12 23:04:01 -04:00
refFormat . ID , err = formatErasureGetDeploymentID ( refFormat , formats )
2018-07-18 23:17:35 -04:00
if err != nil {
return err
}
// If ID is set, then some other node got the lock
// before this node could and generated an ID
// for the deployment. No need to generate one.
if refFormat . ID != "" {
return nil
}
// ID is generated for the first time,
// We set the ID in all the formats and update.
refFormat . ID = mustGetUUID ( )
for _ , format := range formats {
if format != nil {
format . ID = refFormat . ID
}
}
// Deployment ID needs to be set on all the disks.
// Save `format.json` across all disks.
2020-06-12 23:04:01 -04:00
return saveFormatErasureAll ( GlobalContext , storageDisks , formats )
2018-07-18 23:17:35 -04:00
}
// Update only the valid local disks which have not been updated before.
2020-06-12 23:04:01 -04:00
func formatErasureFixLocalDeploymentID ( endpoints Endpoints , storageDisks [ ] StorageAPI , refFormat * formatErasureV3 ) error {
2018-07-18 23:17:35 -04:00
// If this server was down when the deploymentID was updated
// then we make sure that we update the local disks with the deploymentID.
2020-03-27 17:48:30 -04:00
// Initialize errs to collect errors inside go-routine.
g := errgroup . WithNErrs ( len ( storageDisks ) )
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
if endpoints [ index ] . IsLocal && storageDisks [ index ] != nil && storageDisks [ index ] . IsOnline ( ) {
2020-06-12 23:04:01 -04:00
format , err := loadFormatErasure ( storageDisks [ index ] )
2020-03-27 17:48:30 -04:00
if err != nil {
// Disk can be offline etc.
// ignore the errors seen here.
return nil
}
if format . ID != "" {
return nil
}
2020-06-12 23:04:01 -04:00
if ! reflect . DeepEqual ( format . Erasure . Sets , refFormat . Erasure . Sets ) {
2020-03-27 17:48:30 -04:00
return nil
}
format . ID = refFormat . ID
2020-09-28 22:39:32 -04:00
// Heal the drive if we fixed its deployment ID.
2023-01-05 23:41:19 -05:00
if err := saveFormatErasure ( storageDisks [ index ] , format , mustGetUUID ( ) ) ; err != nil {
2020-04-09 12:30:02 -04:00
logger . LogIf ( GlobalContext , err )
2020-03-27 17:48:30 -04:00
return fmt . Errorf ( "Unable to save format.json, %w" , err )
}
2018-07-18 23:17:35 -04:00
}
2020-03-27 17:48:30 -04:00
return nil
} , index )
}
for _ , err := range g . Wait ( ) {
if err != nil {
return err
2018-07-18 23:17:35 -04:00
}
}
return nil
}
2020-06-12 23:04:01 -04:00
// Get backend Erasure format in quorum `format.json`.
func getFormatErasureInQuorum ( formats [ ] * formatErasureV3 ) ( * formatErasureV3 , error ) {
2022-02-04 15:21:21 -05:00
formatCountMap := make ( map [ int ] int , len ( formats ) )
for _ , format := range formats {
2018-02-15 20:45:57 -05:00
if format == nil {
2018-01-22 17:54:55 -05:00
continue
}
2022-02-04 15:21:21 -05:00
formatCountMap [ format . Drives ( ) ] ++
2016-06-01 19:15:56 -04:00
}
2016-07-26 06:18:47 -04:00
2022-02-04 15:21:21 -05:00
maxDrives := 0
2018-02-15 20:45:57 -05:00
maxCount := 0
2022-02-04 15:21:21 -05:00
for drives , count := range formatCountMap {
2018-02-15 20:45:57 -05:00
if count > maxCount {
maxCount = count
2022-02-04 15:21:21 -05:00
maxDrives = drives
2016-07-28 19:49:59 -04:00
}
2018-02-15 20:45:57 -05:00
}
2022-02-04 15:21:21 -05:00
if maxDrives == 0 {
return nil , errErasureReadQuorum
}
2018-02-15 20:45:57 -05:00
if maxCount < len ( formats ) / 2 {
2020-06-12 23:04:01 -04:00
return nil , errErasureReadQuorum
2018-02-15 20:45:57 -05:00
}
2022-02-04 15:21:21 -05:00
for i , format := range formats {
if format == nil {
continue
}
if format . Drives ( ) == maxDrives {
2019-11-21 07:24:51 -05:00
format := formats [ i ] . Clone ( )
2020-06-12 23:04:01 -04:00
format . Erasure . This = ""
2019-11-21 07:24:51 -05:00
return format , nil
2016-07-28 19:49:59 -04:00
}
}
2018-02-15 20:45:57 -05:00
2020-06-12 23:04:01 -04:00
return nil , errErasureReadQuorum
2016-07-28 19:49:59 -04:00
}
2020-06-12 23:04:01 -04:00
func formatErasureV3Check ( reference * formatErasureV3 , format * formatErasureV3 ) error {
2019-11-21 07:24:51 -05:00
tmpFormat := format . Clone ( )
2020-06-12 23:04:01 -04:00
this := tmpFormat . Erasure . This
tmpFormat . Erasure . This = ""
if len ( reference . Erasure . Sets ) != len ( format . Erasure . Sets ) {
return fmt . Errorf ( "Expected number of sets %d, got %d" , len ( reference . Erasure . Sets ) , len ( format . Erasure . Sets ) )
2018-02-15 20:45:57 -05:00
}
2017-08-03 07:37:02 -04:00
2018-02-15 20:45:57 -05:00
// Make sure that the sets match.
2020-06-12 23:04:01 -04:00
for i := range reference . Erasure . Sets {
if len ( reference . Erasure . Sets [ i ] ) != len ( format . Erasure . Sets [ i ] ) {
2018-02-15 20:45:57 -05:00
return fmt . Errorf ( "Each set should be of same size, expected %d got %d" ,
2020-06-12 23:04:01 -04:00
len ( reference . Erasure . Sets [ i ] ) , len ( format . Erasure . Sets [ i ] ) )
2016-07-28 19:49:59 -04:00
}
2020-06-12 23:04:01 -04:00
for j := range reference . Erasure . Sets [ i ] {
if reference . Erasure . Sets [ i ] [ j ] != format . Erasure . Sets [ i ] [ j ] {
2020-10-24 16:23:08 -04:00
return fmt . Errorf ( "UUID on positions %d:%d do not match with, expected %s got %s: (%w)" ,
i , j , reference . Erasure . Sets [ i ] [ j ] , format . Erasure . Sets [ i ] [ j ] , errInconsistentDisk )
2016-07-28 19:49:59 -04:00
}
}
2018-02-15 20:45:57 -05:00
}
// Make sure that the diskID is found in the set.
2020-06-12 23:04:01 -04:00
for i := 0 ; i < len ( tmpFormat . Erasure . Sets ) ; i ++ {
for j := 0 ; j < len ( tmpFormat . Erasure . Sets [ i ] ) ; j ++ {
if this == tmpFormat . Erasure . Sets [ i ] [ j ] {
2018-02-15 20:45:57 -05:00
return nil
2016-07-28 19:49:59 -04:00
}
}
}
2022-08-04 19:10:08 -04:00
return fmt . Errorf ( "DriveID %s not found in any drive sets %s" , this , format . Erasure . Sets )
2016-07-28 19:49:59 -04:00
}
2020-06-12 23:04:01 -04:00
// saveFormatErasureAll - populates `format.json` on disks in its order.
func saveFormatErasureAll ( ctx context . Context , storageDisks [ ] StorageAPI , formats [ ] * formatErasureV3 ) error {
2019-10-14 12:44:51 -04:00
g := errgroup . WithNErrs ( len ( storageDisks ) )
2016-07-28 19:49:59 -04:00
2018-02-15 20:45:57 -05:00
// Write `format.json` to all disks.
2019-10-14 12:44:51 -04:00
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
2020-07-17 13:08:04 -04:00
if formats [ index ] == nil {
return errDiskNotFound
}
2023-01-05 23:41:19 -05:00
return saveFormatErasure ( storageDisks [ index ] , formats [ index ] , "" )
2019-10-14 12:44:51 -04:00
} , index )
2016-07-28 19:49:59 -04:00
}
2019-10-14 12:44:51 -04:00
// Wait for the routines to finish.
2022-07-21 21:04:17 -04:00
return reduceWriteQuorumErrs ( ctx , g . Wait ( ) , nil , len ( storageDisks ) )
2018-02-15 20:45:57 -05:00
}
2018-04-04 00:58:48 -04:00
// relinquishes the underlying connection for all storage disks.
2022-05-30 13:58:37 -04:00
func closeStorageDisks ( storageDisks ... StorageAPI ) {
2022-01-24 14:28:45 -05:00
var wg sync . WaitGroup
2018-04-04 00:58:48 -04:00
for _ , disk := range storageDisks {
if disk == nil {
continue
}
2022-01-24 14:28:45 -05:00
wg . Add ( 1 )
go func ( disk StorageAPI ) {
defer wg . Done ( )
disk . Close ( )
} ( disk )
2018-04-04 00:58:48 -04:00
}
2022-01-24 14:28:45 -05:00
wg . Wait ( )
2018-04-04 00:58:48 -04:00
}
2019-09-27 19:47:12 -04:00
// Initialize storage disks for each endpoint.
// Errors are returned for each endpoint with matching index.
2023-08-01 13:54:26 -04:00
func initStorageDisksWithErrors ( endpoints Endpoints , opts storageOpts ) ( [ ] StorageAPI , [ ] error ) {
2018-02-15 20:45:57 -05:00
// Bootstrap disks.
storageDisks := make ( [ ] StorageAPI , len ( endpoints ) )
2019-10-14 12:44:51 -04:00
g := errgroup . WithNErrs ( len ( endpoints ) )
for index := range endpoints {
index := index
2020-08-18 17:37:26 -04:00
g . Go ( func ( ) ( err error ) {
2023-08-01 13:54:26 -04:00
storageDisks [ index ] , err = newStorageAPI ( endpoints [ index ] , opts )
2020-08-18 17:37:26 -04:00
return err
2019-10-14 12:44:51 -04:00
} , index )
2019-08-02 15:17:26 -04:00
}
2019-10-14 12:44:51 -04:00
return storageDisks , g . Wait ( )
2019-08-02 15:17:26 -04:00
}
2020-06-12 23:04:01 -04:00
// formatErasureV3ThisEmpty - find out if '.This' field is empty
2018-03-19 12:13:00 -04:00
// in any of the input `formats`, if yes return true.
2020-06-12 23:04:01 -04:00
func formatErasureV3ThisEmpty ( formats [ ] * formatErasureV3 ) bool {
2018-03-19 12:13:00 -04:00
for _ , format := range formats {
if format == nil {
continue
}
// NOTE: This code is specifically needed when migrating version
// V1 to V2 to V3, in a scenario such as this we only need to handle
// single sets since we never used to support multiple sets in releases
// with V1 format version.
2020-06-12 23:04:01 -04:00
if len ( format . Erasure . Sets ) > 1 {
2018-03-19 12:13:00 -04:00
continue
}
2020-06-12 23:04:01 -04:00
if format . Erasure . This == "" {
2018-03-19 12:13:00 -04:00
return true
}
}
return false
}
2020-06-12 23:04:01 -04:00
// fixFormatErasureV3 - fix format Erasure configuration on all disks.
func fixFormatErasureV3 ( storageDisks [ ] StorageAPI , endpoints Endpoints , formats [ ] * formatErasureV3 ) error {
2020-03-27 17:48:30 -04:00
g := errgroup . WithNErrs ( len ( formats ) )
for i := range formats {
i := i
g . Go ( func ( ) error {
if formats [ i ] == nil || ! endpoints [ i ] . IsLocal {
return nil
}
// NOTE: This code is specifically needed when migrating version
// V1 to V2 to V3, in a scenario such as this we only need to handle
// single sets since we never used to support multiple sets in releases
// with V1 format version.
2020-06-12 23:04:01 -04:00
if len ( formats [ i ] . Erasure . Sets ) > 1 {
2020-03-27 17:48:30 -04:00
return nil
}
2020-06-12 23:04:01 -04:00
if formats [ i ] . Erasure . This == "" {
formats [ i ] . Erasure . This = formats [ i ] . Erasure . Sets [ 0 ] [ i ]
2020-09-28 22:39:32 -04:00
// Heal the drive if drive has .This empty.
2023-01-05 23:41:19 -05:00
if err := saveFormatErasure ( storageDisks [ i ] , formats [ i ] , mustGetUUID ( ) ) ; err != nil {
2020-03-27 17:48:30 -04:00
return err
}
2018-03-19 12:13:00 -04:00
}
2020-03-27 17:48:30 -04:00
return nil
} , i )
}
for _ , err := range g . Wait ( ) {
if err != nil {
return err
2018-03-19 12:13:00 -04:00
}
}
return nil
}
2020-06-12 23:04:01 -04:00
// initFormatErasure - save Erasure format configuration on all disks.
2021-01-19 13:01:31 -05:00
func initFormatErasure ( ctx context . Context , storageDisks [ ] StorageAPI , setCount , setDriveCount int , deploymentID , distributionAlgo string , sErrs [ ] error ) ( * formatErasureV3 , error ) {
2021-01-16 15:08:02 -05:00
format := newFormatErasureV3 ( setCount , setDriveCount )
2020-06-12 23:04:01 -04:00
formats := make ( [ ] * formatErasureV3 , len ( storageDisks ) )
2023-01-10 02:07:45 -05:00
wantAtMost , err := ecDrivesNoConfig ( setDriveCount )
if err != nil {
return nil , err
}
2016-05-04 15:18:20 -04:00
2018-02-15 20:45:57 -05:00
for i := 0 ; i < setCount ; i ++ {
2020-08-26 22:29:35 -04:00
hostCount := make ( map [ string ] int , setDriveCount )
for j := 0 ; j < setDriveCount ; j ++ {
disk := storageDisks [ i * setDriveCount + j ]
2023-02-06 12:26:09 -05:00
if disk == nil {
continue
}
2019-11-21 07:24:51 -05:00
newFormat := format . Clone ( )
2020-06-12 23:04:01 -04:00
newFormat . Erasure . This = format . Erasure . Sets [ i ] [ j ]
2021-01-19 13:01:31 -05:00
if distributionAlgo != "" {
newFormat . Erasure . DistributionAlgo = distributionAlgo
}
2019-11-20 05:09:30 -05:00
if deploymentID != "" {
newFormat . ID = deploymentID
}
2020-01-13 16:09:10 -05:00
hostCount [ disk . Hostname ( ) ] ++
2020-08-26 22:29:35 -04:00
formats [ i * setDriveCount + j ] = newFormat
2018-02-15 20:45:57 -05:00
}
2023-02-06 12:26:09 -05:00
var once sync . Once
for host , count := range hostCount {
if count > wantAtMost {
if host == "" {
host = "local"
2020-01-13 16:09:10 -05:00
}
2023-02-06 12:26:09 -05:00
once . Do ( func ( ) {
if len ( hostCount ) == 1 {
return
}
logger . Info ( " * Set %v:" , i + 1 )
for j := 0 ; j < setDriveCount ; j ++ {
disk := storageDisks [ i * setDriveCount + j ]
logger . Info ( " - Drive: %s" , disk . String ( ) )
}
} )
logger . Info ( color . Yellow ( "WARNING:" ) + " Host %v has more than %v drives of set. " +
"A host failure will result in data becoming unavailable." , host , wantAtMost )
2020-01-13 16:09:10 -05:00
}
}
2016-05-04 15:18:20 -04:00
}
2020-08-18 17:37:26 -04:00
// Mark all root disks down
markRootDisksAsDown ( storageDisks , sErrs )
2018-02-15 20:45:57 -05:00
// Save formats `format.json` across all disks.
2020-06-12 23:04:01 -04:00
if err := saveFormatErasureAll ( ctx , storageDisks , formats ) ; err != nil {
2018-02-15 20:45:57 -05:00
return nil , err
2017-04-18 13:35:17 -04:00
}
2018-02-15 20:45:57 -05:00
2020-06-12 23:04:01 -04:00
return getFormatErasureInQuorum ( formats )
2017-04-18 13:35:17 -04:00
}
2020-01-13 16:09:10 -05:00
// ecDrivesNoConfig returns the erasure coded drives in a set if no config has been set.
// It will attempt to read it from env variable and fall back to drives/2.
2023-01-10 02:07:45 -05:00
func ecDrivesNoConfig ( setDriveCount int ) ( int , error ) {
sc , err := storageclass . LookupConfig ( config . KVS { } , setDriveCount )
if err != nil {
return 0 , err
2020-01-13 16:09:10 -05:00
}
2023-01-10 02:07:45 -05:00
return sc . GetParityForSC ( storageclass . STANDARD ) , nil
2020-01-13 16:09:10 -05:00
}
2018-02-15 20:45:57 -05:00
// Initialize a new set of set formats which will be written to all disks.
2023-12-29 18:52:41 -05:00
func newHealFormatSets ( refFormat * formatErasureV3 , setCount , setDriveCount int , formats [ ] * formatErasureV3 , errs [ ] error ) ( [ ] [ ] * formatErasureV3 , [ ] [ ] DiskInfo ) {
2020-06-12 23:04:01 -04:00
newFormats := make ( [ ] [ ] * formatErasureV3 , setCount )
for i := range refFormat . Erasure . Sets {
2020-08-26 22:29:35 -04:00
newFormats [ i ] = make ( [ ] * formatErasureV3 , setDriveCount )
2018-02-15 20:45:57 -05:00
}
2023-12-29 18:52:41 -05:00
currentDisksInfo := make ( [ ] [ ] DiskInfo , setCount )
for i := range refFormat . Erasure . Sets {
currentDisksInfo [ i ] = make ( [ ] DiskInfo , setDriveCount )
}
2020-06-12 23:04:01 -04:00
for i := range refFormat . Erasure . Sets {
for j := range refFormat . Erasure . Sets [ i ] {
2020-10-26 13:29:29 -04:00
if errors . Is ( errs [ i * setDriveCount + j ] , errUnformattedDisk ) {
2020-06-12 23:04:01 -04:00
newFormats [ i ] [ j ] = & formatErasureV3 { }
2019-01-22 21:32:06 -05:00
newFormats [ i ] [ j ] . ID = refFormat . ID
2018-02-15 20:45:57 -05:00
newFormats [ i ] [ j ] . Format = refFormat . Format
2020-10-26 13:29:29 -04:00
newFormats [ i ] [ j ] . Version = refFormat . Version
newFormats [ i ] [ j ] . Erasure . This = refFormat . Erasure . Sets [ i ] [ j ]
newFormats [ i ] [ j ] . Erasure . Sets = refFormat . Erasure . Sets
2020-06-12 23:04:01 -04:00
newFormats [ i ] [ j ] . Erasure . Version = refFormat . Erasure . Version
newFormats [ i ] [ j ] . Erasure . DistributionAlgo = refFormat . Erasure . DistributionAlgo
2018-02-15 20:45:57 -05:00
}
2023-12-29 18:52:41 -05:00
if format := formats [ i * setDriveCount + j ] ; format != nil && ( errs [ i * setDriveCount + j ] == nil ) {
currentDisksInfo [ i ] [ j ] = format . Info
}
2018-02-15 20:45:57 -05:00
}
}
2023-12-29 18:52:41 -05:00
return newFormats , currentDisksInfo
2016-05-04 15:18:20 -04:00
}