2016-08-30 19:22:27 -07:00
/ *
2019-04-09 11:39:42 -07:00
* MinIO Cloud Storage , ( C ) 2016 MinIO , Inc .
2016-08-30 19:22:27 -07:00
*
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
package cmd
import (
2018-04-05 15:04:40 -07:00
"context"
2017-06-17 11:20:12 -07:00
"fmt"
2018-02-15 17:45:57 -08:00
"os"
2019-10-30 00:04:39 -07:00
"sync"
2016-08-30 19:22:27 -07:00
"time"
2018-04-05 15:04:40 -07:00
"github.com/minio/minio/cmd/logger"
2019-01-26 01:33:28 +04:00
"github.com/minio/minio/pkg/sync/errgroup"
2016-08-30 19:22:27 -07:00
)
2018-02-15 17:45:57 -08:00
var printEndpointError = func ( ) func ( Endpoint , error ) {
2019-10-30 00:04:39 -07:00
var mutex sync . Mutex
2018-02-15 17:45:57 -08:00
printOnce := make ( map [ Endpoint ] map [ string ] bool )
return func ( endpoint Endpoint , err error ) {
2018-09-13 21:42:50 -07:00
reqInfo := ( & logger . ReqInfo { } ) . AppendTags ( "endpoint" , endpoint . String ( ) )
2018-04-05 15:04:40 -07:00
ctx := logger . SetReqInfo ( context . Background ( ) , reqInfo )
2019-10-30 00:04:39 -07:00
mutex . Lock ( )
defer mutex . Unlock ( )
2018-02-15 17:45:57 -08:00
m , ok := printOnce [ endpoint ]
if ! ok {
m = make ( map [ string ] bool )
m [ err . Error ( ) ] = true
printOnce [ endpoint ] = m
2018-08-14 13:58:48 -07:00
logger . LogAlwaysIf ( ctx , err )
2018-02-15 17:45:57 -08:00
return
2017-12-28 18:32:48 +01:00
}
2018-02-15 17:45:57 -08:00
if m [ err . Error ( ) ] {
return
2017-12-28 18:32:48 +01:00
}
2018-02-15 17:45:57 -08:00
m [ err . Error ( ) ] = true
2018-08-14 13:58:48 -07:00
logger . LogAlwaysIf ( ctx , err )
2017-12-28 18:32:48 +01:00
}
2018-02-15 17:45:57 -08:00
} ( )
2016-08-30 19:22:27 -07:00
2018-03-15 13:55:23 -07:00
// Migrates backend format of local disks.
2018-02-15 17:45:57 -08:00
func formatXLMigrateLocalEndpoints ( endpoints EndpointList ) error {
2019-01-26 01:33:28 +04:00
g := errgroup . WithNErrs ( len ( endpoints ) )
for index , endpoint := range endpoints {
2018-02-15 17:45:57 -08:00
if ! endpoint . IsLocal {
continue
2016-08-30 19:22:27 -07:00
}
2019-01-26 01:33:28 +04:00
index := index
g . Go ( func ( ) error {
epPath := endpoints [ index ] . Path
formatPath := pathJoin ( epPath , minioMetaBucket , formatConfigFile )
if _ , err := os . Stat ( formatPath ) ; err != nil {
if os . IsNotExist ( err ) {
return nil
}
2019-08-05 11:41:29 -07:00
return fmt . Errorf ( "unable to access (%s) %s" , formatPath , err )
2018-02-15 17:45:57 -08:00
}
2019-01-26 01:33:28 +04:00
return formatXLMigrate ( epPath )
} , index )
}
for _ , err := range g . Wait ( ) {
if err != nil {
2018-02-15 17:45:57 -08:00
return err
2016-11-23 15:48:10 -08:00
}
}
2018-02-15 17:45:57 -08:00
return nil
2016-11-23 15:48:10 -08:00
}
2018-03-15 13:55:23 -07:00
// Cleans up tmp directory of local disks.
func formatXLCleanupTmpLocalEndpoints ( endpoints EndpointList ) error {
2019-01-26 01:33:28 +04:00
g := errgroup . WithNErrs ( len ( endpoints ) )
for index , endpoint := range endpoints {
2018-03-15 13:55:23 -07:00
if ! endpoint . IsLocal {
continue
}
2019-01-26 01:33:28 +04:00
index := index
g . Go ( func ( ) error {
epPath := endpoints [ index ] . Path
// If disk is not formatted there is nothing to be cleaned up.
formatPath := pathJoin ( epPath , minioMetaBucket , formatConfigFile )
if _ , err := os . Stat ( formatPath ) ; err != nil {
if os . IsNotExist ( err ) {
return nil
}
2019-08-05 11:41:29 -07:00
return fmt . Errorf ( "unable to access (%s) %s" , formatPath , err )
2018-03-15 13:55:23 -07:00
}
2019-01-26 01:33:28 +04:00
if _ , err := os . Stat ( pathJoin ( epPath , minioMetaTmpBucket + "-old" ) ) ; err != nil {
if ! os . IsNotExist ( err ) {
2019-08-05 11:41:29 -07:00
return fmt . Errorf ( "unable to access (%s) %s" ,
pathJoin ( epPath , minioMetaTmpBucket + "-old" ) ,
err )
2019-01-26 01:33:28 +04:00
}
}
// Need to move temporary objects left behind from previous run of minio
// server to a unique directory under `minioMetaTmpBucket-old` to clean
// up `minioMetaTmpBucket` for the current run.
//
// /disk1/.minio.sys/tmp-old/
// |__ 33a58b40-aecc-4c9f-a22f-ff17bfa33b62
// |__ e870a2c1-d09c-450c-a69c-6eaa54a89b3e
//
// In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains
// temporary objects from one of the previous runs of minio server.
2019-08-05 11:41:29 -07:00
tmpOld := pathJoin ( epPath , minioMetaTmpBucket + "-old" , mustGetUUID ( ) )
2019-01-26 01:33:28 +04:00
if err := renameAll ( pathJoin ( epPath , minioMetaTmpBucket ) ,
2019-08-05 11:41:29 -07:00
tmpOld ) ; err != nil && err != errFileNotFound {
return fmt . Errorf ( "unable to rename (%s -> %s) %s" ,
pathJoin ( epPath , minioMetaTmpBucket ) ,
tmpOld ,
err )
2019-01-26 01:33:28 +04:00
}
// Removal of tmp-old folder is backgrounded completely.
go removeAll ( pathJoin ( epPath , minioMetaTmpBucket + "-old" ) )
2019-08-05 11:41:29 -07:00
if err := mkdirAll ( pathJoin ( epPath , minioMetaTmpBucket ) , 0777 ) ; err != nil {
return fmt . Errorf ( "unable to create (%s) %s" ,
pathJoin ( epPath , minioMetaTmpBucket ) ,
err )
}
return nil
2019-01-26 01:33:28 +04:00
} , index )
}
for _ , err := range g . Wait ( ) {
if err != nil {
2018-03-15 13:55:23 -07:00
return err
}
}
return nil
}
2018-04-03 23:58:48 -05:00
// validate reference format against list of XL formats.
func validateXLFormats ( format * formatXLV3 , formats [ ] * formatXLV3 , endpoints EndpointList , setCount , drivesPerSet int ) error {
for i := range formats {
if formats [ i ] == nil {
continue
}
if err := formatXLV3Check ( format , formats [ i ] ) ; err != nil {
return fmt . Errorf ( "%s format error: %s" , endpoints [ i ] , err )
}
}
if len ( format . XL . Sets ) != setCount {
return fmt . Errorf ( "Current backend format is inconsistent with input args (%s), Expected set count %d, got %d" , endpoints , len ( format . XL . Sets ) , setCount )
}
if len ( format . XL . Sets [ 0 ] ) != drivesPerSet {
return fmt . Errorf ( "Current backend format is inconsistent with input args (%s), Expected drive count per set %d, got %d" , endpoints , len ( format . XL . Sets [ 0 ] ) , drivesPerSet )
}
return nil
}
// Following error message is added to fix a regression in release
// RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3. This
// migration failed to capture '.This' field properly which indicates
// the disk UUID association. Below error message is returned when
// we see this situation in format.json, for more info refer
// https://github.com/minio/minio/issues/5667
var errXLV3ThisEmpty = fmt . Errorf ( "XL format version 3 has This field empty" )
// connect to list of endpoints and load all XL disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt.
2018-09-11 00:21:59 +01:00
func connectLoadInitFormats ( retryCount int , firstDisk bool , endpoints EndpointList , setCount , drivesPerSet int ) ( * formatXLV3 , error ) {
// Initialize all storage disks
2019-09-27 16:47:12 -07:00
storageDisks , errs := initStorageDisksWithErrors ( endpoints )
2018-04-03 23:58:48 -05:00
defer closeStorageDisks ( storageDisks )
2019-09-27 16:47:12 -07:00
for i , err := range errs {
if err != nil && err != errDiskNotFound {
return nil , fmt . Errorf ( "Disk %s: %w" , endpoints [ i ] , err )
}
}
2018-04-03 23:58:48 -05:00
2018-12-04 10:25:56 -08:00
// Attempt to load all `format.json` from all disks.
formatConfigs , sErrs := loadFormatXLAll ( storageDisks )
// Check if we have
for i , sErr := range sErrs {
if _ , ok := formatCriticalErrors [ sErr ] ; ok {
return nil , fmt . Errorf ( "Disk %s: %s" , endpoints [ i ] , sErr )
}
}
2018-09-11 00:21:59 +01:00
// Connect to all storage disks, a connection failure will be
// only logged after some retries.
for _ , disk := range storageDisks {
if disk != nil {
connectErr := disk . LastError ( )
if connectErr != nil && retryCount >= 5 {
logger . Info ( "Unable to connect to %s: %v\n" , disk . String ( ) , connectErr . Error ( ) )
}
}
}
2018-04-03 23:58:48 -05:00
// Pre-emptively check if one of the formatted disks
// is invalid. This function returns success for the
// most part unless one of the formats is not consistent
// with expected XL format. For example if a user is
// trying to pool FS backend into an XL set.
2019-09-27 16:47:12 -07:00
if err := checkFormatXLValues ( formatConfigs ) ; err != nil {
2018-04-03 23:58:48 -05:00
return nil , err
}
2018-04-12 15:43:38 -07:00
// All disks report unformatted we should initialized everyone.
if shouldInitXLDisks ( sErrs ) && firstDisk {
2019-04-02 23:20:13 +05:30
// Initialize erasure code format on disks
format , err := initFormatXL ( context . Background ( ) , storageDisks , setCount , drivesPerSet )
if err != nil {
return nil , err
}
// Assign globalDeploymentID on first run for the
// minio server managing the first disk
globalDeploymentID = format . ID
2018-04-03 23:58:48 -05:00
}
2018-04-12 15:43:38 -07:00
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
if quorumUnformattedDisks ( sErrs ) && ! firstDisk {
return nil , errNotFirstDisk
}
// Return error when quorum unformatted disks but waiting for rest
// of the servers to be online.
if quorumUnformattedDisks ( sErrs ) && firstDisk {
return nil , errFirstDiskWait
}
2018-04-03 23:58:48 -05:00
// Following function is added to fix a regressions which was introduced
// in release RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3.
// This migration failed to capture '.This' field properly which indicates
// the disk UUID association. Below function is called to handle and fix
// this regression, for more info refer https://github.com/minio/minio/issues/5667
2019-09-27 16:47:12 -07:00
if err := fixFormatXLV3 ( storageDisks , endpoints , formatConfigs ) ; err != nil {
2018-04-03 23:58:48 -05:00
return nil , err
}
// If any of the .This field is still empty, we return error.
if formatXLV3ThisEmpty ( formatConfigs ) {
return nil , errXLV3ThisEmpty
}
format , err := getFormatXLInQuorum ( formatConfigs )
if err != nil {
return nil , err
}
// Validate all format configs with reference format.
if err = validateXLFormats ( format , formatConfigs , endpoints , setCount , drivesPerSet ) ; err != nil {
return nil , err
}
2018-07-18 20:17:35 -07:00
// Get the deploymentID if set.
format . ID , err = formatXLGetDeploymentID ( format , formatConfigs )
if err != nil {
return nil , err
}
if format . ID == "" {
2019-11-13 12:17:45 -08:00
// Not a first disk, wait until first disk fixes deploymentID
if ! firstDisk {
return nil , errNotFirstDisk
}
if err = formatXLFixDeploymentID ( endpoints , storageDisks , format ) ; err != nil {
2018-07-18 20:17:35 -07:00
return nil , err
}
}
2018-11-19 14:47:03 -08:00
globalDeploymentID = format . ID
2018-07-18 20:17:35 -07:00
2019-11-13 12:17:45 -08:00
if err = formatXLFixLocalDeploymentID ( endpoints , storageDisks , format ) ; err != nil {
2018-07-18 20:17:35 -07:00
return nil , err
}
2018-04-03 23:58:48 -05:00
return format , nil
}
2018-02-15 17:45:57 -08:00
// Format disks before initialization of object layer.
2019-10-15 18:35:41 -07:00
func waitForFormatXL ( firstDisk bool , endpoints EndpointList , setCount , disksPerSet int ) ( format * formatXLV3 , err error ) {
2018-02-15 17:45:57 -08:00
if len ( endpoints ) == 0 || setCount == 0 || disksPerSet == 0 {
return nil , errInvalidArgument
2016-11-02 16:51:06 +01:00
}
2018-02-15 17:45:57 -08:00
if err = formatXLMigrateLocalEndpoints ( endpoints ) ; err != nil {
return nil , err
2016-08-30 19:22:27 -07:00
}
2018-03-15 13:55:23 -07:00
if err = formatXLCleanupTmpLocalEndpoints ( endpoints ) ; err != nil {
return nil , err
}
2017-02-07 02:16:29 -08:00
// Done channel is used to close any lingering retry routine, as soon
// as this function returns.
doneCh := make ( chan struct { } )
2016-08-30 19:22:27 -07:00
2017-02-07 02:16:29 -08:00
// Indicate to our retry routine to exit cleanly, upon this function return.
2016-10-05 12:48:07 -07:00
defer close ( doneCh )
2016-12-07 19:22:00 +01:00
// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
// All times are rounded to avoid showing milli, micro and nano seconds
formatStartTime := time . Now ( ) . Round ( time . Second )
getElapsedTime := func ( ) string {
return time . Now ( ) . Round ( time . Second ) . Sub ( formatStartTime ) . String ( )
}
2016-10-05 12:48:07 -07:00
// Wait on the jitter retry loop.
2017-02-07 02:16:29 -08:00
retryTimerCh := newRetryTimerSimple ( doneCh )
2016-11-02 23:27:36 +01:00
for {
select {
2018-09-11 00:21:59 +01:00
case retryCount := <- retryTimerCh :
format , err := connectLoadInitFormats ( retryCount , firstDisk , endpoints , setCount , disksPerSet )
2018-04-03 23:58:48 -05:00
if err != nil {
switch err {
case errNotFirstDisk :
// Fresh setup, wait for first server to be up.
2018-04-10 09:37:14 -07:00
logger . Info ( "Waiting for the first server to format the disks." )
2017-04-18 10:35:17 -07:00
continue
2018-04-12 15:43:38 -07:00
case errFirstDiskWait :
// Fresh setup, wait for other servers to come up.
logger . Info ( "Waiting for all other servers to be online to format the disks." )
continue
2018-04-03 23:58:48 -05:00
case errXLReadQuorum :
// no quorum available continue to wait for minimum number of servers.
2018-04-10 09:37:14 -07:00
logger . Info ( "Waiting for a minimum of %d disks to come online (elapsed %s)\n" , len ( endpoints ) / 2 , getElapsedTime ( ) )
2018-04-03 23:58:48 -05:00
continue
case errXLV3ThisEmpty :
// need to wait for this error to be healed, so continue.
continue
default :
// For all other unhandled errors we exit and fail.
return nil , err
2017-04-18 10:35:17 -07:00
}
2016-12-11 15:18:55 -08:00
}
2018-04-03 23:58:48 -05:00
return format , nil
2018-02-06 15:07:17 -08:00
case <- globalOSSignalCh :
2018-02-15 17:45:57 -08:00
return nil , fmt . Errorf ( "Initializing data volumes gracefully stopped" )
2016-11-02 23:27:36 +01:00
}
}
2016-08-30 19:22:27 -07:00
}