2016-08-30 22:22:27 -04:00
/ *
* Minio Cloud Storage , ( C ) 2016 Minio , Inc .
*
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
package cmd
import (
2018-04-05 18:04:40 -04:00
"context"
2017-06-17 14:20:12 -04:00
"fmt"
2018-02-15 20:45:57 -05:00
"os"
2016-08-30 22:22:27 -04:00
"time"
2018-04-05 18:04:40 -04:00
"github.com/minio/minio/cmd/logger"
2016-08-30 22:22:27 -04:00
)
2018-02-15 20:45:57 -05:00
var printEndpointError = func ( ) func ( Endpoint , error ) {
printOnce := make ( map [ Endpoint ] map [ string ] bool )
return func ( endpoint Endpoint , err error ) {
2018-09-14 00:42:50 -04:00
reqInfo := ( & logger . ReqInfo { } ) . AppendTags ( "endpoint" , endpoint . String ( ) )
2018-04-05 18:04:40 -04:00
ctx := logger . SetReqInfo ( context . Background ( ) , reqInfo )
2018-02-15 20:45:57 -05:00
m , ok := printOnce [ endpoint ]
if ! ok {
m = make ( map [ string ] bool )
m [ err . Error ( ) ] = true
printOnce [ endpoint ] = m
2018-08-14 16:58:48 -04:00
logger . LogAlwaysIf ( ctx , err )
2018-02-15 20:45:57 -05:00
return
2017-12-28 12:32:48 -05:00
}
2018-02-15 20:45:57 -05:00
if m [ err . Error ( ) ] {
return
2017-12-28 12:32:48 -05:00
}
2018-02-15 20:45:57 -05:00
m [ err . Error ( ) ] = true
2018-08-14 16:58:48 -04:00
logger . LogAlwaysIf ( ctx , err )
2017-12-28 12:32:48 -05:00
}
2018-02-15 20:45:57 -05:00
} ( )
2016-08-30 22:22:27 -04:00
2018-03-15 16:55:23 -04:00
// Migrates backend format of local disks.
2018-02-15 20:45:57 -05:00
func formatXLMigrateLocalEndpoints ( endpoints EndpointList ) error {
for _ , endpoint := range endpoints {
if ! endpoint . IsLocal {
continue
2016-08-30 22:22:27 -04:00
}
2018-02-15 20:45:57 -05:00
formatPath := pathJoin ( endpoint . Path , minioMetaBucket , formatConfigFile )
if _ , err := os . Stat ( formatPath ) ; err != nil {
if os . IsNotExist ( err ) {
continue
}
return err
2016-09-08 12:35:13 -04:00
}
2018-02-15 20:45:57 -05:00
if err := formatXLMigrate ( endpoint . Path ) ; err != nil {
return err
2016-11-23 18:48:10 -05:00
}
}
2018-02-15 20:45:57 -05:00
return nil
2016-11-23 18:48:10 -05:00
}
2018-03-15 16:55:23 -04:00
// Cleans up tmp directory of local disks.
func formatXLCleanupTmpLocalEndpoints ( endpoints EndpointList ) error {
for _ , endpoint := range endpoints {
if ! endpoint . IsLocal {
continue
}
formatPath := pathJoin ( endpoint . Path , minioMetaBucket , formatConfigFile )
if _ , err := os . Stat ( formatPath ) ; err != nil {
if os . IsNotExist ( err ) {
continue
}
return err
}
2018-08-06 00:15:28 -04:00
if err := removeAll ( pathJoin ( endpoint . Path , minioMetaTmpBucket ) ) ; err != nil {
2018-03-15 16:55:23 -04:00
return err
}
2018-08-06 00:15:28 -04:00
if err := mkdirAll ( pathJoin ( endpoint . Path , minioMetaTmpBucket ) , 0777 ) ; err != nil {
2018-03-15 16:55:23 -04:00
return err
}
}
return nil
}
2018-04-04 00:58:48 -04:00
// validate reference format against list of XL formats.
func validateXLFormats ( format * formatXLV3 , formats [ ] * formatXLV3 , endpoints EndpointList , setCount , drivesPerSet int ) error {
for i := range formats {
if formats [ i ] == nil {
continue
}
if err := formatXLV3Check ( format , formats [ i ] ) ; err != nil {
return fmt . Errorf ( "%s format error: %s" , endpoints [ i ] , err )
}
}
if len ( format . XL . Sets ) != setCount {
return fmt . Errorf ( "Current backend format is inconsistent with input args (%s), Expected set count %d, got %d" , endpoints , len ( format . XL . Sets ) , setCount )
}
if len ( format . XL . Sets [ 0 ] ) != drivesPerSet {
return fmt . Errorf ( "Current backend format is inconsistent with input args (%s), Expected drive count per set %d, got %d" , endpoints , len ( format . XL . Sets [ 0 ] ) , drivesPerSet )
}
return nil
}
// Following error message is added to fix a regression in release
// RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3. This
// migration failed to capture '.This' field properly which indicates
// the disk UUID association. Below error message is returned when
// we see this situation in format.json, for more info refer
// https://github.com/minio/minio/issues/5667
var errXLV3ThisEmpty = fmt . Errorf ( "XL format version 3 has This field empty" )
// connect to list of endpoints and load all XL disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt.
2018-09-10 19:21:59 -04:00
func connectLoadInitFormats ( retryCount int , firstDisk bool , endpoints EndpointList , setCount , drivesPerSet int ) ( * formatXLV3 , error ) {
// Initialize all storage disks
2018-04-04 00:58:48 -04:00
storageDisks , err := initStorageDisks ( endpoints )
if err != nil {
return nil , err
}
defer closeStorageDisks ( storageDisks )
2018-12-04 13:25:56 -05:00
// Attempt to load all `format.json` from all disks.
formatConfigs , sErrs := loadFormatXLAll ( storageDisks )
// Check if we have
for i , sErr := range sErrs {
if _ , ok := formatCriticalErrors [ sErr ] ; ok {
return nil , fmt . Errorf ( "Disk %s: %s" , endpoints [ i ] , sErr )
}
}
2018-09-10 19:21:59 -04:00
// Connect to all storage disks, a connection failure will be
// only logged after some retries.
for _ , disk := range storageDisks {
if disk != nil {
connectErr := disk . LastError ( )
if connectErr != nil && retryCount >= 5 {
logger . Info ( "Unable to connect to %s: %v\n" , disk . String ( ) , connectErr . Error ( ) )
}
}
}
2018-04-04 00:58:48 -04:00
// Pre-emptively check if one of the formatted disks
// is invalid. This function returns success for the
// most part unless one of the formats is not consistent
// with expected XL format. For example if a user is
// trying to pool FS backend into an XL set.
if err = checkFormatXLValues ( formatConfigs ) ; err != nil {
return nil , err
}
2018-04-12 18:43:38 -04:00
// All disks report unformatted we should initialized everyone.
if shouldInitXLDisks ( sErrs ) && firstDisk {
2018-04-05 18:04:40 -04:00
return initFormatXL ( context . Background ( ) , storageDisks , setCount , drivesPerSet )
2018-04-04 00:58:48 -04:00
}
2018-04-12 18:43:38 -04:00
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
if quorumUnformattedDisks ( sErrs ) && ! firstDisk {
return nil , errNotFirstDisk
}
// Return error when quorum unformatted disks but waiting for rest
// of the servers to be online.
if quorumUnformattedDisks ( sErrs ) && firstDisk {
return nil , errFirstDiskWait
}
2018-04-04 00:58:48 -04:00
// Following function is added to fix a regressions which was introduced
// in release RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3.
// This migration failed to capture '.This' field properly which indicates
// the disk UUID association. Below function is called to handle and fix
// this regression, for more info refer https://github.com/minio/minio/issues/5667
if err = fixFormatXLV3 ( storageDisks , endpoints , formatConfigs ) ; err != nil {
return nil , err
}
// If any of the .This field is still empty, we return error.
if formatXLV3ThisEmpty ( formatConfigs ) {
return nil , errXLV3ThisEmpty
}
format , err := getFormatXLInQuorum ( formatConfigs )
if err != nil {
return nil , err
}
// Validate all format configs with reference format.
if err = validateXLFormats ( format , formatConfigs , endpoints , setCount , drivesPerSet ) ; err != nil {
return nil , err
}
2018-07-18 23:17:35 -04:00
// Get the deploymentID if set.
format . ID , err = formatXLGetDeploymentID ( format , formatConfigs )
if err != nil {
return nil , err
}
if format . ID == "" {
if err = formatXLFixDeploymentID ( context . Background ( ) , storageDisks , format ) ; err != nil {
return nil , err
}
}
2018-11-19 17:47:03 -05:00
globalDeploymentID = format . ID
2018-07-18 23:17:35 -04:00
if err = formatXLFixLocalDeploymentID ( context . Background ( ) , storageDisks , format ) ; err != nil {
return nil , err
}
2018-04-04 00:58:48 -04:00
return format , nil
}
2018-02-15 20:45:57 -05:00
// Format disks before initialization of object layer.
2018-04-05 18:04:40 -04:00
func waitForFormatXL ( ctx context . Context , firstDisk bool , endpoints EndpointList , setCount , disksPerSet int ) ( format * formatXLV3 , err error ) {
2018-02-15 20:45:57 -05:00
if len ( endpoints ) == 0 || setCount == 0 || disksPerSet == 0 {
return nil , errInvalidArgument
2016-11-02 11:51:06 -04:00
}
2018-02-15 20:45:57 -05:00
if err = formatXLMigrateLocalEndpoints ( endpoints ) ; err != nil {
return nil , err
2016-08-30 22:22:27 -04:00
}
2018-03-15 16:55:23 -04:00
if err = formatXLCleanupTmpLocalEndpoints ( endpoints ) ; err != nil {
return nil , err
}
2017-02-07 05:16:29 -05:00
// Done channel is used to close any lingering retry routine, as soon
// as this function returns.
doneCh := make ( chan struct { } )
2016-08-30 22:22:27 -04:00
2017-02-07 05:16:29 -05:00
// Indicate to our retry routine to exit cleanly, upon this function return.
2016-10-05 15:48:07 -04:00
defer close ( doneCh )
2016-12-07 13:22:00 -05:00
// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
// All times are rounded to avoid showing milli, micro and nano seconds
formatStartTime := time . Now ( ) . Round ( time . Second )
getElapsedTime := func ( ) string {
return time . Now ( ) . Round ( time . Second ) . Sub ( formatStartTime ) . String ( )
}
2016-10-05 15:48:07 -04:00
// Wait on the jitter retry loop.
2017-02-07 05:16:29 -05:00
retryTimerCh := newRetryTimerSimple ( doneCh )
2016-11-02 18:27:36 -04:00
for {
select {
2018-09-10 19:21:59 -04:00
case retryCount := <- retryTimerCh :
format , err := connectLoadInitFormats ( retryCount , firstDisk , endpoints , setCount , disksPerSet )
2018-04-04 00:58:48 -04:00
if err != nil {
switch err {
case errNotFirstDisk :
// Fresh setup, wait for first server to be up.
2018-04-10 12:37:14 -04:00
logger . Info ( "Waiting for the first server to format the disks." )
2017-04-18 13:35:17 -04:00
continue
2018-04-12 18:43:38 -04:00
case errFirstDiskWait :
// Fresh setup, wait for other servers to come up.
logger . Info ( "Waiting for all other servers to be online to format the disks." )
continue
2018-04-04 00:58:48 -04:00
case errXLReadQuorum :
// no quorum available continue to wait for minimum number of servers.
2018-04-10 12:37:14 -04:00
logger . Info ( "Waiting for a minimum of %d disks to come online (elapsed %s)\n" , len ( endpoints ) / 2 , getElapsedTime ( ) )
2018-04-04 00:58:48 -04:00
continue
case errXLV3ThisEmpty :
// need to wait for this error to be healed, so continue.
continue
default :
// For all other unhandled errors we exit and fail.
return nil , err
2017-04-18 13:35:17 -04:00
}
2016-12-11 18:18:55 -05:00
}
2018-04-04 00:58:48 -04:00
return format , nil
2018-02-06 18:07:17 -05:00
case <- globalOSSignalCh :
2018-02-15 20:45:57 -05:00
return nil , fmt . Errorf ( "Initializing data volumes gracefully stopped" )
2016-11-02 18:27:36 -04:00
}
}
2016-08-30 22:22:27 -04:00
}