minio/cmd/prepare-storage.go
Anis Elleuch 069876e262 xl: All nodes create meta volumes in its local disks (#8786)
Meta volumes directories, tmp/, background-ops/, etc..
undr .minio.sys are created when disks are formatted
but also when the cluster is started.

However using MakeVolBulk() is not appropriate in the
case of a user migrating from a version which does not
have .minio.sys/background-ops/. The reason is that
MakeVolBulk() exits early when an error is occured:
errVolumeExists in this case, which is expected since
some directories such as tmp/ already exist.

This commit will avoid use MakeVolBulk and use MakeVol
instead.

Also the PR will make each node creates meta volumes
in its local disks and stop relying on the first disk
since the first node could be offline.
2020-01-15 12:36:52 -08:00

393 lines
12 KiB
Go

/*
* MinIO Cloud Storage, (C) 2016 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package cmd
import (
"context"
"crypto/tls"
"fmt"
"net/http"
"net/url"
"os"
"path"
"sync"
"time"
xhttp "github.com/minio/minio/cmd/http"
"github.com/minio/minio/cmd/logger"
"github.com/minio/minio/cmd/rest"
"github.com/minio/minio/pkg/sync/errgroup"
)
var printEndpointError = func() func(Endpoint, error) {
var mutex sync.Mutex
printOnce := make(map[Endpoint]map[string]bool)
return func(endpoint Endpoint, err error) {
reqInfo := (&logger.ReqInfo{}).AppendTags("endpoint", endpoint.String())
ctx := logger.SetReqInfo(context.Background(), reqInfo)
mutex.Lock()
defer mutex.Unlock()
m, ok := printOnce[endpoint]
if !ok {
m = make(map[string]bool)
m[err.Error()] = true
printOnce[endpoint] = m
logger.LogAlwaysIf(ctx, err)
return
}
if m[err.Error()] {
return
}
m[err.Error()] = true
logger.LogAlwaysIf(ctx, err)
}
}()
// Migrates backend format of local disks.
func formatXLMigrateLocalEndpoints(endpoints Endpoints) error {
g := errgroup.WithNErrs(len(endpoints))
for index, endpoint := range endpoints {
if !endpoint.IsLocal {
continue
}
index := index
g.Go(func() error {
epPath := endpoints[index].Path
formatPath := pathJoin(epPath, minioMetaBucket, formatConfigFile)
if _, err := os.Stat(formatPath); err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("unable to access (%s) %w", formatPath, err)
}
return formatXLMigrate(epPath)
}, index)
}
for _, err := range g.Wait() {
if err != nil {
return err
}
}
return nil
}
// Cleans up tmp directory of local disks.
func formatXLCleanupTmpLocalEndpoints(endpoints Endpoints) error {
g := errgroup.WithNErrs(len(endpoints))
for index, endpoint := range endpoints {
if !endpoint.IsLocal {
continue
}
index := index
g.Go(func() error {
epPath := endpoints[index].Path
// If disk is not formatted there is nothing to be cleaned up.
formatPath := pathJoin(epPath, minioMetaBucket, formatConfigFile)
if _, err := os.Stat(formatPath); err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("unable to access (%s) %w", formatPath, err)
}
if _, err := os.Stat(pathJoin(epPath, minioMetaTmpBucket+"-old")); err != nil {
if !os.IsNotExist(err) {
return fmt.Errorf("unable to access (%s) %w",
pathJoin(epPath, minioMetaTmpBucket+"-old"),
err)
}
}
// Need to move temporary objects left behind from previous run of minio
// server to a unique directory under `minioMetaTmpBucket-old` to clean
// up `minioMetaTmpBucket` for the current run.
//
// /disk1/.minio.sys/tmp-old/
// |__ 33a58b40-aecc-4c9f-a22f-ff17bfa33b62
// |__ e870a2c1-d09c-450c-a69c-6eaa54a89b3e
//
// In this example, `33a58b40-aecc-4c9f-a22f-ff17bfa33b62` directory contains
// temporary objects from one of the previous runs of minio server.
tmpOld := pathJoin(epPath, minioMetaTmpBucket+"-old", mustGetUUID())
if err := renameAll(pathJoin(epPath, minioMetaTmpBucket),
tmpOld); err != nil && err != errFileNotFound {
return fmt.Errorf("unable to rename (%s -> %s) %w",
pathJoin(epPath, minioMetaTmpBucket),
tmpOld,
err)
}
// Removal of tmp-old folder is backgrounded completely.
go removeAll(pathJoin(epPath, minioMetaTmpBucket+"-old"))
if err := mkdirAll(pathJoin(epPath, minioMetaTmpBucket), 0777); err != nil {
return fmt.Errorf("unable to create (%s) %w",
pathJoin(epPath, minioMetaTmpBucket),
err)
}
return nil
}, index)
}
for _, err := range g.Wait() {
if err != nil {
return err
}
}
return nil
}
// validate reference format against list of XL formats.
func validateXLFormats(format *formatXLV3, formats []*formatXLV3, endpoints Endpoints, setCount, drivesPerSet int) error {
for i := range formats {
if formats[i] == nil {
continue
}
if err := formatXLV3Check(format, formats[i]); err != nil {
return fmt.Errorf("%s format error: %w", endpoints[i], err)
}
}
if len(format.XL.Sets) != setCount {
return fmt.Errorf("Current backend format is inconsistent with input args (%s), Expected set count %d, got %d", endpoints, len(format.XL.Sets), setCount)
}
if len(format.XL.Sets[0]) != drivesPerSet {
return fmt.Errorf("Current backend format is inconsistent with input args (%s), Expected drive count per set %d, got %d", endpoints, len(format.XL.Sets[0]), drivesPerSet)
}
return nil
}
// Following error message is added to fix a regression in release
// RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3. This
// migration failed to capture '.This' field properly which indicates
// the disk UUID association. Below error message is returned when
// we see this situation in format.json, for more info refer
// https://github.com/minio/minio/issues/5667
var errXLV3ThisEmpty = fmt.Errorf("XL format version 3 has This field empty")
// IsServerResolvable - checks if the endpoint is resolvable
// by sending a naked HTTP request with liveness checks.
func IsServerResolvable(endpoint Endpoint) error {
serverURL := &url.URL{
Scheme: endpoint.Scheme,
Host: endpoint.Host,
Path: path.Join(healthCheckPathPrefix, healthCheckLivenessPath),
}
var tlsConfig *tls.Config
if globalIsSSL {
tlsConfig = &tls.Config{
ServerName: endpoint.Hostname(),
RootCAs: globalRootCAs,
NextProtos: []string{"http/1.1"}, // Force http1.1
}
}
req, err := http.NewRequest(http.MethodGet, serverURL.String(), nil)
if err != nil {
return err
}
httpClient := &http.Client{
Transport: newCustomHTTPTransport(tlsConfig, rest.DefaultRESTTimeout, rest.DefaultRESTTimeout)(),
}
resp, err := httpClient.Do(req)
if err != nil {
return err
}
defer xhttp.DrainBody(resp.Body)
if resp.StatusCode != http.StatusOK {
return StorageErr(resp.Status)
}
return nil
}
// connect to list of endpoints and load all XL disk formats, validate the formats are correct
// and are in quorum, if no formats are found attempt to initialize all of them for the first
// time. additionally make sure to close all the disks used in this attempt.
func connectLoadInitFormats(retryCount int, firstDisk bool, endpoints Endpoints, setCount, drivesPerSet int, deploymentID string) (*formatXLV3, error) {
// Initialize all storage disks
storageDisks, errs := initStorageDisksWithErrors(endpoints)
defer closeStorageDisks(storageDisks)
for i, err := range errs {
if err != nil {
if err != errDiskNotFound {
return nil, fmt.Errorf("Disk %s: %w", endpoints[i], err)
}
if retryCount >= 5 {
logger.Info("Unable to connect to %s: %v\n", endpoints[i], IsServerResolvable(endpoints[i]))
}
}
}
// Attempt to load all `format.json` from all disks.
formatConfigs, sErrs := loadFormatXLAll(storageDisks)
// Check if we have
for i, sErr := range sErrs {
if _, ok := formatCriticalErrors[sErr]; ok {
return nil, fmt.Errorf("Disk %s: %w", endpoints[i], sErr)
}
}
// Pre-emptively check if one of the formatted disks
// is invalid. This function returns success for the
// most part unless one of the formats is not consistent
// with expected XL format. For example if a user is
// trying to pool FS backend into an XL set.
if err := checkFormatXLValues(formatConfigs); err != nil {
return nil, err
}
// All disks report unformatted we should initialized everyone.
if shouldInitXLDisks(sErrs) && firstDisk {
// Initialize erasure code format on disks
format, err := initFormatXL(context.Background(), storageDisks, setCount, drivesPerSet, deploymentID)
if err != nil {
return nil, err
}
// Assign globalDeploymentID on first run for the
// minio server managing the first disk
globalDeploymentID = format.ID
return format, nil
}
// Return error when quorum unformatted disks - indicating we are
// waiting for first server to be online.
if quorumUnformattedDisks(sErrs) && !firstDisk {
return nil, errNotFirstDisk
}
// Return error when quorum unformatted disks but waiting for rest
// of the servers to be online.
if quorumUnformattedDisks(sErrs) && firstDisk {
return nil, errFirstDiskWait
}
// Following function is added to fix a regressions which was introduced
// in release RELEASE.2018-03-16T22-52-12Z after migrating v1 to v2 to v3.
// This migration failed to capture '.This' field properly which indicates
// the disk UUID association. Below function is called to handle and fix
// this regression, for more info refer https://github.com/minio/minio/issues/5667
if err := fixFormatXLV3(storageDisks, endpoints, formatConfigs); err != nil {
return nil, err
}
// If any of the .This field is still empty, we return error.
if formatXLV3ThisEmpty(formatConfigs) {
return nil, errXLV3ThisEmpty
}
format, err := getFormatXLInQuorum(formatConfigs)
if err != nil {
return nil, err
}
// Validate all format configs with reference format.
if err = validateXLFormats(format, formatConfigs, endpoints, setCount, drivesPerSet); err != nil {
return nil, err
}
// Get the deploymentID if set.
format.ID, err = formatXLGetDeploymentID(format, formatConfigs)
if err != nil {
return nil, err
}
if format.ID == "" {
// Not a first disk, wait until first disk fixes deploymentID
if !firstDisk {
return nil, errNotFirstDisk
}
if err = formatXLFixDeploymentID(endpoints, storageDisks, format); err != nil {
return nil, err
}
}
globalDeploymentID = format.ID
if err = formatXLFixLocalDeploymentID(endpoints, storageDisks, format); err != nil {
return nil, err
}
// The will always recreate some directories inside .minio.sys of
// the local disk such as tmp, multipart and background-ops
initXLMetaVolumesInLocalDisks(storageDisks, formatConfigs)
return format, nil
}
// Format disks before initialization of object layer.
func waitForFormatXL(firstDisk bool, endpoints Endpoints, setCount, drivesPerSet int, deploymentID string) (format *formatXLV3, err error) {
if len(endpoints) == 0 || setCount == 0 || drivesPerSet == 0 {
return nil, errInvalidArgument
}
if err = formatXLMigrateLocalEndpoints(endpoints); err != nil {
return nil, err
}
if err = formatXLCleanupTmpLocalEndpoints(endpoints); err != nil {
return nil, err
}
// prepare getElapsedTime() to calculate elapsed time since we started trying formatting disks.
// All times are rounded to avoid showing milli, micro and nano seconds
formatStartTime := time.Now().Round(time.Second)
getElapsedTime := func() string {
return time.Now().Round(time.Second).Sub(formatStartTime).String()
}
// Wait on each try for an update.
ticker := time.NewTicker(500 * time.Millisecond)
defer ticker.Stop()
var tries int
for {
select {
case <-ticker.C:
format, err := connectLoadInitFormats(tries, firstDisk, endpoints, setCount, drivesPerSet, deploymentID)
if err != nil {
tries++
switch err {
case errNotFirstDisk:
// Fresh setup, wait for first server to be up.
logger.Info("Waiting for the first server to format the disks.")
continue
case errFirstDiskWait:
// Fresh setup, wait for other servers to come up.
logger.Info("Waiting for all other servers to be online to format the disks.")
continue
case errXLReadQuorum:
// no quorum available continue to wait for minimum number of servers.
logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n", len(endpoints)/2, getElapsedTime())
continue
case errXLV3ThisEmpty:
// need to wait for this error to be healed, so continue.
continue
default:
// For all other unhandled errors we exit and fail.
return nil, err
}
}
return format, nil
case <-globalOSSignalCh:
return nil, fmt.Errorf("Initializing data volumes gracefully stopped")
}
}
}