From 47de1d2e0eed5acab133a6af65ff4a156e732fa2 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Mon, 23 Aug 2021 10:13:47 +0200 Subject: [PATCH] Fix diskinfo race (#12857) Fixes share info struct. ``` WARNING: DATA RACE Read at 0x00c011780618 by goroutine 419: github.com/minio/minio/cmd.(*DiskMetrics).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:331 +0x247 github.com/minio/minio/cmd.(*DiskInfo).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:76 +0x5ec github.com/tinylib/msgp/msgp.Decode() c:/gopath/pkg/mod/github.com/tinylib/msgp@v1.1.6-0.20210521143832-0becd170c402/msgp/read.go:105 +0x70 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo.func1.1() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:288 +0x235 github.com/minio/minio/cmd.(*timedValue).Get() c:/gopath/src/github.com/minio/minio/cmd/utils.go:886 +0x77 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:297 +0xf9 github.com/minio/minio/cmd.getDiskInfos() c:/gopath/src/github.com/minio/minio/cmd/object-api-utils.go:962 +0x1a8 github.com/minio/minio/cmd.(*erasureServerPools).getServerPoolsAvailableSpace.func1() c:/gopath/src/github.com/minio/minio/cmd/erasure-server-pool.go:241 +0x27c github.com/minio/minio/internal/sync/errgroup.(*Group).Go.func1() c:/gopath/src/github.com/minio/minio/internal/sync/errgroup/errgroup.go:123 +0xd7 Previous write at 0x00c011780618 by goroutine 423: github.com/minio/minio/cmd.(*DiskMetrics).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:332 +0x6e4 github.com/minio/minio/cmd.(*DiskInfo).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:76 +0x5ec github.com/tinylib/msgp/msgp.Decode() c:/gopath/pkg/mod/github.com/tinylib/msgp@v1.1.6-0.20210521143832-0becd170c402/msgp/read.go:105 +0x70 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo.func1.1() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:288 +0x235 github.com/minio/minio/cmd.(*timedValue).Get() c:/gopath/src/github.com/minio/minio/cmd/utils.go:886 +0x77 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:297 +0xf9 github.com/minio/minio/cmd.getDiskInfos() c:/gopath/src/github.com/minio/minio/cmd/object-api-utils.go:962 +0x1a8 github.com/minio/minio/cmd.(*erasureServerPools).getServerPoolsAvailableSpace.func1() c:/gopath/src/github.com/minio/minio/cmd/erasure-server-pool.go:241 +0x27c github.com/minio/minio/internal/sync/errgroup.(*Group).Go.func1() c:/gopath/src/github.com/minio/minio/internal/sync/errgroup/errgroup.go:123 +0xd7 ``` --- buildscripts/verify-healing.sh | 12 ++++++------ cmd/prepare-storage.go | 8 +++++++- cmd/storage-rest-client.go | 35 ++++++++++++++++++---------------- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/buildscripts/verify-healing.sh b/buildscripts/verify-healing.sh index 5643ace41..4ff842349 100755 --- a/buildscripts/verify-healing.sh +++ b/buildscripts/verify-healing.sh @@ -14,8 +14,6 @@ WORK_DIR="$PWD/.verify-$RANDOM" MINIO_CONFIG_DIR="$WORK_DIR/.minio" MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server ) -export GOGC=25 - function start_minio_3_node() { export MINIO_ROOT_USER=minio export MINIO_ROOT_PASSWORD=minio123 @@ -67,7 +65,7 @@ function start_minio_3_node() { function check_online() { - if grep -q 'Unable to initialize sub-systems' ${WORK_DIR}/dist-minio-*.log; then + if grep -q 'Unable to initialize' ${WORK_DIR}/dist-minio-*.log; then echo "1" fi } @@ -88,22 +86,24 @@ function __init__() } function perform_test() { - start_minio_3_node 60 + start_minio_3_node 120 echo "Testing Distributed Erasure setup healing of drives" echo "Remove the contents of the disks belonging to '${1}' erasure set" + set -x + rm -rf ${WORK_DIR}/${1}/*/ - start_minio_3_node 60 + start_minio_3_node 200 rv=$(check_online) if [ "$rv" == "1" ]; then - pkill -9 minio for i in $(seq 1 3); do echo "server$i log:" cat "${WORK_DIR}/dist-minio-server$i.log" done + pkill -9 minio echo "FAILED" purge "$WORK_DIR" exit 1 diff --git a/cmd/prepare-storage.go b/cmd/prepare-storage.go index 4e21fbcf5..de1c6d4a3 100644 --- a/cmd/prepare-storage.go +++ b/cmd/prepare-storage.go @@ -328,7 +328,13 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou continue case errErasureReadQuorum: // no quorum available continue to wait for minimum number of servers. - logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n", len(endpoints)/2, getElapsedTime()) + logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n", + len(endpoints)/2, getElapsedTime()) + continue + case errErasureWriteQuorum: + // no quorum available continue to wait for minimum number of servers. + logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n", + (len(endpoints)/2)+1, getElapsedTime()) continue case errErasureV3ThisEmpty: // need to wait for this error to be healed, so continue. diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 5f894a6dc..ffad56e66 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -265,6 +265,24 @@ func (client *storageRESTClient) SetDiskID(id string) { client.diskID = id } +func (client *storageRESTClient) diskInfo() (interface{}, error) { + var info DiskInfo + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + respBody, err := client.call(ctx, storageRESTMethodDiskInfo, nil, nil, -1) + if err != nil { + return info, err + } + defer xhttp.DrainBody(respBody) + if err = msgp.Decode(respBody, &info); err != nil { + return info, err + } + if info.Error != "" { + return info, toStorageErr(errors.New(info.Error)) + } + return info, nil +} + // DiskInfo - fetch disk information for a remote disk. func (client *storageRESTClient) DiskInfo(ctx context.Context) (info DiskInfo, err error) { if !client.IsOnline() { @@ -277,22 +295,7 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context) (info DiskInfo, e } client.diskInfoCache.Once.Do(func() { client.diskInfoCache.TTL = time.Second - client.diskInfoCache.Update = func() (interface{}, error) { - ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) - defer cancel() - respBody, err := client.call(ctx, storageRESTMethodDiskInfo, nil, nil, -1) - if err != nil { - return info, err - } - defer xhttp.DrainBody(respBody) - if err = msgp.Decode(respBody, &info); err != nil { - return info, err - } - if info.Error != "" { - return info, toStorageErr(errors.New(info.Error)) - } - return info, nil - } + client.diskInfoCache.Update = client.diskInfo }) val, err := client.diskInfoCache.Get() if err == nil {