mirror of
https://github.com/minio/minio.git
synced 2024-12-24 06:05:55 -05:00
Fix diskinfo race (#12857)
Fixes share info struct. ``` WARNING: DATA RACE Read at 0x00c011780618 by goroutine 419: github.com/minio/minio/cmd.(*DiskMetrics).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:331 +0x247 github.com/minio/minio/cmd.(*DiskInfo).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:76 +0x5ec github.com/tinylib/msgp/msgp.Decode() c:/gopath/pkg/mod/github.com/tinylib/msgp@v1.1.6-0.20210521143832-0becd170c402/msgp/read.go:105 +0x70 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo.func1.1() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:288 +0x235 github.com/minio/minio/cmd.(*timedValue).Get() c:/gopath/src/github.com/minio/minio/cmd/utils.go:886 +0x77 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:297 +0xf9 github.com/minio/minio/cmd.getDiskInfos() c:/gopath/src/github.com/minio/minio/cmd/object-api-utils.go:962 +0x1a8 github.com/minio/minio/cmd.(*erasureServerPools).getServerPoolsAvailableSpace.func1() c:/gopath/src/github.com/minio/minio/cmd/erasure-server-pool.go:241 +0x27c github.com/minio/minio/internal/sync/errgroup.(*Group).Go.func1() c:/gopath/src/github.com/minio/minio/internal/sync/errgroup/errgroup.go:123 +0xd7 Previous write at 0x00c011780618 by goroutine 423: github.com/minio/minio/cmd.(*DiskMetrics).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:332 +0x6e4 github.com/minio/minio/cmd.(*DiskInfo).DecodeMsg() c:/gopath/src/github.com/minio/minio/cmd/storage-datatypes_gen.go:76 +0x5ec github.com/tinylib/msgp/msgp.Decode() c:/gopath/pkg/mod/github.com/tinylib/msgp@v1.1.6-0.20210521143832-0becd170c402/msgp/read.go:105 +0x70 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo.func1.1() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:288 +0x235 github.com/minio/minio/cmd.(*timedValue).Get() c:/gopath/src/github.com/minio/minio/cmd/utils.go:886 +0x77 github.com/minio/minio/cmd.(*storageRESTClient).DiskInfo() c:/gopath/src/github.com/minio/minio/cmd/storage-rest-client.go:297 +0xf9 github.com/minio/minio/cmd.getDiskInfos() c:/gopath/src/github.com/minio/minio/cmd/object-api-utils.go:962 +0x1a8 github.com/minio/minio/cmd.(*erasureServerPools).getServerPoolsAvailableSpace.func1() c:/gopath/src/github.com/minio/minio/cmd/erasure-server-pool.go:241 +0x27c github.com/minio/minio/internal/sync/errgroup.(*Group).Go.func1() c:/gopath/src/github.com/minio/minio/internal/sync/errgroup/errgroup.go:123 +0xd7 ```
This commit is contained in:
parent
14fe8ecb58
commit
47de1d2e0e
@ -14,8 +14,6 @@ WORK_DIR="$PWD/.verify-$RANDOM"
|
||||
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
|
||||
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )
|
||||
|
||||
export GOGC=25
|
||||
|
||||
function start_minio_3_node() {
|
||||
export MINIO_ROOT_USER=minio
|
||||
export MINIO_ROOT_PASSWORD=minio123
|
||||
@ -67,7 +65,7 @@ function start_minio_3_node() {
|
||||
|
||||
|
||||
function check_online() {
|
||||
if grep -q 'Unable to initialize sub-systems' ${WORK_DIR}/dist-minio-*.log; then
|
||||
if grep -q 'Unable to initialize' ${WORK_DIR}/dist-minio-*.log; then
|
||||
echo "1"
|
||||
fi
|
||||
}
|
||||
@ -88,22 +86,24 @@ function __init__()
|
||||
}
|
||||
|
||||
function perform_test() {
|
||||
start_minio_3_node 60
|
||||
start_minio_3_node 120
|
||||
|
||||
echo "Testing Distributed Erasure setup healing of drives"
|
||||
echo "Remove the contents of the disks belonging to '${1}' erasure set"
|
||||
|
||||
set -x
|
||||
|
||||
rm -rf ${WORK_DIR}/${1}/*/
|
||||
|
||||
start_minio_3_node 60
|
||||
start_minio_3_node 200
|
||||
|
||||
rv=$(check_online)
|
||||
if [ "$rv" == "1" ]; then
|
||||
pkill -9 minio
|
||||
for i in $(seq 1 3); do
|
||||
echo "server$i log:"
|
||||
cat "${WORK_DIR}/dist-minio-server$i.log"
|
||||
done
|
||||
pkill -9 minio
|
||||
echo "FAILED"
|
||||
purge "$WORK_DIR"
|
||||
exit 1
|
||||
|
@ -328,7 +328,13 @@ func waitForFormatErasure(firstDisk bool, endpoints Endpoints, poolCount, setCou
|
||||
continue
|
||||
case errErasureReadQuorum:
|
||||
// no quorum available continue to wait for minimum number of servers.
|
||||
logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n", len(endpoints)/2, getElapsedTime())
|
||||
logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n",
|
||||
len(endpoints)/2, getElapsedTime())
|
||||
continue
|
||||
case errErasureWriteQuorum:
|
||||
// no quorum available continue to wait for minimum number of servers.
|
||||
logger.Info("Waiting for a minimum of %d disks to come online (elapsed %s)\n",
|
||||
(len(endpoints)/2)+1, getElapsedTime())
|
||||
continue
|
||||
case errErasureV3ThisEmpty:
|
||||
// need to wait for this error to be healed, so continue.
|
||||
|
@ -265,19 +265,8 @@ func (client *storageRESTClient) SetDiskID(id string) {
|
||||
client.diskID = id
|
||||
}
|
||||
|
||||
// DiskInfo - fetch disk information for a remote disk.
|
||||
func (client *storageRESTClient) DiskInfo(ctx context.Context) (info DiskInfo, err error) {
|
||||
if !client.IsOnline() {
|
||||
// make sure to check if the disk is offline, since the underlying
|
||||
// value is cached we should attempt to invalidate it if such calls
|
||||
// were attempted. This can lead to false success under certain conditions
|
||||
// - this change attempts to avoid stale information if the underlying
|
||||
// transport is already down.
|
||||
return info, errDiskNotFound
|
||||
}
|
||||
client.diskInfoCache.Once.Do(func() {
|
||||
client.diskInfoCache.TTL = time.Second
|
||||
client.diskInfoCache.Update = func() (interface{}, error) {
|
||||
func (client *storageRESTClient) diskInfo() (interface{}, error) {
|
||||
var info DiskInfo
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
defer cancel()
|
||||
respBody, err := client.call(ctx, storageRESTMethodDiskInfo, nil, nil, -1)
|
||||
@ -293,6 +282,20 @@ func (client *storageRESTClient) DiskInfo(ctx context.Context) (info DiskInfo, e
|
||||
}
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// DiskInfo - fetch disk information for a remote disk.
|
||||
func (client *storageRESTClient) DiskInfo(ctx context.Context) (info DiskInfo, err error) {
|
||||
if !client.IsOnline() {
|
||||
// make sure to check if the disk is offline, since the underlying
|
||||
// value is cached we should attempt to invalidate it if such calls
|
||||
// were attempted. This can lead to false success under certain conditions
|
||||
// - this change attempts to avoid stale information if the underlying
|
||||
// transport is already down.
|
||||
return info, errDiskNotFound
|
||||
}
|
||||
client.diskInfoCache.Once.Do(func() {
|
||||
client.diskInfoCache.TTL = time.Second
|
||||
client.diskInfoCache.Update = client.diskInfo
|
||||
})
|
||||
val, err := client.diskInfoCache.Get()
|
||||
if err == nil {
|
||||
|
Loading…
Reference in New Issue
Block a user