skip inconsistent shards if possible (#13945)

data shards were wrong due to a healing bug
reported in #13803 mainly with unaligned object
sizes.

This PR is an attempt to automatically avoid
these shards, with available information about
the `xl.meta` and actually disk mtime.
This commit is contained in:
Harshavardhana 2021-12-21 10:08:26 -08:00 committed by GitHub
parent 6fbf4f96b6
commit 0e3037631f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 548 additions and 129 deletions

48
.github/workflows/go-healing.yml vendored Normal file
View File

@ -0,0 +1,48 @@
name: Healing Functional Tests
on:
pull_request:
branches:
- master
# This ensures that previous jobs for the PR are canceled when the PR is
# updated.
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
cancel-in-progress: true
jobs:
build:
name: Go ${{ matrix.go-version }} on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
go-version: [1.17.x]
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v2
- uses: actions/setup-go@v2
with:
go-version: ${{ matrix.go-version }}
- uses: actions/cache@v2
with:
path: |
~/.cache/go-build
~/go/pkg/mod
key: ${{ runner.os }}-${{ matrix.go-version }}-go-${{ hashFiles('**/go.sum') }}
restore-keys: |
${{ runner.os }}-${{ matrix.go-version }}-go-
- name: Build on ${{ matrix.os }}
if: matrix.os == 'ubuntu-latest'
env:
CGO_ENABLED: 0
GO111MODULE: on
MINIO_KMS_KES_CERT_FILE: /home/runner/work/minio/minio/.github/workflows/root.cert
MINIO_KMS_KES_KEY_FILE: /home/runner/work/minio/minio/.github/workflows/root.key
MINIO_KMS_KES_ENDPOINT: "https://play.min.io:7373"
MINIO_KMS_KES_KEY_NAME: "my-minio-key"
MINIO_KMS_AUTO_ENCRYPTION: on
run: |
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
make verify-healing

View File

@ -13,7 +13,7 @@ concurrency:
jobs:
build:
name: Go ${{ matrix.go-version }} on ${{ matrix.os }}
name: Go ${{ matrix.go-version }} on ${{ matrix.os }} - healing
runs-on: ${{ matrix.os }}
strategy:
matrix:
@ -46,4 +46,3 @@ jobs:
sudo sysctl net.ipv6.conf.all.disable_ipv6=0
sudo sysctl net.ipv6.conf.default.disable_ipv6=0
make verify
make verify-healing

1
.gitignore vendored
View File

@ -32,3 +32,4 @@ hash-set
minio.RELEASE*
mc
nancy
inspects/*

View File

@ -73,6 +73,7 @@ verify-healing: ## verify healing and replacing disks with minio binary
@echo "Verify healing build with race"
@GO111MODULE=on CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null
@(env bash $(PWD)/buildscripts/verify-healing.sh)
@(env bash $(PWD)/buildscripts/unaligned-healing.sh)
build: checks ## builds minio to $(PWD)
@echo "Building minio binary to './minio'"

152
buildscripts/unaligned-healing.sh Executable file
View File

@ -0,0 +1,152 @@
#!/bin/bash -e
#
set -E
set -o pipefail
set -x
if [ ! -x "$PWD/minio" ]; then
echo "minio executable binary not found in current directory"
exit 1
fi
WORK_DIR="$PWD/.verify-$RANDOM"
MINIO_CONFIG_DIR="$WORK_DIR/.minio"
MINIO_OLD=( "$PWD/minio.RELEASE.2021-11-24T23-19-33Z" --config-dir "$MINIO_CONFIG_DIR" server )
MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server )
function download_old_release() {
if [ ! -f minio.RELEASE.2021-11-24T23-19-33Z ]; then
curl --silent -O https://dl.minio.io/server/minio/release/linux-amd64/archive/minio.RELEASE.2021-11-24T23-19-33Z
chmod a+x minio.RELEASE.2021-11-24T23-19-33Z
fi
}
function start_minio_16drive() {
start_port=$1
export MINIO_ROOT_USER=minio
export MINIO_ROOT_PASSWORD=minio123
export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/"
unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects
MC_BUILD_DIR="mc-$RANDOM"
if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then
echo "failed to download https://github.com/minio/mc"
purge "${MC_BUILD_DIR}"
exit 1
fi
(cd "${MC_BUILD_DIR}" && go build -o "$WORK_DIR/mc")
# remove mc source.
purge "${MC_BUILD_DIR}"
"${MINIO_OLD[@]}" --address ":$start_port" "${WORK_DIR}/xl{1...16}" > "${WORK_DIR}/server1.log" 2>&1 &
pid=$!
disown $pid
sleep 30
if ! ps -p ${pid} 1>&2 >/dev/null; then
echo "server1 log:"
cat "${WORK_DIR}/server1.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
shred --iterations=1 --size=5241856 - 1>"${WORK_DIR}/unaligned" 2>/dev/null
"${WORK_DIR}/mc" mb minio/healing-shard-bucket --quiet
"${WORK_DIR}/mc" cp \
"${WORK_DIR}/unaligned" \
minio/healing-shard-bucket/unaligned \
--disable-multipart --quiet
## "unaligned" object name gets consistently distributed
## to disks in following distribution order
##
## NOTE: if you change the name make sure to change the
## distribution order present here
##
## [15, 16, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
## make sure to remove the "last" data shard
rm -rf "${WORK_DIR}/xl14/healing-shard-bucket/unaligned"
sleep 10
## Heal the shard
"${WORK_DIR}/mc" admin heal --quiet --recursive minio/healing-shard-bucket
## then remove any other data shard let's pick first disk
## - 1st data shard.
rm -rf "${WORK_DIR}/xl3/healing-shard-bucket/unaligned"
sleep 10
## Heal the shard
"${WORK_DIR}/mc" admin heal --quiet --recursive minio/healing-shard-bucket
go build ./docs/debugging/s3-check-md5/
if ! ./s3-check-md5 \
-debug \
-access-key minio \
-secret-key minio123 \
-endpoint http://127.0.0.1:${start_port}/ 2>&1 | grep CORRUPTED; then
echo "server1 log:"
cat "${WORK_DIR}/server1.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
pkill minio
sleep 3
"${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/xl{1...16}" > "${WORK_DIR}/server1.log" 2>&1 &
pid=$!
disown $pid
sleep 30
if ! ps -p ${pid} 1>&2 >/dev/null; then
echo "server1 log:"
cat "${WORK_DIR}/server1.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
if ! ./s3-check-md5 \
-debug \
-access-key minio \
-secret-key minio123 \
-endpoint http://127.0.0.1:${start_port}/ 2>&1 | grep INTACT; then
echo "server1 log:"
cat "${WORK_DIR}/server1.log"
echo "FAILED"
mkdir inspects
(cd inspects; "${WORK_DIR}/mc" admin inspect minio/healing-shard-bucket/unaligned/**)
"${WORK_DIR}/mc" mb play/inspects
"${WORK_DIR}/mc" mirror inspects play/inspects
purge "$WORK_DIR"
exit 1
fi
pkill minio
sleep 3
}
function main() {
download_old_release
start_port=$(shuf -i 10000-65000 -n 1)
start_minio_16drive ${start_port}
}
function purge()
{
rm -rf "$1"
}
( main "$@" )
rv=$?
purge "$WORK_DIR"
exit "$rv"

View File

@ -21,51 +21,70 @@ function start_minio_3_node() {
start_port=$2
args=""
for i in $(seq 1 3); do
args="$args http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/6/"
args="$args http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/1/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/2/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/3/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/4/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/5/ http://127.0.0.1:$[$start_port+$i]${WORK_DIR}/$i/6/"
done
"${MINIO[@]}" --address ":$[$start_port+1]" $args > "${WORK_DIR}/dist-minio-server1.log" 2>&1 &
disown $!
pid1=$!
disown ${pid1}
"${MINIO[@]}" --address ":$[$start_port+2]" $args > "${WORK_DIR}/dist-minio-server2.log" 2>&1 &
disown $!
pid2=$!
disown $pid2
"${MINIO[@]}" --address ":$[$start_port+3]" $args > "${WORK_DIR}/dist-minio-server3.log" 2>&1 &
disown $!
pid3=$!
disown $pid3
sleep "$1"
if [ "$(pgrep -c minio)" -ne 3 ]; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log"
done
echo "FAILED"
purge "$WORK_DIR"
exit 1
if ! ps -p $pid1 1>&2 > /dev/null; then
echo "server1 log:"
cat "${WORK_DIR}/dist-minio-server1.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
if ! ps -p $pid2 1>&2 > /dev/null; then
echo "server2 log:"
cat "${WORK_DIR}/dist-minio-server2.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
if ! ps -p $pid3 1>&2 > /dev/null; then
echo "server3 log:"
cat "${WORK_DIR}/dist-minio-server3.log"
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
if ! pkill minio; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log"
done
echo "FAILED"
purge "$WORK_DIR"
exit 1
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log"
done
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
sleep 1;
if pgrep minio; then
# forcibly killing, to proceed further properly.
if ! pkill -9 minio; then
echo "no minio process running anymore, proceed."
fi
# forcibly killing, to proceed further properly.
if ! pkill -9 minio; then
echo "no minio process running anymore, proceed."
fi
fi
}
function check_online() {
if grep -q 'Unable to initialize sub-systems' ${WORK_DIR}/dist-minio-*.log; then
echo "1"
echo "1"
fi
}
@ -96,14 +115,14 @@ function perform_test() {
rv=$(check_online)
if [ "$rv" == "1" ]; then
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log"
done
pkill -9 minio
echo "FAILED"
purge "$WORK_DIR"
exit 1
for i in $(seq 1 3); do
echo "server$i log:"
cat "${WORK_DIR}/dist-minio-server$i.log"
done
pkill -9 minio
echo "FAILED"
purge "$WORK_DIR"
exit 1
fi
}

View File

@ -26,29 +26,59 @@ import (
)
// commonTime returns a maximally occurring time from a list of time.
func commonTime(modTimes []time.Time) (modTime time.Time) {
timeOccurenceMap := make(map[int64]int, len(modTimes))
func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime time.Time, maxima int) {
timeOccurenceMap := make(map[int64]int, len(times))
groupNano := group.Nanoseconds()
// Ignore the uuid sentinel and count the rest.
for _, t := range modTimes {
for _, t := range times {
if t.Equal(timeSentinel) {
continue
}
timeOccurenceMap[t.UnixNano()]++
nano := t.UnixNano()
if group > 0 {
for k := range timeOccurenceMap {
if k == nano {
// We add to ourself later
continue
}
diff := k - nano
if diff < 0 {
diff = -diff
}
// We are within the limit
if diff < groupNano {
timeOccurenceMap[k]++
}
}
}
// Add ourself...
timeOccurenceMap[nano]++
}
var maxima int // Counter for remembering max occurrence of elements.
maxima = 0 // Counter for remembering max occurrence of elements.
latest := int64(0)
// Find the common cardinality from previously collected
// occurrences of elements.
for nano, count := range timeOccurenceMap {
t := time.Unix(0, nano).UTC()
if count > maxima || (count == maxima && t.After(modTime)) {
if count < maxima {
continue
}
// We are at or above maxima
if count > maxima || nano > latest {
maxima = count
modTime = t
latest = nano
}
}
// Return the collected common modTime.
// Return the collected common max time, with maxima
return time.Unix(0, latest).UTC(), maxima
}
// commonTime returns a maximally occurring time from a list of time.
func commonTime(modTimes []time.Time) (modTime time.Time) {
modTime, _ = commonTimeAndOccurence(modTimes, 0)
return modTime
}
@ -88,6 +118,19 @@ func filterOnlineDisksInplace(fi FileInfo, partsMetadata []FileInfo, onlineDisks
}
}
// Extracts list of disk mtimes from FileInfo slice and returns, skips
// slice elements that have errors.
func listObjectDiskMtimes(partsMetadata []FileInfo) (diskMTimes []time.Time) {
diskMTimes = bootModtimes(len(partsMetadata))
for index, metadata := range partsMetadata {
if metadata.IsValid() {
// Once the file is found, save the disk mtime saved on disk.
diskMTimes[index] = metadata.DiskMTime
}
}
return diskMTimes
}
// Notes:
// There are 5 possible states a disk could be in,
// 1. __online__ - has the latest copy of xl.meta - returned by listOnlineDisks
@ -185,6 +228,13 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
errs []error, latestMeta FileInfo,
bucket, object string, scanMode madmin.HealScanMode) ([]StorageAPI, []error) {
var diskMTime time.Time
delta := 5 * time.Second
if !latestMeta.DataShardFixed() {
diskMTime = pickValidDiskTimeWithQuorum(partsMetadata,
latestMeta.Erasure.DataBlocks)
}
availableDisks := make([]StorageAPI, len(onlineDisks))
dataErrs := make([]error, len(onlineDisks))
inconsistent := 0
@ -289,6 +339,14 @@ func disksWithAllParts(ctx context.Context, onlineDisks []StorageAPI, partsMetad
}
}
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
if !partsMetadata[i].AcceptableDelta(diskMTime, delta) {
// not with in acceptable delta, skip.
partsMetadata[i] = FileInfo{}
continue
}
}
if dataErrs[i] == nil {
// All parts verified, mark it as all data available.
availableDisks[i] = onlineDisk

View File

@ -24,12 +24,38 @@ import (
"fmt"
"io"
"sync"
"time"
"github.com/minio/madmin-go"
"github.com/minio/minio/internal/logger"
"github.com/minio/minio/internal/sync/errgroup"
)
const reservedMetadataPrefixLowerDataShardFix = ReservedMetadataPrefixLower + "data-shard-fix"
// AcceptableDelta returns 'true' if the fi.DiskMTime is under
// acceptable delta of "delta" duration with maxTime.
//
// This code is primarily used for heuristic detection of
// incorrect shards, as per https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
func (fi FileInfo) AcceptableDelta(maxTime time.Time, delta time.Duration) bool {
diff := maxTime.Sub(fi.DiskMTime)
if diff < 0 {
diff = -diff
}
return diff < delta
}
// DataShardFixed - data shard fixed?
func (fi FileInfo) DataShardFixed() bool {
return fi.Metadata[reservedMetadataPrefixLowerDataShardFix] == "true"
}
// Heals a bucket if it doesn't exist on one of the disks, additionally
// also heals the missing entries for bucket metadata files
// `policy.json, notification.xml, listeners.json`.
@ -224,6 +250,11 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, latestMeta File
return true
}
if erErr == nil {
if meta.XLV1 {
// Legacy means heal always
// always check first.
return true
}
if !meta.Deleted && !meta.IsRemote() {
// If xl.meta was read fine but there may be problem with the part.N files.
if IsErr(dataErr, []error{
@ -234,19 +265,7 @@ func shouldHealObjectOnDisk(erErr, dataErr error, meta FileInfo, latestMeta File
return true
}
}
if !latestMeta.MetadataEquals(meta) {
return true
}
if !latestMeta.TransitionInfoEquals(meta) {
return true
}
if !latestMeta.ReplicationInfoEquals(meta) {
return true
}
if !latestMeta.ModTime.Equal(meta.ModTime) {
return true
}
if meta.XLV1 {
if !latestMeta.Equals(meta) {
return true
}
}

View File

@ -93,7 +93,7 @@ func TestHealing(t *testing.T) {
}
// After heal the meta file should be as expected.
if !reflect.DeepEqual(fileInfoPreHeal, fileInfoPostHeal) {
if !fileInfoPreHeal.Equals(fileInfoPostHeal) {
t.Fatal("HealObject failed")
}
@ -122,7 +122,7 @@ func TestHealing(t *testing.T) {
}
// After heal the meta file should be as expected.
if !reflect.DeepEqual(fileInfoPreHeal, fileInfoPostHeal) {
if !fileInfoPreHeal.Equals(fileInfoPostHeal) {
t.Fatal("HealObject failed")
}
@ -699,7 +699,8 @@ func TestHealLastDataShard(t *testing.T) {
t.Fatalf("Failed to make a bucket - %v", err)
}
_, err = obj.PutObject(ctx, bucket, object, mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), opts)
_, err = obj.PutObject(ctx, bucket, object,
mustGetPutObjReader(t, bytes.NewReader(data), int64(len(data)), "", ""), opts)
if err != nil {
t.Fatal(err)
@ -724,7 +725,9 @@ func TestHealLastDataShard(t *testing.T) {
if err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}
_, err = obj.HealObject(ctx, bucket, object, "", madmin.HealOpts{ScanMode: madmin.HealNormalScan})
_, err = obj.HealObject(ctx, bucket, object, "", madmin.HealOpts{
ScanMode: madmin.HealNormalScan,
})
if err != nil {
t.Fatal(err)
}
@ -738,21 +741,23 @@ func TestHealLastDataShard(t *testing.T) {
firstHealedH := sha256.New()
_, err = io.Copy(firstHealedH, firstGr)
if err != nil {
t.Fatal()
t.Fatal(err)
}
firstHealedDataSha256 := firstHealedH.Sum(nil)
if !bytes.Equal(actualSha256, firstHealedDataSha256) {
t.Fatal("object healed wrong")
t.Fatalf("object healed wrong, expected %x, got %x",
actualSha256, firstHealedDataSha256)
}
// remove another data shard
err = removeAll(pathJoin(shuffledDisks[1].String(), bucket, object))
if err != nil {
if err = removeAll(pathJoin(shuffledDisks[1].String(), bucket, object)); err != nil {
t.Fatalf("Failed to delete a file - %v", err)
}
_, err = obj.HealObject(ctx, bucket, object, "", madmin.HealOpts{ScanMode: madmin.HealNormalScan})
_, err = obj.HealObject(ctx, bucket, object, "", madmin.HealOpts{
ScanMode: madmin.HealNormalScan,
})
if err != nil {
t.Fatal(err)
}
@ -766,12 +771,13 @@ func TestHealLastDataShard(t *testing.T) {
secondHealedH := sha256.New()
_, err = io.Copy(secondHealedH, secondGr)
if err != nil {
t.Fatal()
t.Fatal(err)
}
secondHealedDataSha256 := secondHealedH.Sum(nil)
if !bytes.Equal(actualSha256, secondHealedDataSha256) {
t.Fatal("object healed wrong")
t.Fatalf("object healed wrong, expected %x, got %x",
actualSha256, secondHealedDataSha256)
}
})
}

View File

@ -351,6 +351,17 @@ func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.
return FileInfo{}, errErasureReadQuorum
}
func pickValidDiskTimeWithQuorum(metaArr []FileInfo, quorum int) time.Time {
diskMTimes := listObjectDiskMtimes(metaArr)
diskMTime, diskMaxima := commonTimeAndOccurence(diskMTimes, 5*time.Second)
if diskMaxima >= quorum {
return diskMTime
}
return timeSentinel
}
// pickValidFileInfo - picks one valid FileInfo content and returns from a
// slice of FileInfo.
func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, quorum int) (FileInfo, error) {

View File

@ -28,6 +28,7 @@ import (
"strconv"
"strings"
"sync"
"time"
"github.com/minio/madmin-go"
"github.com/minio/minio-go/v7/pkg/tags"
@ -179,6 +180,31 @@ func (er erasureObjects) GetObjectNInfo(ctx context.Context, bucket, object stri
return nil, toObjectErr(err, bucket, object)
}
if !fi.DataShardFixed() {
diskMTime := pickValidDiskTimeWithQuorum(metaArr, fi.Erasure.DataBlocks)
delta := 5 * time.Second
if !diskMTime.Equal(timeSentinel) && !diskMTime.IsZero() {
for index := range onlineDisks {
if onlineDisks[index] == OfflineDisk {
continue
}
if !metaArr[index].IsValid() {
continue
}
if !metaArr[index].AcceptableDelta(diskMTime, delta) {
// If disk mTime mismatches it is considered outdated
// https://github.com/minio/minio/pull/13803
//
// This check only is active if we could find maximally
// occurring disk mtimes that are somewhat same across
// the quorum. Allowing to skip those shards which we
// might think are wrong.
onlineDisks[index] = OfflineDisk
}
}
}
}
objInfo := fi.ToObjectInfo(bucket, object)
if objInfo.DeleteMarker {
if opts.VersionID == "" {
@ -1444,14 +1470,16 @@ func (er erasureObjects) addPartial(bucket, object, versionID string, size int64
}
func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object string, opts ObjectOptions) (ObjectInfo, error) {
// Lock the object before updating tags.
lk := er.NewNSLock(bucket, object)
lkctx, err := lk.GetLock(ctx, globalOperationTimeout)
if err != nil {
return ObjectInfo{}, err
if !opts.NoLock {
// Lock the object before updating metadata.
lk := er.NewNSLock(bucket, object)
lkctx, err := lk.GetLock(ctx, globalOperationTimeout)
if err != nil {
return ObjectInfo{}, err
}
ctx = lkctx.Context()
defer lk.Unlock(lkctx.Cancel)
}
ctx = lkctx.Context()
defer lk.Unlock(lkctx.Cancel)
disks := er.getDisks()

View File

@ -184,6 +184,25 @@ type FileInfo struct {
// no other caller must set this value other than multi-object delete call.
// usage in other calls in undefined please avoid.
Idx int `msg:"i"`
// DiskMTime indicates the mtime of the xl.meta on disk
// This is mainly used for detecting a particular issue
// reported in https://github.com/minio/minio/pull/13803
DiskMTime time.Time `msg:"dmt"`
}
// Equals checks if fi(FileInfo) matches ofi(FileInfo)
func (fi FileInfo) Equals(ofi FileInfo) (ok bool) {
if !fi.MetadataEquals(ofi) {
return false
}
if !fi.ReplicationInfoEquals(ofi) {
return false
}
if !fi.TransitionInfoEquals(ofi) {
return false
}
return fi.ModTime.Equal(ofi.ModTime)
}
// GetDataDir returns an expected dataDir given FileInfo

View File

@ -550,8 +550,8 @@ func (z *FileInfo) DecodeMsg(dc *msgp.Reader) (err error) {
err = msgp.WrapError(err)
return
}
if zb0001 != 25 {
err = msgp.ArrayError{Wanted: 25, Got: zb0001}
if zb0001 != 26 {
err = msgp.ArrayError{Wanted: 26, Got: zb0001}
return
}
z.Volume, err = dc.ReadString()
@ -716,13 +716,18 @@ func (z *FileInfo) DecodeMsg(dc *msgp.Reader) (err error) {
err = msgp.WrapError(err, "Idx")
return
}
z.DiskMTime, err = dc.ReadTime()
if err != nil {
err = msgp.WrapError(err, "DiskMTime")
return
}
return
}
// EncodeMsg implements msgp.Encodable
func (z *FileInfo) EncodeMsg(en *msgp.Writer) (err error) {
// array header, size 25
err = en.Append(0xdc, 0x0, 0x19)
// array header, size 26
err = en.Append(0xdc, 0x0, 0x1a)
if err != nil {
return
}
@ -870,14 +875,19 @@ func (z *FileInfo) EncodeMsg(en *msgp.Writer) (err error) {
err = msgp.WrapError(err, "Idx")
return
}
err = en.WriteTime(z.DiskMTime)
if err != nil {
err = msgp.WrapError(err, "DiskMTime")
return
}
return
}
// MarshalMsg implements msgp.Marshaler
func (z *FileInfo) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.Require(b, z.Msgsize())
// array header, size 25
o = append(o, 0xdc, 0x0, 0x19)
// array header, size 26
o = append(o, 0xdc, 0x0, 0x1a)
o = msgp.AppendString(o, z.Volume)
o = msgp.AppendString(o, z.Name)
o = msgp.AppendString(o, z.VersionID)
@ -922,6 +932,7 @@ func (z *FileInfo) MarshalMsg(b []byte) (o []byte, err error) {
o = msgp.AppendTime(o, z.SuccessorModTime)
o = msgp.AppendBool(o, z.Fresh)
o = msgp.AppendInt(o, z.Idx)
o = msgp.AppendTime(o, z.DiskMTime)
return
}
@ -933,8 +944,8 @@ func (z *FileInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
err = msgp.WrapError(err)
return
}
if zb0001 != 25 {
err = msgp.ArrayError{Wanted: 25, Got: zb0001}
if zb0001 != 26 {
err = msgp.ArrayError{Wanted: 26, Got: zb0001}
return
}
z.Volume, bts, err = msgp.ReadStringBytes(bts)
@ -1099,6 +1110,11 @@ func (z *FileInfo) UnmarshalMsg(bts []byte) (o []byte, err error) {
err = msgp.WrapError(err, "Idx")
return
}
z.DiskMTime, bts, err = msgp.ReadTimeBytes(bts)
if err != nil {
err = msgp.WrapError(err, "DiskMTime")
return
}
o = bts
return
}
@ -1116,7 +1132,7 @@ func (z *FileInfo) Msgsize() (s int) {
for za0003 := range z.Parts {
s += z.Parts[za0003].Msgsize()
}
s += z.Erasure.Msgsize() + msgp.BoolSize + z.ReplicationState.Msgsize() + msgp.BytesPrefixSize + len(z.Data) + msgp.IntSize + msgp.TimeSize + msgp.BoolSize + msgp.IntSize
s += z.Erasure.Msgsize() + msgp.BoolSize + z.ReplicationState.Msgsize() + msgp.BytesPrefixSize + len(z.Data) + msgp.IntSize + msgp.TimeSize + msgp.BoolSize + msgp.IntSize + msgp.TimeSize
return
}

View File

@ -18,7 +18,7 @@
package cmd
const (
storageRESTVersion = "v42" // Added FreeVersions to FileInfoVersions
storageRESTVersion = "v43" // Added DiskMTime field for FileInfo
storageRESTVersionPrefix = SlashSeparator + storageRESTVersion
storageRESTPrefix = minioReservedBucketPath + "/storage"
)

View File

@ -375,32 +375,39 @@ func (s *xlStorage) Healing() *healingTracker {
return &h
}
func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte, error) {
// readsMetadata and returns disk mTime information for xl.meta
func (s *xlStorage) readMetadataWithDMTime(ctx context.Context, itemPath string) ([]byte, time.Time, error) {
if contextCanceled(ctx) {
return nil, ctx.Err()
return nil, time.Time{}, ctx.Err()
}
if err := checkPathLength(itemPath); err != nil {
return nil, err
return nil, time.Time{}, err
}
f, err := OpenFile(itemPath, readMode, 0)
if err != nil {
return nil, err
return nil, time.Time{}, err
}
defer f.Close()
stat, err := f.Stat()
if err != nil {
return nil, err
return nil, time.Time{}, err
}
if stat.IsDir() {
return nil, &os.PathError{
return nil, time.Time{}, &os.PathError{
Op: "open",
Path: itemPath,
Err: syscall.EISDIR,
}
}
return readXLMetaNoData(f, stat.Size())
buf, err := readXLMetaNoData(f, stat.Size())
return buf, stat.ModTime().UTC(), err
}
func (s *xlStorage) readMetadata(ctx context.Context, itemPath string) ([]byte, error) {
buf, _, err := s.readMetadataWithDMTime(ctx, itemPath)
return buf, err
}
func (s *xlStorage) NSScanner(ctx context.Context, cache dataUsageCache, updates chan<- dataUsageEntry) (dataUsageCache, error) {
@ -1214,11 +1221,18 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str
if err != nil {
return fi, err
}
// Validate file path length, before reading.
filePath := pathJoin(volumeDir, path)
if err = checkPathLength(filePath); err != nil {
return fi, err
}
var buf []byte
var dmTime time.Time
if readData {
buf, err = s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFile))
buf, dmTime, err = s.readAllData(ctx, volumeDir, pathJoin(filePath, xlStorageFormatFile))
} else {
buf, err = s.readMetadata(ctx, pathJoin(volumeDir, path, xlStorageFormatFile))
buf, dmTime, err = s.readMetadataWithDMTime(ctx, pathJoin(filePath, xlStorageFormatFile))
if err != nil {
if osIsNotExist(err) {
if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) {
@ -1231,7 +1245,7 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str
if err != nil {
if err == errFileNotFound {
buf, err = s.ReadAll(ctx, volume, pathJoin(path, xlStorageFormatFileV1))
buf, dmTime, err = s.readAllData(ctx, volumeDir, pathJoin(filePath, xlStorageFormatFileV1))
if err != nil {
if err == errFileNotFound {
if versionID != "" {
@ -1257,6 +1271,7 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str
if err != nil {
return fi, err
}
fi.DiskMTime = dmTime
if len(fi.Data) == 0 {
// We did not read inline data, so we have no references.
@ -1299,7 +1314,7 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str
len(fi.Parts) == 1 {
partPath := fmt.Sprintf("part.%d", fi.Parts[0].Number)
dataPath := pathJoin(volumeDir, path, fi.DataDir, partPath)
fi.Data, err = s.readAllData(volumeDir, dataPath)
fi.Data, _, err = s.readAllData(ctx, volumeDir, dataPath)
if err != nil {
return FileInfo{}, err
}
@ -1309,38 +1324,42 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str
return fi, nil
}
func (s *xlStorage) readAllData(volumeDir string, filePath string) (buf []byte, err error) {
func (s *xlStorage) readAllData(ctx context.Context, volumeDir string, filePath string) (buf []byte, dmTime time.Time, err error) {
if contextCanceled(ctx) {
return nil, time.Time{}, ctx.Err()
}
f, err := OpenFileDirectIO(filePath, readMode, 0666)
if err != nil {
if osIsNotExist(err) {
// Check if the object doesn't exist because its bucket
// is missing in order to return the correct error.
if err = Access(volumeDir); err != nil && osIsNotExist(err) {
return nil, errVolumeNotFound
return nil, dmTime, errVolumeNotFound
}
return nil, errFileNotFound
return nil, dmTime, errFileNotFound
} else if osIsPermission(err) {
return nil, errFileAccessDenied
return nil, dmTime, errFileAccessDenied
} else if isSysErrNotDir(err) || isSysErrIsDir(err) {
return nil, errFileNotFound
return nil, dmTime, errFileNotFound
} else if isSysErrHandleInvalid(err) {
// This case is special and needs to be handled for windows.
return nil, errFileNotFound
return nil, dmTime, errFileNotFound
} else if isSysErrIO(err) {
return nil, errFaultyDisk
return nil, dmTime, errFaultyDisk
} else if isSysErrTooManyFiles(err) {
return nil, errTooManyOpenFiles
return nil, dmTime, errTooManyOpenFiles
} else if isSysErrInvalidArg(err) {
st, _ := Lstat(filePath)
if st != nil && st.IsDir() {
// Linux returns InvalidArg for directory O_DIRECT
// we need to keep this fallback code to return correct
// errors upwards.
return nil, errFileNotFound
return nil, dmTime, errFileNotFound
}
return nil, errUnsupportedDisk
return nil, dmTime, errUnsupportedDisk
}
return nil, err
return nil, dmTime, err
}
r := &xioutil.ODirectReader{
File: f,
@ -1352,10 +1371,10 @@ func (s *xlStorage) readAllData(volumeDir string, filePath string) (buf []byte,
stat, err := f.Stat()
if err != nil {
buf, err = ioutil.ReadAll(r)
return buf, osErrToFileErr(err)
return buf, dmTime, osErrToFileErr(err)
}
if stat.IsDir() {
return nil, errFileNotFound
return nil, dmTime, errFileNotFound
}
// Read into appropriate buffer.
@ -1369,7 +1388,7 @@ func (s *xlStorage) readAllData(volumeDir string, filePath string) (buf []byte,
// Read file...
_, err = io.ReadFull(r, buf)
return buf, osErrToFileErr(err)
return buf, stat.ModTime().UTC(), osErrToFileErr(err)
}
// ReadAll reads from r until an error or EOF and returns the data it read.
@ -1390,7 +1409,8 @@ func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (bu
return nil, err
}
return s.readAllData(volumeDir, filePath)
buf, _, err = s.readAllData(ctx, volumeDir, filePath)
return buf, err
}
// ReadFile reads exactly len(buf) bytes into buf. It returns the

View File

@ -240,18 +240,18 @@ head -c 221227088 </dev/urandom >200M
sleep 10
echo "Verifying ETag for all objects"
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9001/ -bucket bucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9002/ -bucket bucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9003/ -bucket bucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9004/ -bucket bucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9005/ -bucket bucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9006/ -bucket bucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9001/ -bucket bucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9002/ -bucket bucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9003/ -bucket bucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9004/ -bucket bucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9005/ -bucket bucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9006/ -bucket bucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9001/ -bucket olockbucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9002/ -bucket olockbucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9003/ -bucket olockbucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9004/ -bucket olockbucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9005/ -bucket olockbucket
./s3-check-md5 -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9006/ -bucket olockbucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9001/ -bucket olockbucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9002/ -bucket olockbucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9003/ -bucket olockbucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9004/ -bucket olockbucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9005/ -bucket olockbucket
./s3-check-md5 -versions -access-key minio -secret-key minio123 -endpoint http://127.0.0.1:9006/ -bucket olockbucket
catch

View File

@ -25,6 +25,7 @@ import (
"io"
"log"
"net/url"
"os"
"strconv"
"strings"
@ -35,6 +36,8 @@ import (
var (
endpoint, accessKey, secretKey string
bucket, prefix string
debug bool
versions bool
)
// getMD5Sum returns MD5 sum of given data.
@ -50,6 +53,8 @@ func main() {
flag.StringVar(&secretKey, "secret-key", "zuf+tfteSlswRu7BJ86wekitnifILbZam1KYY3TG", "S3 Secret Key")
flag.StringVar(&bucket, "bucket", "", "Select a specific bucket")
flag.StringVar(&prefix, "prefix", "", "Select a prefix")
flag.BoolVar(&debug, "debug", false, "Prints HTTP network calls to S3 endpoint")
flag.BoolVar(&versions, "versions", false, "Verify all versions")
flag.Parse()
if endpoint == "" {
@ -81,7 +86,9 @@ func main() {
log.Fatalln()
}
// s3Client.TraceOn(os.Stderr)
if debug {
s3Client.TraceOn(os.Stderr)
}
var buckets []string
if bucket != "" {
@ -100,7 +107,8 @@ func main() {
opts := minio.ListObjectsOptions{
Recursive: true,
Prefix: prefix,
WithVersions: true,
WithVersions: versions,
WithMetadata: true,
}
// List all objects from a bucket-name with a matching prefix.
@ -110,6 +118,19 @@ func main() {
continue
}
if object.IsDeleteMarker {
log.Println("DELETE marker skipping object:", object.Key)
continue
}
if _, ok := object.UserMetadata["X-Amz-Server-Side-Encryption-Customer-Algorithm"]; ok {
log.Println("Objects encrypted with SSE-C do not have md5sum as ETag:", object.Key)
continue
}
if _, ok := object.UserMetadata["X-Amz-Server-Side-Encryption-Customer-Algorithm"]; ok {
log.Println("Objects encrypted with SSE-C do not have md5sum as ETag:", object.Key)
continue
}
if v, ok := object.UserMetadata["X-Amz-Server-Side-Encryption"]; ok && v == "aws:kms" {
log.Println("Objects encrypted with SSE-KMS do not have md5sum as ETag:", object.Key)
continue
}
parts := 1
@ -132,10 +153,12 @@ func main() {
}
var partsMD5Sum [][]byte
for p := 1; p <= parts; p++ {
obj, err := s3Client.GetObject(context.Background(), bucket, object.Key,
minio.GetObjectOptions{VersionID: object.VersionID, PartNumber: p})
opts := minio.GetObjectOptions{
VersionID: object.VersionID,
PartNumber: p,
}
obj, err := s3Client.GetObject(context.Background(), bucket, object.Key, opts)
if err != nil {
log.Println("GET", bucket, object.Key, object.VersionID, "=>", err)
continue
@ -149,7 +172,6 @@ func main() {
}
corrupted := false
if !multipart {
md5sum := fmt.Sprintf("%x", partsMD5Sum[0])
if md5sum != object.ETag {
@ -169,7 +191,7 @@ func main() {
if corrupted {
log.Println("CORRUPTED object:", bucket, object.Key, object.VersionID)
} else {
log.Println("INTACT", bucket, object.Key, object.VersionID)
log.Println("INTACT object:", bucket, object.Key, object.VersionID)
}
}
}