diff --git a/.github/workflows/go-healing.yml b/.github/workflows/go-healing.yml index 7bfccacdc..7453e98b2 100644 --- a/.github/workflows/go-healing.yml +++ b/.github/workflows/go-healing.yml @@ -47,3 +47,4 @@ jobs: sudo sysctl net.ipv6.conf.all.disable_ipv6=0 sudo sysctl net.ipv6.conf.default.disable_ipv6=0 make verify-healing + make verify-healing-inconsistent-versions diff --git a/Makefile b/Makefile index ad3c8b493..84a3ee7d4 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,11 @@ verify-healing: ## verify healing and replacing disks with minio binary @(env bash $(PWD)/buildscripts/verify-healing.sh) @(env bash $(PWD)/buildscripts/unaligned-healing.sh) +verify-healing-inconsistent-versions: ## verify resolving inconsistent versions + @echo "Verify resolving inconsistent versions build with race" + @CGO_ENABLED=1 go build -race -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null + @(env bash $(PWD)/buildscripts/resolve-right-versions.sh) + build: checks ## builds minio to $(PWD) @echo "Building minio binary to './minio'" @CGO_ENABLED=0 go build -tags kqueue -trimpath --ldflags "$(LDFLAGS)" -o $(PWD)/minio 1>/dev/null diff --git a/buildscripts/cicd-corpus/disk1/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 b/buildscripts/cicd-corpus/disk1/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 new file mode 100644 index 000000000..afbb2da12 Binary files /dev/null and b/buildscripts/cicd-corpus/disk1/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 differ diff --git a/buildscripts/cicd-corpus/disk2/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 b/buildscripts/cicd-corpus/disk2/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 new file mode 100644 index 000000000..1031cd2a3 Binary files /dev/null and b/buildscripts/cicd-corpus/disk2/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 differ diff --git a/buildscripts/cicd-corpus/disk2/bucket/testobj/xl.meta b/buildscripts/cicd-corpus/disk2/bucket/testobj/xl.meta new file mode 100644 index 000000000..4e786994c Binary files /dev/null and b/buildscripts/cicd-corpus/disk2/bucket/testobj/xl.meta differ diff --git a/buildscripts/cicd-corpus/disk3/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 b/buildscripts/cicd-corpus/disk3/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 new file mode 100644 index 000000000..789a168c9 Binary files /dev/null and b/buildscripts/cicd-corpus/disk3/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 differ diff --git a/buildscripts/cicd-corpus/disk3/bucket/testobj/xl.meta b/buildscripts/cicd-corpus/disk3/bucket/testobj/xl.meta new file mode 100644 index 000000000..cba1a3478 Binary files /dev/null and b/buildscripts/cicd-corpus/disk3/bucket/testobj/xl.meta differ diff --git a/buildscripts/cicd-corpus/disk4/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 b/buildscripts/cicd-corpus/disk4/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 new file mode 100644 index 000000000..867002851 Binary files /dev/null and b/buildscripts/cicd-corpus/disk4/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 differ diff --git a/buildscripts/cicd-corpus/disk4/bucket/testobj/a599bd9e-69fe-49b7-b6bf-fe53021039d5/part.1 b/buildscripts/cicd-corpus/disk4/bucket/testobj/a599bd9e-69fe-49b7-b6bf-fe53021039d5/part.1 new file mode 100644 index 000000000..867002851 Binary files /dev/null and b/buildscripts/cicd-corpus/disk4/bucket/testobj/a599bd9e-69fe-49b7-b6bf-fe53021039d5/part.1 differ diff --git a/buildscripts/cicd-corpus/disk4/bucket/testobj/xl.meta b/buildscripts/cicd-corpus/disk4/bucket/testobj/xl.meta new file mode 100644 index 000000000..54bbedd66 Binary files /dev/null and b/buildscripts/cicd-corpus/disk4/bucket/testobj/xl.meta differ diff --git a/buildscripts/cicd-corpus/disk5/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 b/buildscripts/cicd-corpus/disk5/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 new file mode 100644 index 000000000..d39542b5f Binary files /dev/null and b/buildscripts/cicd-corpus/disk5/bucket/testobj/2b4f7e41-df82-4a5e-a3c1-8df87f83332f/part.1 differ diff --git a/buildscripts/cicd-corpus/disk5/bucket/testobj/a599bd9e-69fe-49b7-b6bf-fe53021039d5/part.1 b/buildscripts/cicd-corpus/disk5/bucket/testobj/a599bd9e-69fe-49b7-b6bf-fe53021039d5/part.1 new file mode 100644 index 000000000..d39542b5f Binary files /dev/null and b/buildscripts/cicd-corpus/disk5/bucket/testobj/a599bd9e-69fe-49b7-b6bf-fe53021039d5/part.1 differ diff --git a/buildscripts/cicd-corpus/disk5/bucket/testobj/xl.meta b/buildscripts/cicd-corpus/disk5/bucket/testobj/xl.meta new file mode 100644 index 000000000..68f69a631 Binary files /dev/null and b/buildscripts/cicd-corpus/disk5/bucket/testobj/xl.meta differ diff --git a/buildscripts/resolve-right-versions.sh b/buildscripts/resolve-right-versions.sh new file mode 100755 index 000000000..066fc207c --- /dev/null +++ b/buildscripts/resolve-right-versions.sh @@ -0,0 +1,72 @@ +#!/bin/bash -e + +set -E +set -o pipefail +set -x + +WORK_DIR="$PWD/.verify-$RANDOM" +MINIO_CONFIG_DIR="$WORK_DIR/.minio" +MINIO=( "$PWD/minio" --config-dir "$MINIO_CONFIG_DIR" server ) + +if [ ! -x "$PWD/minio" ]; then + echo "minio executable binary not found in current directory" + exit 1 +fi + +function start_minio_5drive() { + start_port=$1 + + export MINIO_ROOT_USER=minio + export MINIO_ROOT_PASSWORD=minio123 + export MC_HOST_minio="http://minio:minio123@127.0.0.1:${start_port}/" + unset MINIO_KMS_AUTO_ENCRYPTION # do not auto-encrypt objects + export MINIO_CI_CD=1 + + MC_BUILD_DIR="mc-$RANDOM" + if ! git clone --quiet https://github.com/minio/mc "$MC_BUILD_DIR"; then + echo "failed to download https://github.com/minio/mc" + purge "${MC_BUILD_DIR}" + exit 1 + fi + + (cd "${MC_BUILD_DIR}" && go build -o "$WORK_DIR/mc") + + # remove mc source. + purge "${MC_BUILD_DIR}" + + "${WORK_DIR}/mc" cp --quiet -r "buildscripts/cicd-corpus/" "${WORK_DIR}/cicd-corpus/" + + "${MINIO[@]}" --address ":$start_port" "${WORK_DIR}/cicd-corpus/disk{1...5}" > "${WORK_DIR}/server1.log" 2>&1 & + pid=$! + disown $pid + sleep 30 + + if ! ps -p ${pid} 1>&2 >/dev/null; then + echo "server1 log:" + cat "${WORK_DIR}/server1.log" + echo "FAILED" + purge "$WORK_DIR" + exit 1 + fi + + "${WORK_DIR}/mc" stat minio/bucket/testobj + + pkill minio + sleep 3 +} + +function main() { + start_port=$(shuf -i 10000-65000 -n 1) + + start_minio_5drive ${start_port} +} + +function purge() +{ + rm -rf "$1" +} + +( main "$@" ) +rv=$? +purge "$WORK_DIR" +exit "$rv" diff --git a/cmd/erasure-healing-common.go b/cmd/erasure-healing-common.go index 5051fb57b..8423ef74f 100644 --- a/cmd/erasure-healing-common.go +++ b/cmd/erasure-healing-common.go @@ -31,7 +31,7 @@ func commonTimeAndOccurence(times []time.Time, group time.Duration) (maxTime tim groupNano := group.Nanoseconds() // Ignore the uuid sentinel and count the rest. for _, t := range times { - if t.Equal(timeSentinel) { + if t.Equal(timeSentinel) || t.IsZero() { continue } nano := t.UnixNano() diff --git a/cmd/erasure-healing_test.go b/cmd/erasure-healing_test.go index a8a5848e4..1ddd5fcb2 100644 --- a/cmd/erasure-healing_test.go +++ b/cmd/erasure-healing_test.go @@ -758,8 +758,10 @@ func TestHealObjectCorruptedPools(t *testing.T) { t.Fatalf("Failed to getLatestFileInfo - %v", err) } + fi.DiskMTime = time.Time{} + nfi.DiskMTime = time.Time{} if !reflect.DeepEqual(fi, nfi) { - t.Fatalf("FileInfo not equal after healing") + t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi) } err = firstDisk.Delete(context.Background(), bucket, pathJoin(object, fi.DataDir, "part.1"), false) @@ -784,8 +786,10 @@ func TestHealObjectCorruptedPools(t *testing.T) { t.Fatalf("Failed to getLatestFileInfo - %v", err) } + fi.DiskMTime = time.Time{} + nfi.DiskMTime = time.Time{} if !reflect.DeepEqual(fi, nfi) { - t.Fatalf("FileInfo not equal after healing") + t.Fatalf("FileInfo not equal after healing: %v != %v", fi, nfi) } // Test 4: checks if HealObject returns an error when xl.meta is not found @@ -904,6 +908,8 @@ func TestHealObjectCorruptedXLMeta(t *testing.T) { t.Fatalf("Failed to getLatestFileInfo - %v", err) } + fi.DiskMTime = time.Time{} + nfi1.DiskMTime = time.Time{} if !reflect.DeepEqual(fi, nfi1) { t.Fatalf("FileInfo not equal after healing") } @@ -925,6 +931,8 @@ func TestHealObjectCorruptedXLMeta(t *testing.T) { t.Fatalf("Failed to getLatestFileInfo - %v", err) } + fi.DiskMTime = time.Time{} + nfi2.DiskMTime = time.Time{} if !reflect.DeepEqual(fi, nfi2) { t.Fatalf("FileInfo not equal after healing") } diff --git a/cmd/erasure-object.go b/cmd/erasure-object.go index 062b84593..738518c87 100644 --- a/cmd/erasure-object.go +++ b/cmd/erasure-object.go @@ -81,12 +81,26 @@ func (er erasureObjects) CopyObject(ctx context.Context, srcBucket, srcObject, d } // Read metadata associated with the object from all disks. storageDisks := er.getDisks() - metaArr, errs := readAllFileInfo(ctx, storageDisks, srcBucket, srcObject, srcOpts.VersionID, true) - // get Quorum for this object + var metaArr []FileInfo + var errs []error + + // Read metadata associated with the object from all disks. + if srcOpts.VersionID != "" { + metaArr, errs = readAllFileInfo(ctx, storageDisks, srcBucket, srcObject, srcOpts.VersionID, true) + } else { + metaArr, errs = readAllXL(ctx, storageDisks, srcBucket, srcObject, true) + } + readQuorum, writeQuorum, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) if err != nil { - return oi, toObjectErr(err, srcBucket, srcObject) + if errors.Is(err, errErasureReadQuorum) && !strings.HasPrefix(srcBucket, minioMetaBucket) { + _, derr := er.deleteIfDangling(ctx, srcBucket, srcObject, metaArr, errs, nil, srcOpts) + if derr != nil { + err = derr + } + } + return ObjectInfo{}, toObjectErr(err, srcBucket, srcObject) } // List all online disks. @@ -436,11 +450,90 @@ func (er erasureObjects) deleteIfDangling(ctx context.Context, bucket, object st return m, err } +func readAllXL(ctx context.Context, disks []StorageAPI, bucket, object string, readData bool) ([]FileInfo, []error) { + metadataArray := make([]*xlMetaV2, len(disks)) + metaFileInfos := make([]FileInfo, len(metadataArray)) + metadataShallowVersions := make([][]xlMetaV2ShallowVersion, len(disks)) + + g := errgroup.WithNErrs(len(disks)) + // Read `xl.meta` in parallel across disks. + for index := range disks { + index := index + g.Go(func() (err error) { + if disks[index] == nil { + return errDiskNotFound + } + rf, err := disks[index].ReadXL(ctx, bucket, object, readData) + if err != nil { + if !IsErr(err, []error{ + errFileNotFound, + errVolumeNotFound, + errFileVersionNotFound, + errDiskNotFound, + }...) { + logger.LogOnceIf(ctx, fmt.Errorf("Drive %s, path (%s/%s) returned an error (%w)", + disks[index], bucket, object, err), + disks[index].String()) + } + return err + } + + var xl xlMetaV2 + if err = xl.LoadOrConvert(rf.Buf); err != nil { + return err + } + metadataArray[index] = &xl + metaFileInfos[index] = FileInfo{ + DiskMTime: rf.DiskMTime, + } + return nil + }, index) + } + + errs := g.Wait() + for index := range metadataArray { + if metadataArray[index] != nil { + metadataShallowVersions[index] = metadataArray[index].versions + } + } + + readQuorum := (len(disks) + 1) / 2 + merged := mergeXLV2Versions(readQuorum, false, 1, metadataShallowVersions...) + for index := range metadataArray { + if metadataArray[index] == nil { + continue + } + + metadataArray[index].versions = merged + + // make sure to preserve this for diskmtime based healing bugfix. + diskMTime := metaFileInfos[index].DiskMTime + metaFileInfos[index], errs[index] = metadataArray[index].ToFileInfo(bucket, object, "") + if errs[index] == nil { + versionID := metaFileInfos[index].VersionID + if versionID == "" { + versionID = nullVersionID + } + metaFileInfos[index].Data = metadataArray[index].data.find(versionID) + metaFileInfos[index].DiskMTime = diskMTime + } + } + + // Return all the metadata. + return metaFileInfos, errs +} + func (er erasureObjects) getObjectFileInfo(ctx context.Context, bucket, object string, opts ObjectOptions, readData bool) (fi FileInfo, metaArr []FileInfo, onlineDisks []StorageAPI, err error) { disks := er.getDisks() + var errs []error + // Read metadata associated with the object from all disks. - metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, readData) + if opts.VersionID != "" { + metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, readData) + } else { + metaArr, errs = readAllXL(ctx, disks, bucket, object, readData) + } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) if err != nil { @@ -1453,11 +1546,24 @@ func (er erasureObjects) PutObjectMetadata(ctx context.Context, bucket, object s disks := er.getDisks() + var metaArr []FileInfo + var errs []error + // Read metadata associated with the object from all disks. - metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false) + if opts.VersionID != "" { + metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false) + } else { + metaArr, errs = readAllXL(ctx, disks, bucket, object, false) + } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) if err != nil { + if errors.Is(err, errErasureReadQuorum) && !strings.HasPrefix(bucket, minioMetaBucket) { + _, derr := er.deleteIfDangling(ctx, bucket, object, metaArr, errs, nil, opts) + if derr != nil { + err = derr + } + } return ObjectInfo{}, toObjectErr(err, bucket, object) } @@ -1513,11 +1619,24 @@ func (er erasureObjects) PutObjectTags(ctx context.Context, bucket, object strin disks := er.getDisks() + var metaArr []FileInfo + var errs []error + // Read metadata associated with the object from all disks. - metaArr, errs := readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false) + if opts.VersionID != "" { + metaArr, errs = readAllFileInfo(ctx, disks, bucket, object, opts.VersionID, false) + } else { + metaArr, errs = readAllXL(ctx, disks, bucket, object, false) + } readQuorum, _, err := objectQuorumFromMeta(ctx, metaArr, errs, er.defaultParityCount) if err != nil { + if errors.Is(err, errErasureReadQuorum) && !strings.HasPrefix(bucket, minioMetaBucket) { + _, derr := er.deleteIfDangling(ctx, bucket, object, metaArr, errs, nil, opts) + if derr != nil { + err = derr + } + } return ObjectInfo{}, toObjectErr(err, bucket, object) } diff --git a/cmd/naughty-disk_test.go b/cmd/naughty-disk_test.go index 7081a3bba..d7b413c58 100644 --- a/cmd/naughty-disk_test.go +++ b/cmd/naughty-disk_test.go @@ -281,6 +281,13 @@ func (d *naughtyDisk) ReadAll(ctx context.Context, volume string, path string) ( return d.disk.ReadAll(ctx, volume, path) } +func (d *naughtyDisk) ReadXL(ctx context.Context, volume string, path string, readData bool) (rf RawFileInfo, err error) { + if err := d.calcError(); err != nil { + return rf, err + } + return d.disk.ReadXL(ctx, volume, path, readData) +} + func (d *naughtyDisk) VerifyFile(ctx context.Context, volume, path string, fi FileInfo) error { if err := d.calcError(); err != nil { return err diff --git a/cmd/storage-datatypes.go b/cmd/storage-datatypes.go index 99059dc65..38cf181f2 100644 --- a/cmd/storage-datatypes.go +++ b/cmd/storage-datatypes.go @@ -112,9 +112,23 @@ func (f *FileInfoVersions) findVersionIndex(v string) int { return -1 } +// RawFileInfo - represents raw file stat information as byte array. +// The above means that any added/deleted fields are incompatible. +// Make sure to bump the internode version at storage-rest-common.go +type RawFileInfo struct { + // Content of entire xl.meta (may contain data depending on what was requested by the caller. + Buf []byte `msg:"b"` + + // DiskMTime indicates the mtime of the xl.meta on disk + // This is mainly used for detecting a particular issue + // reported in https://github.com/minio/minio/pull/13803 + DiskMTime time.Time `msg:"dmt"` +} + // FileInfo - represents file stat information. //msgp:tuple FileInfo // The above means that any added/deleted fields are incompatible. +// Make sure to bump the internode version at storage-rest-common.go type FileInfo struct { // Name of the volume. Volume string `msg:"v,omitempty"` diff --git a/cmd/storage-datatypes_gen.go b/cmd/storage-datatypes_gen.go index 26837980b..949df878e 100644 --- a/cmd/storage-datatypes_gen.go +++ b/cmd/storage-datatypes_gen.go @@ -1530,6 +1530,134 @@ func (z *FilesInfo) Msgsize() (s int) { return } +// DecodeMsg implements msgp.Decodable +func (z *RawFileInfo) DecodeMsg(dc *msgp.Reader) (err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, err = dc.ReadMapHeader() + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, err = dc.ReadMapKeyPtr() + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "b": + z.Buf, err = dc.ReadBytes(z.Buf) + if err != nil { + err = msgp.WrapError(err, "Buf") + return + } + case "dmt": + z.DiskMTime, err = dc.ReadTime() + if err != nil { + err = msgp.WrapError(err, "DiskMTime") + return + } + default: + err = dc.Skip() + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + return +} + +// EncodeMsg implements msgp.Encodable +func (z *RawFileInfo) EncodeMsg(en *msgp.Writer) (err error) { + // map header, size 2 + // write "b" + err = en.Append(0x82, 0xa1, 0x62) + if err != nil { + return + } + err = en.WriteBytes(z.Buf) + if err != nil { + err = msgp.WrapError(err, "Buf") + return + } + // write "dmt" + err = en.Append(0xa3, 0x64, 0x6d, 0x74) + if err != nil { + return + } + err = en.WriteTime(z.DiskMTime) + if err != nil { + err = msgp.WrapError(err, "DiskMTime") + return + } + return +} + +// MarshalMsg implements msgp.Marshaler +func (z *RawFileInfo) MarshalMsg(b []byte) (o []byte, err error) { + o = msgp.Require(b, z.Msgsize()) + // map header, size 2 + // string "b" + o = append(o, 0x82, 0xa1, 0x62) + o = msgp.AppendBytes(o, z.Buf) + // string "dmt" + o = append(o, 0xa3, 0x64, 0x6d, 0x74) + o = msgp.AppendTime(o, z.DiskMTime) + return +} + +// UnmarshalMsg implements msgp.Unmarshaler +func (z *RawFileInfo) UnmarshalMsg(bts []byte) (o []byte, err error) { + var field []byte + _ = field + var zb0001 uint32 + zb0001, bts, err = msgp.ReadMapHeaderBytes(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + for zb0001 > 0 { + zb0001-- + field, bts, err = msgp.ReadMapKeyZC(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + switch msgp.UnsafeString(field) { + case "b": + z.Buf, bts, err = msgp.ReadBytesBytes(bts, z.Buf) + if err != nil { + err = msgp.WrapError(err, "Buf") + return + } + case "dmt": + z.DiskMTime, bts, err = msgp.ReadTimeBytes(bts) + if err != nil { + err = msgp.WrapError(err, "DiskMTime") + return + } + default: + bts, err = msgp.Skip(bts) + if err != nil { + err = msgp.WrapError(err) + return + } + } + } + o = bts + return +} + +// Msgsize returns an upper bound estimate of the number of bytes occupied by the serialized message +func (z *RawFileInfo) Msgsize() (s int) { + s = 1 + 2 + msgp.BytesPrefixSize + len(z.Buf) + 4 + msgp.TimeSize + return +} + // DecodeMsg implements msgp.Decodable func (z *VolInfo) DecodeMsg(dc *msgp.Reader) (err error) { var zb0001 uint32 diff --git a/cmd/storage-datatypes_gen_test.go b/cmd/storage-datatypes_gen_test.go index 1495d113b..a6b5f9343 100644 --- a/cmd/storage-datatypes_gen_test.go +++ b/cmd/storage-datatypes_gen_test.go @@ -574,6 +574,119 @@ func BenchmarkDecodeFilesInfo(b *testing.B) { } } +func TestMarshalUnmarshalRawFileInfo(t *testing.T) { + v := RawFileInfo{} + bts, err := v.MarshalMsg(nil) + if err != nil { + t.Fatal(err) + } + left, err := v.UnmarshalMsg(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after UnmarshalMsg(): %q", len(left), left) + } + + left, err = msgp.Skip(bts) + if err != nil { + t.Fatal(err) + } + if len(left) > 0 { + t.Errorf("%d bytes left over after Skip(): %q", len(left), left) + } +} + +func BenchmarkMarshalMsgRawFileInfo(b *testing.B) { + v := RawFileInfo{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.MarshalMsg(nil) + } +} + +func BenchmarkAppendMsgRawFileInfo(b *testing.B) { + v := RawFileInfo{} + bts := make([]byte, 0, v.Msgsize()) + bts, _ = v.MarshalMsg(bts[0:0]) + b.SetBytes(int64(len(bts))) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + bts, _ = v.MarshalMsg(bts[0:0]) + } +} + +func BenchmarkUnmarshalRawFileInfo(b *testing.B) { + v := RawFileInfo{} + bts, _ := v.MarshalMsg(nil) + b.ReportAllocs() + b.SetBytes(int64(len(bts))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, err := v.UnmarshalMsg(bts) + if err != nil { + b.Fatal(err) + } + } +} + +func TestEncodeDecodeRawFileInfo(t *testing.T) { + v := RawFileInfo{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + + m := v.Msgsize() + if buf.Len() > m { + t.Log("WARNING: TestEncodeDecodeRawFileInfo Msgsize() is inaccurate") + } + + vn := RawFileInfo{} + err := msgp.Decode(&buf, &vn) + if err != nil { + t.Error(err) + } + + buf.Reset() + msgp.Encode(&buf, &v) + err = msgp.NewReader(&buf).Skip() + if err != nil { + t.Error(err) + } +} + +func BenchmarkEncodeRawFileInfo(b *testing.B) { + v := RawFileInfo{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + en := msgp.NewWriter(msgp.Nowhere) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + v.EncodeMsg(en) + } + en.Flush() +} + +func BenchmarkDecodeRawFileInfo(b *testing.B) { + v := RawFileInfo{} + var buf bytes.Buffer + msgp.Encode(&buf, &v) + b.SetBytes(int64(buf.Len())) + rd := msgp.NewEndlessReader(buf.Bytes(), b) + dc := msgp.NewReader(rd) + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + err := v.DecodeMsg(dc) + if err != nil { + b.Fatal(err) + } + } +} + func TestMarshalUnmarshalVolInfo(t *testing.T) { v := VolInfo{} bts, err := v.MarshalMsg(nil) diff --git a/cmd/storage-interface.go b/cmd/storage-interface.go index 3a95505a3..63d310d59 100644 --- a/cmd/storage-interface.go +++ b/cmd/storage-interface.go @@ -84,6 +84,7 @@ type StorageAPI interface { WriteMetadata(ctx context.Context, volume, path string, fi FileInfo) error UpdateMetadata(ctx context.Context, volume, path string, fi FileInfo) error ReadVersion(ctx context.Context, volume, path, versionID string, readData bool) (FileInfo, error) + ReadXL(ctx context.Context, volume, path string, readData bool) (RawFileInfo, error) RenameData(ctx context.Context, srcVolume, srcPath string, fi FileInfo, dstVolume, dstPath string) error // File operations. @@ -261,6 +262,10 @@ func (p *unrecognizedDisk) ReadVersion(ctx context.Context, volume, path, versio return fi, errDiskNotFound } +func (p *unrecognizedDisk) ReadXL(ctx context.Context, volume, path string, readData bool) (rf RawFileInfo, err error) { + return rf, errDiskNotFound +} + func (p *unrecognizedDisk) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { return nil, errDiskNotFound } diff --git a/cmd/storage-rest-client.go b/cmd/storage-rest-client.go index 0f965dee5..388c1f188 100644 --- a/cmd/storage-rest-client.go +++ b/cmd/storage-rest-client.go @@ -515,6 +515,25 @@ func (client *storageRESTClient) ReadVersion(ctx context.Context, volume, path, return fi, err } +// ReadXL - reads all contents of xl.meta of a file. +func (client *storageRESTClient) ReadXL(ctx context.Context, volume string, path string, readData bool) (rf RawFileInfo, err error) { + values := make(url.Values) + values.Set(storageRESTVolume, volume) + values.Set(storageRESTFilePath, path) + values.Set(storageRESTReadData, strconv.FormatBool(readData)) + respBody, err := client.call(ctx, storageRESTMethodReadXL, values, nil, -1) + if err != nil { + return rf, err + } + defer xhttp.DrainBody(respBody) + + dec := msgpNewReader(respBody) + defer readMsgpReaderPool.Put(dec) + + err = rf.DecodeMsg(dec) + return rf, err +} + // ReadAll - reads all contents of a file. func (client *storageRESTClient) ReadAll(ctx context.Context, volume string, path string) ([]byte, error) { values := make(url.Values) diff --git a/cmd/storage-rest-common.go b/cmd/storage-rest-common.go index 9b8c74bb6..364991cf4 100644 --- a/cmd/storage-rest-common.go +++ b/cmd/storage-rest-common.go @@ -18,7 +18,7 @@ package cmd const ( - storageRESTVersion = "v44" // Added heal scan mode in NSScanner + storageRESTVersion = "v45" // Added ReadXL API storageRESTVersionPrefix = SlashSeparator + storageRESTVersion storageRESTPrefix = minioReservedBucketPath + "/storage" ) @@ -40,6 +40,7 @@ const ( storageRESTMethodUpdateMetadata = "/updatemetadata" storageRESTMethodDeleteVersion = "/deleteversion" storageRESTMethodReadVersion = "/readversion" + storageRESTMethodReadXL = "/readxl" storageRESTMethodRenameData = "/renamedata" storageRESTMethodCheckParts = "/checkparts" storageRESTMethodReadAll = "/readall" diff --git a/cmd/storage-rest-server.go b/cmd/storage-rest-server.go index 53bbe63f3..27a07dc89 100644 --- a/cmd/storage-rest-server.go +++ b/cmd/storage-rest-server.go @@ -520,6 +520,28 @@ func (s *storageRESTServer) ReadAllHandler(w http.ResponseWriter, r *http.Reques w.Write(buf) } +// ReadXLHandler - read xl.meta for an object at path. +func (s *storageRESTServer) ReadXLHandler(w http.ResponseWriter, r *http.Request) { + if !s.IsValid(w, r) { + return + } + volume := r.Form.Get(storageRESTVolume) + filePath := r.Form.Get(storageRESTFilePath) + readData, err := strconv.ParseBool(r.Form.Get(storageRESTReadData)) + if err != nil { + s.writeErrorResponse(w, err) + return + } + + rf, err := s.storage.ReadXL(r.Context(), volume, filePath, readData) + if err != nil { + s.writeErrorResponse(w, err) + return + } + + logger.LogIf(r.Context(), msgp.Encode(w, &rf)) +} + // ReadFileHandler - read section of a file. func (s *storageRESTServer) ReadFileHandler(w http.ResponseWriter, r *http.Request) { if !s.IsValid(w, r) { @@ -1261,6 +1283,7 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodUpdateMetadata).HandlerFunc(httpTraceHdrs(server.UpdateMetadataHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodDeleteVersion).HandlerFunc(httpTraceHdrs(server.DeleteVersionHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodReadVersion).HandlerFunc(httpTraceHdrs(server.ReadVersionHandler)) + subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodReadXL).HandlerFunc(httpTraceHdrs(server.ReadXLHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodRenameData).HandlerFunc(httpTraceHdrs(server.RenameDataHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodCreateFile).HandlerFunc(httpTraceHdrs(server.CreateFileHandler)) subrouter.Methods(http.MethodPost).Path(storageRESTVersionPrefix + storageRESTMethodCheckParts).HandlerFunc(httpTraceHdrs(server.CheckPartsHandler)) diff --git a/cmd/storagemetric_string.go b/cmd/storagemetric_string.go index 590cc5956..71f196d77 100644 --- a/cmd/storagemetric_string.go +++ b/cmd/storagemetric_string.go @@ -30,14 +30,15 @@ func _() { _ = x[storageMetricWriteMetadata-19] _ = x[storageMetricUpdateMetadata-20] _ = x[storageMetricReadVersion-21] - _ = x[storageMetricReadAll-22] - _ = x[storageMetricStatInfoFile-23] - _ = x[storageMetricLast-24] + _ = x[storageMetricReadXL-22] + _ = x[storageMetricReadAll-23] + _ = x[storageMetricStatInfoFile-24] + _ = x[storageMetricLast-25] } -const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataUpdateMetadataReadVersionReadAllStatInfoFileLast" +const _storageMetric_name = "MakeVolBulkMakeVolListVolsStatVolDeleteVolWalkDirListDirReadFileAppendFileCreateFileReadFileStreamRenameFileRenameDataCheckPartsDeleteDeleteVersionsVerifyFileWriteAllDeleteVersionWriteMetadataUpdateMetadataReadVersionReadXLReadAllStatInfoFileLast" -var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 134, 148, 158, 166, 179, 192, 206, 217, 224, 236, 240} +var _storageMetric_index = [...]uint8{0, 11, 18, 26, 33, 42, 49, 56, 64, 74, 84, 98, 108, 118, 128, 134, 148, 158, 166, 179, 192, 206, 217, 223, 230, 242, 246} func (i storageMetric) String() string { if i >= storageMetric(len(_storageMetric_index)-1) { diff --git a/cmd/xl-storage-disk-id-check.go b/cmd/xl-storage-disk-id-check.go index 27480f425..c8e17757c 100644 --- a/cmd/xl-storage-disk-id-check.go +++ b/cmd/xl-storage-disk-id-check.go @@ -61,6 +61,7 @@ const ( storageMetricWriteMetadata storageMetricUpdateMetadata storageMetricReadVersion + storageMetricReadXL storageMetricReadAll storageMetricStatInfoFile @@ -473,6 +474,16 @@ func (p *xlStorageDiskIDCheck) ReadAll(ctx context.Context, volume string, path return p.storage.ReadAll(ctx, volume, path) } +func (p *xlStorageDiskIDCheck) ReadXL(ctx context.Context, volume string, path string, readData bool) (rf RawFileInfo, err error) { + ctx, done, err := p.TrackDiskHealth(ctx, storageMetricReadXL, volume, path) + if err != nil { + return RawFileInfo{}, err + } + defer done(&err) + + return p.storage.ReadXL(ctx, volume, path, readData) +} + func (p *xlStorageDiskIDCheck) StatInfoFile(ctx context.Context, volume, path string, glob bool) (stat []StatInfo, err error) { ctx, done, err := p.TrackDiskHealth(ctx, storageMetricStatInfoFile, volume, path) if err != nil { diff --git a/cmd/xl-storage.go b/cmd/xl-storage.go index 6a6e1ba1e..9b2b7f5f9 100644 --- a/cmd/xl-storage.go +++ b/cmd/xl-storage.go @@ -1226,6 +1226,60 @@ func (s *xlStorage) renameLegacyMetadata(volumeDir, path string) (err error) { return nil } +func (s *xlStorage) readRaw(ctx context.Context, volumeDir, filePath string, readData bool) (buf []byte, dmTime time.Time, err error) { + if readData { + buf, dmTime, err = s.readAllData(ctx, volumeDir, pathJoin(filePath, xlStorageFormatFile)) + } else { + buf, dmTime, err = s.readMetadataWithDMTime(ctx, pathJoin(filePath, xlStorageFormatFile)) + if err != nil { + if osIsNotExist(err) { + if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) { + return nil, time.Time{}, errVolumeNotFound + } + } + err = osErrToFileErr(err) + } + } + + if err != nil { + if err == errFileNotFound { + buf, dmTime, err = s.readAllData(ctx, volumeDir, pathJoin(filePath, xlStorageFormatFileV1)) + if err != nil { + return nil, time.Time{}, err + } + } else { + return nil, time.Time{}, err + } + } + + if len(buf) == 0 { + return nil, time.Time{}, errFileNotFound + } + + return buf, dmTime, nil +} + +// ReadXL reads from path/xl.meta, does not interpret the data it read. This +// is a raw call equivalent of ReadVersion(). +func (s *xlStorage) ReadXL(ctx context.Context, volume, path string, readData bool) (RawFileInfo, error) { + volumeDir, err := s.getVolDir(volume) + if err != nil { + return RawFileInfo{}, err + } + + // Validate file path length, before reading. + filePath := pathJoin(volumeDir, path) + if err = checkPathLength(filePath); err != nil { + return RawFileInfo{}, err + } + + buf, dmTime, err := s.readRaw(ctx, volumeDir, filePath, readData) + return RawFileInfo{ + Buf: buf, + DiskMTime: dmTime, + }, err +} + // ReadVersion - reads metadata and returns FileInfo at path `xl.meta` // for all objects less than `32KiB` this call returns data as well // along with metadata. @@ -1240,44 +1294,14 @@ func (s *xlStorage) ReadVersion(ctx context.Context, volume, path, versionID str return fi, err } - var buf []byte - var dmTime time.Time - if readData { - buf, dmTime, err = s.readAllData(ctx, volumeDir, pathJoin(filePath, xlStorageFormatFile)) - } else { - buf, dmTime, err = s.readMetadataWithDMTime(ctx, pathJoin(filePath, xlStorageFormatFile)) - if err != nil { - if osIsNotExist(err) { - if aerr := Access(volumeDir); aerr != nil && osIsNotExist(aerr) { - return fi, errVolumeNotFound - } - } - err = osErrToFileErr(err) - } - } - + buf, dmTime, err := s.readRaw(ctx, volumeDir, filePath, readData) if err != nil { if err == errFileNotFound { - buf, dmTime, err = s.readAllData(ctx, volumeDir, pathJoin(filePath, xlStorageFormatFileV1)) - if err != nil { - if err == errFileNotFound { - if versionID != "" { - return fi, errFileVersionNotFound - } - return fi, errFileNotFound - } - return fi, err + if versionID != "" { + return fi, errFileVersionNotFound } - } else { - return fi, err } - } - - if len(buf) == 0 { - if versionID != "" { - return fi, errFileVersionNotFound - } - return fi, errFileNotFound + return fi, err } fi, err = getFileInfo(buf, volume, path, versionID, readData) @@ -1404,12 +1428,7 @@ func (s *xlStorage) readAllData(ctx context.Context, volumeDir string, filePath return buf, stat.ModTime().UTC(), osErrToFileErr(err) } -// ReadAll reads from r until an error or EOF and returns the data it read. -// A successful call returns err == nil, not err == EOF. Because ReadAll is -// defined to read from src until EOF, it does not treat an EOF from Read -// as an error to be reported. -// This API is meant to be used on files which have small memory footprint, do -// not use this on large files as it would cause server to crash. +// ReadAll is a raw call, reads content at any path and returns the buffer. func (s *xlStorage) ReadAll(ctx context.Context, volume string, path string) (buf []byte, err error) { // Specific optimization to avoid re-read from the drives for `format.json` // in-case the caller is a network operation.