2021-04-18 12:41:13 -07:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2020-06-12 20:04:01 -07:00
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"encoding/hex"
|
|
|
|
"fmt"
|
|
|
|
"sort"
|
2021-09-18 16:31:35 -04:00
|
|
|
"strings"
|
2020-06-12 20:04:01 -07:00
|
|
|
"time"
|
|
|
|
|
2022-09-07 07:24:54 -07:00
|
|
|
"github.com/minio/minio/internal/amztime"
|
2021-06-01 14:59:40 -07:00
|
|
|
"github.com/minio/minio/internal/bucket/replication"
|
2022-05-27 06:00:19 -07:00
|
|
|
"github.com/minio/minio/internal/hash/sha256"
|
2021-06-01 14:59:40 -07:00
|
|
|
xhttp "github.com/minio/minio/internal/http"
|
|
|
|
"github.com/minio/minio/internal/logger"
|
2023-09-04 12:57:37 -07:00
|
|
|
"github.com/minio/pkg/v2/sync/errgroup"
|
2020-06-12 20:04:01 -07:00
|
|
|
)
|
|
|
|
|
2021-05-27 13:38:04 -07:00
|
|
|
// Object was stored with additional erasure codes due to degraded system at upload time
|
|
|
|
const minIOErasureUpgraded = "x-minio-internal-erasure-upgraded"
|
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
const erasureAlgorithm = "rs-vandermonde"
|
|
|
|
|
|
|
|
// GetChecksumInfo - get checksum of a part.
|
|
|
|
func (e ErasureInfo) GetChecksumInfo(partNumber int) (ckSum ChecksumInfo) {
|
|
|
|
for _, sum := range e.Checksums {
|
|
|
|
if sum.PartNumber == partNumber {
|
|
|
|
// Return the checksum
|
|
|
|
return sum
|
|
|
|
}
|
|
|
|
}
|
2023-09-01 13:45:58 -07:00
|
|
|
return ChecksumInfo{Algorithm: DefaultBitrotAlgorithm}
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// ShardFileSize - returns final erasure size from original size.
|
|
|
|
func (e ErasureInfo) ShardFileSize(totalLength int64) int64 {
|
|
|
|
if totalLength == 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
if totalLength == -1 {
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
numShards := totalLength / e.BlockSize
|
|
|
|
lastBlockSize := totalLength % e.BlockSize
|
|
|
|
lastShardSize := ceilFrac(lastBlockSize, int64(e.DataBlocks))
|
|
|
|
return numShards*e.ShardSize() + lastShardSize
|
|
|
|
}
|
|
|
|
|
|
|
|
// ShardSize - returns actual shared size from erasure blockSize.
|
|
|
|
func (e ErasureInfo) ShardSize() int64 {
|
|
|
|
return ceilFrac(e.BlockSize, int64(e.DataBlocks))
|
|
|
|
}
|
|
|
|
|
|
|
|
// IsValid - tells if erasure info fields are valid.
|
|
|
|
func (fi FileInfo) IsValid() bool {
|
|
|
|
if fi.Deleted {
|
|
|
|
// Delete marker has no data, no need to check
|
|
|
|
// for erasure coding information
|
|
|
|
return true
|
|
|
|
}
|
2020-08-03 12:15:08 -07:00
|
|
|
dataBlocks := fi.Erasure.DataBlocks
|
|
|
|
parityBlocks := fi.Erasure.ParityBlocks
|
2020-10-28 19:24:01 -07:00
|
|
|
correctIndexes := (fi.Erasure.Index > 0 &&
|
|
|
|
fi.Erasure.Index <= dataBlocks+parityBlocks &&
|
|
|
|
len(fi.Erasure.Distribution) == (dataBlocks+parityBlocks))
|
2020-08-03 12:15:08 -07:00
|
|
|
return ((dataBlocks >= parityBlocks) &&
|
2022-05-30 10:58:37 -07:00
|
|
|
(dataBlocks > 0) && (parityBlocks >= 0) &&
|
2020-10-28 19:24:01 -07:00
|
|
|
correctIndexes)
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// ToObjectInfo - Converts metadata to object info.
|
2022-05-31 02:57:57 -07:00
|
|
|
func (fi FileInfo) ToObjectInfo(bucket, object string, versioned bool) ObjectInfo {
|
2020-09-19 08:39:41 -07:00
|
|
|
object = decodeDirObject(object)
|
2020-09-16 18:21:50 +01:00
|
|
|
versionID := fi.VersionID
|
2022-05-31 02:57:57 -07:00
|
|
|
if versioned && versionID == "" {
|
2020-09-16 18:21:50 +01:00
|
|
|
versionID = nullVersionID
|
|
|
|
}
|
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
objInfo := ObjectInfo{
|
2021-02-01 18:52:11 +01:00
|
|
|
IsDir: HasSuffix(object, SlashSeparator),
|
|
|
|
Bucket: bucket,
|
|
|
|
Name: object,
|
2023-05-19 17:42:45 +01:00
|
|
|
ParityBlocks: fi.Erasure.ParityBlocks,
|
|
|
|
DataBlocks: fi.Erasure.DataBlocks,
|
2021-02-01 18:52:11 +01:00
|
|
|
VersionID: versionID,
|
|
|
|
IsLatest: fi.IsLatest,
|
|
|
|
DeleteMarker: fi.Deleted,
|
|
|
|
Size: fi.Size,
|
|
|
|
ModTime: fi.ModTime,
|
|
|
|
Legacy: fi.XLV1,
|
|
|
|
ContentType: fi.Metadata["content-type"],
|
|
|
|
ContentEncoding: fi.Metadata["content-encoding"],
|
|
|
|
NumVersions: fi.NumVersions,
|
|
|
|
SuccessorModTime: fi.SuccessorModTime,
|
2023-11-22 13:46:17 -08:00
|
|
|
CacheControl: fi.Metadata["cache-control"],
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
2021-02-01 18:52:11 +01:00
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
if exp, ok := fi.Metadata["expires"]; ok {
|
2022-09-07 07:24:54 -07:00
|
|
|
if t, err := amztime.ParseHeader(exp); err == nil {
|
2022-09-05 19:18:18 -07:00
|
|
|
objInfo.Expires = t.UTC()
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Extract etag from metadata.
|
|
|
|
objInfo.ETag = extractETag(fi.Metadata)
|
|
|
|
|
|
|
|
// Add user tags to the object info
|
2021-04-10 09:13:12 -07:00
|
|
|
tags := fi.Metadata[xhttp.AmzObjectTagging]
|
|
|
|
if len(tags) != 0 {
|
|
|
|
objInfo.UserTags = tags
|
|
|
|
}
|
2020-06-12 20:04:01 -07:00
|
|
|
|
2020-07-21 17:49:56 -07:00
|
|
|
// Add replication status to the object info
|
2021-09-18 16:31:35 -04:00
|
|
|
objInfo.ReplicationStatusInternal = fi.ReplicationState.ReplicationStatusInternal
|
|
|
|
objInfo.VersionPurgeStatusInternal = fi.ReplicationState.VersionPurgeStatusInternal
|
2022-12-28 22:48:33 -08:00
|
|
|
objInfo.ReplicationStatus = fi.ReplicationStatus()
|
2023-10-25 21:24:10 -07:00
|
|
|
if objInfo.ReplicationStatus.Empty() { // overlay x-amx-replication-status if present for replicas
|
|
|
|
if st, ok := fi.Metadata[xhttp.AmzBucketReplicationStatus]; ok && st == string(replication.Replica) {
|
|
|
|
objInfo.ReplicationStatus = replication.StatusType(st)
|
|
|
|
}
|
|
|
|
}
|
2022-12-28 22:48:33 -08:00
|
|
|
objInfo.VersionPurgeStatus = fi.VersionPurgeStatus()
|
2020-11-12 12:12:09 -08:00
|
|
|
|
2021-08-17 07:50:00 -07:00
|
|
|
objInfo.TransitionedObject = TransitionedObject{
|
|
|
|
Name: fi.TransitionedObjName,
|
|
|
|
VersionID: fi.TransitionVersionID,
|
|
|
|
Status: fi.TransitionStatus,
|
|
|
|
FreeVersion: fi.TierFreeVersion(),
|
|
|
|
Tier: fi.TransitionTier,
|
|
|
|
}
|
2020-11-12 12:12:09 -08:00
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
// etag/md5Sum has already been extracted. We need to
|
|
|
|
// remove to avoid it from appearing as part of
|
|
|
|
// response headers. e.g, X-Minio-* or X-Amz-*.
|
|
|
|
// Tags have also been extracted, we remove that as well.
|
|
|
|
objInfo.UserDefined = cleanMetadata(fi.Metadata)
|
|
|
|
|
|
|
|
// All the parts per object.
|
|
|
|
objInfo.Parts = fi.Parts
|
|
|
|
|
|
|
|
// Update storage class
|
2022-11-09 15:57:34 -08:00
|
|
|
if fi.TransitionTier != "" {
|
|
|
|
objInfo.StorageClass = fi.TransitionTier
|
|
|
|
} else if sc, ok := fi.Metadata[xhttp.AmzStorageClass]; ok {
|
2020-06-12 20:04:01 -07:00
|
|
|
objInfo.StorageClass = sc
|
|
|
|
} else {
|
|
|
|
objInfo.StorageClass = globalMinioDefaultStorageClass
|
|
|
|
}
|
2021-04-19 10:30:42 -07:00
|
|
|
|
2020-11-12 12:12:09 -08:00
|
|
|
// set restore status for transitioned object
|
2021-04-19 10:30:42 -07:00
|
|
|
restoreHdr, ok := fi.Metadata[xhttp.AmzRestore]
|
|
|
|
if ok {
|
|
|
|
if restoreStatus, err := parseRestoreObjStatus(restoreHdr); err == nil {
|
|
|
|
objInfo.RestoreOngoing = restoreStatus.Ongoing()
|
|
|
|
objInfo.RestoreExpires, _ = restoreStatus.Expiry()
|
|
|
|
}
|
2020-11-12 12:12:09 -08:00
|
|
|
}
|
2022-08-30 01:57:16 +02:00
|
|
|
objInfo.Checksum = fi.Checksum
|
2023-04-17 12:16:37 -07:00
|
|
|
objInfo.Inlined = fi.InlineData()
|
2020-06-12 20:04:01 -07:00
|
|
|
// Success.
|
|
|
|
return objInfo
|
|
|
|
}
|
|
|
|
|
2021-08-23 13:14:55 -07:00
|
|
|
// TransitionInfoEquals returns true if transition related information are equal, false otherwise.
|
|
|
|
func (fi FileInfo) TransitionInfoEquals(ofi FileInfo) bool {
|
|
|
|
switch {
|
|
|
|
case fi.TransitionStatus != ofi.TransitionStatus,
|
|
|
|
fi.TransitionTier != ofi.TransitionTier,
|
|
|
|
fi.TransitionedObjName != ofi.TransitionedObjName,
|
|
|
|
fi.TransitionVersionID != ofi.TransitionVersionID:
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// MetadataEquals returns true if FileInfos Metadata maps are equal, false otherwise.
|
|
|
|
func (fi FileInfo) MetadataEquals(ofi FileInfo) bool {
|
|
|
|
if len(fi.Metadata) != len(ofi.Metadata) {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
for k, v := range fi.Metadata {
|
|
|
|
if ov, ok := ofi.Metadata[k]; !ok || ov != v {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReplicationInfoEquals returns true if server-side replication related fields are equal, false otherwise.
|
|
|
|
func (fi FileInfo) ReplicationInfoEquals(ofi FileInfo) bool {
|
|
|
|
switch {
|
|
|
|
case fi.MarkDeleted != ofi.MarkDeleted,
|
2021-09-18 16:31:35 -04:00
|
|
|
!fi.ReplicationState.Equal(ofi.ReplicationState):
|
2021-08-23 13:14:55 -07:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
// objectPartIndex - returns the index of matching object part number.
|
|
|
|
func objectPartIndex(parts []ObjectPartInfo, partNumber int) int {
|
|
|
|
for i, part := range parts {
|
|
|
|
if partNumber == part.Number {
|
|
|
|
return i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return -1
|
|
|
|
}
|
|
|
|
|
|
|
|
// AddObjectPart - add a new object part in order.
|
2022-08-30 01:57:16 +02:00
|
|
|
func (fi *FileInfo) AddObjectPart(partNumber int, partETag string, partSize, actualSize int64, modTime time.Time, idx []byte, checksums map[string]string) {
|
2020-06-12 20:04:01 -07:00
|
|
|
partInfo := ObjectPartInfo{
|
|
|
|
Number: partNumber,
|
|
|
|
ETag: partETag,
|
|
|
|
Size: partSize,
|
|
|
|
ActualSize: actualSize,
|
2022-07-19 18:56:24 -07:00
|
|
|
ModTime: modTime,
|
2022-07-11 17:30:56 -07:00
|
|
|
Index: idx,
|
2022-08-30 01:57:16 +02:00
|
|
|
Checksums: checksums,
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// Update part info if it already exists.
|
|
|
|
for i, part := range fi.Parts {
|
|
|
|
if partNumber == part.Number {
|
|
|
|
fi.Parts[i] = partInfo
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Proceed to include new part info.
|
|
|
|
fi.Parts = append(fi.Parts, partInfo)
|
|
|
|
|
|
|
|
// Parts in FileInfo should be in sorted order by part number.
|
2023-04-24 13:28:18 -07:00
|
|
|
sort.Slice(fi.Parts, func(i, j int) bool { return fi.Parts[i].Number < fi.Parts[j].Number })
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// ObjectToPartOffset - translate offset of an object to offset of its individual part.
|
|
|
|
func (fi FileInfo) ObjectToPartOffset(ctx context.Context, offset int64) (partIndex int, partOffset int64, err error) {
|
|
|
|
if offset == 0 {
|
|
|
|
// Special case - if offset is 0, then partIndex and partOffset are always 0.
|
|
|
|
return 0, 0, nil
|
|
|
|
}
|
|
|
|
partOffset = offset
|
|
|
|
// Seek until object offset maps to a particular part offset.
|
|
|
|
for i, part := range fi.Parts {
|
|
|
|
partIndex = i
|
|
|
|
// Offset is smaller than size we have reached the proper part offset.
|
|
|
|
if partOffset < part.Size {
|
|
|
|
return partIndex, partOffset, nil
|
|
|
|
}
|
|
|
|
// Continue to towards the next part.
|
|
|
|
partOffset -= part.Size
|
|
|
|
}
|
|
|
|
logger.LogIf(ctx, InvalidRange{})
|
|
|
|
// Offset beyond the size of the object return InvalidRange.
|
|
|
|
return 0, 0, InvalidRange{}
|
|
|
|
}
|
|
|
|
|
2023-06-17 19:18:20 -07:00
|
|
|
func findFileInfoInQuorum(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) {
|
2021-05-24 18:31:56 -07:00
|
|
|
// with less quorum return error.
|
2022-05-30 10:58:37 -07:00
|
|
|
if quorum < 1 {
|
2021-05-24 18:31:56 -07:00
|
|
|
return FileInfo{}, errErasureReadQuorum
|
|
|
|
}
|
2020-06-12 20:04:01 -07:00
|
|
|
metaHashes := make([]string, len(metaArr))
|
2020-10-28 00:09:15 -07:00
|
|
|
h := sha256.New()
|
2020-06-12 20:04:01 -07:00
|
|
|
for i, meta := range metaArr {
|
2023-06-17 19:18:20 -07:00
|
|
|
if !meta.IsValid() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
etagOnly := modTime.Equal(timeSentinel) && (etag != "" && etag == meta.Metadata["etag"])
|
|
|
|
mtimeValid := meta.ModTime.Equal(modTime)
|
|
|
|
if mtimeValid || etagOnly {
|
2021-11-21 10:41:30 -08:00
|
|
|
fmt.Fprintf(h, "%v", meta.XLV1)
|
2023-06-17 19:18:20 -07:00
|
|
|
if !etagOnly {
|
|
|
|
// Verify dataDir is same only when mtime is valid and etag is not considered.
|
|
|
|
fmt.Fprintf(h, "%v", meta.GetDataDir())
|
|
|
|
}
|
2020-06-12 20:04:01 -07:00
|
|
|
for _, part := range meta.Parts {
|
2021-11-16 09:28:29 -08:00
|
|
|
fmt.Fprintf(h, "part.%d", part.Number)
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
2023-07-11 21:53:49 -07:00
|
|
|
|
|
|
|
if !meta.Deleted && meta.Size != 0 {
|
|
|
|
fmt.Fprintf(h, "%v+%v", meta.Erasure.DataBlocks, meta.Erasure.ParityBlocks)
|
|
|
|
fmt.Fprintf(h, "%v", meta.Erasure.Distribution)
|
|
|
|
}
|
2021-08-19 14:55:42 -07:00
|
|
|
|
|
|
|
// ILM transition fields
|
2021-11-16 09:28:29 -08:00
|
|
|
fmt.Fprint(h, meta.TransitionStatus)
|
|
|
|
fmt.Fprint(h, meta.TransitionTier)
|
|
|
|
fmt.Fprint(h, meta.TransitionedObjName)
|
|
|
|
fmt.Fprint(h, meta.TransitionVersionID)
|
2021-08-19 14:55:42 -07:00
|
|
|
|
|
|
|
// Server-side replication fields
|
2021-11-16 09:28:29 -08:00
|
|
|
fmt.Fprintf(h, "%v", meta.MarkDeleted)
|
|
|
|
fmt.Fprint(h, meta.Metadata[string(meta.ReplicationState.ReplicaStatus)])
|
|
|
|
fmt.Fprint(h, meta.Metadata[meta.ReplicationState.ReplicationStatusInternal])
|
|
|
|
fmt.Fprint(h, meta.Metadata[meta.ReplicationState.VersionPurgeStatusInternal])
|
2021-09-18 16:31:35 -04:00
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
metaHashes[i] = hex.EncodeToString(h.Sum(nil))
|
2020-10-28 00:09:15 -07:00
|
|
|
h.Reset()
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
metaHashCountMap := make(map[string]int)
|
|
|
|
for _, hash := range metaHashes {
|
|
|
|
if hash == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
metaHashCountMap[hash]++
|
|
|
|
}
|
|
|
|
|
|
|
|
maxHash := ""
|
|
|
|
maxCount := 0
|
|
|
|
for hash, count := range metaHashCountMap {
|
|
|
|
if count > maxCount {
|
|
|
|
maxCount = count
|
|
|
|
maxHash = hash
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if maxCount < quorum {
|
|
|
|
return FileInfo{}, errErasureReadQuorum
|
|
|
|
}
|
|
|
|
|
2023-09-04 08:24:17 -07:00
|
|
|
// Find the successor mod time in quorum, otherwise leave the
|
|
|
|
// candidate's successor modTime as found
|
|
|
|
succModTimeMap := make(map[time.Time]int)
|
|
|
|
var candidate FileInfo
|
|
|
|
var found bool
|
2020-06-12 20:04:01 -07:00
|
|
|
for i, hash := range metaHashes {
|
|
|
|
if hash == maxHash {
|
2021-05-24 18:31:56 -07:00
|
|
|
if metaArr[i].IsValid() {
|
2023-09-04 08:24:17 -07:00
|
|
|
if !found {
|
|
|
|
candidate = metaArr[i]
|
|
|
|
found = true
|
|
|
|
}
|
|
|
|
succModTimeMap[metaArr[i].SuccessorModTime]++
|
2021-05-24 18:31:56 -07:00
|
|
|
}
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
}
|
2023-09-04 08:24:17 -07:00
|
|
|
var succModTime time.Time
|
|
|
|
var smodTimeQuorum bool
|
|
|
|
for smodTime, count := range succModTimeMap {
|
|
|
|
if count >= quorum {
|
|
|
|
smodTimeQuorum = true
|
|
|
|
succModTime = smodTime
|
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2020-06-12 20:04:01 -07:00
|
|
|
|
2023-09-04 08:24:17 -07:00
|
|
|
if found {
|
|
|
|
if smodTimeQuorum {
|
|
|
|
candidate.SuccessorModTime = succModTime
|
|
|
|
candidate.IsLatest = succModTime.IsZero()
|
|
|
|
}
|
|
|
|
return candidate, nil
|
|
|
|
}
|
2020-06-12 20:04:01 -07:00
|
|
|
return FileInfo{}, errErasureReadQuorum
|
|
|
|
}
|
|
|
|
|
|
|
|
// pickValidFileInfo - picks one valid FileInfo content and returns from a
|
|
|
|
// slice of FileInfo.
|
2023-06-17 19:18:20 -07:00
|
|
|
func pickValidFileInfo(ctx context.Context, metaArr []FileInfo, modTime time.Time, etag string, quorum int) (FileInfo, error) {
|
|
|
|
return findFileInfoInQuorum(ctx, metaArr, modTime, etag, quorum)
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// writeUniqueFileInfo - writes unique `xl.meta` content for each disk concurrently.
|
2024-01-30 12:43:25 -08:00
|
|
|
func writeUniqueFileInfo(ctx context.Context, disks []StorageAPI, origbucket, bucket, prefix string, files []FileInfo, quorum int) ([]StorageAPI, error) {
|
2020-06-12 20:04:01 -07:00
|
|
|
g := errgroup.WithNErrs(len(disks))
|
|
|
|
|
|
|
|
// Start writing `xl.meta` to all disks in parallel.
|
|
|
|
for index := range disks {
|
|
|
|
index := index
|
|
|
|
g.Go(func() error {
|
|
|
|
if disks[index] == nil {
|
|
|
|
return errDiskNotFound
|
|
|
|
}
|
|
|
|
// Pick one FileInfo for a disk at index.
|
2021-04-03 09:03:42 -07:00
|
|
|
fi := files[index]
|
|
|
|
fi.Erasure.Index = index + 1
|
|
|
|
if fi.IsValid() {
|
2024-01-30 12:43:25 -08:00
|
|
|
return disks[index].WriteMetadata(ctx, origbucket, bucket, prefix, fi)
|
2021-04-03 09:03:42 -07:00
|
|
|
}
|
|
|
|
return errCorruptedFormat
|
2020-06-12 20:04:01 -07:00
|
|
|
}, index)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for all the routines.
|
|
|
|
mErrs := g.Wait()
|
|
|
|
|
|
|
|
err := reduceWriteQuorumErrs(ctx, mErrs, objectOpIgnoredErrs, quorum)
|
|
|
|
return evalDisks(disks, mErrs), err
|
|
|
|
}
|
|
|
|
|
2023-04-14 16:23:28 -07:00
|
|
|
func commonParity(parities []int, defaultParityCount int) int {
|
|
|
|
N := len(parities)
|
|
|
|
|
2022-10-13 00:42:45 +01:00
|
|
|
occMap := make(map[int]int)
|
|
|
|
for _, p := range parities {
|
|
|
|
occMap[p]++
|
|
|
|
}
|
|
|
|
|
2023-05-26 09:57:44 -07:00
|
|
|
var maxOcc, cparity int
|
2022-10-13 00:42:45 +01:00
|
|
|
for parity, occ := range occMap {
|
|
|
|
if parity == -1 {
|
|
|
|
// Ignore non defined parity
|
|
|
|
continue
|
|
|
|
}
|
2023-04-14 16:23:28 -07:00
|
|
|
|
|
|
|
readQuorum := N - parity
|
|
|
|
if defaultParityCount > 0 && parity == 0 {
|
|
|
|
// In this case, parity == 0 implies that this object version is a
|
|
|
|
// delete marker
|
|
|
|
readQuorum = N/2 + 1
|
|
|
|
}
|
|
|
|
if occ < readQuorum {
|
|
|
|
// Ignore this parity since we don't have enough shards for read quorum
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if occ > maxOcc {
|
2022-10-13 00:42:45 +01:00
|
|
|
maxOcc = occ
|
2023-05-26 09:57:44 -07:00
|
|
|
cparity = parity
|
2022-10-13 00:42:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if maxOcc == 0 {
|
|
|
|
// Did not found anything useful
|
|
|
|
return -1
|
|
|
|
}
|
2023-05-26 09:57:44 -07:00
|
|
|
return cparity
|
2022-10-13 00:42:45 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func listObjectParities(partsMetadata []FileInfo, errs []error) (parities []int) {
|
|
|
|
parities = make([]int, len(partsMetadata))
|
|
|
|
for index, metadata := range partsMetadata {
|
|
|
|
if errs[index] != nil {
|
|
|
|
parities[index] = -1
|
|
|
|
continue
|
|
|
|
}
|
2023-01-13 03:28:16 +05:30
|
|
|
if !metadata.IsValid() {
|
|
|
|
parities[index] = -1
|
|
|
|
continue
|
|
|
|
}
|
2023-07-11 21:53:49 -07:00
|
|
|
// Delete marker or zero byte objects take highest parity.
|
|
|
|
if metadata.Deleted || metadata.Size == 0 {
|
2023-05-26 09:57:44 -07:00
|
|
|
parities[index] = len(partsMetadata) / 2
|
|
|
|
} else {
|
2023-01-13 03:28:16 +05:30
|
|
|
parities[index] = metadata.Erasure.ParityBlocks
|
|
|
|
}
|
2022-10-13 00:42:45 +01:00
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
// Returns per object readQuorum and writeQuorum
|
|
|
|
// readQuorum is the min required disks to read data.
|
|
|
|
// writeQuorum is the min required disks to write data.
|
2021-01-16 12:08:02 -08:00
|
|
|
func objectQuorumFromMeta(ctx context.Context, partsMetaData []FileInfo, errs []error, defaultParityCount int) (objectReadQuorum, objectWriteQuorum int, err error) {
|
2024-01-17 23:03:17 -08:00
|
|
|
// There should be at least half correct entries, if not return failure
|
2022-10-13 00:42:45 +01:00
|
|
|
expectedRQuorum := len(partsMetaData) / 2
|
|
|
|
if defaultParityCount == 0 {
|
|
|
|
// if parity count is '0', we expected all entries to be present.
|
|
|
|
expectedRQuorum = len(partsMetaData)
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
|
|
|
|
2022-10-13 00:42:45 +01:00
|
|
|
reducedErr := reduceReadQuorumErrs(ctx, errs, objectOpIgnoredErrs, expectedRQuorum)
|
|
|
|
if reducedErr != nil {
|
|
|
|
return -1, -1, reducedErr
|
2022-02-04 02:47:36 -08:00
|
|
|
}
|
|
|
|
|
2022-10-13 00:42:45 +01:00
|
|
|
// special case when parity is '0'
|
|
|
|
if defaultParityCount == 0 {
|
|
|
|
return len(partsMetaData), len(partsMetaData), nil
|
2020-08-03 12:15:08 -07:00
|
|
|
}
|
|
|
|
|
2022-10-13 00:42:45 +01:00
|
|
|
parities := listObjectParities(partsMetaData, errs)
|
2023-04-14 16:23:28 -07:00
|
|
|
parityBlocks := commonParity(parities, defaultParityCount)
|
2022-10-13 00:42:45 +01:00
|
|
|
if parityBlocks < 0 {
|
|
|
|
return -1, -1, errErasureReadQuorum
|
2022-02-04 02:47:36 -08:00
|
|
|
}
|
|
|
|
|
2022-10-13 00:42:45 +01:00
|
|
|
if parityBlocks == 0 {
|
|
|
|
// For delete markers do not use 'defaultParityCount' as it is not expected to be the case.
|
|
|
|
// Use maximum allowed read quorum instead, writeQuorum+1 is returned for compatibility sake
|
|
|
|
// but there are no callers that shall be using this.
|
|
|
|
readQuorum := len(partsMetaData) / 2
|
|
|
|
return readQuorum, readQuorum + 1, nil
|
2021-05-27 10:11:50 -07:00
|
|
|
}
|
|
|
|
|
2022-10-13 00:42:45 +01:00
|
|
|
dataBlocks := len(partsMetaData) - parityBlocks
|
|
|
|
|
2020-08-03 12:15:08 -07:00
|
|
|
writeQuorum := dataBlocks
|
|
|
|
if dataBlocks == parityBlocks {
|
2021-01-16 12:08:02 -08:00
|
|
|
writeQuorum++
|
2020-08-03 12:15:08 -07:00
|
|
|
}
|
|
|
|
|
2020-06-12 20:04:01 -07:00
|
|
|
// Since all the valid erasure code meta updated at the same time are equivalent, pass dataBlocks
|
|
|
|
// from latestFileInfo to get the quorum
|
2020-08-03 12:15:08 -07:00
|
|
|
return dataBlocks, writeQuorum, nil
|
2020-06-12 20:04:01 -07:00
|
|
|
}
|
2021-06-30 19:32:07 -07:00
|
|
|
|
|
|
|
const (
|
|
|
|
tierFVID = "tier-free-versionID"
|
|
|
|
tierFVMarker = "tier-free-marker"
|
|
|
|
)
|
|
|
|
|
|
|
|
// SetTierFreeVersionID sets free-version's versionID. This method is used by
|
|
|
|
// object layer to pass down a versionID to set for a free-version that may be
|
|
|
|
// created.
|
|
|
|
func (fi *FileInfo) SetTierFreeVersionID(versionID string) {
|
|
|
|
if fi.Metadata == nil {
|
|
|
|
fi.Metadata = make(map[string]string)
|
|
|
|
}
|
|
|
|
fi.Metadata[ReservedMetadataPrefixLower+tierFVID] = versionID
|
|
|
|
}
|
|
|
|
|
|
|
|
// TierFreeVersionID returns the free-version's version id.
|
|
|
|
func (fi *FileInfo) TierFreeVersionID() string {
|
|
|
|
return fi.Metadata[ReservedMetadataPrefixLower+tierFVID]
|
|
|
|
}
|
|
|
|
|
|
|
|
// SetTierFreeVersion sets fi as a free-version. This method is used by
|
|
|
|
// lower layers to indicate a free-version.
|
|
|
|
func (fi *FileInfo) SetTierFreeVersion() {
|
|
|
|
if fi.Metadata == nil {
|
|
|
|
fi.Metadata = make(map[string]string)
|
|
|
|
}
|
|
|
|
fi.Metadata[ReservedMetadataPrefixLower+tierFVMarker] = ""
|
|
|
|
}
|
|
|
|
|
|
|
|
// TierFreeVersion returns true if version is a free-version.
|
|
|
|
func (fi *FileInfo) TierFreeVersion() bool {
|
|
|
|
_, ok := fi.Metadata[ReservedMetadataPrefixLower+tierFVMarker]
|
|
|
|
return ok
|
|
|
|
}
|
2021-09-18 16:31:35 -04:00
|
|
|
|
2022-02-17 15:05:19 -08:00
|
|
|
// IsRestoreObjReq returns true if fi corresponds to a RestoreObject request.
|
|
|
|
func (fi *FileInfo) IsRestoreObjReq() bool {
|
|
|
|
if restoreHdr, ok := fi.Metadata[xhttp.AmzRestore]; ok {
|
|
|
|
if restoreStatus, err := parseRestoreObjStatus(restoreHdr); err == nil {
|
|
|
|
if !restoreStatus.Ongoing() {
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
2021-09-18 16:31:35 -04:00
|
|
|
// VersionPurgeStatus returns overall version purge status for this object version across targets
|
|
|
|
func (fi *FileInfo) VersionPurgeStatus() VersionPurgeStatusType {
|
|
|
|
return fi.ReplicationState.CompositeVersionPurgeStatus()
|
|
|
|
}
|
|
|
|
|
2022-12-28 22:48:33 -08:00
|
|
|
// ReplicationStatus returns overall version replication status for this object version across targets
|
|
|
|
func (fi *FileInfo) ReplicationStatus() replication.StatusType {
|
|
|
|
return fi.ReplicationState.CompositeReplicationStatus()
|
|
|
|
}
|
|
|
|
|
2021-09-18 16:31:35 -04:00
|
|
|
// DeleteMarkerReplicationStatus returns overall replication status for this delete marker version across targets
|
|
|
|
func (fi *FileInfo) DeleteMarkerReplicationStatus() replication.StatusType {
|
|
|
|
if fi.Deleted {
|
|
|
|
return fi.ReplicationState.CompositeReplicationStatus()
|
|
|
|
}
|
|
|
|
return replication.StatusType("")
|
|
|
|
}
|
|
|
|
|
|
|
|
// GetInternalReplicationState is a wrapper method to fetch internal replication state from the map m
|
|
|
|
func GetInternalReplicationState(m map[string][]byte) ReplicationState {
|
|
|
|
m1 := make(map[string]string, len(m))
|
|
|
|
for k, v := range m {
|
|
|
|
m1[k] = string(v)
|
|
|
|
}
|
|
|
|
return getInternalReplicationState(m1)
|
|
|
|
}
|
|
|
|
|
|
|
|
// getInternalReplicationState fetches internal replication state from the map m
|
|
|
|
func getInternalReplicationState(m map[string]string) ReplicationState {
|
2021-11-18 12:15:22 -08:00
|
|
|
d := ReplicationState{}
|
2021-09-18 16:31:35 -04:00
|
|
|
for k, v := range m {
|
|
|
|
switch {
|
|
|
|
case equals(k, ReservedMetadataPrefixLower+ReplicationTimestamp):
|
2023-06-14 22:49:13 +08:00
|
|
|
d.ReplicaTimeStamp, _ = amztime.ParseReplicationTS(v)
|
2021-09-18 16:31:35 -04:00
|
|
|
case equals(k, ReservedMetadataPrefixLower+ReplicaTimestamp):
|
2023-06-14 22:49:13 +08:00
|
|
|
d.ReplicaTimeStamp, _ = amztime.ParseReplicationTS(v)
|
2021-09-18 16:31:35 -04:00
|
|
|
case equals(k, ReservedMetadataPrefixLower+ReplicaStatus):
|
|
|
|
d.ReplicaStatus = replication.StatusType(v)
|
|
|
|
case equals(k, ReservedMetadataPrefixLower+ReplicationStatus):
|
|
|
|
d.ReplicationStatusInternal = v
|
|
|
|
d.Targets = replicationStatusesMap(v)
|
|
|
|
case equals(k, VersionPurgeStatusKey):
|
|
|
|
d.VersionPurgeStatusInternal = v
|
|
|
|
d.PurgeTargets = versionPurgeStatusesMap(v)
|
|
|
|
case strings.HasPrefix(k, ReservedMetadataPrefixLower+ReplicationReset):
|
|
|
|
arn := strings.TrimPrefix(k, fmt.Sprintf("%s-", ReservedMetadataPrefixLower+ReplicationReset))
|
2021-11-18 12:15:22 -08:00
|
|
|
if d.ResetStatusesMap == nil {
|
|
|
|
d.ResetStatusesMap = make(map[string]string, 1)
|
|
|
|
}
|
2021-09-18 16:31:35 -04:00
|
|
|
d.ResetStatusesMap[arn] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return d
|
|
|
|
}
|