2021-04-18 15:41:13 -04:00
// Copyright (c) 2015-2021 MinIO, Inc.
//
// This file is part of MinIO Object Storage stack
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2018-02-15 20:45:57 -05:00
package cmd
import (
2018-03-14 15:01:47 -04:00
"context"
2021-02-26 19:53:06 -05:00
"encoding/binary"
2020-08-18 17:37:26 -04:00
"errors"
2018-02-15 20:45:57 -05:00
"fmt"
"hash/crc32"
2020-10-13 21:28:42 -04:00
"math/rand"
2018-09-20 22:22:09 -04:00
"net/http"
2021-11-15 12:46:55 -05:00
"reflect"
2022-07-25 20:51:32 -04:00
"strings"
2018-02-15 20:45:57 -05:00
"sync"
"time"
2020-06-12 23:04:01 -04:00
"github.com/dchest/siphash"
"github.com/google/uuid"
2023-06-19 20:53:08 -04:00
"github.com/minio/madmin-go/v3"
2020-12-10 10:28:37 -05:00
"github.com/minio/minio-go/v7/pkg/set"
2020-07-14 12:38:05 -04:00
"github.com/minio/minio-go/v7/pkg/tags"
2021-06-01 17:59:40 -04:00
"github.com/minio/minio/internal/dsync"
"github.com/minio/minio/internal/logger"
2024-05-24 19:05:23 -04:00
"github.com/minio/pkg/v3/console"
"github.com/minio/pkg/v3/sync/errgroup"
2018-02-15 20:45:57 -05:00
)
2019-11-13 15:17:45 -05:00
// setsDsyncLockers is encapsulated type for Close()
type setsDsyncLockers [ ] [ ] dsync . NetLocker
2020-06-12 23:04:01 -04:00
// erasureSets implements ObjectLayer combining a static list of erasure coded
2018-02-15 20:45:57 -05:00
// object sets. NOTE: There is no dynamic scaling allowed or intended in
// current design.
2020-06-12 23:04:01 -04:00
type erasureSets struct {
sets [ ] * erasureObjects
2018-02-15 20:45:57 -05:00
// Reference format.
2020-06-12 23:04:01 -04:00
format * formatErasureV3
2018-02-15 20:45:57 -05:00
2020-06-12 23:04:01 -04:00
// erasureDisks mutex to lock erasureDisks.
erasureDisksMu sync . RWMutex
2018-02-15 20:45:57 -05:00
// Re-ordered list of disks per set.
2020-06-12 23:04:01 -04:00
erasureDisks [ ] [ ] StorageAPI
2018-02-15 20:45:57 -05:00
2019-11-13 15:17:45 -05:00
// Distributed locker clients.
2020-06-12 23:04:01 -04:00
erasureLockers setsDsyncLockers
2019-11-13 15:17:45 -05:00
2020-09-25 22:21:52 -04:00
// Distributed lock owner (constant per running instance).
erasureLockOwner string
2018-02-15 20:45:57 -05:00
// List of endpoints provided on the command line.
2022-01-10 12:07:49 -05:00
endpoints PoolEndpoints
2018-02-15 20:45:57 -05:00
2020-03-24 21:53:24 -04:00
// String version of all the endpoints, an optimization
// to avoid url.String() conversion taking CPU on
// large disk setups.
endpointStrings [ ] string
2018-02-15 20:45:57 -05:00
// Total number of sets and the number of disks per set.
2020-08-26 22:29:35 -04:00
setCount , setDriveCount int
2021-01-16 15:08:02 -05:00
defaultParityCount int
2018-02-15 20:45:57 -05:00
2021-03-04 17:36:23 -05:00
poolIndex int
2021-01-26 16:21:51 -05:00
2018-02-15 20:45:57 -05:00
// Distribution algorithm of choice.
distributionAlgo string
2020-06-12 23:04:01 -04:00
deploymentID [ 16 ] byte
2018-02-15 20:45:57 -05:00
2021-05-11 12:19:15 -04:00
lastConnectDisksOpTime time . Time
2018-02-15 20:45:57 -05:00
}
2021-06-16 17:26:26 -04:00
func ( s * erasureSets ) getDiskMap ( ) map [ Endpoint ] StorageAPI {
diskMap := make ( map [ Endpoint ] StorageAPI )
2020-03-25 02:26:13 -04:00
2020-06-12 23:04:01 -04:00
s . erasureDisksMu . RLock ( )
defer s . erasureDisksMu . RUnlock ( )
2018-02-15 20:45:57 -05:00
for i := 0 ; i < s . setCount ; i ++ {
2020-08-26 22:29:35 -04:00
for j := 0 ; j < s . setDriveCount ; j ++ {
2020-06-12 23:04:01 -04:00
disk := s . erasureDisks [ i ] [ j ]
2020-09-17 00:14:35 -04:00
if disk == OfflineDisk {
2018-02-15 20:45:57 -05:00
continue
}
2020-03-25 02:26:13 -04:00
if ! disk . IsOnline ( ) {
2018-02-15 20:45:57 -05:00
continue
}
2021-06-16 17:26:26 -04:00
diskMap [ disk . Endpoint ( ) ] = disk
2018-02-15 20:45:57 -05:00
}
}
2020-03-25 02:26:13 -04:00
return diskMap
2018-02-15 20:45:57 -05:00
}
// Initializes a new StorageAPI from the endpoint argument, returns
// StorageAPI and also `format` which exists on the disk.
2024-04-15 04:25:46 -04:00
func connectEndpoint ( endpoint Endpoint ) ( StorageAPI , * formatErasureV3 , error ) {
2023-08-01 13:54:26 -04:00
disk , err := newStorageAPI ( endpoint , storageOpts {
cleanUp : false ,
healthCheck : false ,
} )
2018-02-15 20:45:57 -05:00
if err != nil {
2024-04-15 04:25:46 -04:00
return nil , nil , err
2018-02-15 20:45:57 -05:00
}
2024-04-15 04:25:46 -04:00
format , err := loadFormatErasure ( disk , false )
2018-02-15 20:45:57 -05:00
if err != nil {
2024-01-12 04:48:36 -05:00
disk . Close ( )
2024-04-15 04:25:46 -04:00
return nil , nil , fmt . Errorf ( "Drive: %s returned %w" , disk , err ) // make sure to '%w' to wrap the error
2018-02-15 20:45:57 -05:00
}
2023-08-01 13:54:26 -04:00
disk . Close ( )
disk , err = newStorageAPI ( endpoint , storageOpts {
cleanUp : true ,
healthCheck : true ,
} )
if err != nil {
2024-04-15 04:25:46 -04:00
return nil , nil , err
2023-08-01 13:54:26 -04:00
}
2024-04-15 04:25:46 -04:00
return disk , format , nil
2018-02-15 20:45:57 -05:00
}
2020-03-27 17:48:30 -04:00
// findDiskIndex - returns the i,j'th position of the input `diskID` against the reference
// format, after successful validation.
// - i'th position is the set index
// - j'th position is the disk index in the current set
2020-06-12 23:04:01 -04:00
func findDiskIndexByDiskID ( refFormat * formatErasureV3 , diskID string ) ( int , int , error ) {
2022-01-24 22:40:02 -05:00
if diskID == "" {
return - 1 , - 1 , errDiskNotFound
}
2020-03-27 17:48:30 -04:00
if diskID == offlineDiskUUID {
2022-08-04 19:10:08 -04:00
return - 1 , - 1 , fmt . Errorf ( "DriveID: %s is offline" , diskID )
2020-03-27 17:48:30 -04:00
}
2020-06-12 23:04:01 -04:00
for i := 0 ; i < len ( refFormat . Erasure . Sets ) ; i ++ {
for j := 0 ; j < len ( refFormat . Erasure . Sets [ 0 ] ) ; j ++ {
if refFormat . Erasure . Sets [ i ] [ j ] == diskID {
2020-03-27 17:48:30 -04:00
return i , j , nil
}
}
}
2022-08-04 19:10:08 -04:00
return - 1 , - 1 , fmt . Errorf ( "DriveID: %s not found" , diskID )
2020-03-27 17:48:30 -04:00
}
2018-02-15 20:45:57 -05:00
// findDiskIndex - returns the i,j'th position of the input `format` against the reference
// format, after successful validation.
2020-01-15 21:30:32 -05:00
// - i'th position is the set index
// - j'th position is the disk index in the current set
2020-06-12 23:04:01 -04:00
func findDiskIndex ( refFormat , format * formatErasureV3 ) ( int , int , error ) {
if err := formatErasureV3Check ( refFormat , format ) ; err != nil {
2018-02-15 20:45:57 -05:00
return 0 , 0 , err
}
2020-06-12 23:04:01 -04:00
if format . Erasure . This == offlineDiskUUID {
2022-08-04 19:10:08 -04:00
return - 1 , - 1 , fmt . Errorf ( "DriveID: %s is offline" , format . Erasure . This )
2018-02-15 20:45:57 -05:00
}
2020-06-12 23:04:01 -04:00
for i := 0 ; i < len ( refFormat . Erasure . Sets ) ; i ++ {
for j := 0 ; j < len ( refFormat . Erasure . Sets [ 0 ] ) ; j ++ {
if refFormat . Erasure . Sets [ i ] [ j ] == format . Erasure . This {
2018-02-15 20:45:57 -05:00
return i , j , nil
}
}
}
2022-08-04 19:10:08 -04:00
return - 1 , - 1 , fmt . Errorf ( "DriveID: %s not found" , format . Erasure . This )
2018-02-15 20:45:57 -05:00
}
2024-05-03 16:08:20 -04:00
// Legacy returns 'true' if distribution algo is CRCMOD
func ( s * erasureSets ) Legacy ( ) ( ok bool ) {
return s . distributionAlgo == formatErasureVersionV2DistributionAlgoV1
}
2020-01-10 05:35:06 -05:00
// connectDisks - attempt to connect all the endpoints, loads format
2018-03-27 21:11:39 -04:00
// and re-arranges the disks in proper position.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) connectDisks ( ) {
2021-05-11 12:19:15 -04:00
defer func ( ) {
s . lastConnectDisksOpTime = time . Now ( )
} ( )
2020-03-25 02:26:13 -04:00
var wg sync . WaitGroup
diskMap := s . getDiskMap ( )
2022-01-10 12:07:49 -05:00
for _ , endpoint := range s . endpoints . Endpoints {
2022-02-21 18:51:54 -05:00
cdisk := diskMap [ endpoint ]
if cdisk != nil && cdisk . IsOnline ( ) {
if s . lastConnectDisksOpTime . IsZero ( ) {
continue
}
// An online-disk means its a valid disk but it may be a re-connected disk
// we verify that here based on LastConn(), however we make sure to avoid
// putting it back into the s.erasureDisks by re-placing the disk again.
_ , setIndex , _ := cdisk . GetDiskLoc ( )
if setIndex != - 1 {
continue
}
2018-03-27 21:11:39 -04:00
}
2023-11-09 12:33:32 -05:00
if cdisk != nil {
// Close previous offline disk.
cdisk . Close ( )
}
2022-02-21 18:51:54 -05:00
2020-03-25 02:26:13 -04:00
wg . Add ( 1 )
go func ( endpoint Endpoint ) {
defer wg . Done ( )
2024-04-15 04:25:46 -04:00
disk , format , err := connectEndpoint ( endpoint )
2020-03-25 02:26:13 -04:00
if err != nil {
2020-09-04 20:09:02 -04:00
if endpoint . IsLocal && errors . Is ( err , errUnformattedDisk ) {
globalBackgroundHealState . pushHealLocalDisks ( endpoint )
2024-07-02 16:41:29 -04:00
} else if ! errors . Is ( err , errDriveIsRoot ) {
2020-09-04 20:09:02 -04:00
printEndpointError ( endpoint , err , true )
}
2020-03-25 02:26:13 -04:00
return
}
2021-03-04 17:36:23 -05:00
if disk . IsLocal ( ) && disk . Healing ( ) != nil {
2020-10-24 16:23:08 -04:00
globalBackgroundHealState . pushHealLocalDisks ( disk . Endpoint ( ) )
}
2023-05-17 15:09:41 -04:00
s . erasureDisksMu . Lock ( )
2020-03-25 02:26:13 -04:00
setIndex , diskIndex , err := findDiskIndex ( s . format , format )
if err != nil {
2020-10-24 16:23:08 -04:00
printEndpointError ( endpoint , err , false )
2021-11-15 12:46:55 -05:00
disk . Close ( )
2023-05-17 15:09:41 -04:00
s . erasureDisksMu . Unlock ( )
2020-03-25 02:26:13 -04:00
return
}
2020-09-28 22:39:32 -04:00
2021-11-15 12:46:55 -05:00
if currentDisk := s . erasureDisks [ setIndex ] [ diskIndex ] ; currentDisk != nil {
if ! reflect . DeepEqual ( currentDisk . Endpoint ( ) , disk . Endpoint ( ) ) {
2022-08-04 19:10:08 -04:00
err = fmt . Errorf ( "Detected unexpected drive ordering refusing to use the drive: expecting %s, found %s, refusing to use the drive" ,
2021-11-15 12:46:55 -05:00
currentDisk . Endpoint ( ) , disk . Endpoint ( ) )
printEndpointError ( endpoint , err , false )
disk . Close ( )
s . erasureDisksMu . Unlock ( )
return
}
2020-06-12 23:04:01 -04:00
s . erasureDisks [ setIndex ] [ diskIndex ] . Close ( )
2020-04-03 21:06:31 -04:00
}
2023-08-01 13:54:26 -04:00
disk . SetDiskID ( format . Erasure . This )
s . erasureDisks [ setIndex ] [ diskIndex ] = disk
2023-12-13 22:27:55 -05:00
2024-02-14 13:37:34 -05:00
if disk . IsLocal ( ) {
2023-12-29 12:30:10 -05:00
globalLocalDrivesMu . Lock ( )
2024-02-14 13:37:34 -05:00
if globalIsDistErasure {
globalLocalSetDrives [ s . poolIndex ] [ setIndex ] [ diskIndex ] = disk
}
for i , ldisk := range globalLocalDrives {
_ , k , l := ldisk . GetDiskLoc ( )
if k == setIndex && l == diskIndex {
globalLocalDrives [ i ] = disk
break
}
}
2023-12-29 12:30:10 -05:00
globalLocalDrivesMu . Unlock ( )
}
2024-02-14 13:37:34 -05:00
s . erasureDisksMu . Unlock ( )
2020-03-25 02:26:13 -04:00
} ( endpoint )
2018-03-27 21:11:39 -04:00
}
2021-03-18 14:19:02 -04:00
2020-03-25 02:26:13 -04:00
wg . Wait ( )
2018-03-27 21:11:39 -04:00
}
2018-02-15 20:45:57 -05:00
// monitorAndConnectEndpoints this is a monitoring loop to keep track of disconnected
// endpoints by reconnecting them and making sure to place them into right position in
// the set topology, this monitoring happens at a given monitoring interval.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) monitorAndConnectEndpoints ( ctx context . Context , monitorInterval time . Duration ) {
2020-10-13 21:28:42 -04:00
r := rand . New ( rand . NewSource ( time . Now ( ) . UnixNano ( ) ) )
time . Sleep ( time . Duration ( r . Float64 ( ) * float64 ( time . Second ) ) )
// Pre-emptively connect the disks if possible.
s . connectDisks ( )
2020-12-16 17:33:05 -05:00
monitor := time . NewTimer ( monitorInterval )
defer monitor . Stop ( )
2018-02-15 20:45:57 -05:00
for {
select {
2020-03-18 19:19:29 -04:00
case <- ctx . Done ( ) :
2018-04-09 13:25:41 -04:00
return
2020-12-16 17:33:05 -05:00
case <- monitor . C :
2020-12-17 19:52:47 -05:00
if serverDebugLog {
2022-08-04 19:10:08 -04:00
console . Debugln ( "running drive monitoring" )
2020-12-17 19:52:47 -05:00
}
2020-12-17 15:35:02 -05:00
s . connectDisks ( )
2022-05-18 01:42:59 -04:00
// Reset the timer for next interval
monitor . Reset ( monitorInterval )
2020-12-16 17:33:05 -05:00
}
2019-11-13 15:17:45 -05:00
}
}
2020-09-25 22:21:52 -04:00
func ( s * erasureSets ) GetLockers ( setIndex int ) func ( ) ( [ ] dsync . NetLocker , string ) {
return func ( ) ( [ ] dsync . NetLocker , string ) {
2020-12-10 10:28:37 -05:00
lockers := make ( [ ] dsync . NetLocker , len ( s . erasureLockers [ setIndex ] ) )
2020-06-12 23:04:01 -04:00
copy ( lockers , s . erasureLockers [ setIndex ] )
2020-09-25 22:21:52 -04:00
return lockers , s . erasureLockOwner
2018-02-15 20:45:57 -05:00
}
}
2024-02-23 19:19:13 -05:00
func ( s * erasureSets ) GetEndpointStrings ( setIndex int ) func ( ) [ ] string {
return func ( ) [ ] string {
eps := make ( [ ] string , s . setDriveCount )
copy ( eps , s . endpointStrings [ setIndex * s . setDriveCount : setIndex * s . setDriveCount + s . setDriveCount ] )
return eps
}
}
2021-09-29 14:36:19 -04:00
func ( s * erasureSets ) GetEndpoints ( setIndex int ) func ( ) [ ] Endpoint {
return func ( ) [ ] Endpoint {
eps := make ( [ ] Endpoint , s . setDriveCount )
2024-02-23 19:19:13 -05:00
copy ( eps , s . endpoints . Endpoints [ setIndex * s . setDriveCount : setIndex * s . setDriveCount + s . setDriveCount ] )
2020-06-10 20:10:31 -04:00
return eps
}
}
2018-02-15 20:45:57 -05:00
// GetDisks returns a closure for a given set, which provides list of disks per set.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) GetDisks ( setIndex int ) func ( ) [ ] StorageAPI {
2018-02-15 20:45:57 -05:00
return func ( ) [ ] StorageAPI {
2020-06-12 23:04:01 -04:00
s . erasureDisksMu . RLock ( )
defer s . erasureDisksMu . RUnlock ( )
2020-08-26 22:29:35 -04:00
disks := make ( [ ] StorageAPI , s . setDriveCount )
2020-06-12 23:04:01 -04:00
copy ( disks , s . erasureDisks [ setIndex ] )
2018-02-15 20:45:57 -05:00
return disks
}
}
2020-09-04 20:09:02 -04:00
// defaultMonitorConnectEndpointInterval is the interval to monitor endpoint connections.
// Must be bigger than defaultMonitorNewDiskInterval.
const defaultMonitorConnectEndpointInterval = defaultMonitorNewDiskInterval + time . Second * 5
2018-02-15 20:45:57 -05:00
// Initialize new set of erasure coded sets.
2022-01-10 12:07:49 -05:00
func newErasureSets ( ctx context . Context , endpoints PoolEndpoints , storageDisks [ ] StorageAPI , format * formatErasureV3 , defaultParityCount , poolIdx int ) ( * erasureSets , error ) {
2020-06-12 23:04:01 -04:00
setCount := len ( format . Erasure . Sets )
2020-08-26 22:29:35 -04:00
setDriveCount := len ( format . Erasure . Sets [ 0 ] )
2020-04-27 17:39:57 -04:00
2022-01-10 12:07:49 -05:00
endpointStrings := make ( [ ] string , len ( endpoints . Endpoints ) )
for i , endpoint := range endpoints . Endpoints {
2021-09-25 13:51:03 -04:00
endpointStrings [ i ] = endpoint . String ( )
}
2020-06-12 23:04:01 -04:00
// Initialize the erasure sets instance.
s := & erasureSets {
2021-01-16 15:08:02 -05:00
sets : make ( [ ] * erasureObjects , setCount ) ,
erasureDisks : make ( [ ] [ ] StorageAPI , setCount ) ,
erasureLockers : make ( [ ] [ ] dsync . NetLocker , setCount ) ,
2021-03-26 14:37:58 -04:00
erasureLockOwner : globalLocalNodeName ,
2021-01-16 15:08:02 -05:00
endpoints : endpoints ,
endpointStrings : endpointStrings ,
setCount : setCount ,
setDriveCount : setDriveCount ,
defaultParityCount : defaultParityCount ,
format : format ,
distributionAlgo : format . Erasure . DistributionAlgo ,
deploymentID : uuid . MustParse ( format . ID ) ,
2021-03-04 17:36:23 -05:00
poolIndex : poolIdx ,
2018-02-15 20:45:57 -05:00
}
2020-06-12 23:04:01 -04:00
mutex := newNSLock ( globalIsDistErasure )
2018-06-01 19:41:23 -04:00
2020-03-27 17:48:30 -04:00
for i := 0 ; i < setCount ; i ++ {
2020-08-26 22:29:35 -04:00
s . erasureDisks [ i ] = make ( [ ] StorageAPI , setDriveCount )
2020-12-10 10:28:37 -05:00
}
2022-01-02 12:15:06 -05:00
erasureLockers := map [ string ] dsync . NetLocker { }
2022-01-10 12:07:49 -05:00
for _ , endpoint := range endpoints . Endpoints {
2020-12-10 10:28:37 -05:00
if _ , ok := erasureLockers [ endpoint . Host ] ; ! ok {
erasureLockers [ endpoint . Host ] = newLockAPI ( endpoint )
}
2020-04-29 16:42:37 -04:00
}
2018-02-15 20:45:57 -05:00
2024-01-24 16:36:44 -05:00
var wg sync . WaitGroup
var lk sync . Mutex
2020-04-29 16:42:37 -04:00
for i := 0 ; i < setCount ; i ++ {
2022-01-02 12:15:06 -05:00
lockerEpSet := set . NewStringSet ( )
2020-08-26 22:29:35 -04:00
for j := 0 ; j < setDriveCount ; j ++ {
2024-01-24 16:36:44 -05:00
wg . Add ( 1 )
go func ( i int , endpoint Endpoint ) {
defer wg . Done ( )
lk . Lock ( )
// Only add lockers only one per endpoint and per erasure set.
if locker , ok := erasureLockers [ endpoint . Host ] ; ok && ! lockerEpSet . Contains ( endpoint . Host ) {
lockerEpSet . Add ( endpoint . Host )
s . erasureLockers [ i ] = append ( s . erasureLockers [ i ] , locker )
}
lk . Unlock ( )
} ( i , endpoints . Endpoints [ i * setDriveCount + j ] )
2020-03-04 19:18:32 -05:00
}
2022-01-24 14:28:45 -05:00
}
2024-01-24 16:36:44 -05:00
wg . Wait ( )
2020-03-04 19:18:32 -05:00
2022-01-24 14:28:45 -05:00
for i := 0 ; i < setCount ; i ++ {
wg . Add ( 1 )
go func ( i int ) {
defer wg . Done ( )
var innerWg sync . WaitGroup
for j := 0 ; j < setDriveCount ; j ++ {
disk := storageDisks [ i * setDriveCount + j ]
if disk == nil {
continue
}
2023-12-29 12:30:10 -05:00
if disk . IsLocal ( ) && globalIsDistErasure {
globalLocalDrivesMu . RLock ( )
ldisk := globalLocalSetDrives [ poolIdx ] [ i ] [ j ]
if ldisk == nil {
globalLocalDrivesMu . RUnlock ( )
continue
}
2024-01-12 04:48:36 -05:00
disk . Close ( )
2023-12-29 12:30:10 -05:00
disk = ldisk
globalLocalDrivesMu . RUnlock ( )
}
2022-01-24 14:28:45 -05:00
innerWg . Add ( 1 )
go func ( disk StorageAPI , i , j int ) {
defer innerWg . Done ( )
diskID , err := disk . GetDiskID ( )
if err != nil {
if ! errors . Is ( err , errUnformattedDisk ) {
2024-04-04 08:04:40 -04:00
bootLogIf ( ctx , err )
2022-01-24 14:28:45 -05:00
}
return
}
2022-01-24 22:40:02 -05:00
if diskID == "" {
return
}
2024-04-11 13:45:28 -04:00
s . erasureDisks [ i ] [ j ] = disk
2022-01-24 14:28:45 -05:00
} ( disk , i , j )
}
2024-04-11 13:45:28 -04:00
2022-01-24 14:28:45 -05:00
innerWg . Wait ( )
// Initialize erasure objects for a given set.
s . sets [ i ] = & erasureObjects {
2022-11-22 10:23:36 -05:00
setIndex : i ,
poolIndex : poolIdx ,
setDriveCount : setDriveCount ,
defaultParityCount : defaultParityCount ,
getDisks : s . GetDisks ( i ) ,
getLockers : s . GetLockers ( i ) ,
getEndpoints : s . GetEndpoints ( i ) ,
2024-02-23 19:19:13 -05:00
getEndpointStrings : s . GetEndpointStrings ( i ) ,
2022-11-22 10:23:36 -05:00
nsMutex : mutex ,
2022-01-24 14:28:45 -05:00
}
} ( i )
2018-02-15 20:45:57 -05:00
}
2022-01-24 14:28:45 -05:00
wg . Wait ( )
2020-12-10 10:28:37 -05:00
// start cleanup stale uploads go-routine.
2021-10-04 13:52:28 -04:00
go s . cleanupStaleUploads ( ctx )
2020-12-10 10:28:37 -05:00
2021-02-26 12:52:27 -05:00
// start cleanup of deleted objects.
2021-10-04 13:52:28 -04:00
go s . cleanupDeletedObjects ( ctx )
2021-02-26 12:52:27 -05:00
2018-02-15 20:45:57 -05:00
// Start the disk monitoring and connect routine.
2022-05-16 08:36:00 -04:00
if ! globalIsTesting {
go s . monitorAndConnectEndpoints ( ctx , defaultMonitorConnectEndpointInterval )
}
2020-01-15 21:30:32 -05:00
2018-02-15 20:45:57 -05:00
return s , nil
}
2021-10-04 13:52:28 -04:00
// cleanup ".trash/" folder every 5m minutes with sufficient sleep cycles, between each
// deletes a dynamic sleeper is used with a factor of 10 ratio with max delay between
// deletes to be 2 seconds.
func ( s * erasureSets ) cleanupDeletedObjects ( ctx context . Context ) {
timer := time . NewTimer ( globalAPIConfig . getDeleteCleanupInterval ( ) )
2021-02-26 12:52:27 -05:00
defer timer . Stop ( )
for {
select {
case <- ctx . Done ( ) :
return
case <- timer . C :
2022-02-11 17:22:48 -05:00
var wg sync . WaitGroup
2021-02-26 12:52:27 -05:00
for _ , set := range s . sets {
2022-02-11 17:22:48 -05:00
wg . Add ( 1 )
go func ( set * erasureObjects ) {
defer wg . Done ( )
if set == nil {
return
}
set . cleanupDeletedObjects ( ctx )
} ( set )
2021-02-26 12:52:27 -05:00
}
2022-02-11 17:22:48 -05:00
wg . Wait ( )
2022-05-18 01:42:59 -04:00
// Reset for the next interval
timer . Reset ( globalAPIConfig . getDeleteCleanupInterval ( ) )
2021-02-26 12:52:27 -05:00
}
}
}
2021-10-04 13:52:28 -04:00
func ( s * erasureSets ) cleanupStaleUploads ( ctx context . Context ) {
timer := time . NewTimer ( globalAPIConfig . getStaleUploadsCleanupInterval ( ) )
2021-02-05 22:23:48 -05:00
defer timer . Stop ( )
2020-12-10 10:28:37 -05:00
for {
select {
case <- ctx . Done ( ) :
return
2021-02-05 22:23:48 -05:00
case <- timer . C :
2022-02-11 17:22:48 -05:00
var wg sync . WaitGroup
2020-12-10 10:28:37 -05:00
for _ , set := range s . sets {
2022-02-11 17:22:48 -05:00
wg . Add ( 1 )
go func ( set * erasureObjects ) {
defer wg . Done ( )
if set == nil {
return
}
set . cleanupStaleUploads ( ctx , globalAPIConfig . getStaleUploadsExpiry ( ) )
} ( set )
2020-12-10 10:28:37 -05:00
}
2022-02-11 17:22:48 -05:00
wg . Wait ( )
2022-05-18 01:42:59 -04:00
// Reset for the next interval
timer . Reset ( globalAPIConfig . getStaleUploadsCleanupInterval ( ) )
2020-12-10 10:28:37 -05:00
}
}
}
2021-01-26 16:21:51 -05:00
type auditObjectOp struct {
2024-05-09 13:15:03 -04:00
Name string ` json:"name" `
Pool int ` json:"poolId" `
Set int ` json:"setId" `
Drives [ ] string ` json:"drives" `
2021-01-26 16:21:51 -05:00
}
2022-05-04 03:45:27 -04:00
// Add erasure set information to the current context
2021-03-04 17:36:23 -05:00
func auditObjectErasureSet ( ctx context . Context , object string , set * erasureObjects ) {
2021-10-28 10:35:28 -04:00
if len ( logger . AuditTargets ( ) ) == 0 {
2021-01-26 16:21:51 -05:00
return
}
op := auditObjectOp {
2024-05-09 13:15:03 -04:00
Name : decodeDirObject ( object ) ,
Pool : set . poolIndex + 1 ,
Set : set . setIndex + 1 ,
Drives : set . getEndpointStrings ( ) ,
2021-01-26 16:21:51 -05:00
}
2022-07-07 20:04:25 -04:00
logger . GetReqInfo ( ctx ) . AppendTags ( "objectLocation" , op )
2021-01-26 16:21:51 -05:00
}
2019-11-13 15:17:45 -05:00
// NewNSLock - initialize a new namespace RWLocker instance.
2020-11-04 11:25:42 -05:00
func ( s * erasureSets ) NewNSLock ( bucket string , objects ... string ) RWLocker {
2020-02-21 00:59:57 -05:00
if len ( objects ) == 1 {
2020-11-04 11:25:42 -05:00
return s . getHashedSet ( objects [ 0 ] ) . NewNSLock ( bucket , objects ... )
2020-02-21 00:59:57 -05:00
}
2020-11-04 11:25:42 -05:00
return s . getHashedSet ( "" ) . NewNSLock ( bucket , objects ... )
2019-11-13 15:17:45 -05:00
}
2020-08-05 16:31:12 -04:00
// SetDriveCount returns the current drives per set.
func ( s * erasureSets ) SetDriveCount ( ) int {
2020-08-26 22:29:35 -04:00
return s . setDriveCount
2020-08-05 16:31:12 -04:00
}
2021-01-16 15:08:02 -05:00
// ParityCount returns the default parity count used while erasure
// coding objects
func ( s * erasureSets ) ParityCount ( ) int {
return s . defaultParityCount
}
2020-05-28 16:03:04 -04:00
// StorageInfo - combines output of StorageInfo across all erasure coded object sets.
2022-12-01 17:31:35 -05:00
func ( s * erasureSets ) StorageInfo ( ctx context . Context ) StorageInfo {
2021-03-04 17:36:23 -05:00
var storageInfo madmin . StorageInfo
2019-08-22 23:02:40 -04:00
2021-03-04 17:36:23 -05:00
storageInfos := make ( [ ] madmin . StorageInfo , len ( s . sets ) )
2019-10-14 12:44:51 -04:00
g := errgroup . WithNErrs ( len ( s . sets ) )
for index := range s . sets {
index := index
g . Go ( func ( ) error {
2022-12-01 17:31:35 -05:00
storageInfos [ index ] = s . sets [ index ] . StorageInfo ( ctx )
2019-10-14 12:44:51 -04:00
return nil
} , index )
2019-08-22 23:02:40 -04:00
}
2019-10-14 12:44:51 -04:00
2019-08-22 23:02:40 -04:00
// Wait for the go routines.
2019-10-14 12:44:51 -04:00
g . Wait ( )
2019-08-22 23:02:40 -04:00
for _ , lstorageInfo := range storageInfos {
2020-07-13 12:51:07 -04:00
storageInfo . Disks = append ( storageInfo . Disks , lstorageInfo . Disks ... )
2018-02-15 20:45:57 -05:00
}
2022-12-01 17:31:35 -05:00
return storageInfo
2018-02-15 20:45:57 -05:00
}
2021-03-02 20:28:04 -05:00
// StorageInfo - combines output of StorageInfo across all erasure coded object sets.
2023-12-21 19:56:43 -05:00
func ( s * erasureSets ) LocalStorageInfo ( ctx context . Context , metrics bool ) StorageInfo {
2021-03-02 20:28:04 -05:00
var storageInfo StorageInfo
storageInfos := make ( [ ] StorageInfo , len ( s . sets ) )
g := errgroup . WithNErrs ( len ( s . sets ) )
for index := range s . sets {
index := index
g . Go ( func ( ) error {
2023-12-21 19:56:43 -05:00
storageInfos [ index ] = s . sets [ index ] . LocalStorageInfo ( ctx , metrics )
2021-03-02 20:28:04 -05:00
return nil
} , index )
}
// Wait for the go routines.
g . Wait ( )
for _ , lstorageInfo := range storageInfos {
storageInfo . Disks = append ( storageInfo . Disks , lstorageInfo . Disks ... )
}
2022-12-01 17:31:35 -05:00
return storageInfo
2021-03-02 20:28:04 -05:00
}
2018-02-15 20:45:57 -05:00
// Shutdown shutsdown all erasure coded sets in parallel
// returns error upon first error.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) Shutdown ( ctx context . Context ) error {
2018-02-15 20:45:57 -05:00
g := errgroup . WithNErrs ( len ( s . sets ) )
for index := range s . sets {
index := index
g . Go ( func ( ) error {
2018-03-14 15:01:47 -04:00
return s . sets [ index ] . Shutdown ( ctx )
2018-02-15 20:45:57 -05:00
} , index )
}
for _ , err := range g . Wait ( ) {
if err != nil {
return err
}
}
return nil
}
// hashes the key returning an integer based on the input algorithm.
// This function currently supports
// - CRCMOD
2020-06-12 23:04:01 -04:00
// - SIPMOD
2018-02-15 20:45:57 -05:00
// - all new algos.
2020-06-12 23:04:01 -04:00
func sipHashMod ( key string , cardinality int , id [ 16 ] byte ) int {
if cardinality <= 0 {
return - 1
}
2021-02-26 19:53:06 -05:00
// use the faster version as per siphash docs
// https://github.com/dchest/siphash#usage
k0 , k1 := binary . LittleEndian . Uint64 ( id [ 0 : 8 ] ) , binary . LittleEndian . Uint64 ( id [ 8 : 16 ] )
sum64 := siphash . Hash ( k0 , k1 , [ ] byte ( key ) )
return int ( sum64 % uint64 ( cardinality ) )
2020-06-12 23:04:01 -04:00
}
2018-02-15 20:45:57 -05:00
func crcHashMod ( key string , cardinality int ) int {
if cardinality <= 0 {
return - 1
}
keyCrc := crc32 . Checksum ( [ ] byte ( key ) , crc32 . IEEETable )
return int ( keyCrc % uint32 ( cardinality ) )
}
2020-06-12 23:04:01 -04:00
func hashKey ( algo string , key string , cardinality int , id [ 16 ] byte ) int {
2018-02-15 20:45:57 -05:00
switch algo {
2021-01-16 15:08:02 -05:00
case formatErasureVersionV2DistributionAlgoV1 :
2018-02-15 20:45:57 -05:00
return crcHashMod ( key , cardinality )
2021-01-16 15:08:02 -05:00
case formatErasureVersionV3DistributionAlgoV2 , formatErasureVersionV3DistributionAlgoV3 :
2020-06-12 23:04:01 -04:00
return sipHashMod ( key , cardinality , id )
2018-08-06 13:26:40 -04:00
default :
// Unknown algorithm returns -1, also if cardinality is lesser than 0.
return - 1
2018-02-15 20:45:57 -05:00
}
}
2019-05-13 15:25:49 -04:00
// Returns always a same erasure coded set for a given input.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) getHashedSetIndex ( input string ) int {
return hashKey ( s . distributionAlgo , input , len ( s . sets ) , s . deploymentID )
2019-05-13 15:25:49 -04:00
}
2018-02-15 20:45:57 -05:00
// Returns always a same erasure coded set for a given input.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) getHashedSet ( input string ) ( set * erasureObjects ) {
2019-05-13 15:25:49 -04:00
return s . sets [ s . getHashedSetIndex ( input ) ]
2018-02-15 20:45:57 -05:00
}
2022-07-25 20:51:32 -04:00
// listDeletedBuckets lists deleted buckets from all disks.
func listDeletedBuckets ( ctx context . Context , storageDisks [ ] StorageAPI , delBuckets map [ string ] VolInfo , readQuorum int ) error {
g := errgroup . WithNErrs ( len ( storageDisks ) )
var mu sync . Mutex
for index := range storageDisks {
index := index
g . Go ( func ( ) error {
if storageDisks [ index ] == nil {
// we ignore disk not found errors
return nil
}
2024-01-30 15:43:25 -05:00
volsInfo , err := storageDisks [ index ] . ListDir ( ctx , "" , minioMetaBucket , pathJoin ( bucketMetaPrefix , deletedBucketsPrefix ) , - 1 )
2022-07-25 20:51:32 -04:00
if err != nil {
2022-12-29 03:08:31 -05:00
if errors . Is ( err , errFileNotFound ) {
2022-07-25 20:51:32 -04:00
return nil
}
return err
}
for _ , volName := range volsInfo {
2022-12-29 03:08:31 -05:00
vi , err := storageDisks [ index ] . StatVol ( ctx , pathJoin ( minioMetaBucket , bucketMetaPrefix , deletedBucketsPrefix , volName ) )
if err == nil {
vi . Name = strings . TrimSuffix ( volName , SlashSeparator )
mu . Lock ( )
if _ , ok := delBuckets [ volName ] ; ! ok {
delBuckets [ volName ] = vi
2022-07-25 20:51:32 -04:00
}
2022-12-29 03:08:31 -05:00
mu . Unlock ( )
2022-07-25 20:51:32 -04:00
}
}
return nil
} , index )
}
return reduceReadQuorumErrs ( ctx , g . Wait ( ) , bucketMetadataOpIgnoredErrs , readQuorum )
}
2018-02-15 20:45:57 -05:00
// --- Object Operations ---
2018-09-20 22:22:09 -04:00
// GetObjectNInfo - returns object info and locked object ReadCloser
2023-04-17 15:16:37 -04:00
func ( s * erasureSets ) GetObjectNInfo ( ctx context . Context , bucket , object string , rs * HTTPRangeSpec , h http . Header , opts ObjectOptions ) ( gr * GetObjectReader , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
2023-04-17 15:16:37 -04:00
return set . GetObjectNInfo ( ctx , bucket , object , rs , h , opts )
2018-09-20 22:22:09 -04:00
}
2018-02-15 20:45:57 -05:00
// PutObject - writes an object to hashedSet based on the object name.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) PutObject ( ctx context . Context , bucket string , object string , data * PutObjReader , opts ObjectOptions ) ( objInfo ObjectInfo , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . PutObject ( ctx , bucket , object , data , opts )
2018-02-15 20:45:57 -05:00
}
// GetObjectInfo - reads object metadata from the hashedSet based on the object name.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) GetObjectInfo ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( objInfo ObjectInfo , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . GetObjectInfo ( ctx , bucket , object , opts )
2018-02-15 20:45:57 -05:00
}
2021-06-15 21:43:14 -04:00
func ( s * erasureSets ) deletePrefix ( ctx context . Context , bucket string , prefix string ) error {
2021-09-17 22:34:48 -04:00
var wg sync . WaitGroup
wg . Add ( len ( s . sets ) )
2021-06-15 21:43:14 -04:00
for _ , s := range s . sets {
2021-09-17 22:34:48 -04:00
go func ( s * erasureObjects ) {
defer wg . Done ( )
// This is a force delete, no reason to throw errors.
s . DeleteObject ( ctx , bucket , prefix , ObjectOptions { DeletePrefix : true } )
} ( s )
2021-06-15 21:43:14 -04:00
}
2021-09-17 22:34:48 -04:00
wg . Wait ( )
2021-06-15 21:43:14 -04:00
return nil
}
2018-02-15 20:45:57 -05:00
// DeleteObject - deletes an object from the hashedSet based on the object name.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) DeleteObject ( ctx context . Context , bucket string , object string , opts ObjectOptions ) ( objInfo ObjectInfo , err error ) {
2023-08-09 19:30:22 -04:00
if opts . DeletePrefix && ! opts . DeletePrefixObject {
2021-06-15 21:43:14 -04:00
err := s . deletePrefix ( ctx , bucket , object )
return ObjectInfo { } , err
}
2022-05-04 03:45:27 -04:00
set := s . getHashedSet ( object )
2021-01-26 16:21:51 -05:00
return set . DeleteObject ( ctx , bucket , object , opts )
2018-02-15 20:45:57 -05:00
}
2019-05-13 15:25:49 -04:00
// DeleteObjects - bulk delete of objects
// Bulk delete is only possible within one set. For that purpose
// objects are group by set first, and then bulk delete is invoked
// for each set, the error response of each delete will be returned
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) DeleteObjects ( ctx context . Context , bucket string , objects [ ] ObjectToDelete , opts ObjectOptions ) ( [ ] DeletedObject , [ ] error ) {
2019-05-13 15:25:49 -04:00
type delObj struct {
// Set index associated to this object
setIndex int
// Original index from the list of arguments
// where this object is passed
origIndex int
2020-06-12 23:04:01 -04:00
// object to delete
object ObjectToDelete
2019-05-13 15:25:49 -04:00
}
// Transform []delObj to the list of object names
2020-06-12 23:04:01 -04:00
toNames := func ( delObjs [ ] delObj ) [ ] ObjectToDelete {
objs := make ( [ ] ObjectToDelete , len ( delObjs ) )
2019-05-13 15:25:49 -04:00
for i , obj := range delObjs {
2020-06-12 23:04:01 -04:00
objs [ i ] = obj . object
2019-05-13 15:25:49 -04:00
}
2020-06-12 23:04:01 -04:00
return objs
2019-05-13 15:25:49 -04:00
}
// The result of delete operation on all passed objects
2022-01-02 12:15:06 -05:00
delErrs := make ( [ ] error , len ( objects ) )
2019-05-13 15:25:49 -04:00
2020-06-12 23:04:01 -04:00
// The result of delete objects
2022-01-02 12:15:06 -05:00
delObjects := make ( [ ] DeletedObject , len ( objects ) )
2020-06-12 23:04:01 -04:00
2019-05-13 15:25:49 -04:00
// A map between a set and its associated objects
2022-01-02 12:15:06 -05:00
objSetMap := make ( map [ int ] [ ] delObj )
2019-05-13 15:25:49 -04:00
// Group objects by set index
for i , object := range objects {
2020-06-12 23:04:01 -04:00
index := s . getHashedSetIndex ( object . ObjectName )
objSetMap [ index ] = append ( objSetMap [ index ] , delObj { setIndex : index , origIndex : i , object : object } )
2019-05-13 15:25:49 -04:00
}
// Invoke bulk delete on objects per set and save
// the result of the delete operation
2022-01-06 13:47:49 -05:00
var wg sync . WaitGroup
var mu sync . Mutex
wg . Add ( len ( objSetMap ) )
for setIdx , objsGroup := range objSetMap {
go func ( set * erasureObjects , group [ ] delObj ) {
defer wg . Done ( )
dobjects , errs := set . DeleteObjects ( ctx , bucket , toNames ( group ) , opts )
mu . Lock ( )
defer mu . Unlock ( )
for i , obj := range group {
delErrs [ obj . origIndex ] = errs [ i ]
delObjects [ obj . origIndex ] = dobjects [ i ]
2021-01-26 16:21:51 -05:00
}
2022-01-06 13:47:49 -05:00
} ( s . sets [ setIdx ] , objsGroup )
2019-05-13 15:25:49 -04:00
}
2022-01-06 13:47:49 -05:00
wg . Wait ( )
2019-05-13 15:25:49 -04:00
2020-06-12 23:04:01 -04:00
return delObjects , delErrs
2019-05-13 15:25:49 -04:00
}
2018-02-15 20:45:57 -05:00
// CopyObject - copies objects from one hashedSet to another hashedSet, on server side.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) CopyObject ( ctx context . Context , srcBucket , srcObject , dstBucket , dstObject string , srcInfo ObjectInfo , srcOpts , dstOpts ObjectOptions ) ( objInfo ObjectInfo , err error ) {
2018-02-15 20:45:57 -05:00
srcSet := s . getHashedSet ( srcObject )
2020-05-28 17:36:38 -04:00
dstSet := s . getHashedSet ( dstObject )
2018-02-15 20:45:57 -05:00
2020-08-03 19:21:10 -04:00
cpSrcDstSame := srcSet == dstSet
2018-02-15 20:45:57 -05:00
// Check if this request is only metadata update.
2020-08-03 19:21:10 -04:00
if cpSrcDstSame && srcInfo . metadataOnly {
2020-09-14 18:57:13 -04:00
// Version ID is set for the destination and source == destination version ID.
// perform an in-place update.
2020-06-19 11:44:51 -04:00
if dstOpts . VersionID != "" && srcOpts . VersionID == dstOpts . VersionID {
2022-06-21 22:20:11 -04:00
srcInfo . Reader . Close ( ) // We are not interested in the reader stream at this point close it.
2020-06-19 11:44:51 -04:00
return srcSet . CopyObject ( ctx , srcBucket , srcObject , dstBucket , dstObject , srcInfo , srcOpts , dstOpts )
}
2020-09-14 18:57:13 -04:00
// Destination is not versioned and source version ID is empty
// perform an in-place update.
2020-06-19 11:44:51 -04:00
if ! dstOpts . Versioned && srcOpts . VersionID == "" {
2022-06-21 22:20:11 -04:00
srcInfo . Reader . Close ( ) // We are not interested in the reader stream at this point close it.
2020-06-19 11:44:51 -04:00
return srcSet . CopyObject ( ctx , srcBucket , srcObject , dstBucket , dstObject , srcInfo , srcOpts , dstOpts )
}
2020-08-03 19:21:10 -04:00
// CopyObject optimization where we don't create an entire copy
// of the content, instead we add a reference, we disallow legacy
// objects to be self referenced in this manner so make sure
// that we actually create a new dataDir for legacy objects.
if dstOpts . Versioned && srcOpts . VersionID != dstOpts . VersionID && ! srcInfo . Legacy {
srcInfo . versionOnly = true
2022-06-21 22:20:11 -04:00
srcInfo . Reader . Close ( ) // We are not interested in the reader stream at this point close it.
2020-08-03 19:21:10 -04:00
return srcSet . CopyObject ( ctx , srcBucket , srcObject , dstBucket , dstObject , srcInfo , srcOpts , dstOpts )
}
2018-02-15 20:45:57 -05:00
}
2020-06-17 14:13:41 -04:00
putOpts := ObjectOptions {
ServerSideEncryption : dstOpts . ServerSideEncryption ,
UserDefined : srcInfo . UserDefined ,
Versioned : dstOpts . Versioned ,
VersionID : dstOpts . VersionID ,
2020-11-19 14:50:22 -05:00
MTime : dstOpts . MTime ,
2020-06-17 14:13:41 -04:00
}
2020-06-19 11:44:51 -04:00
2020-05-28 17:36:38 -04:00
return dstSet . putObject ( ctx , dstBucket , dstObject , srcInfo . PutObjReader , putOpts )
2018-02-15 20:45:57 -05:00
}
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) ListMultipartUploads ( ctx context . Context , bucket , prefix , keyMarker , uploadIDMarker , delimiter string , maxUploads int ) ( result ListMultipartsInfo , err error ) {
2018-02-15 20:45:57 -05:00
// In list multipart uploads we are going to treat input prefix as the object,
// this means that we are not supporting directory navigation.
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( prefix )
return set . ListMultipartUploads ( ctx , bucket , prefix , keyMarker , uploadIDMarker , delimiter , maxUploads )
2018-02-15 20:45:57 -05:00
}
// Initiate a new multipart upload on a hashedSet based on object name.
2022-08-29 19:57:16 -04:00
func ( s * erasureSets ) NewMultipartUpload ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( res * NewMultipartUploadResult , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . NewMultipartUpload ( ctx , bucket , object , opts )
2018-02-15 20:45:57 -05:00
}
// PutObjectPart - writes part of an object to hashedSet based on the object name.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) PutObjectPart ( ctx context . Context , bucket , object , uploadID string , partID int , data * PutObjReader , opts ObjectOptions ) ( info PartInfo , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . PutObjectPart ( ctx , bucket , object , uploadID , partID , data , opts )
2018-02-15 20:45:57 -05:00
}
2020-05-28 15:36:20 -04:00
// GetMultipartInfo - return multipart metadata info uploaded at hashedSet.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) GetMultipartInfo ( ctx context . Context , bucket , object , uploadID string , opts ObjectOptions ) ( result MultipartInfo , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . GetMultipartInfo ( ctx , bucket , object , uploadID , opts )
2020-05-28 15:36:20 -04:00
}
2018-02-15 20:45:57 -05:00
// ListObjectParts - lists all uploaded parts to an object in hashedSet.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) ListObjectParts ( ctx context . Context , bucket , object , uploadID string , partNumberMarker int , maxParts int , opts ObjectOptions ) ( result ListPartsInfo , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . ListObjectParts ( ctx , bucket , object , uploadID , partNumberMarker , maxParts , opts )
2018-02-15 20:45:57 -05:00
}
// Aborts an in-progress multipart operation on hashedSet based on the object name.
2020-09-14 18:57:13 -04:00
func ( s * erasureSets ) AbortMultipartUpload ( ctx context . Context , bucket , object , uploadID string , opts ObjectOptions ) error {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . AbortMultipartUpload ( ctx , bucket , object , uploadID , opts )
2018-02-15 20:45:57 -05:00
}
// CompleteMultipartUpload - completes a pending multipart transaction, on hashedSet based on object name.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) CompleteMultipartUpload ( ctx context . Context , bucket , object , uploadID string , uploadedParts [ ] CompletePart , opts ObjectOptions ) ( objInfo ObjectInfo , err error ) {
2021-01-26 16:21:51 -05:00
set := s . getHashedSet ( object )
return set . CompleteMultipartUpload ( ctx , bucket , object , uploadID , uploadedParts , opts )
2018-02-15 20:45:57 -05:00
}
/ *
All disks online
-- -- -- -- -- -- -- -- -
- All Unformatted - format all and return success .
- Some Unformatted - format all and return success .
- Any JBOD inconsistent - return failure
- Some are corrupt ( missing format . json ) - return failure
- Any unrecognized disks - return failure
Some disks are offline and we have quorum .
-- -- -- -- -- -- -- -- -
- Some unformatted - format all and return success ,
treat disks offline as corrupted .
- Any JBOD inconsistent - return failure
- Some are corrupt ( missing format . json )
- Any unrecognized disks - return failure
No read quorum
-- -- -- -- -- -- -- -- -
failure for all cases .
// Pseudo code for managing `format.json`.
// Generic checks.
if ( no quorum ) return error
if ( any disk is corrupt ) return error // Always error
if ( jbod inconsistent ) return error // Always error.
if ( disks not recognized ) // Always error.
// Specific checks.
if ( all disks online )
if ( all disks return format . json )
if ( jbod consistent )
if ( all disks recognized )
return
else
if ( all disks return format . json not found )
return error
else ( some disks return format . json not found )
( heal format )
return
fi
fi
else
if ( some disks return format . json not found )
// Offline disks are marked as dead.
( heal format ) // Offline disks should be marked as dead.
return success
fi
fi
* /
2020-07-13 12:51:07 -04:00
func formatsToDrivesInfo ( endpoints Endpoints , formats [ ] * formatErasureV3 , sErrs [ ] error ) ( beforeDrives [ ] madmin . HealDriveInfo ) {
beforeDrives = make ( [ ] madmin . HealDriveInfo , len ( endpoints ) )
2018-02-15 20:45:57 -05:00
// Existing formats are available (i.e. ok), so save it in
// result, also populate disks to be healed.
for i , format := range formats {
drive := endpoints . GetString ( i )
2022-01-02 12:15:06 -05:00
state := madmin . DriveStateCorrupt
2018-02-15 20:45:57 -05:00
switch {
case format != nil :
2019-08-30 17:11:18 -04:00
state = madmin . DriveStateOk
2018-02-15 20:45:57 -05:00
case sErrs [ i ] == errUnformattedDisk :
2019-08-30 17:11:18 -04:00
state = madmin . DriveStateMissing
2019-08-02 15:17:26 -04:00
case sErrs [ i ] == errDiskNotFound :
2019-08-30 17:11:18 -04:00
state = madmin . DriveStateOffline
}
2020-07-13 12:51:07 -04:00
beforeDrives [ i ] = madmin . HealDriveInfo {
2019-08-30 17:11:18 -04:00
UUID : func ( ) string {
if format != nil {
2020-06-12 23:04:01 -04:00
return format . Erasure . This
2019-08-30 17:11:18 -04:00
}
return ""
} ( ) ,
Endpoint : drive ,
State : state ,
2018-02-15 20:45:57 -05:00
}
}
return beforeDrives
}
2018-04-30 23:37:39 -04:00
// HealFormat - heals missing `format.json` on fresh unformatted disks.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) HealFormat ( ctx context . Context , dryRun bool ) ( res madmin . HealResultItem , err error ) {
2023-08-01 13:54:26 -04:00
storageDisks , _ := initStorageDisksWithErrors ( s . endpoints . Endpoints , storageOpts {
2024-01-23 17:11:46 -05:00
cleanUp : false ,
2024-01-12 04:48:36 -05:00
healthCheck : false ,
2023-08-01 13:54:26 -04:00
} )
2018-04-09 13:25:41 -04:00
defer func ( storageDisks [ ] StorageAPI ) {
if err != nil {
2022-05-30 13:58:37 -04:00
closeStorageDisks ( storageDisks ... )
2018-04-09 13:25:41 -04:00
}
} ( storageDisks )
2018-04-04 00:58:48 -04:00
2020-06-12 23:04:01 -04:00
formats , sErrs := loadFormatErasureAll ( storageDisks , true )
2021-01-29 14:40:55 -05:00
if err = checkFormatErasureValues ( formats , storageDisks , s . setDriveCount ) ; err != nil {
2018-02-15 20:45:57 -05:00
return madmin . HealResultItem { } , err
}
2020-09-04 20:09:02 -04:00
refFormat , err := getFormatErasureInQuorum ( formats )
if err != nil {
return res , err
}
2018-02-15 20:45:57 -05:00
// Prepare heal-result
2018-04-09 13:25:41 -04:00
res = madmin . HealResultItem {
2018-02-15 20:45:57 -05:00
Type : madmin . HealItemMetadata ,
Detail : "disk-format" ,
2020-08-26 22:29:35 -04:00
DiskCount : s . setCount * s . setDriveCount ,
2018-02-15 20:45:57 -05:00
SetCount : s . setCount ,
}
// Fetch all the drive info status.
2022-01-10 12:07:49 -05:00
beforeDrives := formatsToDrivesInfo ( s . endpoints . Endpoints , formats , sErrs )
2018-02-15 20:45:57 -05:00
res . After . Drives = make ( [ ] madmin . HealDriveInfo , len ( beforeDrives ) )
res . Before . Drives = make ( [ ] madmin . HealDriveInfo , len ( beforeDrives ) )
// Copy "after" drive state too from before.
for k , v := range beforeDrives {
2021-03-04 17:36:23 -05:00
res . Before . Drives [ k ] = v
res . After . Drives [ k ] = v
2018-02-15 20:45:57 -05:00
}
2019-09-24 21:47:26 -04:00
if countErrs ( sErrs , errUnformattedDisk ) == 0 {
2018-04-30 23:37:39 -04:00
return res , errNoHealRequired
}
2024-01-16 18:13:14 -05:00
if ! reflect . DeepEqual ( s . format , refFormat ) {
// Format is corrupted and unrecognized by the running instance.
2024-04-04 08:04:40 -04:00
healingLogIf ( ctx , fmt . Errorf ( "Unable to heal the newly replaced drives due to format.json inconsistencies, please engage MinIO support for further assistance: %w" ,
2024-01-16 18:13:14 -05:00
errCorruptedFormat ) )
return res , errCorruptedFormat
}
2023-01-05 23:41:19 -05:00
formatOpID := mustGetUUID ( )
2018-02-15 20:45:57 -05:00
// Initialize a new set of set formats which will be written to disk.
2023-12-29 18:52:41 -05:00
newFormatSets , currentDisksInfo := newHealFormatSets ( refFormat , s . setCount , s . setDriveCount , formats , sErrs )
2018-02-15 20:45:57 -05:00
if ! dryRun {
2022-01-02 12:15:06 -05:00
tmpNewFormats := make ( [ ] * formatErasureV3 , s . setCount * s . setDriveCount )
2018-02-15 20:45:57 -05:00
for i := range newFormatSets {
for j := range newFormatSets [ i ] {
if newFormatSets [ i ] [ j ] == nil {
continue
}
2020-10-26 13:29:29 -04:00
res . After . Drives [ i * s . setDriveCount + j ] . UUID = newFormatSets [ i ] [ j ] . Erasure . This
res . After . Drives [ i * s . setDriveCount + j ] . State = madmin . DriveStateOk
2020-08-26 22:29:35 -04:00
tmpNewFormats [ i * s . setDriveCount + j ] = newFormatSets [ i ] [ j ]
2018-02-15 20:45:57 -05:00
}
}
2020-10-31 04:34:48 -04:00
// Save new formats `format.json` on unformatted disks.
2021-11-04 19:42:49 -04:00
for index , format := range tmpNewFormats {
if storageDisks [ index ] == nil || format == nil {
continue
}
2023-01-05 23:41:19 -05:00
if err := saveFormatErasure ( storageDisks [ index ] , format , formatOpID ) ; err != nil {
2024-04-04 08:04:40 -04:00
healingLogIf ( ctx , fmt . Errorf ( "Drive %s failed to write updated 'format.json': %v" , storageDisks [ index ] , err ) )
2024-01-12 04:48:36 -05:00
storageDisks [ index ] . Close ( )
2021-11-04 19:42:49 -04:00
tmpNewFormats [ index ] = nil // this disk failed to write new format
}
2020-08-07 16:22:53 -04:00
}
2020-09-17 00:14:35 -04:00
s . erasureDisksMu . Lock ( )
2018-04-09 13:25:41 -04:00
2020-10-31 04:34:48 -04:00
for index , format := range tmpNewFormats {
if format == nil {
2020-03-27 17:48:30 -04:00
continue
}
2020-10-31 04:34:48 -04:00
m , n , err := findDiskIndexByDiskID ( refFormat , format . Erasure . This )
2020-03-27 17:48:30 -04:00
if err != nil {
2024-04-04 08:04:40 -04:00
healingLogIf ( ctx , err )
2020-03-27 17:48:30 -04:00
continue
}
2020-06-12 23:04:01 -04:00
if s . erasureDisks [ m ] [ n ] != nil {
s . erasureDisks [ m ] [ n ] . Close ( )
2020-03-27 17:48:30 -04:00
}
2021-09-14 18:10:00 -04:00
2023-12-29 18:52:41 -05:00
if disk := storageDisks [ index ] ; disk != nil {
2024-01-12 04:48:36 -05:00
if disk . IsLocal ( ) {
2023-12-29 18:52:41 -05:00
xldisk , ok := disk . ( * xlStorageDiskIDCheck )
if ok {
2024-01-30 02:03:58 -05:00
_ , commonDeletes := calcCommonWritesDeletes ( currentDisksInfo [ m ] , ( s . setDriveCount + 1 ) / 2 )
xldisk . totalDeletes . Store ( commonDeletes )
xldisk . storage . setDeleteAttribute ( commonDeletes )
if globalDriveMonitoring {
go xldisk . monitorDiskWritable ( xldisk . diskCtx )
2024-01-12 04:48:36 -05:00
}
2023-12-29 18:52:41 -05:00
}
2024-01-12 04:48:36 -05:00
} else {
disk . Close ( ) // Close the remote storage client, re-initialize with healthchecks.
disk , err = newStorageRESTClient ( disk . Endpoint ( ) , true , globalGrid . Load ( ) )
if err != nil {
continue
}
2023-12-29 18:52:41 -05:00
}
s . erasureDisks [ m ] [ n ] = disk
2024-02-12 16:00:20 -05:00
if disk . IsLocal ( ) {
2023-12-29 18:52:41 -05:00
globalLocalDrivesMu . Lock ( )
2024-02-12 16:00:20 -05:00
if globalIsDistErasure {
globalLocalSetDrives [ s . poolIndex ] [ m ] [ n ] = disk
}
for i , ldisk := range globalLocalDrives {
_ , k , l := ldisk . GetDiskLoc ( )
if k == m && l == n {
globalLocalDrives [ i ] = disk
break
}
}
2023-12-29 18:52:41 -05:00
globalLocalDrivesMu . Unlock ( )
}
2021-09-14 18:10:00 -04:00
}
2020-03-27 17:48:30 -04:00
}
2018-04-09 13:25:41 -04:00
2020-09-17 00:14:35 -04:00
s . erasureDisksMu . Unlock ( )
2018-02-15 20:45:57 -05:00
}
return res , nil
}
// HealObject - heals inconsistent object on a hashedSet based on object name.
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) HealObject ( ctx context . Context , bucket , object , versionID string , opts madmin . HealOpts ) ( madmin . HealResultItem , error ) {
return s . getHashedSet ( object ) . HealObject ( ctx , bucket , object , versionID , opts )
2018-02-15 20:45:57 -05:00
}
2021-04-04 16:32:31 -04:00
// PutObjectMetadata - replace or add metadata to an existing object/version
func ( s * erasureSets ) PutObjectMetadata ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( ObjectInfo , error ) {
er := s . getHashedSet ( object )
return er . PutObjectMetadata ( ctx , bucket , object , opts )
}
2023-03-16 10:48:05 -04:00
// DecomTieredObject - moves tiered object to another pool during decommissioning.
func ( s * erasureSets ) DecomTieredObject ( ctx context . Context , bucket , object string , fi FileInfo , opts ObjectOptions ) error {
er := s . getHashedSet ( object )
return er . DecomTieredObject ( ctx , bucket , object , fi , opts )
}
2020-05-23 14:09:35 -04:00
// PutObjectTags - replace or add tags to an existing object
2021-02-01 16:52:51 -05:00
func ( s * erasureSets ) PutObjectTags ( ctx context . Context , bucket , object string , tags string , opts ObjectOptions ) ( ObjectInfo , error ) {
2021-01-26 16:21:51 -05:00
er := s . getHashedSet ( object )
return er . PutObjectTags ( ctx , bucket , object , tags , opts )
2020-01-20 11:45:59 -05:00
}
2020-05-23 14:09:35 -04:00
// DeleteObjectTags - delete object tags from an existing object
2021-02-01 16:52:51 -05:00
func ( s * erasureSets ) DeleteObjectTags ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( ObjectInfo , error ) {
2021-01-26 16:21:51 -05:00
er := s . getHashedSet ( object )
return er . DeleteObjectTags ( ctx , bucket , object , opts )
2020-01-20 11:45:59 -05:00
}
2020-05-23 14:09:35 -04:00
// GetObjectTags - get object tags from an existing object
2020-06-12 23:04:01 -04:00
func ( s * erasureSets ) GetObjectTags ( ctx context . Context , bucket , object string , opts ObjectOptions ) ( * tags . Tags , error ) {
2021-01-26 16:21:51 -05:00
er := s . getHashedSet ( object )
return er . GetObjectTags ( ctx , bucket , object , opts )
2020-01-20 11:45:59 -05:00
}
2021-04-19 13:30:42 -04:00
// TransitionObject - transition object content to target tier.
func ( s * erasureSets ) TransitionObject ( ctx context . Context , bucket , object string , opts ObjectOptions ) error {
return s . getHashedSet ( object ) . TransitionObject ( ctx , bucket , object , opts )
}
// RestoreTransitionedObject - restore transitioned object content locally on this cluster.
func ( s * erasureSets ) RestoreTransitionedObject ( ctx context . Context , bucket , object string , opts ObjectOptions ) error {
return s . getHashedSet ( object ) . RestoreTransitionedObject ( ctx , bucket , object , opts )
}
2022-11-28 13:20:55 -05:00
// CheckAbandonedParts - check object for abandoned parts.
func ( s * erasureSets ) CheckAbandonedParts ( ctx context . Context , bucket , object string , opts madmin . HealOpts ) error {
return s . getHashedSet ( object ) . checkAbandonedParts ( ctx , bucket , object , opts )
}