Imporve healing and reporting (#11312)

* Provide information on *actively* healing, buckets healed/queued, objects healed/failed.
* Add concurrent healing of multiple sets (typically on startup).
* Add bucket level resume, so restarts will only heal non-healed buckets.
* Print summary after healing a disk is done.
This commit is contained in:
Klaus Post
2021-03-04 14:36:23 -08:00
committed by GitHub
parent 97e7a902d0
commit fa9cf1251b
36 changed files with 1357 additions and 307 deletions

View File

@@ -21,6 +21,7 @@ package main
import (
"context"
"encoding/json"
"log"
"github.com/minio/minio/pkg/madmin"
@@ -41,6 +42,7 @@ func main() {
if err != nil {
log.Fatalln(err)
}
js, _ := json.MarshalIndent(healStatusResult, "", " ")
log.Printf("Heal status result: %+v\n", healStatusResult)
log.Printf("Heal status result: %s\n", string(js))
}

View File

@@ -41,5 +41,5 @@ func main() {
if err != nil {
log.Fatalln(err)
}
log.Println(st)
log.Printf("%+v\n", st)
}

View File

@@ -41,5 +41,5 @@ func main() {
if err != nil {
log.Fatalln(err)
}
log.Println(st)
log.Printf("%+v\n", st)
}

View File

@@ -24,6 +24,7 @@ import (
"io/ioutil"
"net/http"
"net/url"
"sort"
"time"
)
@@ -47,6 +48,7 @@ type HealOpts struct {
DryRun bool `json:"dryRun"`
Remove bool `json:"remove"`
Recreate bool `json:"recreate"` // only used when bucket needs to be healed
NoLock bool `json:"-"` // only used internally.
ScanMode HealScanMode `json:"scanMode"`
}
@@ -298,9 +300,96 @@ func (adm *AdminClient) Heal(ctx context.Context, bucket, prefix string,
// BgHealState represents the status of the background heal
type BgHealState struct {
ScannedItemsCount int64
LastHealActivity time.Time
NextHealRound time.Time
HealDisks []string
HealDisks []string
// SetStatus contains information for each set.
Sets []SetStatus `json:"sets"`
}
// SetStatus contains information about the heal status of a set.
type SetStatus struct {
ID string `json:"id"`
PoolIndex int `json:"pool_index"`
SetIndex int `json:"set_index"`
HealStatus string `json:"heal_status"`
HealPriority string `json:"heal_priority"`
Disks []Disk `json:"disks"`
}
// HealingDisk contains information about
type HealingDisk struct {
// Copied from cmd/background-newdisks-heal-ops.go
// When adding new field, update (*healingTracker).toHealingDisk
ID string `json:"id"`
PoolIndex int `json:"pool_index"`
SetIndex int `json:"set_index"`
DiskIndex int `json:"disk_index"`
Endpoint string `json:"endpoint"`
Path string `json:"path"`
Started time.Time `json:"started"`
LastUpdate time.Time `json:"last_update"`
ObjectsHealed uint64 `json:"objects_healed"`
ObjectsFailed uint64 `json:"objects_failed"`
BytesDone uint64 `json:"bytes_done"`
BytesFailed uint64 `json:"bytes_failed"`
// Last object scanned.
Bucket string `json:"current_bucket"`
Object string `json:"current_object"`
// Filled on startup/restarts.
QueuedBuckets []string `json:"queued_buckets"`
// Filled during heal.
HealedBuckets []string `json:"healed_buckets"`
// future add more tracking capabilities
}
// Merge others into b.
func (b *BgHealState) Merge(others ...BgHealState) {
for _, other := range others {
b.ScannedItemsCount += other.ScannedItemsCount
if len(b.Sets) == 0 {
b.Sets = make([]SetStatus, len(other.Sets))
copy(b.Sets, other.Sets)
continue
}
// Add disk if not present.
// If present select the one with latest lastupdate.
addSet := func(set SetStatus) {
for eSetIdx, existing := range b.Sets {
if existing.ID != set.ID {
continue
}
if len(existing.Disks) < len(set.Disks) {
b.Sets[eSetIdx].Disks = set.Disks
}
if len(existing.Disks) < len(set.Disks) {
return
}
for i, disk := range set.Disks {
// Disks should be the same.
if disk.HealInfo != nil {
existing.Disks[i].HealInfo = disk.HealInfo
}
}
return
}
b.Sets = append(b.Sets, set)
}
for _, disk := range other.Sets {
addSet(disk)
}
}
sort.Slice(b.Sets, func(i, j int) bool {
if b.Sets[i].PoolIndex != b.Sets[j].PoolIndex {
return b.Sets[i].PoolIndex < b.Sets[j].PoolIndex
}
return b.Sets[i].SetIndex < b.Sets[j].SetIndex
})
}
// BackgroundHealStatus returns the background heal status of the

View File

@@ -35,6 +35,8 @@ const (
FS
// Multi disk Erasure (single, distributed) backend.
Erasure
// Gateway to other storage
Gateway
// Add your own backend.
)
@@ -57,16 +59,26 @@ type StorageInfo struct {
Disks []Disk
// Backend type.
Backend struct {
// Represents various backend types, currently on FS and Erasure.
Type BackendType
Backend BackendInfo
}
// Following fields are only meaningful if BackendType is Erasure.
OnlineDisks BackendDisks // Online disks during server startup.
OfflineDisks BackendDisks // Offline disks during server startup.
StandardSCParity int // Parity disks for currently configured Standard storage class.
RRSCParity int // Parity disks for currently configured Reduced Redundancy storage class.
}
// BackendInfo - contains info of the underlying backend
type BackendInfo struct {
// Represents various backend types, currently on FS, Erasure and Gateway
Type BackendType
// Following fields are only meaningful if BackendType is Gateway.
GatewayOnline bool
// Following fields are only meaningful if BackendType is Erasure.
OnlineDisks BackendDisks // Online disks during server startup.
OfflineDisks BackendDisks // Offline disks during server startup.
// Following fields are only meaningful if BackendType is Erasure.
StandardSCData []int // Data disks for currently configured Standard storage class.
StandardSCParity int // Parity disks for currently configured Standard storage class.
RRSCData []int // Data disks for currently configured Reduced Redundancy storage class.
RRSCParity int // Parity disks for currently configured Reduced Redundancy storage class.
}
// BackendDisks - represents the map of endpoint-disks.
@@ -280,21 +292,27 @@ type ServerProperties struct {
// Disk holds Disk information
type Disk struct {
Endpoint string `json:"endpoint,omitempty"`
RootDisk bool `json:"rootDisk,omitempty"`
DrivePath string `json:"path,omitempty"`
Healing bool `json:"healing,omitempty"`
State string `json:"state,omitempty"`
UUID string `json:"uuid,omitempty"`
Model string `json:"model,omitempty"`
TotalSpace uint64 `json:"totalspace,omitempty"`
UsedSpace uint64 `json:"usedspace,omitempty"`
AvailableSpace uint64 `json:"availspace,omitempty"`
ReadThroughput float64 `json:"readthroughput,omitempty"`
WriteThroughPut float64 `json:"writethroughput,omitempty"`
ReadLatency float64 `json:"readlatency,omitempty"`
WriteLatency float64 `json:"writelatency,omitempty"`
Utilization float64 `json:"utilization,omitempty"`
Endpoint string `json:"endpoint,omitempty"`
RootDisk bool `json:"rootDisk,omitempty"`
DrivePath string `json:"path,omitempty"`
Healing bool `json:"healing,omitempty"`
State string `json:"state,omitempty"`
UUID string `json:"uuid,omitempty"`
Model string `json:"model,omitempty"`
TotalSpace uint64 `json:"totalspace,omitempty"`
UsedSpace uint64 `json:"usedspace,omitempty"`
AvailableSpace uint64 `json:"availspace,omitempty"`
ReadThroughput float64 `json:"readthroughput,omitempty"`
WriteThroughPut float64 `json:"writethroughput,omitempty"`
ReadLatency float64 `json:"readlatency,omitempty"`
WriteLatency float64 `json:"writelatency,omitempty"`
Utilization float64 `json:"utilization,omitempty"`
HealInfo *HealingDisk `json:"heal_info,omitempty"`
// Indexes, will be -1 until assigned a set.
PoolIndex int `json:"pool_index"`
SetIndex int `json:"set_index"`
DiskIndex int `json:"disk_index"`
}
// ServerInfo - Connect to a minio server and call Server Admin Info Management API