mirror of
https://github.com/minio/minio.git
synced 2025-11-07 21:02:58 -05:00
Imporve healing and reporting (#11312)
* Provide information on *actively* healing, buckets healed/queued, objects healed/failed. * Add concurrent healing of multiple sets (typically on startup). * Add bucket level resume, so restarts will only heal non-healed buckets. * Print summary after healing a disk is done.
This commit is contained in:
@@ -297,7 +297,7 @@ func (a adminAPIHandlers) StorageInfoHandler(w http.ResponseWriter, r *http.Requ
|
||||
storageInfo, _ := objectAPI.StorageInfo(ctx)
|
||||
|
||||
// Collect any disk healing.
|
||||
healing, _ := getAggregatedBackgroundHealState(ctx)
|
||||
healing, _ := getAggregatedBackgroundHealState(ctx, nil)
|
||||
healDisks := make(map[string]struct{}, len(healing.HealDisks))
|
||||
for _, disk := range healing.HealDisks {
|
||||
healDisks[disk] = struct{}{}
|
||||
@@ -861,16 +861,14 @@ func (a adminAPIHandlers) HealHandler(w http.ResponseWriter, r *http.Request) {
|
||||
keepConnLive(w, r, respCh)
|
||||
}
|
||||
|
||||
func getAggregatedBackgroundHealState(ctx context.Context) (madmin.BgHealState, error) {
|
||||
var bgHealStates []madmin.BgHealState
|
||||
|
||||
localHealState, ok := getLocalBackgroundHealStatus()
|
||||
if !ok {
|
||||
return madmin.BgHealState{}, errServerNotInitialized
|
||||
}
|
||||
|
||||
// getAggregatedBackgroundHealState returns the heal state of disks.
|
||||
// If no ObjectLayer is provided no set status is returned.
|
||||
func getAggregatedBackgroundHealState(ctx context.Context, o ObjectLayer) (madmin.BgHealState, error) {
|
||||
// Get local heal status first
|
||||
bgHealStates = append(bgHealStates, localHealState)
|
||||
bgHealStates, ok := getBackgroundHealStatus(ctx, o)
|
||||
if !ok {
|
||||
return bgHealStates, errServerNotInitialized
|
||||
}
|
||||
|
||||
if globalIsDistErasure {
|
||||
// Get heal status from other peers
|
||||
@@ -885,33 +883,10 @@ func getAggregatedBackgroundHealState(ctx context.Context) (madmin.BgHealState,
|
||||
if errCount == len(nerrs) {
|
||||
return madmin.BgHealState{}, fmt.Errorf("all remote servers failed to report heal status, cluster is unhealthy")
|
||||
}
|
||||
bgHealStates = append(bgHealStates, peersHealStates...)
|
||||
bgHealStates.Merge(peersHealStates...)
|
||||
}
|
||||
|
||||
// Aggregate healing result
|
||||
var aggregatedHealStateResult = madmin.BgHealState{
|
||||
ScannedItemsCount: bgHealStates[0].ScannedItemsCount,
|
||||
LastHealActivity: bgHealStates[0].LastHealActivity,
|
||||
NextHealRound: bgHealStates[0].NextHealRound,
|
||||
HealDisks: bgHealStates[0].HealDisks,
|
||||
}
|
||||
|
||||
bgHealStates = bgHealStates[1:]
|
||||
|
||||
for _, state := range bgHealStates {
|
||||
aggregatedHealStateResult.ScannedItemsCount += state.ScannedItemsCount
|
||||
aggregatedHealStateResult.HealDisks = append(aggregatedHealStateResult.HealDisks, state.HealDisks...)
|
||||
if !state.LastHealActivity.IsZero() && aggregatedHealStateResult.LastHealActivity.Before(state.LastHealActivity) {
|
||||
aggregatedHealStateResult.LastHealActivity = state.LastHealActivity
|
||||
// The node which has the last heal activity means its
|
||||
// is the node that is orchestrating self healing operations,
|
||||
// which also means it is the same node which decides when
|
||||
// the next self healing operation will be done.
|
||||
aggregatedHealStateResult.NextHealRound = state.NextHealRound
|
||||
}
|
||||
}
|
||||
|
||||
return aggregatedHealStateResult, nil
|
||||
return bgHealStates, nil
|
||||
}
|
||||
|
||||
func (a adminAPIHandlers) BackgroundHealStatusHandler(w http.ResponseWriter, r *http.Request) {
|
||||
@@ -930,7 +905,7 @@ func (a adminAPIHandlers) BackgroundHealStatusHandler(w http.ResponseWriter, r *
|
||||
return
|
||||
}
|
||||
|
||||
aggregateHealStateResult, err := getAggregatedBackgroundHealState(r.Context())
|
||||
aggregateHealStateResult, err := getAggregatedBackgroundHealState(r.Context(), objectAPI)
|
||||
if err != nil {
|
||||
writeErrorResponseJSON(ctx, w, toAdminAPIErr(ctx, err), r.URL)
|
||||
return
|
||||
@@ -1568,9 +1543,9 @@ func (a adminAPIHandlers) ServerInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||
// Get the notification target info
|
||||
notifyTarget := fetchLambdaInfo()
|
||||
|
||||
server := getLocalServerProperty(globalEndpoints, r)
|
||||
local := getLocalServerProperty(globalEndpoints, r)
|
||||
servers := globalNotificationSys.ServerInfo()
|
||||
servers = append(servers, server)
|
||||
servers = append(servers, local)
|
||||
|
||||
assignPoolNumbers(servers)
|
||||
|
||||
@@ -1598,7 +1573,7 @@ func (a adminAPIHandlers) ServerInfoHandler(w http.ResponseWriter, r *http.Reque
|
||||
|
||||
// Fetching the backend information
|
||||
backendInfo := objectAPI.BackendInfo()
|
||||
if backendInfo.Type == BackendType(madmin.Erasure) {
|
||||
if backendInfo.Type == madmin.Erasure {
|
||||
// Calculate the number of online/offline disks of all nodes
|
||||
var allDisks []madmin.Disk
|
||||
for _, s := range servers {
|
||||
|
||||
Reference in New Issue
Block a user