Handle read/quorum errors when initializing all subsystems (#6585)

- Only require len(disks)/2 to initialize the cluster
- Fix checking of read/write quorm in subsystems init
- Add retry mechanism in policy and notification to avoid aborting in case of read/write quorums errors
This commit is contained in:
Anis Elleuch
2018-10-08 23:47:13 +01:00
committed by kannappanr
parent d8a2975a68
commit cbc5d78a09
4 changed files with 83 additions and 44 deletions

View File

@@ -21,6 +21,7 @@ import (
"encoding/json"
"net/http"
"path"
"strings"
"sync"
"time"
@@ -131,25 +132,46 @@ func (sys *PolicySys) Init(objAPI ObjectLayer) error {
return errInvalidArgument
}
// Load PolicySys once during boot.
if err := sys.refresh(objAPI); err != nil {
return err
}
// Refresh PolicySys in background.
go func() {
ticker := time.NewTicker(globalRefreshBucketPolicyInterval)
defer ticker.Stop()
for {
select {
case <-globalServiceDoneCh:
return
case <-ticker.C:
sys.refresh(objAPI)
defer func() {
// Refresh PolicySys in background.
go func() {
ticker := time.NewTicker(globalRefreshBucketPolicyInterval)
defer ticker.Stop()
for {
select {
case <-globalServiceDoneCh:
return
case <-ticker.C:
sys.refresh(objAPI)
}
}
}
}()
}()
return nil
doneCh := make(chan struct{})
defer close(doneCh)
// Initializing policy needs a retry mechanism for
// the following reasons:
// - Read quorum is lost just after the initialization
// of the object layer.
retryTimerCh := newRetryTimerSimple(doneCh)
for {
select {
case _ = <-retryTimerCh:
// Load PolicySys once during boot.
if err := sys.refresh(objAPI); err != nil {
if err == errDiskNotFound ||
strings.Contains(err.Error(), InsufficientReadQuorum{}.Error()) ||
strings.Contains(err.Error(), InsufficientWriteQuorum{}.Error()) {
logger.Info("Waiting for policy subsystem to be initialized..")
continue
}
return err
}
return nil
}
}
}
// NewPolicySys - creates new policy system.