Fix Mux Connect Error (#18567)

`OpMuxConnectError` was not handled correctly.

Remove local checks for single request handlers so they can 
run before being registered locally.

Bonus: Only log IAM bootstrap on startup.
This commit is contained in:
Klaus Post
2023-12-01 00:18:04 -08:00
committed by GitHub
parent 0d7abe3b9f
commit 5f971fea6e
11 changed files with 113 additions and 24 deletions

View File

@@ -72,7 +72,7 @@ func (er erasureObjects) listAndHeal(bucket, prefix string, healEntry func(strin
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
disks, _ := er.getOnlineDisksWithHealing()
disks, _ := er.getOnlineDisksWithHealing(false)
if len(disks) == 0 {
return errors.New("listAndHeal: No non-healing drives found")
}

View File

@@ -1967,7 +1967,7 @@ func (z *erasureServerPools) Walk(ctx context.Context, bucket, prefix string, re
go func() {
defer wg.Done()
disks, _ := set.getOnlineDisksWithHealing()
disks, _ := set.getOnlineDisksWithHealing(true)
if len(disks) == 0 {
cancel()
return

View File

@@ -274,7 +274,12 @@ func (er erasureObjects) LocalStorageInfo(ctx context.Context) StorageInfo {
return getStorageInfo(localDisks, localEndpoints)
}
func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, healing bool) {
// getOnlineDisksWithHealing - returns online disks and overall healing status.
// Disks are randomly ordered, but in the following groups:
// - Non-scanning disks
// - Non-healing disks
// - Healing disks (if inclHealing is true)
func (er erasureObjects) getOnlineDisksWithHealing(inclHealing bool) (newDisks []StorageAPI, healing bool) {
var wg sync.WaitGroup
disks := er.getDisks()
infos := make([]DiskInfo, len(disks))
@@ -292,7 +297,7 @@ func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, hea
}
di, err := disk.DiskInfo(context.Background(), false)
if err != nil || di.Healing {
if err != nil {
// - Do not consume disks which are not reachable
// unformatted or simply not accessible for some reason.
//
@@ -303,21 +308,31 @@ func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, hea
}
return
}
if !inclHealing && di.Healing {
return
}
infos[i] = di
}()
}
wg.Wait()
var scanningDisks []StorageAPI
var scanningDisks, healingDisks []StorageAPI
for i, info := range infos {
// Check if one of the drives in the set is being healed.
// this information is used by scanner to skip healing
// this erasure set while it calculates the usage.
if info.Healing || info.Error != "" {
healing = true
if info.Error != "" || disks[i] == nil {
continue
}
if info.Healing {
healing = true
if inclHealing {
healingDisks = append(healingDisks, disks[i])
}
continue
}
if !info.Scanning {
newDisks = append(newDisks, disks[i])
} else {
@@ -325,8 +340,10 @@ func (er erasureObjects) getOnlineDisksWithHealing() (newDisks []StorageAPI, hea
}
}
// Prefer new disks over disks which are currently being scanned.
// Prefer non-scanning disks over disks which are currently being scanned.
newDisks = append(newDisks, scanningDisks...)
/// Then add healing disks.
newDisks = append(newDisks, healingDisks...)
return newDisks, healing
}
@@ -364,7 +381,7 @@ func (er erasureObjects) nsScanner(ctx context.Context, buckets []BucketInfo, wa
}
// Collect disks we can use.
disks, healing := er.getOnlineDisksWithHealing()
disks, healing := er.getOnlineDisksWithHealing(false)
if len(disks) == 0 {
logger.LogIf(ctx, errors.New("data-scanner: all drives are offline or being healed, skipping scanner cycle"))
return nil

View File

@@ -209,7 +209,7 @@ func (er *erasureObjects) healErasureSet(ctx context.Context, buckets []string,
bucket, humanize.Ordinal(er.setIndex+1))
}
disks, _ := er.getOnlineDisksWithHealing()
disks, _ := er.getOnlineDisksWithHealing(false)
if len(disks) == 0 {
logger.LogIf(ctx, fmt.Errorf("no online disks found to heal the bucket `%s`", bucket))
continue

View File

@@ -488,7 +488,12 @@ func (store *IAMStoreSys) PurgeExpiredSTS(ctx context.Context) error {
// LoadIAMCache reads all IAM items and populates a new iamCache object and
// replaces the in-memory cache object.
func (store *IAMStoreSys) LoadIAMCache(ctx context.Context) error {
func (store *IAMStoreSys) LoadIAMCache(ctx context.Context, firstTime bool) error {
bootstrapTraceMsg := func(s string) {
if firstTime {
bootstrapTraceMsg(s)
}
}
bootstrapTraceMsg("loading IAM data")
newCache := newIamCache()

View File

@@ -189,7 +189,7 @@ func (sys *IAMSys) Initialized() bool {
// Load - loads all credentials, policies and policy mappings.
func (sys *IAMSys) Load(ctx context.Context, firstTime bool) error {
loadStartTime := time.Now()
err := sys.store.LoadIAMCache(ctx)
err := sys.store.LoadIAMCache(ctx, firstTime)
if err != nil {
atomic.AddUint64(&sys.TotalRefreshFailures, 1)
return err

View File

@@ -606,8 +606,8 @@ func (er *erasureObjects) listPath(ctx context.Context, o listPathOptions, resul
defer close(results)
o.debugf(color.Green("listPath:")+" with options: %#v", o)
// get non-healing disks for listing
disks, _ := er.getOnlineDisksWithHealing()
// get prioritized non-healing disks for listing
disks, _ := er.getOnlineDisksWithHealing(true)
askDisks := getListQuorum(o.AskDisks, er.setDriveCount)
var fallbackDisks []StorageAPI

View File

@@ -1350,12 +1350,12 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
return collectInternodeStats(httpTraceHdrs(f))
}
registered := 0
for _, setDisks := range storageDisks {
for _, storage := range setDisks {
if storage == nil {
continue
}
endpoint := storage.Endpoint()
server := &storageRESTServer{storage: newXLStorageDiskIDCheck(storage, true)}
@@ -1402,6 +1402,17 @@ func registerStorageRESTHandlers(router *mux.Router, endpointServerPools Endpoin
Handle: server.WalkDirHandler,
OutCapacity: 1,
}), "unable to register handler")
registered++
}
}
if registered == 0 {
// Register a dummy handler so remote calls can go out.
logger.FatalIf(gm.RegisterStreamingHandler(grid.HandlerWalkDir, grid.StreamHandler{
Subroute: fmt.Sprintf("__dummy__%d", time.Now().UnixNano()),
Handle: func(ctx context.Context, payload []byte, in <-chan []byte, out chan<- []byte) *grid.RemoteErr {
return grid.NewRemoteErr(errDiskNotFound)
},
OutCapacity: 1,
}), "unable to register handler")
}
}