Avoid healing to be stuck with many concurrent event listeners (#10111)

If there are many listeners to bucket notifications or to the trace
subsystem, healing fails to work properly since it suspends itself when
the number of concurrent connections is above a certain threshold.

These connections are also continuous and not costly (*no disk access*),
it is okay to just ignore them in waitForLowHTTPReq().
This commit is contained in:
Anis Elleuch 2020-07-22 21:16:55 +01:00 committed by GitHub
parent ad8b53e6d4
commit 456b2ef6eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 10 additions and 1 deletions

View File

@ -56,6 +56,10 @@ func (h *healRoutine) queueHealTask(task healTask) {
} }
func waitForLowHTTPReq(tolerance int32) { func waitForLowHTTPReq(tolerance int32) {
// Bucket notification and http trace are not costly, it is okay to ignore them
// while counting the number of concurrent connections
tolerance += int32(globalHTTPListen.NumSubscribers() + globalHTTPTrace.NumSubscribers())
if httpServer := newHTTPServerFn(); httpServer != nil { if httpServer := newHTTPServerFn(); httpServer != nil {
// Wait at max 10 minute for an inprogress request before proceeding to heal // Wait at max 10 minute for an inprogress request before proceeding to heal
waitCount := 600 waitCount := 600

View File

@ -73,9 +73,14 @@ func (ps *PubSub) Subscribe(subCh chan interface{}, doneCh <-chan struct{}, filt
// HasSubscribers returns true if pubsub system has subscribers // HasSubscribers returns true if pubsub system has subscribers
func (ps *PubSub) HasSubscribers() bool { func (ps *PubSub) HasSubscribers() bool {
return ps.NumSubscribers() > 0
}
// NumSubscribers returns the number of current subscribers
func (ps *PubSub) NumSubscribers() int {
ps.RLock() ps.RLock()
defer ps.RUnlock() defer ps.RUnlock()
return len(ps.subs) > 0 return len(ps.subs)
} }
// New inits a PubSub system // New inits a PubSub system