separate lock from common grid to avoid epoll contention (#20180)

epoll contention on TCP causes latency build-up when
we have high volume ingress. This PR is an attempt to
relieve this pressure.

upstream issue https://github.com/golang/go/issues/65064
It seems to be a deeper problem; haven't yet tried the fix
provide in this issue, but however this change without
changing the compiler helps. 

Of course, this is a workaround for now, hoping for a
more comprehensive fix from Go runtime.
This commit is contained in:
Harshavardhana
2024-07-29 11:10:04 -07:00
committed by GitHub
parent 6651c655cb
commit a17f14f73a
13 changed files with 121 additions and 31 deletions

View File

@@ -247,8 +247,11 @@ func guessIsRPCReq(req *http.Request) bool {
if req == nil {
return false
}
if req.Method == http.MethodGet && req.URL != nil && req.URL.Path == grid.RoutePath {
return true
if req.Method == http.MethodGet && req.URL != nil {
switch req.URL.Path {
case grid.RoutePath, grid.RouteLockPath:
return true
}
}
return (req.Method == http.MethodPost || req.Method == http.MethodGet) &&

View File

@@ -64,6 +64,14 @@ func TestGuessIsRPC(t *testing.T) {
if !guessIsRPCReq(r) {
t.Fatal("Grid RPC path not detected")
}
r = &http.Request{
Proto: "HTTP/1.1",
Method: http.MethodGet,
URL: &url.URL{Path: grid.RouteLockPath},
}
if !guessIsRPCReq(r) {
t.Fatal("Grid RPC path not detected")
}
}
var isHTTPHeaderSizeTooLargeTests = []struct {

View File

@@ -31,9 +31,15 @@ import (
// globalGrid is the global grid manager.
var globalGrid atomic.Pointer[grid.Manager]
// globalLockGrid is the global lock grid manager.
var globalLockGrid atomic.Pointer[grid.Manager]
// globalGridStart is a channel that will block startup of grid connections until closed.
var globalGridStart = make(chan struct{})
// globalLockGridStart is a channel that will block startup of lock grid connections until closed.
var globalLockGridStart = make(chan struct{})
func initGlobalGrid(ctx context.Context, eps EndpointServerPools) error {
hosts, local := eps.GridHosts()
lookupHost := globalDNSCache.LookupHost
@@ -55,9 +61,10 @@ func initGlobalGrid(ctx context.Context, eps EndpointServerPools) error {
AuthFn: newCachedAuthToken(),
BlockConnect: globalGridStart,
// Record incoming and outgoing bytes.
Incoming: globalConnStats.incInternodeInputBytes,
Outgoing: globalConnStats.incInternodeOutputBytes,
TraceTo: globalTrace,
Incoming: globalConnStats.incInternodeInputBytes,
Outgoing: globalConnStats.incInternodeOutputBytes,
TraceTo: globalTrace,
RoutePath: grid.RoutePath,
})
if err != nil {
return err
@@ -65,3 +72,36 @@ func initGlobalGrid(ctx context.Context, eps EndpointServerPools) error {
globalGrid.Store(g)
return nil
}
func initGlobalLockGrid(ctx context.Context, eps EndpointServerPools) error {
hosts, local := eps.GridHosts()
lookupHost := globalDNSCache.LookupHost
g, err := grid.NewManager(ctx, grid.ManagerOptions{
// Pass Dialer for websocket grid, make sure we do not
// provide any DriveOPTimeout() function, as that is not
// useful over persistent connections.
Dialer: grid.ConnectWSWithRoutePath(
grid.ContextDialer(xhttp.DialContextWithLookupHost(lookupHost, xhttp.NewInternodeDialContext(rest.DefaultTimeout, globalTCPOptions.ForWebsocket()))),
newCachedAuthToken(),
&tls.Config{
RootCAs: globalRootCAs,
CipherSuites: fips.TLSCiphers(),
CurvePreferences: fips.TLSCurveIDs(),
}, grid.RouteLockPath),
Local: local,
Hosts: hosts,
AuthToken: validateStorageRequestToken,
AuthFn: newCachedAuthToken(),
BlockConnect: globalGridStart,
// Record incoming and outgoing bytes.
Incoming: globalConnStats.incInternodeInputBytes,
Outgoing: globalConnStats.incInternodeOutputBytes,
TraceTo: globalTrace,
RoutePath: grid.RouteLockPath,
})
if err != nil {
return err
}
globalLockGrid.Store(g)
return nil
}

View File

@@ -107,5 +107,5 @@ func newLockAPI(endpoint Endpoint) dsync.NetLocker {
// Returns a lock rest client.
func newlockRESTClient(ep Endpoint) *lockRESTClient {
return &lockRESTClient{globalGrid.Load().Connection(ep.GridHost())}
return &lockRESTClient{globalLockGrid.Load().Connection(ep.GridHost())}
}

View File

@@ -39,7 +39,7 @@ func TestLockRESTlient(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
err = initGlobalGrid(ctx, []PoolEndpoints{{Endpoints: Endpoints{endpoint, endpointLocal}}})
err = initGlobalLockGrid(ctx, []PoolEndpoints{{Endpoints: Endpoints{endpoint, endpointLocal}}})
if err != nil {
t.Fatal(err)
}

View File

@@ -111,17 +111,17 @@ func newLockHandler(h grid.HandlerID) *grid.SingleHandler[*dsync.LockArgs, *dsyn
}
// registerLockRESTHandlers - register lock rest router.
func registerLockRESTHandlers() {
func registerLockRESTHandlers(gm *grid.Manager) {
lockServer := &lockRESTServer{
ll: newLocker(),
}
logger.FatalIf(lockRPCForceUnlock.Register(globalGrid.Load(), lockServer.ForceUnlockHandler), "unable to register handler")
logger.FatalIf(lockRPCRefresh.Register(globalGrid.Load(), lockServer.RefreshHandler), "unable to register handler")
logger.FatalIf(lockRPCLock.Register(globalGrid.Load(), lockServer.LockHandler), "unable to register handler")
logger.FatalIf(lockRPCUnlock.Register(globalGrid.Load(), lockServer.UnlockHandler), "unable to register handler")
logger.FatalIf(lockRPCRLock.Register(globalGrid.Load(), lockServer.RLockHandler), "unable to register handler")
logger.FatalIf(lockRPCRUnlock.Register(globalGrid.Load(), lockServer.RUnlockHandler), "unable to register handler")
logger.FatalIf(lockRPCForceUnlock.Register(gm, lockServer.ForceUnlockHandler), "unable to register handler")
logger.FatalIf(lockRPCRefresh.Register(gm, lockServer.RefreshHandler), "unable to register handler")
logger.FatalIf(lockRPCLock.Register(gm, lockServer.LockHandler), "unable to register handler")
logger.FatalIf(lockRPCUnlock.Register(gm, lockServer.UnlockHandler), "unable to register handler")
logger.FatalIf(lockRPCRLock.Register(gm, lockServer.RLockHandler), "unable to register handler")
logger.FatalIf(lockRPCRUnlock.Register(gm, lockServer.RUnlockHandler), "unable to register handler")
globalLockServer = lockServer.ll

View File

@@ -26,20 +26,28 @@ import (
// Composed function registering routers for only distributed Erasure setup.
func registerDistErasureRouters(router *mux.Router, endpointServerPools EndpointServerPools) {
var (
lockGrid = globalLockGrid.Load()
commonGrid = globalGrid.Load()
)
// Register storage REST router only if its a distributed setup.
registerStorageRESTHandlers(router, endpointServerPools, globalGrid.Load())
registerStorageRESTHandlers(router, endpointServerPools, commonGrid)
// Register peer REST router only if its a distributed setup.
registerPeerRESTHandlers(router, globalGrid.Load())
registerPeerRESTHandlers(router, commonGrid)
// Register bootstrap REST router for distributed setups.
registerBootstrapRESTHandlers(globalGrid.Load())
registerBootstrapRESTHandlers(commonGrid)
// Register distributed namespace lock routers.
registerLockRESTHandlers()
registerLockRESTHandlers(lockGrid)
// Add lock grid to router
router.Handle(grid.RouteLockPath, adminMiddleware(lockGrid.Handler(storageServerRequestValidate), noGZFlag, noObjLayerFlag))
// Add grid to router
router.Handle(grid.RoutePath, adminMiddleware(globalGrid.Load().Handler(storageServerRequestValidate), noGZFlag, noObjLayerFlag))
router.Handle(grid.RoutePath, adminMiddleware(commonGrid.Handler(storageServerRequestValidate), noGZFlag, noObjLayerFlag))
}
// List of some generic middlewares which are applied for all incoming requests.

View File

@@ -856,6 +856,11 @@ func serverMain(ctx *cli.Context) {
logger.FatalIf(initGlobalGrid(GlobalContext, globalEndpoints), "Unable to configure server grid RPC services")
})
// Initialize lock grid
bootstrapTrace("initLockGrid", func() {
logger.FatalIf(initGlobalLockGrid(GlobalContext, globalEndpoints), "Unable to configure server lock grid RPC services")
})
// Configure server.
bootstrapTrace("configureServer", func() {
handler, err := configureServerHandler(globalEndpoints)
@@ -863,7 +868,8 @@ func serverMain(ctx *cli.Context) {
logger.Fatal(config.ErrUnexpectedError(err), "Unable to configure one of server's RPC services")
}
// Allow grid to start after registering all services.
xioutil.SafeClose(globalGridStart)
close(globalGridStart)
close(globalLockGridStart)
httpServer := xhttp.NewServer(getServerListenAddrs()).
UseHandler(setCriticalErrorHandler(corsHandler(handler))).