simplify listener implementation setup customizations in right place (#19589)

This commit is contained in:
Harshavardhana
2024-04-23 21:08:47 -07:00
committed by GitHub
parent 7640cd24c9
commit f3a52cc195
9 changed files with 84 additions and 107 deletions

View File

@@ -39,9 +39,16 @@ func setTCPParametersFn(opts TCPOptions) func(network, address string, c syscall
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_REUSEPORT, 1)
{
// Enable big buffers
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_SNDBUF, opts.SendBufSize)
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_RCVBUF, opts.RecvBufSize)
}
// Enable TCP open
// https://lwn.net/Articles/508865/ - 16k queue size.
_ = syscall.SetsockoptInt(fd, syscall.SOL_TCP, unix.TCP_FASTOPEN, 16*1024)
// https://lwn.net/Articles/508865/ - 32k queue size.
_ = syscall.SetsockoptInt(fd, syscall.SOL_TCP, unix.TCP_FASTOPEN, 32*1024)
// Enable TCP fast connect
// TCPFastOpenConnect sets the underlying socket to use
@@ -53,17 +60,22 @@ func setTCPParametersFn(opts TCPOptions) func(network, address string, c syscall
// "Set TCP_QUICKACK. If you find a case where that makes things worse, let me know."
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, unix.TCP_QUICKACK, 1)
// The time (in seconds) the connection needs to remain idle before
// TCP starts sending keepalive probes
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, 15)
/// Enable keep-alive
{
_ = unix.SetsockoptInt(fd, unix.SOL_SOCKET, unix.SO_KEEPALIVE, 1)
// Number of probes.
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_probes (defaults to 9, we reduce it to 5)
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPCNT, 5)
// The time (in seconds) the connection needs to remain idle before
// TCP starts sending keepalive probes
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPIDLE, 15)
// Wait time after successful probe in seconds.
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_intvl (defaults to 75 secs, we reduce it to 15 secs)
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, 15)
// Number of probes.
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_probes (defaults to 9, we reduce it to 5)
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPCNT, 5)
// Wait time after successful probe in seconds.
// ~ cat /proc/sys/net/ipv4/tcp_keepalive_intvl (defaults to 75 secs, we reduce it to 15 secs)
_ = syscall.SetsockoptInt(fd, syscall.IPPROTO_TCP, syscall.TCP_KEEPINTVL, 15)
}
// Set tcp user timeout in addition to the keep-alive - tcp-keepalive is not enough to close a socket
// with dead end because tcp-keepalive is not fired when there is data in the socket buffer.
@@ -100,14 +112,3 @@ func NewInternodeDialContext(dialTimeout time.Duration, opts TCPOptions) DialCon
return dialer.DialContext(ctx, network, addr)
}
}
// NewCustomDialContext setups a custom dialer for any external communication and proxies.
func NewCustomDialContext(dialTimeout time.Duration, opts TCPOptions) DialContext {
return func(ctx context.Context, network, addr string) (net.Conn, error) {
dialer := &net.Dialer{
Timeout: dialTimeout,
Control: setTCPParametersFn(opts),
}
return dialer.DialContext(ctx, network, addr)
}
}

View File

@@ -39,11 +39,8 @@ func setTCPParametersFn(opts TCPOptions) func(network, address string, c syscall
// DialContext is a function to make custom Dial for internode communications
type DialContext func(ctx context.Context, network, address string) (net.Conn, error)
// NewInternodeDialContext setups a custom dialer for internode communication
var NewInternodeDialContext = NewCustomDialContext
// NewCustomDialContext configures a custom dialer for internode communications
func NewCustomDialContext(dialTimeout time.Duration, _ TCPOptions) DialContext {
// NewInternodeDialContext configures a custom dialer for internode communications
func NewInternodeDialContext(dialTimeout time.Duration, _ TCPOptions) DialContext {
return func(ctx context.Context, network, addr string) (net.Conn, error) {
dialer := &net.Dialer{
Timeout: dialTimeout,

View File

@@ -35,11 +35,11 @@ type acceptResult struct {
// httpListener - HTTP listener capable of handling multiple server addresses.
type httpListener struct {
opts TCPOptions
tcpListeners []*net.TCPListener // underlying TCP listeners.
acceptCh chan acceptResult // channel where all TCP listeners write accepted connection.
ctx context.Context
ctxCanceler context.CancelFunc
opts TCPOptions
listeners []net.Listener // underlying TCP listeners.
acceptCh chan acceptResult // channel where all TCP listeners write accepted connection.
ctx context.Context
ctxCanceler context.CancelFunc
}
// start - starts separate goroutine for each TCP listener. A valid new connection is passed to httpListener.acceptCh.
@@ -57,18 +57,15 @@ func (listener *httpListener) start() {
}
// Closure to handle TCPListener until done channel is closed.
handleListener := func(idx int, tcpListener *net.TCPListener) {
handleListener := func(idx int, listener net.Listener) {
for {
tcpConn, err := tcpListener.AcceptTCP()
if tcpConn != nil {
tcpConn.SetKeepAlive(true)
}
send(acceptResult{tcpConn, err, idx})
conn, err := listener.Accept()
send(acceptResult{conn, err, idx})
}
}
// Start separate goroutine for each TCP listener to handle connection.
for idx, tcpListener := range listener.tcpListeners {
for idx, tcpListener := range listener.listeners {
go handleListener(idx, tcpListener)
}
}
@@ -91,8 +88,8 @@ func (listener *httpListener) Accept() (conn net.Conn, err error) {
func (listener *httpListener) Close() (err error) {
listener.ctxCanceler()
for i := range listener.tcpListeners {
listener.tcpListeners[i].Close()
for i := range listener.listeners {
listener.listeners[i].Close()
}
return nil
@@ -100,8 +97,8 @@ func (listener *httpListener) Close() (err error) {
// Addr - net.Listener interface compatible method returns net.Addr. In case of multiple TCP listeners, it returns '0.0.0.0' as IP address.
func (listener *httpListener) Addr() (addr net.Addr) {
addr = listener.tcpListeners[0].Addr()
if len(listener.tcpListeners) == 1 {
addr = listener.listeners[0].Addr()
if len(listener.listeners) == 1 {
return addr
}
@@ -116,8 +113,8 @@ func (listener *httpListener) Addr() (addr net.Addr) {
// Addrs - returns all address information of TCP listeners.
func (listener *httpListener) Addrs() (addrs []net.Addr) {
for i := range listener.tcpListeners {
addrs = append(addrs, listener.tcpListeners[i].Addr())
for i := range listener.listeners {
addrs = append(addrs, listener.listeners[i].Addr())
}
return addrs
@@ -125,11 +122,16 @@ func (listener *httpListener) Addrs() (addrs []net.Addr) {
// TCPOptions specify customizable TCP optimizations on raw socket
type TCPOptions struct {
UserTimeout int // this value is expected to be in milliseconds
ClientReadTimeout time.Duration // When the net.Conn is idle for more than ReadTimeout duration, we close the connection on the client proactively.
ClientWriteTimeout time.Duration // When the net.Conn is idle for more than WriteTimeout duration, we close the connection on the client proactively.
Interface string // this is a VRF device passed via `--interface` flag
Trace func(msg string) // Trace when starting.
UserTimeout int // this value is expected to be in milliseconds
// When the net.Conn is idle for more than ReadTimeout duration, we close the connection on the client proactively.
ClientReadTimeout time.Duration
// When the net.Conn is idle for more than WriteTimeout duration, we close the connection on the client proactively.
ClientWriteTimeout time.Duration
SendBufSize int // SO_SNDBUF size for the socket connection, NOTE: this sets server and client connection
RecvBufSize int // SO_RECVBUF size for the socket connection, NOTE: this sets server and client connection
Interface string // This is a VRF device passed via `--interface` flag
Trace func(msg string) // Trace when starting.
}
// newHTTPListener - creates new httpListener object which is interface compatible to net.Listener.
@@ -137,7 +139,7 @@ type TCPOptions struct {
// * listen to multiple addresses
// * controls incoming connections only doing HTTP protocol
func newHTTPListener(ctx context.Context, serverAddrs []string, opts TCPOptions) (listener *httpListener, listenErrs []error) {
tcpListeners := make([]*net.TCPListener, 0, len(serverAddrs))
listeners := make([]net.Listener, 0, len(serverAddrs))
listenErrs = make([]error, len(serverAddrs))
// Unix listener with special TCP options.
@@ -146,46 +148,36 @@ func newHTTPListener(ctx context.Context, serverAddrs []string, opts TCPOptions)
}
for i, serverAddr := range serverAddrs {
var (
l net.Listener
e error
)
if l, e = listenCfg.Listen(ctx, "tcp", serverAddr); e != nil {
l, e := listenCfg.Listen(ctx, "tcp", serverAddr)
if e != nil {
if opts.Trace != nil {
opts.Trace(fmt.Sprint("listenCfg.Listen: ", e.Error()))
opts.Trace(fmt.Sprint("listenCfg.Listen: ", e))
}
listenErrs[i] = e
continue
}
tcpListener, ok := l.(*net.TCPListener)
if !ok {
listenErrs[i] = fmt.Errorf("unexpected listener type found %v, expected net.TCPListener", l)
if opts.Trace != nil {
opts.Trace(fmt.Sprint("net.TCPListener: ", listenErrs[i].Error()))
}
continue
}
if opts.Trace != nil {
opts.Trace(fmt.Sprint("adding listener to ", tcpListener.Addr()))
opts.Trace(fmt.Sprint("adding listener to ", l.Addr()))
}
tcpListeners = append(tcpListeners, tcpListener)
listeners = append(listeners, l)
}
if len(tcpListeners) == 0 {
if len(listeners) == 0 {
// No listeners initialized, no need to continue
return
}
listener = &httpListener{
tcpListeners: tcpListeners,
acceptCh: make(chan acceptResult, len(tcpListeners)),
opts: opts,
listeners: listeners,
acceptCh: make(chan acceptResult, len(listeners)),
opts: opts,
}
listener.ctx, listener.ctxCanceler = context.WithCancel(ctx)
if opts.Trace != nil {
opts.Trace(fmt.Sprint("opening ", len(listener.tcpListeners), " listeners"))
opts.Trace(fmt.Sprint("opening ", len(listener.listeners), " listeners"))
}
listener.start()

View File

@@ -72,8 +72,8 @@ func (s ConnSettings) getDefaultTransport(maxIdleConnsPerHost int) *http.Transpo
Proxy: http.ProxyFromEnvironment,
DialContext: dialContext,
MaxIdleConnsPerHost: maxIdleConnsPerHost,
WriteBufferSize: 32 << 10, // 32KiB moving up from 4KiB default
ReadBufferSize: 32 << 10, // 32KiB moving up from 4KiB default
WriteBufferSize: 64 << 10, // 64KiB moving up from 4KiB default
ReadBufferSize: 64 << 10, // 64KiB moving up from 4KiB default
IdleConnTimeout: 15 * time.Second,
ResponseHeaderTimeout: 15 * time.Minute, // Conservative timeout is the default (for MinIO internode)
TLSHandshakeTimeout: 10 * time.Second,