perf: websocket grid connectivity for all internode communication (#18461)

This PR adds a WebSocket grid feature that allows servers to communicate via 
a single two-way connection.

There are two request types:

* Single requests, which are `[]byte => ([]byte, error)`. This is for efficient small
  roundtrips with small payloads.

* Streaming requests which are `[]byte, chan []byte => chan []byte (and error)`,
  which allows for different combinations of full two-way streams with an initial payload.

Only a single stream is created between two machines - and there is, as such, no
server/client relation since both sides can initiate and handle requests. Which server
initiates the request is decided deterministically on the server names.

Requests are made through a mux client and server, which handles message
passing, congestion, cancelation, timeouts, etc.

If a connection is lost, all requests are canceled, and the calling server will try
to reconnect. Registered handlers can operate directly on byte 
slices or use a higher-level generics abstraction.

There is no versioning of handlers/clients, and incompatible changes should
be handled by adding new handlers.

The request path can be changed to a new one for any protocol changes.

First, all servers create a "Manager." The manager must know its address 
as well as all remote addresses. This will manage all connections.
To get a connection to any remote, ask the manager to provide it given
the remote address using.

```
func (m *Manager) Connection(host string) *Connection
```

All serverside handlers must also be registered on the manager. This will
make sure that all incoming requests are served. The number of in-flight 
requests and responses must also be given for streaming requests.

The "Connection" returned manages the mux-clients. Requests issued
to the connection will be sent to the remote.

* `func (c *Connection) Request(ctx context.Context, h HandlerID, req []byte) ([]byte, error)`
   performs a single request and returns the result. Any deadline provided on the request is
   forwarded to the server, and canceling the context will make the function return at once.

* `func (c *Connection) NewStream(ctx context.Context, h HandlerID, payload []byte) (st *Stream, err error)`
   will initiate a remote call and send the initial payload.

```Go
// A Stream is a two-way stream.
// All responses *must* be read by the caller.
// If the call is canceled through the context,
//The appropriate error will be returned.
type Stream struct {
	// Responses from the remote server.
	// Channel will be closed after an error or when the remote closes.
	// All responses *must* be read by the caller until either an error is returned or the channel is closed.
	// Canceling the context will cause the context cancellation error to be returned.
	Responses <-chan Response

	// Requests sent to the server.
	// If the handler is defined with 0 incoming capacity this will be nil.
	// Channel *must* be closed to signal the end of the stream.
	// If the request context is canceled, the stream will no longer process requests.
	Requests chan<- []byte
}

type Response struct {
	Msg []byte
	Err error
}
```

There are generic versions of the server/client handlers that allow the use of type
safe implementations for data types that support msgpack marshal/unmarshal.
This commit is contained in:
Klaus Post
2023-11-20 17:09:35 -08:00
committed by GitHub
parent 8bedb419a9
commit 51aa59a737
65 changed files with 13519 additions and 1022 deletions

View File

@@ -19,21 +19,18 @@ package cmd
import (
"context"
"fmt"
"io"
"net/http"
"net/url"
"runtime/debug"
"sort"
"strconv"
"strings"
xhttp "github.com/minio/minio/internal/http"
"github.com/minio/minio/internal/grid"
xioutil "github.com/minio/minio/internal/ioutil"
"github.com/minio/minio/internal/logger"
"github.com/valyala/bytebufferpool"
)
//go:generate msgp -file $GOFILE
// WalkDirOptions provides options for WalkDir operations.
type WalkDirOptions struct {
// Bucket to scanner
@@ -57,6 +54,10 @@ type WalkDirOptions struct {
// Limit the number of returned objects if > 0.
Limit int
// DiskID contains the disk ID of the disk.
// Leave empty to not check disk ID.
DiskID string
}
// WalkDir will traverse a directory and return all entries found.
@@ -387,6 +388,9 @@ func (s *xlStorage) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writ
}
func (p *xlStorageDiskIDCheck) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) (err error) {
if err := p.checkID(opts.DiskID); err != nil {
return err
}
ctx, done, err := p.TrackDiskHealth(ctx, storageMetricWalkDir, opts.Bucket, opts.BaseDir)
if err != nil {
return err
@@ -399,59 +403,32 @@ func (p *xlStorageDiskIDCheck) WalkDir(ctx context.Context, opts WalkDirOptions,
// WalkDir will traverse a directory and return all entries found.
// On success a meta cache stream will be returned, that should be closed when done.
func (client *storageRESTClient) WalkDir(ctx context.Context, opts WalkDirOptions, wr io.Writer) error {
values := make(url.Values)
values.Set(storageRESTVolume, opts.Bucket)
values.Set(storageRESTDirPath, opts.BaseDir)
values.Set(storageRESTRecursive, strconv.FormatBool(opts.Recursive))
values.Set(storageRESTReportNotFound, strconv.FormatBool(opts.ReportNotFound))
values.Set(storageRESTPrefixFilter, opts.FilterPrefix)
values.Set(storageRESTForwardFilter, opts.ForwardTo)
respBody, err := client.call(ctx, storageRESTMethodWalkDir, values, nil, -1)
// Ensure remote has the same disk ID.
opts.DiskID = client.diskID
b, err := opts.MarshalMsg(grid.GetByteBuffer()[:0])
if err != nil {
logger.LogIf(ctx, err)
return err
}
defer xhttp.DrainBody(respBody)
return waitForHTTPStream(respBody, wr)
st, err := client.gridConn.NewStream(ctx, grid.HandlerWalkDir, b)
if err != nil {
return err
}
return toStorageErr(st.Results(func(in []byte) error {
_, err := wr.Write(in)
return err
}))
}
// WalkDirHandler - remote caller to list files and folders in a requested directory path.
func (s *storageRESTServer) WalkDirHandler(w http.ResponseWriter, r *http.Request) {
if !s.IsValid(w, r) {
return
}
volume := r.Form.Get(storageRESTVolume)
dirPath := r.Form.Get(storageRESTDirPath)
recursive, err := strconv.ParseBool(r.Form.Get(storageRESTRecursive))
func (s *storageRESTServer) WalkDirHandler(ctx context.Context, payload []byte, _ <-chan []byte, out chan<- []byte) (gerr *grid.RemoteErr) {
var opts WalkDirOptions
_, err := opts.UnmarshalMsg(payload)
if err != nil {
s.writeErrorResponse(w, err)
return
return grid.NewRemoteErr(err)
}
var reportNotFound bool
if v := r.Form.Get(storageRESTReportNotFound); v != "" {
reportNotFound, err = strconv.ParseBool(v)
if err != nil {
s.writeErrorResponse(w, err)
return
}
}
prefix := r.Form.Get(storageRESTPrefixFilter)
forward := r.Form.Get(storageRESTForwardFilter)
writer := streamHTTPResponse(w)
defer func() {
if r := recover(); r != nil {
debug.PrintStack()
writer.CloseWithError(fmt.Errorf("panic: %v", r))
}
}()
writer.CloseWithError(s.storage.WalkDir(r.Context(), WalkDirOptions{
Bucket: volume,
BaseDir: dirPath,
Recursive: recursive,
ReportNotFound: reportNotFound,
FilterPrefix: prefix,
ForwardTo: forward,
}, writer))
ctx, cancel := context.WithCancel(ctx)
defer cancel()
return grid.NewRemoteErr(s.storage.WalkDir(ctx, opts, grid.WriterToChannel(ctx, out)))
}