2021-04-18 15:41:13 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2016-04-29 20:52:17 -04:00
|
|
|
|
2016-08-18 19:23:42 -04:00
|
|
|
package cmd
|
2016-04-29 20:52:17 -04:00
|
|
|
|
2016-05-25 04:33:39 -04:00
|
|
|
import (
|
2021-03-08 14:30:43 -05:00
|
|
|
"sync"
|
2019-07-05 17:06:12 -04:00
|
|
|
|
2021-06-17 11:16:31 -04:00
|
|
|
"github.com/dustin/go-humanize"
|
2016-05-25 04:33:39 -04:00
|
|
|
)
|
2016-04-29 20:52:17 -04:00
|
|
|
|
2016-05-29 18:38:14 -04:00
|
|
|
const (
|
|
|
|
// Block size used for all internal operations version 1.
|
[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
2021-03-06 17:09:34 -05:00
|
|
|
|
|
|
|
// TLDR..
|
|
|
|
// Not used anymore xl.meta captures the right blockSize
|
|
|
|
// so blockSizeV2 should be used for all future purposes.
|
|
|
|
// this value is kept here to calculate the max API
|
|
|
|
// requests based on RAM size for existing content.
|
2016-11-22 21:18:22 -05:00
|
|
|
blockSizeV1 = 10 * humanize.MiByte
|
2016-06-25 06:03:27 -04:00
|
|
|
|
[feat]: change erasure coding default block size from 10MiB to 1MiB (#11721)
major performance improvements in range GETs to avoid large
read amplification when ranges are tiny and random
```
-------------------
Operation: GET
Operations: 142014 -> 339421
Duration: 4m50s -> 4m56s
* Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s
* Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s
* 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s
* Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s
```
TTFB from 10MiB BlockSize
```
* First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s
```
TTFB from 1MiB BlockSize
```
* First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms
```
Full object reads however do see a slight change which won't be
noticeable in real world, so not doing any comparisons
TTFB still had improvements with full object reads with 1MiB
```
* First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s
```
v/s
TTFB with 10MiB
```
* First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s
```
This change should affect all new uploads, previous uploads should
continue to work with business as usual. But dramatic improvements can
be seen with these changes.
2021-03-06 17:09:34 -05:00
|
|
|
// Block size used in erasure coding version 2.
|
|
|
|
blockSizeV2 = 1 * humanize.MiByte
|
|
|
|
|
2016-07-21 20:31:14 -04:00
|
|
|
// Buckets meta prefix.
|
|
|
|
bucketMetaPrefix = "buckets"
|
2017-04-10 22:51:23 -04:00
|
|
|
|
2022-07-25 20:51:32 -04:00
|
|
|
// Deleted Buckets prefix.
|
|
|
|
deletedBucketsPrefix = ".deleted"
|
|
|
|
|
2017-05-14 15:05:51 -04:00
|
|
|
// ETag (hex encoded md5sum) of empty string.
|
|
|
|
emptyETag = "d41d8cd98f00b204e9800998ecf8427e"
|
2016-05-29 18:38:14 -04:00
|
|
|
)
|
|
|
|
|
2016-10-10 02:03:10 -04:00
|
|
|
// Global object layer mutex, used for safely updating object layer.
|
2020-05-04 01:35:40 -04:00
|
|
|
var globalObjLayerMutex sync.RWMutex
|
2016-10-10 02:03:10 -04:00
|
|
|
|
2019-10-31 02:39:09 -04:00
|
|
|
// Global object layer, only accessed by globalObjectAPI.
|
2016-10-10 02:03:10 -04:00
|
|
|
var globalObjectAPI ObjectLayer
|
|
|
|
|
2023-08-01 13:54:26 -04:00
|
|
|
type storageOpts struct {
|
|
|
|
cleanUp bool
|
|
|
|
healthCheck bool
|
|
|
|
}
|
|
|
|
|
2016-06-02 04:49:46 -04:00
|
|
|
// Depending on the disk type network or local, initialize storage API.
|
2023-08-01 13:54:26 -04:00
|
|
|
func newStorageAPI(endpoint Endpoint, opts storageOpts) (storage StorageAPI, err error) {
|
2017-04-11 18:44:27 -04:00
|
|
|
if endpoint.IsLocal {
|
2023-08-01 13:54:26 -04:00
|
|
|
storage, err := newXLStorage(endpoint, opts.cleanUp)
|
2019-10-25 13:37:53 -04:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2023-08-01 13:54:26 -04:00
|
|
|
return newXLStorageDiskIDCheck(storage, opts.healthCheck), nil
|
2016-06-02 04:49:46 -04:00
|
|
|
}
|
2017-04-11 18:44:27 -04:00
|
|
|
|
perf: websocket grid connectivity for all internode communication (#18461)
This PR adds a WebSocket grid feature that allows servers to communicate via
a single two-way connection.
There are two request types:
* Single requests, which are `[]byte => ([]byte, error)`. This is for efficient small
roundtrips with small payloads.
* Streaming requests which are `[]byte, chan []byte => chan []byte (and error)`,
which allows for different combinations of full two-way streams with an initial payload.
Only a single stream is created between two machines - and there is, as such, no
server/client relation since both sides can initiate and handle requests. Which server
initiates the request is decided deterministically on the server names.
Requests are made through a mux client and server, which handles message
passing, congestion, cancelation, timeouts, etc.
If a connection is lost, all requests are canceled, and the calling server will try
to reconnect. Registered handlers can operate directly on byte
slices or use a higher-level generics abstraction.
There is no versioning of handlers/clients, and incompatible changes should
be handled by adding new handlers.
The request path can be changed to a new one for any protocol changes.
First, all servers create a "Manager." The manager must know its address
as well as all remote addresses. This will manage all connections.
To get a connection to any remote, ask the manager to provide it given
the remote address using.
```
func (m *Manager) Connection(host string) *Connection
```
All serverside handlers must also be registered on the manager. This will
make sure that all incoming requests are served. The number of in-flight
requests and responses must also be given for streaming requests.
The "Connection" returned manages the mux-clients. Requests issued
to the connection will be sent to the remote.
* `func (c *Connection) Request(ctx context.Context, h HandlerID, req []byte) ([]byte, error)`
performs a single request and returns the result. Any deadline provided on the request is
forwarded to the server, and canceling the context will make the function return at once.
* `func (c *Connection) NewStream(ctx context.Context, h HandlerID, payload []byte) (st *Stream, err error)`
will initiate a remote call and send the initial payload.
```Go
// A Stream is a two-way stream.
// All responses *must* be read by the caller.
// If the call is canceled through the context,
//The appropriate error will be returned.
type Stream struct {
// Responses from the remote server.
// Channel will be closed after an error or when the remote closes.
// All responses *must* be read by the caller until either an error is returned or the channel is closed.
// Canceling the context will cause the context cancellation error to be returned.
Responses <-chan Response
// Requests sent to the server.
// If the handler is defined with 0 incoming capacity this will be nil.
// Channel *must* be closed to signal the end of the stream.
// If the request context is canceled, the stream will no longer process requests.
Requests chan<- []byte
}
type Response struct {
Msg []byte
Err error
}
```
There are generic versions of the server/client handlers that allow the use of type
safe implementations for data types that support msgpack marshal/unmarshal.
2023-11-20 20:09:35 -05:00
|
|
|
return newStorageRESTClient(endpoint, opts.healthCheck, globalGrid.Load())
|
2016-06-02 04:49:46 -04:00
|
|
|
}
|