mirror of
https://github.com/minio/minio.git
synced 2025-01-24 13:13:16 -05:00
9ccc483df6
major performance improvements in range GETs to avoid large read amplification when ranges are tiny and random ``` ------------------- Operation: GET Operations: 142014 -> 339421 Duration: 4m50s -> 4m56s * Average: +139.41% (+1177.3 MiB/s) throughput, +139.11% (+658.4) obj/s * Fastest: +125.24% (+1207.4 MiB/s) throughput, +132.32% (+612.9) obj/s * 50% Median: +139.06% (+1175.7 MiB/s) throughput, +133.46% (+660.9) obj/s * Slowest: +203.40% (+1267.9 MiB/s) throughput, +198.59% (+753.5) obj/s ``` TTFB from 10MiB BlockSize ``` * First Access TTFB: Avg: 81ms, Median: 61ms, Best: 20ms, Worst: 2.056s ``` TTFB from 1MiB BlockSize ``` * First Access TTFB: Avg: 22ms, Median: 21ms, Best: 8ms, Worst: 91ms ``` Full object reads however do see a slight change which won't be noticeable in real world, so not doing any comparisons TTFB still had improvements with full object reads with 1MiB ``` * First Access TTFB: Avg: 68ms, Median: 35ms, Best: 11ms, Worst: 1.16s ``` v/s TTFB with 10MiB ``` * First Access TTFB: Avg: 388ms, Median: 98ms, Best: 20ms, Worst: 4.156s ``` This change should affect all new uploads, previous uploads should continue to work with business as usual. But dramatic improvements can be seen with these changes.
161 lines
7.8 KiB
Go
161 lines
7.8 KiB
Go
/*
|
|
* MinIO Cloud Storage, (C) 2016-2020 MinIO, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package cmd
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/rand"
|
|
"io"
|
|
"os"
|
|
"testing"
|
|
)
|
|
|
|
var erasureHealTests = []struct {
|
|
dataBlocks, disks int
|
|
|
|
// number of offline disks is also number of staleDisks for
|
|
// erasure reconstruction in this test
|
|
offDisks int
|
|
|
|
// bad disks are online disks which return errors
|
|
badDisks, badStaleDisks int
|
|
|
|
blocksize, size int64
|
|
algorithm BitrotAlgorithm
|
|
shouldFail bool
|
|
}{
|
|
{dataBlocks: 2, disks: 4, offDisks: 1, badDisks: 0, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: SHA256, shouldFail: false}, // 0
|
|
{dataBlocks: 3, disks: 6, offDisks: 2, badDisks: 0, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: BLAKE2b512, shouldFail: false}, // 1
|
|
{dataBlocks: 4, disks: 8, offDisks: 2, badDisks: 1, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: BLAKE2b512, shouldFail: false}, // 2
|
|
{dataBlocks: 5, disks: 10, offDisks: 3, badDisks: 1, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 3
|
|
{dataBlocks: 6, disks: 12, offDisks: 2, badDisks: 3, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: SHA256, shouldFail: false}, // 4
|
|
{dataBlocks: 7, disks: 14, offDisks: 4, badDisks: 1, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 5
|
|
{dataBlocks: 8, disks: 16, offDisks: 6, badDisks: 1, badStaleDisks: 1, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 6
|
|
{dataBlocks: 7, disks: 14, offDisks: 2, badDisks: 3, badStaleDisks: 0, blocksize: int64(oneMiByte / 2), size: oneMiByte, algorithm: BLAKE2b512, shouldFail: false}, // 7
|
|
{dataBlocks: 6, disks: 12, offDisks: 1, badDisks: 0, badStaleDisks: 1, blocksize: int64(oneMiByte - 1), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: true}, // 8
|
|
{dataBlocks: 5, disks: 10, offDisks: 3, badDisks: 0, badStaleDisks: 3, blocksize: int64(oneMiByte / 2), size: oneMiByte, algorithm: SHA256, shouldFail: true}, // 9
|
|
{dataBlocks: 4, disks: 8, offDisks: 1, badDisks: 1, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 10
|
|
{dataBlocks: 2, disks: 4, offDisks: 1, badDisks: 0, badStaleDisks: 1, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: true}, // 11
|
|
{dataBlocks: 6, disks: 12, offDisks: 8, badDisks: 3, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: true}, // 12
|
|
{dataBlocks: 7, disks: 14, offDisks: 3, badDisks: 4, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: BLAKE2b512, shouldFail: false}, // 13
|
|
{dataBlocks: 7, disks: 14, offDisks: 6, badDisks: 1, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 14
|
|
{dataBlocks: 8, disks: 16, offDisks: 4, badDisks: 5, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: true}, // 15
|
|
{dataBlocks: 2, disks: 4, offDisks: 1, badDisks: 0, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 16
|
|
{dataBlocks: 12, disks: 16, offDisks: 2, badDisks: 1, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: DefaultBitrotAlgorithm, shouldFail: false}, // 17
|
|
{dataBlocks: 6, disks: 8, offDisks: 1, badDisks: 0, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte, algorithm: BLAKE2b512, shouldFail: false}, // 18
|
|
{dataBlocks: 2, disks: 4, offDisks: 1, badDisks: 0, badStaleDisks: 0, blocksize: int64(blockSizeV2), size: oneMiByte * 64, algorithm: SHA256, shouldFail: false}, // 19
|
|
}
|
|
|
|
func TestErasureHeal(t *testing.T) {
|
|
for i, test := range erasureHealTests {
|
|
if test.offDisks < test.badStaleDisks {
|
|
// test case sanity check
|
|
t.Fatalf("Test %d: Bad test case - number of stale disks cannot be less than number of badstale disks", i)
|
|
}
|
|
|
|
// create some test data
|
|
setup, err := newErasureTestSetup(test.dataBlocks, test.disks-test.dataBlocks, test.blocksize)
|
|
if err != nil {
|
|
t.Fatalf("Test %d: failed to setup Erasure environment: %v", i, err)
|
|
}
|
|
disks := setup.disks
|
|
erasure, err := NewErasure(context.Background(), test.dataBlocks, test.disks-test.dataBlocks, test.blocksize)
|
|
if err != nil {
|
|
setup.Remove()
|
|
t.Fatalf("Test %d: failed to create ErasureStorage: %v", i, err)
|
|
}
|
|
data := make([]byte, test.size)
|
|
if _, err = io.ReadFull(rand.Reader, data); err != nil {
|
|
setup.Remove()
|
|
t.Fatalf("Test %d: failed to create random test data: %v", i, err)
|
|
}
|
|
buffer := make([]byte, test.blocksize, 2*test.blocksize)
|
|
writers := make([]io.Writer, len(disks))
|
|
for i, disk := range disks {
|
|
writers[i] = newBitrotWriter(disk, "testbucket", "testobject", erasure.ShardFileSize(test.size), test.algorithm, erasure.ShardSize())
|
|
}
|
|
_, err = erasure.Encode(context.Background(), bytes.NewReader(data), writers, buffer, erasure.dataBlocks+1)
|
|
closeBitrotWriters(writers)
|
|
if err != nil {
|
|
setup.Remove()
|
|
t.Fatalf("Test %d: failed to create random test data: %v", i, err)
|
|
}
|
|
|
|
readers := make([]io.ReaderAt, len(disks))
|
|
for i, disk := range disks {
|
|
shardFilesize := erasure.ShardFileSize(test.size)
|
|
readers[i] = newBitrotReader(disk, nil, "testbucket", "testobject", shardFilesize, test.algorithm, bitrotWriterSum(writers[i]), erasure.ShardSize())
|
|
}
|
|
|
|
// setup stale disks for the test case
|
|
staleDisks := make([]StorageAPI, len(disks))
|
|
copy(staleDisks, disks)
|
|
for j := 0; j < len(staleDisks); j++ {
|
|
if j < test.offDisks {
|
|
readers[j] = nil
|
|
} else {
|
|
staleDisks[j] = nil
|
|
}
|
|
}
|
|
for j := 0; j < test.badDisks; j++ {
|
|
switch r := readers[test.offDisks+j].(type) {
|
|
case *streamingBitrotReader:
|
|
r.disk = badDisk{nil}
|
|
case *wholeBitrotReader:
|
|
r.disk = badDisk{nil}
|
|
}
|
|
}
|
|
for j := 0; j < test.badStaleDisks; j++ {
|
|
staleDisks[j] = badDisk{nil}
|
|
}
|
|
|
|
staleWriters := make([]io.Writer, len(staleDisks))
|
|
for i, disk := range staleDisks {
|
|
if disk == nil {
|
|
continue
|
|
}
|
|
os.Remove(pathJoin(disk.String(), "testbucket", "testobject"))
|
|
staleWriters[i] = newBitrotWriter(disk, "testbucket", "testobject", erasure.ShardFileSize(test.size), test.algorithm, erasure.ShardSize())
|
|
}
|
|
|
|
// test case setup is complete - now call Heal()
|
|
err = erasure.Heal(context.Background(), readers, staleWriters, test.size)
|
|
closeBitrotReaders(readers)
|
|
closeBitrotWriters(staleWriters)
|
|
if err != nil && !test.shouldFail {
|
|
t.Errorf("Test %d: should pass but it failed with: %v", i, err)
|
|
}
|
|
if err == nil && test.shouldFail {
|
|
t.Errorf("Test %d: should fail but it passed", i)
|
|
}
|
|
if err == nil {
|
|
// Verify that checksums of staleDisks
|
|
// match expected values
|
|
for i := range staleWriters {
|
|
if staleWriters[i] == nil {
|
|
continue
|
|
}
|
|
if !bytes.Equal(bitrotWriterSum(staleWriters[i]), bitrotWriterSum(writers[i])) {
|
|
t.Errorf("Test %d: heal returned different bitrot checksums", i)
|
|
}
|
|
}
|
|
}
|
|
setup.Remove()
|
|
}
|
|
}
|