mirror of
https://github.com/minio/minio.git
synced 2025-11-10 14:09:48 -05:00
Automatically set goroutines based on shardSize (#5346)
Update reedsolomon library to enable feature to automatically set number of go-routines based on the input shard size, since shard size is sort of a constant in Minio for objects > 10MiB (default blocksize) klauspost reported around 15-20% improvement in performance numbers on older systems such as AVX and SSE3 ``` name old speed new speed delta Encode10x2x10000-8 5.45GB/s ± 1% 6.22GB/s ± 1% +14.20% (p=0.000 n=9+9) Encode100x20x10000-8 1.44GB/s ± 1% 1.64GB/s ± 1% +13.77% (p=0.000 n=10+10) Encode17x3x1M-8 10.0GB/s ± 5% 12.0GB/s ± 1% +19.88% (p=0.000 n=10+10) Encode10x4x16M-8 7.81GB/s ± 5% 8.56GB/s ± 5% +9.58% (p=0.000 n=10+9) Encode5x2x1M-8 15.3GB/s ± 2% 19.6GB/s ± 2% +28.57% (p=0.000 n=9+10) Encode10x2x1M-8 12.2GB/s ± 5% 15.0GB/s ± 5% +22.45% (p=0.000 n=10+10) Encode10x4x1M-8 7.84GB/s ± 1% 9.03GB/s ± 1% +15.19% (p=0.000 n=9+9) Encode50x20x1M-8 1.73GB/s ± 4% 2.09GB/s ± 4% +20.59% (p=0.000 n=10+9) Encode17x3x16M-8 10.6GB/s ± 1% 11.7GB/s ± 4% +10.12% (p=0.000 n=8+10) ```
This commit is contained in:
committed by
kannappanr
parent
b1fb550d5c
commit
c0721164be
56
vendor/github.com/klauspost/reedsolomon/galois_amd64.s
generated
vendored
56
vendor/github.com/klauspost/reedsolomon/galois_amd64.s
generated
vendored
@@ -19,8 +19,35 @@ TEXT ·galMulSSSE3Xor(SB), 7, $0
|
||||
MOVQ out+72(FP), DX // DX: &out
|
||||
PSHUFB X5, X8 // X8: lomask (unpacked)
|
||||
SHRQ $4, R9 // len(in) / 16
|
||||
MOVQ SI, AX
|
||||
MOVQ DX, BX
|
||||
ANDQ $15, AX
|
||||
ANDQ $15, BX
|
||||
CMPQ R9, $0
|
||||
JEQ done_xor
|
||||
ORQ AX, BX
|
||||
CMPQ BX, $0
|
||||
JNZ loopback_xor
|
||||
|
||||
loopback_xor_aligned:
|
||||
MOVOA (SI), X0 // in[x]
|
||||
MOVOA (DX), X4 // out[x]
|
||||
MOVOA X0, X1 // in[x]
|
||||
MOVOA X6, X2 // low copy
|
||||
MOVOA X7, X3 // high copy
|
||||
PSRLQ $4, X1 // X1: high input
|
||||
PAND X8, X0 // X0: low input
|
||||
PAND X8, X1 // X0: high input
|
||||
PSHUFB X0, X2 // X2: mul low part
|
||||
PSHUFB X1, X3 // X3: mul high part
|
||||
PXOR X2, X3 // X3: Result
|
||||
PXOR X4, X3 // X3: Result xor existing out
|
||||
MOVOA X3, (DX) // Store
|
||||
ADDQ $16, SI // in+=16
|
||||
ADDQ $16, DX // out+=16
|
||||
SUBQ $1, R9
|
||||
JNZ loopback_xor_aligned
|
||||
JMP done_xor
|
||||
|
||||
loopback_xor:
|
||||
MOVOU (SI), X0 // in[x]
|
||||
@@ -57,15 +84,40 @@ TEXT ·galMulSSSE3(SB), 7, $0
|
||||
MOVQ in_len+56(FP), R9 // R9: len(in)
|
||||
MOVQ out+72(FP), DX // DX: &out
|
||||
PSHUFB X5, X8 // X8: lomask (unpacked)
|
||||
MOVQ SI, AX
|
||||
MOVQ DX, BX
|
||||
SHRQ $4, R9 // len(in) / 16
|
||||
ANDQ $15, AX
|
||||
ANDQ $15, BX
|
||||
CMPQ R9, $0
|
||||
JEQ done
|
||||
ORQ AX, BX
|
||||
CMPQ BX, $0
|
||||
JNZ loopback
|
||||
|
||||
loopback_aligned:
|
||||
MOVOA (SI), X0 // in[x]
|
||||
MOVOA X0, X1 // in[x]
|
||||
MOVOA X6, X2 // low copy
|
||||
MOVOA X7, X3 // high copy
|
||||
PSRLQ $4, X1 // X1: high input
|
||||
PAND X8, X0 // X0: low input
|
||||
PAND X8, X1 // X0: high input
|
||||
PSHUFB X0, X2 // X2: mul low part
|
||||
PSHUFB X1, X3 // X3: mul high part
|
||||
PXOR X2, X3 // X3: Result
|
||||
MOVOA X3, (DX) // Store
|
||||
ADDQ $16, SI // in+=16
|
||||
ADDQ $16, DX // out+=16
|
||||
SUBQ $1, R9
|
||||
JNZ loopback_aligned
|
||||
JMP done
|
||||
|
||||
loopback:
|
||||
MOVOU (SI), X0 // in[x]
|
||||
MOVOU X0, X1 // in[x]
|
||||
MOVOU X6, X2 // low copy
|
||||
MOVOU X7, X3 // high copy
|
||||
MOVOA X6, X2 // low copy
|
||||
MOVOA X7, X3 // high copy
|
||||
PSRLQ $4, X1 // X1: high input
|
||||
PAND X8, X0 // X0: low input
|
||||
PAND X8, X1 // X0: high input
|
||||
|
||||
Reference in New Issue
Block a user