fix: under FanOut API avoid repeated md5sum calculation (#17572)

md5sum calculation has a high CPU overhead, avoid calculating
it repeatedly for similar fanOut calls.

To fix following CPU profiler result
```
(pprof) top10
Showing nodes accounting for 678.68s, 84.67% of 801.54s total
Dropped 1072 nodes (cum <= 4.01s)
Showing top 10 nodes out of 156
      flat  flat%   sum%        cum   cum%
   332.54s 41.49% 41.49%    332.54s 41.49%  runtime/internal/syscall.Syscall6
   228.39s 28.49% 69.98%    228.39s 28.49%  crypto/md5.block
    48.07s  6.00% 75.98%     48.07s  6.00%  runtime.memmove
    28.91s  3.61% 79.59%     28.91s  3.61%  github.com/minio/highwayhash.updateAVX2
     8.25s  1.03% 80.61%      8.25s  1.03%  runtime.futex
     8.25s  1.03% 81.64%     10.81s  1.35%  runtime.step
     6.99s  0.87% 82.52%     22.35s  2.79%  runtime.pcvalue
     6.67s  0.83% 83.35%     38.90s  4.85%  runtime.mallocgc
     5.77s  0.72% 84.07%     32.61s  4.07%  runtime.gentraceback
     4.84s   0.6% 84.67%     10.49s  1.31%  runtime.lock2
```
This commit is contained in:
Harshavardhana
2023-07-05 03:16:05 -07:00
committed by GitHub
parent f6b48ed02a
commit 0bc34952eb
4 changed files with 60 additions and 35 deletions

View File

@@ -36,6 +36,7 @@ type fanOutOptions struct {
Key []byte
KmsCtx kms.Context
Checksum *hash.Checksum
MD5Hex string
}
// fanOutPutObject takes an input source reader and fans out multiple PUT operations
@@ -53,7 +54,14 @@ func fanOutPutObject(ctx context.Context, bucket string, objectAPI ObjectLayer,
objInfos[idx] = ObjectInfo{Name: req.Key}
hr, err := hash.NewReader(bytes.NewReader(fanOutBuf), int64(len(fanOutBuf)), "", "", -1)
hopts := hash.Options{
Size: int64(len(fanOutBuf)),
MD5Hex: opts.MD5Hex,
SHA256Hex: "",
ActualSize: -1,
DisableMD5: true,
}
hr, err := hash.NewReaderWithOpts(bytes.NewReader(fanOutBuf), hopts)
if err != nil {
errs[idx] = err
return