mirror of
https://github.com/minio/minio.git
synced 2025-11-07 12:52:58 -05:00
Use concurrent bz2 decompression (#13360)
Testing with `mc sql --compression BZIP2 --csv-input "rd=\n,fh=USE,fd=;" --query="select COUNT(*) from S3Object" local2/testbucket/nyc-taxi-data-10M.csv.bz2` Before 96.98s, after 10.79s. Uses about 70% CPU while running.
This commit is contained in:
@@ -21,12 +21,14 @@ import (
|
||||
"archive/tar"
|
||||
"bufio"
|
||||
"bytes"
|
||||
"compress/bzip2"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
"runtime"
|
||||
|
||||
"github.com/cosnicolaou/pbzip2"
|
||||
"github.com/klauspost/compress/s2"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
gzip "github.com/klauspost/pgzip"
|
||||
@@ -112,7 +114,9 @@ func untar(r io.Reader, putObject func(reader io.Reader, info os.FileInfo, name
|
||||
defer dec.Close()
|
||||
r = dec
|
||||
case formatBZ2:
|
||||
r = bzip2.NewReader(bf)
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
r = pbzip2.NewReader(ctx, bf, pbzip2.DecompressionOptions(pbzip2.BZConcurrency((runtime.GOMAXPROCS(0)+1)/2)))
|
||||
case formatLZ4:
|
||||
r = lz4.NewReader(bf)
|
||||
case formatUnknown:
|
||||
|
||||
Reference in New Issue
Block a user