2021-04-18 15:41:13 -04:00
|
|
|
// Copyright (c) 2015-2021 MinIO, Inc.
|
|
|
|
//
|
|
|
|
// This file is part of MinIO Object Storage stack
|
|
|
|
//
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU Affero General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
//
|
|
|
|
// This program is distributed in the hope that it will be useful
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU Affero General Public License for more details.
|
|
|
|
//
|
|
|
|
// You should have received a copy of the GNU Affero General Public License
|
|
|
|
// along with this program. If not, see <http://www.gnu.org/licenses/>.
|
2021-03-26 20:15:09 -04:00
|
|
|
|
|
|
|
package cmd
|
|
|
|
|
|
|
|
import (
|
|
|
|
"archive/tar"
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
2021-10-14 14:11:07 -04:00
|
|
|
"context"
|
2022-10-18 16:50:21 -04:00
|
|
|
"errors"
|
2021-03-26 20:15:09 -04:00
|
|
|
"fmt"
|
|
|
|
"io"
|
2022-10-18 16:50:21 -04:00
|
|
|
"io/fs"
|
2021-03-26 20:15:09 -04:00
|
|
|
"os"
|
|
|
|
"path"
|
2021-10-14 14:11:07 -04:00
|
|
|
"runtime"
|
2022-10-18 16:50:21 -04:00
|
|
|
"sync"
|
2023-01-26 11:50:35 -05:00
|
|
|
"time"
|
2021-03-26 20:15:09 -04:00
|
|
|
|
2021-10-14 14:11:07 -04:00
|
|
|
"github.com/cosnicolaou/pbzip2"
|
2021-03-26 20:15:09 -04:00
|
|
|
"github.com/klauspost/compress/s2"
|
|
|
|
"github.com/klauspost/compress/zstd"
|
|
|
|
gzip "github.com/klauspost/pgzip"
|
2024-09-21 20:33:43 -04:00
|
|
|
"github.com/pierrec/lz4/v4"
|
2021-03-26 20:15:09 -04:00
|
|
|
)
|
|
|
|
|
2021-10-18 11:44:36 -04:00
|
|
|
// Max bzip2 concurrency across calls. 50% of GOMAXPROCS.
|
|
|
|
var bz2Limiter = pbzip2.CreateConcurrencyPool((runtime.GOMAXPROCS(0) + 1) / 2)
|
|
|
|
|
2021-03-26 20:15:09 -04:00
|
|
|
func detect(r *bufio.Reader) format {
|
|
|
|
z, err := r.Peek(4)
|
|
|
|
if err != nil {
|
|
|
|
return formatUnknown
|
|
|
|
}
|
|
|
|
for _, f := range magicHeaders {
|
|
|
|
if bytes.Equal(f.header, z[:len(f.header)]) {
|
|
|
|
return f.f
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return formatUnknown
|
|
|
|
}
|
|
|
|
|
|
|
|
//go:generate stringer -type=format -trimprefix=format $GOFILE
|
|
|
|
type format int
|
|
|
|
|
|
|
|
const (
|
|
|
|
formatUnknown format = iota
|
|
|
|
formatGzip
|
|
|
|
formatZstd
|
|
|
|
formatLZ4
|
|
|
|
formatS2
|
|
|
|
formatBZ2
|
|
|
|
)
|
|
|
|
|
|
|
|
var magicHeaders = []struct {
|
|
|
|
header []byte
|
|
|
|
f format
|
|
|
|
}{
|
|
|
|
{
|
|
|
|
header: []byte{0x1f, 0x8b, 8},
|
|
|
|
f: formatGzip,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Zstd default header.
|
|
|
|
header: []byte{0x28, 0xb5, 0x2f, 0xfd},
|
|
|
|
f: formatZstd,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Zstd skippable frame header.
|
|
|
|
header: []byte{0x2a, 0x4d, 0x18},
|
|
|
|
f: formatZstd,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// LZ4
|
|
|
|
header: []byte{0x4, 0x22, 0x4d, 0x18},
|
|
|
|
f: formatLZ4,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
// Snappy/S2 stream
|
|
|
|
header: []byte{0xff, 0x06, 0x00, 0x00},
|
|
|
|
f: formatS2,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
header: []byte{0x42, 0x5a, 'h'},
|
|
|
|
f: formatBZ2,
|
|
|
|
},
|
|
|
|
}
|
|
|
|
|
2022-10-18 16:50:21 -04:00
|
|
|
type untarOptions struct {
|
|
|
|
ignoreDirs bool
|
|
|
|
ignoreErrs bool
|
|
|
|
prefixAll string
|
|
|
|
}
|
|
|
|
|
|
|
|
// disconnectReader will ensure that no reads can take place on
|
|
|
|
// the upstream reader after close has been called.
|
|
|
|
type disconnectReader struct {
|
|
|
|
r io.Reader
|
|
|
|
mu sync.Mutex
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *disconnectReader) Read(p []byte) (n int, err error) {
|
|
|
|
d.mu.Lock()
|
|
|
|
defer d.mu.Unlock()
|
|
|
|
if d.r != nil {
|
|
|
|
return d.r.Read(p)
|
|
|
|
}
|
|
|
|
return 0, errors.New("reader closed")
|
|
|
|
}
|
|
|
|
|
|
|
|
func (d *disconnectReader) Close() error {
|
|
|
|
d.mu.Lock()
|
|
|
|
d.r = nil
|
|
|
|
d.mu.Unlock()
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func untar(ctx context.Context, r io.Reader, putObject func(reader io.Reader, info os.FileInfo, name string) error, o untarOptions) error {
|
2021-03-26 20:15:09 -04:00
|
|
|
bf := bufio.NewReader(r)
|
|
|
|
switch f := detect(bf); f {
|
|
|
|
case formatGzip:
|
|
|
|
gz, err := gzip.NewReader(bf)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer gz.Close()
|
|
|
|
r = gz
|
|
|
|
case formatS2:
|
|
|
|
r = s2.NewReader(bf)
|
|
|
|
case formatZstd:
|
2023-04-06 20:47:38 -04:00
|
|
|
// Limit to 16 MiB per stream.
|
|
|
|
dec, err := zstd.NewReader(bf, zstd.WithDecoderMaxWindow(16<<20))
|
2021-03-26 20:15:09 -04:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer dec.Close()
|
|
|
|
r = dec
|
|
|
|
case formatBZ2:
|
2022-10-18 16:50:21 -04:00
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
2021-10-14 14:11:07 -04:00
|
|
|
defer cancel()
|
2021-10-18 11:44:36 -04:00
|
|
|
r = pbzip2.NewReader(ctx, bf, pbzip2.DecompressionOptions(
|
|
|
|
pbzip2.BZConcurrency((runtime.GOMAXPROCS(0)+1)/2),
|
|
|
|
pbzip2.BZConcurrencyPool(bz2Limiter)))
|
2021-03-26 20:15:09 -04:00
|
|
|
case formatLZ4:
|
|
|
|
r = lz4.NewReader(bf)
|
|
|
|
case formatUnknown:
|
|
|
|
r = bf
|
|
|
|
default:
|
|
|
|
return fmt.Errorf("Unsupported format %s", f)
|
|
|
|
}
|
|
|
|
tarReader := tar.NewReader(r)
|
2021-12-06 12:45:23 -05:00
|
|
|
n := 0
|
2022-10-18 16:50:21 -04:00
|
|
|
asyncWriters := make(chan struct{}, 16)
|
|
|
|
var wg sync.WaitGroup
|
|
|
|
|
|
|
|
var asyncErr error
|
|
|
|
var asyncErrMu sync.Mutex
|
2021-03-26 20:15:09 -04:00
|
|
|
for {
|
2022-10-18 16:50:21 -04:00
|
|
|
if !o.ignoreErrs {
|
|
|
|
asyncErrMu.Lock()
|
|
|
|
err := asyncErr
|
|
|
|
asyncErrMu.Unlock()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2021-03-26 20:15:09 -04:00
|
|
|
|
2022-10-18 16:50:21 -04:00
|
|
|
header, err := tarReader.Next()
|
2021-03-26 20:15:09 -04:00
|
|
|
switch {
|
|
|
|
|
|
|
|
// if no more files are found return
|
|
|
|
case err == io.EOF:
|
2022-10-18 16:50:21 -04:00
|
|
|
wg.Wait()
|
|
|
|
return asyncErr
|
2021-03-26 20:15:09 -04:00
|
|
|
|
|
|
|
// return any other error
|
|
|
|
case err != nil:
|
2022-10-18 16:50:21 -04:00
|
|
|
wg.Wait()
|
2021-12-06 12:45:23 -05:00
|
|
|
extra := ""
|
|
|
|
if n > 0 {
|
|
|
|
extra = fmt.Sprintf(" after %d successful object(s)", n)
|
|
|
|
}
|
|
|
|
return fmt.Errorf("tar file error: %w%s", err, extra)
|
2021-03-26 20:15:09 -04:00
|
|
|
|
|
|
|
// if the header is nil, just skip it (not sure how this happens)
|
|
|
|
case header == nil:
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
name := header.Name
|
2023-02-17 10:15:03 -05:00
|
|
|
switch path.Clean(name) {
|
|
|
|
case ".", slashSeparator:
|
2021-03-26 20:15:09 -04:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
switch header.Typeflag {
|
|
|
|
case tar.TypeDir: // = directory
|
2022-10-18 16:50:21 -04:00
|
|
|
if o.ignoreDirs {
|
|
|
|
continue
|
2021-12-06 12:45:23 -05:00
|
|
|
}
|
2022-10-18 16:50:21 -04:00
|
|
|
name = trimLeadingSlash(pathJoin(name, slashSeparator))
|
2021-03-26 20:15:09 -04:00
|
|
|
case tar.TypeReg, tar.TypeChar, tar.TypeBlock, tar.TypeFifo, tar.TypeGNUSparse: // = regular
|
2022-10-18 16:50:21 -04:00
|
|
|
name = trimLeadingSlash(path.Clean(name))
|
2021-03-26 20:15:09 -04:00
|
|
|
default:
|
|
|
|
// ignore symlink'ed
|
|
|
|
continue
|
|
|
|
}
|
2022-10-18 16:50:21 -04:00
|
|
|
if o.prefixAll != "" {
|
|
|
|
name = pathJoin(o.prefixAll, name)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Do small files async
|
|
|
|
n++
|
|
|
|
if header.Size <= smallFileThreshold {
|
|
|
|
asyncWriters <- struct{}{}
|
|
|
|
b := poolBuf128k.Get().([]byte)
|
|
|
|
if cap(b) < int(header.Size) {
|
|
|
|
b = make([]byte, smallFileThreshold)
|
|
|
|
}
|
|
|
|
b = b[:header.Size]
|
|
|
|
if _, err := io.ReadFull(tarReader, b); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
wg.Add(1)
|
|
|
|
go func(name string, fi fs.FileInfo, b []byte) {
|
|
|
|
rc := disconnectReader{r: bytes.NewReader(b)}
|
|
|
|
defer func() {
|
|
|
|
rc.Close()
|
|
|
|
<-asyncWriters
|
|
|
|
wg.Done()
|
2023-03-04 23:57:35 -05:00
|
|
|
//nolint:staticcheck // SA6002 we are fine with the tiny alloc
|
2022-10-18 16:50:21 -04:00
|
|
|
poolBuf128k.Put(b)
|
|
|
|
}()
|
|
|
|
if err := putObject(&rc, fi, name); err != nil {
|
|
|
|
if o.ignoreErrs {
|
2024-04-04 08:04:40 -04:00
|
|
|
s3LogIf(ctx, err)
|
2022-10-18 16:50:21 -04:00
|
|
|
return
|
|
|
|
}
|
|
|
|
asyncErrMu.Lock()
|
|
|
|
if asyncErr == nil {
|
|
|
|
asyncErr = err
|
|
|
|
}
|
|
|
|
asyncErrMu.Unlock()
|
|
|
|
}
|
|
|
|
}(name, header.FileInfo(), b)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-01-26 11:50:35 -05:00
|
|
|
// If zero or earlier modtime, set to current.
|
|
|
|
// Otherwise the resulting objects will be invalid.
|
|
|
|
if header.ModTime.UnixNano() <= 0 {
|
|
|
|
header.ModTime = time.Now()
|
|
|
|
}
|
|
|
|
|
2022-10-18 16:50:21 -04:00
|
|
|
// Sync upload.
|
|
|
|
rc := disconnectReader{r: tarReader}
|
|
|
|
if err := putObject(&rc, header.FileInfo(), name); err != nil {
|
|
|
|
rc.Close()
|
|
|
|
if o.ignoreErrs {
|
2024-04-04 08:04:40 -04:00
|
|
|
s3LogIf(ctx, err)
|
2022-10-18 16:50:21 -04:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
rc.Close()
|
2021-03-26 20:15:09 -04:00
|
|
|
}
|
|
|
|
}
|