Release 2.0.0

This commit is contained in:
Santiago Lezica
2021-01-29 18:51:08 -03:00
parent 8107c4478b
commit cef49eff22
209 changed files with 70157 additions and 926 deletions

6
vendor/github.com/hhrutter/lzw/.gitignore generated vendored Normal file
View File

@@ -0,0 +1,6 @@
# Mac
**/.DS_Store
**/._.DS_Store
# VSCode
.vscode/*

27
vendor/github.com/hhrutter/lzw/LICENSE generated vendored Normal file
View File

@@ -0,0 +1,27 @@
Copyright (c) 2009 The Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

37
vendor/github.com/hhrutter/lzw/README.md generated vendored Normal file
View File

@@ -0,0 +1,37 @@
# Note
* This is a consolidated version of [compress/lzw](https://github.com/golang/go/tree/master/src/compress/lzw) that supports GIF, TIFF and PDF.
* Please refer to this [golang proposal](https://github.com/golang/go/issues/25409) for details.
* [github.com/hhrutter/tiff](https://github.com/hhrutter/tiff) uses this package to extend [x/image/tiff](https://github.com/golang/image/tree/master/tiff).
* [pdfcpu](https://github.com/pdfcpu/pdfcpu) uses this package for processing PDFs with embedded TIFF images.
## Background
* PDF's LZWDecode filter comes with the optional parameter `EarlyChange`.
* The type of this parameter is `int` and the defined values are 0 and 1.
* The default value is 1.
This parameter implies two variants of lzw. (See the [PDF spec](https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf)).
[compress/lzw](https://github.com/golang/go/tree/master/src/compress/lzw):
* the algorithm implied by EarlyChange value 1
* provides both Reader and Writer.
[x/image/tiff/lzw](https://github.com/golang/image/tree/master/tiff/lzw):
* the algorithm implied by EarlyChange value 0
* provides a Reader, lacks a Writer
In addition PDF expects a leading `clear_table` marker right at the beginning
which is not something [compress/lzw](https://github.com/golang/go/tree/master/src/compress/lzw) takes into account.
There are numerous PDF Writers out there and for arbitrary PDF files using the LZWDecode filter the following can be observed:
* Some PDF writers do not write the EOD (end of data) marker.
* Some PDF writers do not write the final bits after the EOD marker.
## Goal
An extended version of [compress/lzw](https://github.com/golang/go/tree/master/src/compress/lzw) with reliable support for GIF, TIFF and PDF.

3
vendor/github.com/hhrutter/lzw/go.mod generated vendored Normal file
View File

@@ -0,0 +1,3 @@
module github.com/hhrutter/lzw
go 1.12

238
vendor/github.com/hhrutter/lzw/reader.go generated vendored Normal file
View File

@@ -0,0 +1,238 @@
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package lzw is an enhanced version of compress/lzw.
//
// It implements Adobe's PDF lzw compression as defined for the LZWDecode filter
// and is also compatible with the TIFF file format.
//
// See the golang proposal: https://github.com/golang/go/issues/25409.
//
// More information: https://github.com/pdfcpu/pdfcpu/tree/master/lzw
package lzw
import (
"bufio"
"errors"
"io"
)
const (
maxWidth = 12
decoderInvalidCode = 0xffff
flushBuffer = 1 << maxWidth
)
// decoder is the state from which the readXxx method converts a byte
// stream into a code stream.
type decoder struct {
r io.ByteReader
bits uint32
nBits uint
width uint
read func(*decoder) (uint16, error) // readMSB always for PDF and TIFF
litWidth uint // width in bits of literal codes
err error
// The first 1<<litWidth codes are literal codes.
// The next two codes mean clear and EOF.
// Other valid codes are in the range [lo, hi] where lo := clear + 2,
// with the upper bound incrementing on each code seen.
// overflow is the code at which hi overflows the code width. NOTE: TIFF's LZW is "off by one".
// last is the most recently seen code, or decoderInvalidCode.
//
// An invariant is that
// (hi < overflow) || (hi == overflow && last == decoderInvalidCode)
clear, eof, hi, overflow, last uint16
// Each code c in [lo, hi] expands to two or more bytes. For c != hi:
// suffix[c] is the last of these bytes.
// prefix[c] is the code for all but the last byte.
// This code can either be a literal code or another code in [lo, c).
// The c == hi case is a special case.
suffix [1 << maxWidth]uint8
prefix [1 << maxWidth]uint16
// output is the temporary output buffer.
// Literal codes are accumulated from the start of the buffer.
// Non-literal codes decode to a sequence of suffixes that are first
// written right-to-left from the end of the buffer before being copied
// to the start of the buffer.
// It is flushed when it contains >= 1<<maxWidth bytes,
// so that there is always room to decode an entire code.
output [2 * 1 << maxWidth]byte
o int // write index into output
toRead []byte // bytes to return from Read
// oneOff makes code length increases occur one code early.
oneOff bool
}
// readMSB returns the next code for "Most Significant Bits first" data.
func (d *decoder) readMSB() (uint16, error) {
for d.nBits < d.width {
x, err := d.r.ReadByte()
if err != nil {
return 0, err
}
d.bits |= uint32(x) << (24 - d.nBits)
d.nBits += 8
}
code := uint16(d.bits >> (32 - d.width))
d.bits <<= d.width
d.nBits -= d.width
return code, nil
}
func (d *decoder) Read(b []byte) (int, error) {
for {
if len(d.toRead) > 0 {
n := copy(b, d.toRead)
d.toRead = d.toRead[n:]
return n, nil
}
if d.err != nil {
return 0, d.err
}
d.decode()
}
}
func (d *decoder) handleOverflow() {
ui := d.hi
if d.oneOff {
ui++
}
if ui >= d.overflow {
if d.width == maxWidth {
d.last = decoderInvalidCode
// Undo the d.hi++ a few lines above, so that (1) we maintain
// the invariant that d.hi <= d.overflow, and (2) d.hi does not
// eventually overflow a uint16.
if !d.oneOff {
d.hi--
}
} else {
d.width++
d.overflow <<= 1
}
}
}
// decode decompresses bytes from r and leaves them in d.toRead.
// read specifies how to decode bytes into codes.
// litWidth is the width in bits of literal codes.
func (d *decoder) decode() {
i := 0
// Loop over the code stream, converting codes into decompressed bytes.
loop:
for {
code, err := d.read(d)
i++
if err != nil {
// Some PDF Writers write an EOD some don't.
// Don't insist on EOD marker.
// Don't return an unexpected EOF error.
d.err = err
break
}
switch {
case code < d.clear:
// We have a literal code.
d.output[d.o] = uint8(code)
d.o++
if d.last != decoderInvalidCode {
// Save what the hi code expands to.
d.suffix[d.hi] = uint8(code)
d.prefix[d.hi] = d.last
}
case code == d.clear:
d.width = 1 + d.litWidth
d.hi = d.eof
d.overflow = 1 << d.width
d.last = decoderInvalidCode
continue
case code == d.eof:
d.err = io.EOF
break loop
case code <= d.hi:
c, i := code, len(d.output)-1
if code == d.hi && d.last != decoderInvalidCode {
// code == hi is a special case which expands to the last expansion
// followed by the head of the last expansion. To find the head, we walk
// the prefix chain until we find a literal code.
c = d.last
for c >= d.clear {
c = d.prefix[c]
}
d.output[i] = uint8(c)
i--
c = d.last
}
// Copy the suffix chain into output and then write that to w.
for c >= d.clear {
d.output[i] = d.suffix[c]
i--
c = d.prefix[c]
}
d.output[i] = uint8(c)
d.o += copy(d.output[d.o:], d.output[i:])
if d.last != decoderInvalidCode {
// Save what the hi code expands to.
d.suffix[d.hi] = uint8(c)
d.prefix[d.hi] = d.last
}
default:
d.err = errors.New("lzw: invalid code")
break loop
}
d.last, d.hi = code, d.hi+1
d.handleOverflow()
if d.o >= flushBuffer {
break
}
}
// Flush pending output.
d.toRead = d.output[:d.o]
d.o = 0
}
var errClosed = errors.New("lzw: reader/writer is closed")
func (d *decoder) Close() error {
d.err = errClosed // in case any Reads come along
return nil
}
// NewReader creates a new io.ReadCloser.
// Reads from the returned io.ReadCloser read and decompress data from r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when
// finished reading.
// oneOff makes code length increases occur one code early. It should be true
// for LZWDecode filters with earlyChange=1 which is also the default.
func NewReader(r io.Reader, oneOff bool) io.ReadCloser {
br, ok := r.(io.ByteReader)
if !ok {
br = bufio.NewReader(r)
}
lw := uint(8)
clear := uint16(1) << lw
width := 1 + lw
return &decoder{
r: br,
read: (*decoder).readMSB,
litWidth: lw,
width: width,
clear: clear,
eof: clear + 1,
hi: clear + 1,
overflow: uint16(1) << width,
last: decoderInvalidCode,
oneOff: oneOff,
}
}

283
vendor/github.com/hhrutter/lzw/writer.go generated vendored Normal file
View File

@@ -0,0 +1,283 @@
// Derived from compress/lzw in order to implement
// Adobe's PDF lzw compression as defined for the LZWDecode filter.
// See https://www.adobe.com/content/dam/acom/en/devnet/pdf/pdfs/PDF32000_2008.pdf
// and https://github.com/golang/go/issues/25409.
//
// It is also compatible with the TIFF file format.
//
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package lzw
import (
"bufio"
"errors"
"io"
)
// A writer is a buffered, flushable writer.
type writer interface {
io.ByteWriter
Flush() error
}
// An errWriteCloser is an io.WriteCloser that always returns a given error.
type errWriteCloser struct {
err error
}
func (e *errWriteCloser) Write([]byte) (int, error) {
return 0, e.err
}
func (e *errWriteCloser) Close() error {
return e.err
}
const (
// A code is a 12 bit value, stored as a uint32 when encoding to avoid
// type conversions when shifting bits.
maxCode = 1<<12 - 1
invalidCode = 1<<32 - 1
// There are 1<<12 possible codes, which is an upper bound on the number of
// valid hash table entries at any given point in time. tableSize is 4x that.
tableSize = 4 * 1 << 12
tableMask = tableSize - 1
// A hash table entry is a uint32. Zero is an invalid entry since the
// lower 12 bits of a valid entry must be a non-literal code.
invalidEntry = 0
)
// encoder is LZW compressor.
type encoder struct {
// w is the writer that compressed bytes are written to.
w writer
// write, bits, nBits and width are the state for
// converting a code stream into a byte stream.
write func(*encoder, uint32) error
bits uint32
nBits uint
width uint
// litWidth is the width in bits of literal codes.
litWidth uint
// hi is the code implied by the next code emission.
// overflow is the code at which hi overflows the code width.
hi, overflow uint32
// savedCode is the accumulated code at the end of the most recent Write
// call. It is equal to invalidCode if there was no such call.
savedCode uint32
// err is the first error encountered during writing. Closing the encoder
// will make any future Write calls return errClosed
err error
// table is the hash table from 20-bit keys to 12-bit values. Each table
// entry contains key<<12|val and collisions resolve by linear probing.
// The keys consist of a 12-bit code prefix and an 8-bit byte suffix.
// The values are a 12-bit code.
table [tableSize]uint32
// oneOff makes code length increases occur one code early.
oneOff bool
}
// writeLSB writes the code c for "Least Significant Bits first" data.
func (e *encoder) writeLSB(c uint32) error {
e.bits |= c << e.nBits
e.nBits += e.width
for e.nBits >= 8 {
if err := e.w.WriteByte(uint8(e.bits)); err != nil {
return err
}
e.bits >>= 8
e.nBits -= 8
}
return nil
}
// writeMSB writes the code c for "Most Significant Bits first" data.
func (e *encoder) writeMSB(c uint32) error {
e.bits |= c << (32 - e.width - e.nBits)
e.nBits += e.width
for e.nBits >= 8 {
if err := e.w.WriteByte(uint8(e.bits >> 24)); err != nil {
return err
}
e.bits <<= 8
e.nBits -= 8
}
return nil
}
// errOutOfCodes is an internal error that means that the encoder has run out
// of unused codes and a clear code needs to be sent next.
var errOutOfCodes = errors.New("lzw: out of codes")
// incHi increments e.hi and checks for both overflow and running out of
// unused codes. In the latter case, incHi sends a clear code, resets the
// encoder state and returns errOutOfCodes.
func (e *encoder) incHi() error {
e.hi++
// The PDF spec defines for the LZWDecode filter a parameter "EarlyChange".
// This parameter drives the variation of lzw compression to be used.
// The standard compress/lzw does not know about oneOff.
ui := e.hi
if e.oneOff {
ui++
}
if ui == e.overflow {
e.width++
e.overflow <<= 1
}
if ui == maxCode {
clear := uint32(1) << e.litWidth
if err := e.write(e, clear); err != nil {
return err
}
e.width = e.litWidth + 1
e.hi = clear + 1
e.overflow = clear << 1
for i := range e.table {
e.table[i] = invalidEntry
}
return errOutOfCodes
}
return nil
}
// Write writes a compressed representation of p to e's underlying writer.
func (e *encoder) Write(p []byte) (n int, err error) {
if e.err != nil {
return 0, e.err
}
if len(p) == 0 {
return 0, nil
}
if maxLit := uint8(1<<e.litWidth - 1); maxLit != 0xff {
for _, x := range p {
if x > maxLit {
e.err = errors.New("lzw: input byte too large for the litWidth")
return 0, e.err
}
}
}
n = len(p)
code := e.savedCode
if code == invalidCode {
// The first code sent is always a literal code.
code, p = uint32(p[0]), p[1:]
}
loop:
for _, x := range p {
literal := uint32(x)
key := code<<8 | literal
// If there is a hash table hit for this key then we continue the loop
// and do not emit a code yet.
hash := (key>>12 ^ key) & tableMask
for h, t := hash, e.table[hash]; t != invalidEntry; {
if key == t>>12 {
code = t & maxCode
continue loop
}
h = (h + 1) & tableMask
t = e.table[h]
}
// Otherwise, write the current code, and literal becomes the start of
// the next emitted code.
if e.err = e.write(e, code); e.err != nil {
return 0, e.err
}
code = literal
// Increment e.hi, the next implied code. If we run out of codes, reset
// the encoder state (including clearing the hash table) and continue.
if err1 := e.incHi(); err1 != nil {
if err1 == errOutOfCodes {
continue
}
e.err = err1
return 0, e.err
}
// Otherwise, insert key -> e.hi into the map that e.table represents.
for {
if e.table[hash] == invalidEntry {
e.table[hash] = (key << 12) | e.hi
break
}
hash = (hash + 1) & tableMask
}
}
e.savedCode = code
return n, nil
}
// Close closes the encoder, flushing any pending output. It does not close or
// flush e's underlying writer.
func (e *encoder) Close() error {
if e.err != nil {
if e.err == errClosed {
return nil
}
return e.err
}
// Make any future calls to Write return errClosed.
e.err = errClosed
// Write the savedCode if valid.
if e.savedCode != invalidCode {
if err := e.write(e, e.savedCode); err != nil {
return err
}
if err := e.incHi(); err != nil && err != errOutOfCodes {
return err
}
}
// Write the eof code.
eof := uint32(1)<<e.litWidth + 1
if err := e.write(e, eof); err != nil {
return err
}
//Write the final bits.
if e.nBits > 0 {
e.bits >>= 24
if err := e.w.WriteByte(uint8(e.bits)); err != nil {
return err
}
}
return e.w.Flush()
}
// NewWriter creates a new io.WriteCloser.
// Writes to the returned io.WriteCloser are compressed and written to w.
// It is the caller's responsibility to call Close on the WriteCloser when
// finished writing.
// oneOff makes code length increases occur one code early. It should be true
// for LZWDecode filters with earlyChange=1 which is also the default.
func NewWriter(w io.Writer, oneOff bool) io.WriteCloser {
bw, ok := w.(writer)
if !ok {
bw = bufio.NewWriter(w)
}
lw := uint(8)
e := encoder{
w: bw,
write: (*encoder).writeMSB,
litWidth: lw,
width: 1 + lw,
hi: 1<<lw + 1,
overflow: 1 << (lw + 1),
savedCode: invalidCode,
oneOff: oneOff,
}
// Write initial clear_table.
// The standard compress/lzw does not do this.
clear := uint32(1) << e.litWidth
e.write(&e, clear)
return &e
}