667 lines
15 KiB
C
Raw Normal View History

2020-04-05 00:07:50 +08:00
/*
* Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301, USA.
*/
/**
* @file
*
* LZX decompression
*
* This algorithm is derived jointly from the document "[MS-PATCH]:
* LZX DELTA Compression and Decompression", available from
*
* http://msdn.microsoft.com/en-us/library/cc483133.aspx
*
* and from the file lzx-decompress.c in the wimlib source code.
*
*/
#include "wimboot.h"
#include "huffman.h"
#include "lzx.h"
/** Base positions, indexed by position slot */
static unsigned int lzx_position_base[LZX_POSITION_SLOTS];
/**
* Attempt to accumulate bits from LZX bitstream
*
* @v lzx Decompressor
* @v bits Number of bits to accumulate
* @v norm_value Accumulated value (normalised to 16 bits)
*
* Note that there may not be sufficient accumulated bits in the
* bitstream; callers must check that sufficient bits are available
* before using the value.
*/
static int lzx_accumulate ( struct lzx *lzx, unsigned int bits ) {
const uint16_t *src16;
/* Accumulate more bits if required */
if ( ( lzx->bits < bits ) &&
( lzx->input.offset < lzx->input.len ) ) {
src16 = (const uint16_t *)( ( char * ) lzx->input.data + lzx->input.offset );
lzx->input.offset += sizeof ( *src16 );
lzx->accumulator |= ( *src16 << ( 16 - lzx->bits ) );
lzx->bits += 16;
}
return ( lzx->accumulator >> 16 );
}
/**
* Consume accumulated bits from LZX bitstream
*
* @v lzx Decompressor
* @v bits Number of bits to consume
* @ret rc Return status code
*/
static int lzx_consume ( struct lzx *lzx, unsigned int bits ) {
/* Fail if insufficient bits are available */
if ( lzx->bits < bits ) {
DBG ( "LZX input overrun in %#zx/%#zx out %#zx)\n",
lzx->input.offset, lzx->input.len, lzx->output.offset );
return -1;
}
/* Consume bits */
lzx->accumulator <<= bits;
lzx->bits -= bits;
return 0;
}
/**
* Get bits from LZX bitstream
*
* @v lzx Decompressor
* @v bits Number of bits to fetch
* @ret value Value, or negative error
*/
static int lzx_getbits ( struct lzx *lzx, unsigned int bits ) {
int norm_value;
int rc;
/* Accumulate more bits if required */
norm_value = lzx_accumulate ( lzx, bits );
/* Consume bits */
if ( ( rc = lzx_consume ( lzx, bits ) ) != 0 )
return rc;
return ( norm_value >> ( 16 - bits ) );
}
/**
* Align LZX bitstream for byte access
*
* @v lzx Decompressor
* @v bits Minimum number of padding bits
* @ret rc Return status code
*/
static int lzx_align ( struct lzx *lzx, unsigned int bits ) {
int pad;
/* Get padding bits */
pad = lzx_getbits ( lzx, bits );
if ( pad < 0 )
return pad;
/* Consume all accumulated bits */
lzx_consume ( lzx, lzx->bits );
return 0;
}
/**
* Get bytes from LZX bitstream
*
* @v lzx Decompressor
* @v data Data buffer, or NULL
* @v len Length of data buffer
* @ret rc Return status code
*/
static int lzx_getbytes ( struct lzx *lzx, void *data, size_t len ) {
/* Sanity check */
if ( ( lzx->input.offset + len ) > lzx->input.len ) {
DBG ( "LZX input overrun in %#zx/%#zx out %#zx)\n",
lzx->input.offset, lzx->input.len, lzx->output.offset );
return -1;
}
/* Copy data */
if ( data )
memcpy ( data, ( lzx->input.data + lzx->input.offset ), len );
lzx->input.offset += len;
return 0;
}
/**
* Decode LZX Huffman-coded symbol
*
* @v lzx Decompressor
* @v alphabet Huffman alphabet
* @ret raw Raw symbol, or negative error
*/
static int lzx_decode ( struct lzx *lzx, struct huffman_alphabet *alphabet ) {
struct huffman_symbols *sym;
int huf;
int rc;
/* Accumulate sufficient bits */
huf = lzx_accumulate ( lzx, HUFFMAN_BITS );
if ( huf < 0 )
return huf;
/* Decode symbol */
sym = huffman_sym ( alphabet, huf );
/* Consume bits */
if ( ( rc = lzx_consume ( lzx, huffman_len ( sym ) ) ) != 0 )
return rc;
return huffman_raw ( sym, huf );
}
/**
* Generate Huffman alphabet from raw length table
*
* @v lzx Decompressor
* @v count Number of symbols
* @v bits Length of each length (in bits)
* @v lengths Lengths table to fill in
* @v alphabet Huffman alphabet to fill in
* @ret rc Return status code
*/
static int lzx_raw_alphabet ( struct lzx *lzx, unsigned int count,
unsigned int bits, uint8_t *lengths,
struct huffman_alphabet *alphabet ) {
unsigned int i;
int len;
int rc;
/* Read lengths */
for ( i = 0 ; i < count ; i++ ) {
len = lzx_getbits ( lzx, bits );
if ( len < 0 )
return len;
lengths[i] = len;
}
/* Generate Huffman alphabet */
if ( ( rc = huffman_alphabet ( alphabet, lengths, count ) ) != 0 )
return rc;
return 0;
}
/**
* Generate pretree
*
* @v lzx Decompressor
* @v count Number of symbols
* @v lengths Lengths table to fill in
* @ret rc Return status code
*/
static int lzx_pretree ( struct lzx *lzx, unsigned int count,
uint8_t *lengths ) {
unsigned int i;
unsigned int length;
int dup = 0;
int code;
int rc;
/* Generate pretree alphabet */
if ( ( rc = lzx_raw_alphabet ( lzx, LZX_PRETREE_CODES,
LZX_PRETREE_BITS, lzx->pretree_lengths,
&lzx->pretree ) ) != 0 )
return rc;
/* Read lengths */
for ( i = 0 ; i < count ; i++ ) {
if ( dup ) {
/* Duplicate previous length */
lengths[i] = lengths[ i - 1 ];
dup--;
} else {
/* Get next code */
code = lzx_decode ( lzx, &lzx->pretree );
if ( code < 0 )
return code;
/* Interpret code */
if ( code <= 16 ) {
length = ( ( lengths[i] - code + 17 ) % 17 );
} else if ( code == 17 ) {
length = 0;
dup = lzx_getbits ( lzx, 4 );
if ( dup < 0 )
return dup;
dup += 3;
} else if ( code == 18 ) {
length = 0;
dup = lzx_getbits ( lzx, 5 );
if ( dup < 0 )
return dup;
dup += 19;
} else if ( code == 19 ) {
length = 0;
dup = lzx_getbits ( lzx, 1 );
if ( dup < 0 )
return dup;
dup += 3;
code = lzx_decode ( lzx, &lzx->pretree );
if ( code < 0 )
return code;
length = ( ( lengths[i] - code + 17 ) % 17 );
} else {
DBG ( "Unrecognised pretree code %d\n", code );
return -1;
}
lengths[i] = length;
}
}
/* Sanity check */
if ( dup ) {
DBG ( "Pretree duplicate overrun\n" );
return -1;
}
return 0;
}
/**
* Generate aligned offset Huffman alphabet
*
* @v lzx Decompressor
* @ret rc Return status code
*/
static int lzx_alignoffset_alphabet ( struct lzx *lzx ) {
int rc;
/* Generate aligned offset alphabet */
if ( ( rc = lzx_raw_alphabet ( lzx, LZX_ALIGNOFFSET_CODES,
LZX_ALIGNOFFSET_BITS,
lzx->alignoffset_lengths,
&lzx->alignoffset ) ) != 0 )
return rc;
return 0;
}
/**
* Generate main Huffman alphabet
*
* @v lzx Decompressor
* @ret rc Return status code
*/
static int lzx_main_alphabet ( struct lzx *lzx ) {
int rc;
/* Generate literal symbols pretree */
if ( ( rc = lzx_pretree ( lzx, LZX_MAIN_LIT_CODES,
lzx->main_lengths.literals ) ) != 0 ) {
DBG ( "Could not construct main literal pretree\n" );
return rc;
}
/* Generate remaining symbols pretree */
if ( ( rc = lzx_pretree ( lzx, ( LZX_MAIN_CODES - LZX_MAIN_LIT_CODES ),
lzx->main_lengths.remainder ) ) != 0 ) {
DBG ( "Could not construct main remainder pretree\n" );
return rc;
}
/* Generate Huffman alphabet */
if ( ( rc = huffman_alphabet ( &lzx->main, lzx->main_lengths.literals,
LZX_MAIN_CODES ) ) != 0 ) {
DBG ( "Could not generate main alphabet\n" );
return rc;
}
return 0;
}
/**
* Generate length Huffman alphabet
*
* @v lzx Decompressor
* @ret rc Return status code
*/
static int lzx_length_alphabet ( struct lzx *lzx ) {
int rc;
/* Generate pretree */
if ( ( rc = lzx_pretree ( lzx, LZX_LENGTH_CODES,
lzx->length_lengths ) ) != 0 ) {
DBG ( "Could not generate length pretree\n" );
return rc;
}
/* Generate Huffman alphabet */
if ( ( rc = huffman_alphabet ( &lzx->length, lzx->length_lengths,
LZX_LENGTH_CODES ) ) != 0 ) {
DBG ( "Could not generate length alphabet\n" );
return rc;
}
return 0;
}
/**
* Process LZX block header
*
* @v lzx Decompressor
* @ret rc Return status code
*/
static int lzx_block_header ( struct lzx *lzx ) {
size_t block_len;
int block_type;
int default_len;
int len_high;
int len_low;
int rc;
/* Get block type */
block_type = lzx_getbits ( lzx, LZX_BLOCK_TYPE_BITS );
if ( block_type < 0 )
return block_type;
lzx->block_type = block_type;
/* Check block length */
default_len = lzx_getbits ( lzx, 1 );
if ( default_len < 0 )
return default_len;
if ( default_len ) {
block_len = LZX_DEFAULT_BLOCK_LEN;
} else {
len_high = lzx_getbits ( lzx, 8 );
if ( len_high < 0 )
return len_high;
len_low = lzx_getbits ( lzx, 8 );
if ( len_low < 0 )
return len_low;
block_len = ( ( len_high << 8 ) | len_low );
}
lzx->output.threshold = ( lzx->output.offset + block_len );
/* Handle block type */
switch ( block_type ) {
case LZX_BLOCK_ALIGNOFFSET :
/* Generated aligned offset alphabet */
if ( ( rc = lzx_alignoffset_alphabet ( lzx ) ) != 0 )
return rc;
/* Fall through */
case LZX_BLOCK_VERBATIM :
/* Generate main alphabet */
if ( ( rc = lzx_main_alphabet ( lzx ) ) != 0 )
return rc;
/* Generate lengths alphabet */
if ( ( rc = lzx_length_alphabet ( lzx ) ) != 0 )
return rc;
break;
case LZX_BLOCK_UNCOMPRESSED :
/* Align input stream */
if ( ( rc = lzx_align ( lzx, 1 ) ) != 0 )
return rc;
/* Read new repeated offsets */
if ( ( rc = lzx_getbytes ( lzx, &lzx->repeated_offset,
sizeof ( lzx->repeated_offset )))!=0)
return rc;
break;
default:
DBG ( "Unrecognised block type %d\n", block_type );
return -1;
}
return 0;
}
/**
* Process uncompressed data
*
* @v lzx Decompressor
* @ret rc Return status code
*/
static int lzx_uncompressed ( struct lzx *lzx ) {
void *data;
size_t len;
int rc;
/* Copy bytes */
data = ( lzx->output.data ?
( lzx->output.data + lzx->output.offset ) : NULL );
len = ( lzx->output.threshold - lzx->output.offset );
if ( ( rc = lzx_getbytes ( lzx, data, len ) ) != 0 )
return rc;
/* Align input stream */
if ( len % 2 )
lzx->input.offset++;
return 0;
}
/**
* Process an LZX token
*
* @v lzx Decompressor
* @ret rc Return status code
*
* Variable names are chosen to match the LZX specification
* pseudo-code.
*/
static int lzx_token ( struct lzx *lzx ) {
unsigned int length_header;
unsigned int position_slot;
unsigned int offset_bits;
unsigned int i;
size_t match_offset;
size_t match_length;
int verbatim_bits;
int aligned_bits;
int maindata;
int length;
uint8_t *copy;
/* Get maindata symelse*/
maindata = lzx_decode ( lzx, &lzx->main );
if ( maindata < 0 )
return maindata;
/* Check for literals */
if ( maindata < LZX_MAIN_LIT_CODES ) {
if ( lzx->output.data )
lzx->output.data[lzx->output.offset] = maindata;
lzx->output.offset++;
return 0;
}
maindata -= LZX_MAIN_LIT_CODES;
/* Calculate the match length */
length_header = ( maindata & 7 );
if ( length_header == 7 ) {
length = lzx_decode ( lzx, &lzx->length );
if ( length < 0 )
return length;
} else {
length = 0;
}
match_length = ( length_header + 2 + length );
/* Calculate the position slot */
position_slot = ( maindata >> 3 );
if ( position_slot < LZX_REPEATED_OFFSETS ) {
/* Repeated offset */
match_offset = lzx->repeated_offset[position_slot];
lzx->repeated_offset[position_slot] = lzx->repeated_offset[0];
lzx->repeated_offset[0] = match_offset;
} else {
/* Non-repeated offset */
offset_bits = lzx_footer_bits ( position_slot );
if ( ( lzx->block_type == LZX_BLOCK_ALIGNOFFSET ) &&
( offset_bits >= 3 ) ) {
verbatim_bits = lzx_getbits ( lzx, ( offset_bits - 3 ));
if ( verbatim_bits < 0 )
return verbatim_bits;
verbatim_bits <<= 3;
aligned_bits = lzx_decode ( lzx, &lzx->alignoffset );
if ( aligned_bits < 0 )
return aligned_bits;
} else {
verbatim_bits = lzx_getbits ( lzx, offset_bits );
if ( verbatim_bits < 0 )
return verbatim_bits;
aligned_bits = 0;
}
match_offset = ( lzx_position_base[position_slot] +
verbatim_bits + aligned_bits - 2 );
/* Update repeated offset list */
for ( i = ( LZX_REPEATED_OFFSETS - 1 ) ; i > 0 ; i-- )
lzx->repeated_offset[i] = lzx->repeated_offset[ i - 1 ];
lzx->repeated_offset[0] = match_offset;
}
/* Copy data */
if ( match_offset > lzx->output.offset ) {
DBG ( "LZX match underrun out 0x%x offset 0x%x len 0x%x\n",
lzx->output.offset, match_offset, match_length );
return -1;
}
if ( lzx->output.data ) {
copy = &lzx->output.data[lzx->output.offset];
for ( i = 0 ; i < match_length ; i++ )
copy[i] = copy[ i - match_offset ];
}
lzx->output.offset += match_length;
return 0;
}
/**
* Translate E8 jump addresses
*
* @v lzx Decompressor
*/
static void lzx_translate_jumps ( struct lzx *lzx ) {
size_t offset;
int32_t *target;
/* Sanity check */
if ( lzx->output.offset < 10 )
return;
/* Scan for jump instructions */
for ( offset = 0 ; offset < ( lzx->output.offset - 10 ) ; offset++ ) {
/* Check for jump instruction */
if ( lzx->output.data[offset] != 0xe8 )
continue;
/* Translate jump target */
target = ( ( int32_t * ) &lzx->output.data[ offset + 1 ] );
if ( *target >= 0 ) {
if ( *target < LZX_WIM_MAGIC_FILESIZE )
*target -= offset;
} else {
if ( *target >= -( ( int32_t ) offset ) )
*target += LZX_WIM_MAGIC_FILESIZE;
}
offset += sizeof ( *target );
}
}
/**
* Decompress LZX-compressed data
*
* @v data Compressed data
* @v len Length of compressed data
* @v buf Decompression buffer, or NULL
* @ret out_len Length of decompressed data, or negative error
*/
ssize_t lzx_decompress ( const void *data, size_t len, void *buf ) {
struct lzx lzx;
unsigned int i;
int rc;
/* Sanity check */
if ( len % 2 ) {
DBG ( "LZX cannot handle odd-length input data\n" );
return -1;
}
/* Initialise global state, if required */
if ( ! lzx_position_base[ LZX_POSITION_SLOTS - 1 ] ) {
for ( i = 1 ; i < LZX_POSITION_SLOTS ; i++ ) {
lzx_position_base[i] =
( lzx_position_base[i-1] +
( 1 << lzx_footer_bits ( i - 1 ) ) );
}
}
/* Initialise decompressor */
memset ( &lzx, 0, sizeof ( lzx ) );
lzx.input.data = data;
lzx.input.len = len;
lzx.output.data = buf;
for ( i = 0 ; i < LZX_REPEATED_OFFSETS ; i++ )
lzx.repeated_offset[i] = 1;
/* Process blocks */
while ( lzx.input.offset < lzx.input.len ) {
/* Process block header */
if ( ( rc = lzx_block_header ( &lzx ) ) != 0 )
return rc;
/* Process block contents */
if ( lzx.block_type == LZX_BLOCK_UNCOMPRESSED ) {
/* Copy uncompressed data */
if ( ( rc = lzx_uncompressed ( &lzx ) ) != 0 )
return rc;
} else {
/* Process token stream */
while ( lzx.output.offset < lzx.output.threshold ) {
if ( ( rc = lzx_token ( &lzx ) ) != 0 )
return rc;
}
}
}
/* Postprocess to undo E8 jump compression */
if ( lzx.output.data )
lzx_translate_jumps ( &lzx );
return lzx.output.offset;
}