Home | History | Annotate | Line # | Download | only in gzip
unlz.c revision 1.6.2.2
      1  1.6.2.2  pgoyette /*	$NetBSD: unlz.c,v 1.6.2.2 2018/11/26 01:52:54 pgoyette Exp $	*/
      2  1.6.2.2  pgoyette 
      3  1.6.2.2  pgoyette /*-
      4  1.6.2.2  pgoyette  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  1.6.2.2  pgoyette  * All rights reserved.
      6  1.6.2.2  pgoyette  *
      7  1.6.2.2  pgoyette  * This code is derived from software contributed to The NetBSD Foundation
      8  1.6.2.2  pgoyette  * by Christos Zoulas.
      9  1.6.2.2  pgoyette  *
     10  1.6.2.2  pgoyette  * Redistribution and use in source and binary forms, with or without
     11  1.6.2.2  pgoyette  * modification, are permitted provided that the following conditions
     12  1.6.2.2  pgoyette  * are met:
     13  1.6.2.2  pgoyette  * 1. Redistributions of source code must retain the above copyright
     14  1.6.2.2  pgoyette  *    notice, this list of conditions and the following disclaimer.
     15  1.6.2.2  pgoyette  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.6.2.2  pgoyette  *    notice, this list of conditions and the following disclaimer in the
     17  1.6.2.2  pgoyette  *    documentation and/or other materials provided with the distribution.
     18  1.6.2.2  pgoyette  *
     19  1.6.2.2  pgoyette  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.6.2.2  pgoyette  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.6.2.2  pgoyette  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.6.2.2  pgoyette  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.6.2.2  pgoyette  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.6.2.2  pgoyette  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.6.2.2  pgoyette  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.6.2.2  pgoyette  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.6.2.2  pgoyette  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.6.2.2  pgoyette  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.6.2.2  pgoyette  * POSSIBILITY OF SUCH DAMAGE.
     30  1.6.2.2  pgoyette  */
     31  1.6.2.2  pgoyette 
     32  1.6.2.2  pgoyette /*  Lzd - Educational decompressor for the lzip format
     33  1.6.2.2  pgoyette     Copyright (C) 2013-2018 Antonio Diaz Diaz.
     34  1.6.2.2  pgoyette 
     35  1.6.2.2  pgoyette     This program is free software. Redistribution and use in source and
     36  1.6.2.2  pgoyette     binary forms, with or without modification, are permitted provided
     37  1.6.2.2  pgoyette     that the following conditions are met:
     38  1.6.2.2  pgoyette 
     39  1.6.2.2  pgoyette     1. Redistributions of source code must retain the above copyright
     40  1.6.2.2  pgoyette     notice, this list of conditions and the following disclaimer.
     41  1.6.2.2  pgoyette 
     42  1.6.2.2  pgoyette     2. Redistributions in binary form must reproduce the above copyright
     43  1.6.2.2  pgoyette     notice, this list of conditions and the following disclaimer in the
     44  1.6.2.2  pgoyette     documentation and/or other materials provided with the distribution.
     45  1.6.2.2  pgoyette 
     46  1.6.2.2  pgoyette     This program is distributed in the hope that it will be useful,
     47  1.6.2.2  pgoyette     but WITHOUT ANY WARRANTY; without even the implied warranty of
     48  1.6.2.2  pgoyette     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     49  1.6.2.2  pgoyette */
     50  1.6.2.2  pgoyette 
     51  1.6.2.2  pgoyette #include <sys/param.h>
     52  1.6.2.2  pgoyette #include <stdio.h>
     53  1.6.2.2  pgoyette #include <string.h>
     54  1.6.2.2  pgoyette #include <stdlib.h>
     55  1.6.2.2  pgoyette #include <stdint.h>
     56  1.6.2.2  pgoyette #include <stdbool.h>
     57  1.6.2.2  pgoyette #include <errno.h>
     58  1.6.2.2  pgoyette #include <unistd.h>
     59  1.6.2.2  pgoyette 
     60  1.6.2.2  pgoyette #define LZ_STATES		12
     61  1.6.2.2  pgoyette 
     62  1.6.2.2  pgoyette #define LITERAL_CONTEXT_BITS	3
     63  1.6.2.2  pgoyette #define POS_STATE_BITS		2
     64  1.6.2.2  pgoyette #define POS_STATES		(1 << POS_STATE_BITS)
     65  1.6.2.2  pgoyette #define POS_STATE_MASK 		(POS_STATES - 1)
     66  1.6.2.2  pgoyette 
     67  1.6.2.2  pgoyette #define STATES			4
     68  1.6.2.2  pgoyette #define DIS_SLOT_BITS		6
     69  1.6.2.2  pgoyette 
     70  1.6.2.2  pgoyette #define DIS_MODEL_START		4
     71  1.6.2.2  pgoyette #define DIS_MODEL_END		14
     72  1.6.2.2  pgoyette 
     73  1.6.2.2  pgoyette #define MODELED_DISTANCES	(1 << (DIS_MODEL_END / 2))
     74  1.6.2.2  pgoyette #define DIS_ALIGN_BITS		4
     75  1.6.2.2  pgoyette #define DIS_ALIGN_SIZE		(1 << DIS_ALIGN_BITS)
     76  1.6.2.2  pgoyette 
     77  1.6.2.2  pgoyette #define LOW_BITS		3
     78  1.6.2.2  pgoyette #define MID_BITS		3
     79  1.6.2.2  pgoyette #define HIGH_BITS		8
     80  1.6.2.2  pgoyette 
     81  1.6.2.2  pgoyette #define LOW_SYMBOLS		(1 << LOW_BITS)
     82  1.6.2.2  pgoyette #define MID_SYMBOLS		(1 << MID_BITS)
     83  1.6.2.2  pgoyette #define HIGH_SYMBOLS		(1 << HIGH_BITS)
     84  1.6.2.2  pgoyette 
     85  1.6.2.2  pgoyette #define MAX_SYMBOLS 		(LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
     86  1.6.2.2  pgoyette 
     87  1.6.2.2  pgoyette #define MIN_MATCH_LEN		2
     88  1.6.2.2  pgoyette 
     89  1.6.2.2  pgoyette #define BIT_MODEL_MOVE_BITS	5
     90  1.6.2.2  pgoyette #define BIT_MODEL_TOTAL_BITS 	11
     91  1.6.2.2  pgoyette #define BIT_MODEL_TOTAL 	(1 << BIT_MODEL_TOTAL_BITS)
     92  1.6.2.2  pgoyette #define BIT_MODEL_INIT		(BIT_MODEL_TOTAL / 2)
     93  1.6.2.2  pgoyette 
     94  1.6.2.2  pgoyette static const int lz_st_next[] = {
     95  1.6.2.2  pgoyette 	0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
     96  1.6.2.2  pgoyette };
     97  1.6.2.2  pgoyette 
     98  1.6.2.2  pgoyette static bool
     99  1.6.2.2  pgoyette lz_st_is_char(int st) {
    100  1.6.2.2  pgoyette 	return st < 7;
    101  1.6.2.2  pgoyette }
    102  1.6.2.2  pgoyette 
    103  1.6.2.2  pgoyette static int
    104  1.6.2.2  pgoyette lz_st_get_char(int st) {
    105  1.6.2.2  pgoyette 	return lz_st_next[st];
    106  1.6.2.2  pgoyette }
    107  1.6.2.2  pgoyette 
    108  1.6.2.2  pgoyette static int
    109  1.6.2.2  pgoyette lz_st_get_match(int st) {
    110  1.6.2.2  pgoyette 	return st < 7 ? 7 : 10;
    111  1.6.2.2  pgoyette }
    112  1.6.2.2  pgoyette 
    113  1.6.2.2  pgoyette static int
    114  1.6.2.2  pgoyette lz_st_get_rep(int st) {
    115  1.6.2.2  pgoyette 	return st < 7 ? 8 : 11;
    116  1.6.2.2  pgoyette }
    117  1.6.2.2  pgoyette 
    118  1.6.2.2  pgoyette static int
    119  1.6.2.2  pgoyette lz_st_get_short_rep(int st) {
    120  1.6.2.2  pgoyette 	return st < 7 ? 9 : 11;
    121  1.6.2.2  pgoyette }
    122  1.6.2.2  pgoyette 
    123  1.6.2.2  pgoyette struct lz_len_model {
    124  1.6.2.2  pgoyette 	int choice1;
    125  1.6.2.2  pgoyette 	int choice2;
    126  1.6.2.2  pgoyette 	int bm_low[POS_STATES][LOW_SYMBOLS];
    127  1.6.2.2  pgoyette 	int bm_mid[POS_STATES][MID_SYMBOLS];
    128  1.6.2.2  pgoyette 	int bm_high[HIGH_SYMBOLS];
    129  1.6.2.2  pgoyette };
    130  1.6.2.2  pgoyette 
    131  1.6.2.2  pgoyette static uint32_t lz_crc[256];
    132  1.6.2.2  pgoyette 
    133  1.6.2.2  pgoyette static void
    134  1.6.2.2  pgoyette lz_crc_init(void)
    135  1.6.2.2  pgoyette {
    136  1.6.2.2  pgoyette 	for (unsigned i = 0; i < __arraycount(lz_crc); i++) {
    137  1.6.2.2  pgoyette 		unsigned c = i;
    138  1.6.2.2  pgoyette 		for (unsigned j = 0; j < 8; j++) {
    139  1.6.2.2  pgoyette 			if (c & 1)
    140  1.6.2.2  pgoyette 				c = 0xEDB88320U ^ (c >> 1);
    141  1.6.2.2  pgoyette 			else
    142  1.6.2.2  pgoyette 				c >>= 1;
    143  1.6.2.2  pgoyette 		}
    144  1.6.2.2  pgoyette 		lz_crc[i] = c;
    145  1.6.2.2  pgoyette       }
    146  1.6.2.2  pgoyette }
    147  1.6.2.2  pgoyette 
    148  1.6.2.2  pgoyette static void
    149  1.6.2.2  pgoyette lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
    150  1.6.2.2  pgoyette {
    151  1.6.2.2  pgoyette 	for (size_t i = 0; i < len; i++)
    152  1.6.2.2  pgoyette 		*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
    153  1.6.2.2  pgoyette }
    154  1.6.2.2  pgoyette 
    155  1.6.2.2  pgoyette struct lz_range_decoder {
    156  1.6.2.2  pgoyette 	FILE *fp;
    157  1.6.2.2  pgoyette 	uint32_t code;
    158  1.6.2.2  pgoyette 	uint32_t range;
    159  1.6.2.2  pgoyette };
    160  1.6.2.2  pgoyette 
    161  1.6.2.2  pgoyette static int
    162  1.6.2.2  pgoyette lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
    163  1.6.2.2  pgoyette {
    164  1.6.2.2  pgoyette 	rd->fp = fp;
    165  1.6.2.2  pgoyette 	rd->code = 0;
    166  1.6.2.2  pgoyette 	rd->range = ~0;
    167  1.6.2.2  pgoyette 	for (int i = 0; i < 5; i++)
    168  1.6.2.2  pgoyette 		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
    169  1.6.2.2  pgoyette 	return ferror(rd->fp) ? -1 : 0;
    170  1.6.2.2  pgoyette }
    171  1.6.2.2  pgoyette 
    172  1.6.2.2  pgoyette static unsigned
    173  1.6.2.2  pgoyette lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
    174  1.6.2.2  pgoyette {
    175  1.6.2.2  pgoyette 	unsigned symbol = 0;
    176  1.6.2.2  pgoyette 
    177  1.6.2.2  pgoyette 	for (int i = num_bits; i > 0; i--) {
    178  1.6.2.2  pgoyette 		rd->range >>= 1;
    179  1.6.2.2  pgoyette 		symbol <<= 1;
    180  1.6.2.2  pgoyette 		if (rd->code >= rd->range) {
    181  1.6.2.2  pgoyette 			rd->code -= rd->range;
    182  1.6.2.2  pgoyette 			symbol |= 1;
    183  1.6.2.2  pgoyette 		}
    184  1.6.2.2  pgoyette 		if (rd->range <= 0x00FFFFFFU) {
    185  1.6.2.2  pgoyette 			rd->range <<= 8;
    186  1.6.2.2  pgoyette 			rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
    187  1.6.2.2  pgoyette 		}
    188  1.6.2.2  pgoyette 	}
    189  1.6.2.2  pgoyette 
    190  1.6.2.2  pgoyette 	return symbol;
    191  1.6.2.2  pgoyette }
    192  1.6.2.2  pgoyette 
    193  1.6.2.2  pgoyette static unsigned
    194  1.6.2.2  pgoyette lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
    195  1.6.2.2  pgoyette {
    196  1.6.2.2  pgoyette 	unsigned symbol;
    197  1.6.2.2  pgoyette 	const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
    198  1.6.2.2  pgoyette 
    199  1.6.2.2  pgoyette 	if(rd->code < bound) {
    200  1.6.2.2  pgoyette 		rd->range = bound;
    201  1.6.2.2  pgoyette 		*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
    202  1.6.2.2  pgoyette 		symbol = 0;
    203  1.6.2.2  pgoyette 	}
    204  1.6.2.2  pgoyette 	else {
    205  1.6.2.2  pgoyette 		rd->range -= bound;
    206  1.6.2.2  pgoyette 		rd->code -= bound;
    207  1.6.2.2  pgoyette 		*bm -= *bm >> BIT_MODEL_MOVE_BITS;
    208  1.6.2.2  pgoyette 		symbol = 1;
    209  1.6.2.2  pgoyette 	}
    210  1.6.2.2  pgoyette 
    211  1.6.2.2  pgoyette 	if (rd->range <= 0x00FFFFFFU) {
    212  1.6.2.2  pgoyette 		rd->range <<= 8;
    213  1.6.2.2  pgoyette 		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
    214  1.6.2.2  pgoyette 	}
    215  1.6.2.2  pgoyette 	return symbol;
    216  1.6.2.2  pgoyette }
    217  1.6.2.2  pgoyette 
    218  1.6.2.2  pgoyette static unsigned
    219  1.6.2.2  pgoyette lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
    220  1.6.2.2  pgoyette {
    221  1.6.2.2  pgoyette 	unsigned symbol = 1;
    222  1.6.2.2  pgoyette 
    223  1.6.2.2  pgoyette 	for (int i = 0; i < num_bits; i++)
    224  1.6.2.2  pgoyette 		symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
    225  1.6.2.2  pgoyette 
    226  1.6.2.2  pgoyette 	return symbol - (1 << num_bits);
    227  1.6.2.2  pgoyette }
    228  1.6.2.2  pgoyette 
    229  1.6.2.2  pgoyette static unsigned
    230  1.6.2.2  pgoyette lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
    231  1.6.2.2  pgoyette {
    232  1.6.2.2  pgoyette 	unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
    233  1.6.2.2  pgoyette 	unsigned reversed_symbol = 0;
    234  1.6.2.2  pgoyette 
    235  1.6.2.2  pgoyette 	for (int i = 0; i < num_bits; i++) {
    236  1.6.2.2  pgoyette 		reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
    237  1.6.2.2  pgoyette 		symbol >>= 1;
    238  1.6.2.2  pgoyette 	}
    239  1.6.2.2  pgoyette 
    240  1.6.2.2  pgoyette 	return reversed_symbol;
    241  1.6.2.2  pgoyette }
    242  1.6.2.2  pgoyette 
    243  1.6.2.2  pgoyette static unsigned
    244  1.6.2.2  pgoyette lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
    245  1.6.2.2  pgoyette {
    246  1.6.2.2  pgoyette 	unsigned symbol = 1;
    247  1.6.2.2  pgoyette 
    248  1.6.2.2  pgoyette 	for (int i = 7; i >= 0; i--) {
    249  1.6.2.2  pgoyette 		const unsigned match_bit = (match_byte >> i) & 1;
    250  1.6.2.2  pgoyette 		const unsigned bit = lz_rd_decode_bit(rd,
    251  1.6.2.2  pgoyette 		    &bm[symbol + (match_bit << 8) + 0x100]);
    252  1.6.2.2  pgoyette 		symbol = (symbol << 1) | bit;
    253  1.6.2.2  pgoyette 		if (match_bit != bit) {
    254  1.6.2.2  pgoyette 			while (symbol < 0x100) {
    255  1.6.2.2  pgoyette 				symbol = (symbol << 1) |
    256  1.6.2.2  pgoyette 				    lz_rd_decode_bit(rd, &bm[symbol]);
    257  1.6.2.2  pgoyette 			}
    258  1.6.2.2  pgoyette 			break;
    259  1.6.2.2  pgoyette 		}
    260  1.6.2.2  pgoyette 	}
    261  1.6.2.2  pgoyette 	return symbol & 0xFF;
    262  1.6.2.2  pgoyette }
    263  1.6.2.2  pgoyette 
    264  1.6.2.2  pgoyette static unsigned
    265  1.6.2.2  pgoyette lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
    266  1.6.2.2  pgoyette     int pos_state)
    267  1.6.2.2  pgoyette {
    268  1.6.2.2  pgoyette 	if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
    269  1.6.2.2  pgoyette 		return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
    270  1.6.2.2  pgoyette 
    271  1.6.2.2  pgoyette 	if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
    272  1.6.2.2  pgoyette 		return LOW_SYMBOLS +
    273  1.6.2.2  pgoyette 		    lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
    274  1.6.2.2  pgoyette 	}
    275  1.6.2.2  pgoyette 
    276  1.6.2.2  pgoyette 	return LOW_SYMBOLS + MID_SYMBOLS +
    277  1.6.2.2  pgoyette            lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
    278  1.6.2.2  pgoyette }
    279  1.6.2.2  pgoyette 
    280  1.6.2.2  pgoyette struct lz_decoder {
    281  1.6.2.2  pgoyette 	FILE *fin, *fout;
    282  1.6.2.2  pgoyette 	off_t pos, ppos, spos, dict_size;
    283  1.6.2.2  pgoyette 	bool wrapped;
    284  1.6.2.2  pgoyette 	uint32_t crc;
    285  1.6.2.2  pgoyette 	uint8_t *obuf;
    286  1.6.2.2  pgoyette 	struct lz_range_decoder rdec;
    287  1.6.2.2  pgoyette };
    288  1.6.2.2  pgoyette 
    289  1.6.2.2  pgoyette static int
    290  1.6.2.2  pgoyette lz_flush(struct lz_decoder *lz)
    291  1.6.2.2  pgoyette {
    292  1.6.2.2  pgoyette 	off_t offs = lz->pos - lz->spos;
    293  1.6.2.2  pgoyette 	if (offs <= 0)
    294  1.6.2.2  pgoyette 		return -1;
    295  1.6.2.2  pgoyette 
    296  1.6.2.2  pgoyette 	size_t size = (size_t)offs;
    297  1.6.2.2  pgoyette 	lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
    298  1.6.2.2  pgoyette 	if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
    299  1.6.2.2  pgoyette 		return -1;
    300  1.6.2.2  pgoyette 
    301  1.6.2.2  pgoyette 	lz->wrapped = lz->pos >= lz->dict_size;
    302  1.6.2.2  pgoyette 	if (lz->wrapped) {
    303  1.6.2.2  pgoyette 		lz->ppos += lz->pos;
    304  1.6.2.2  pgoyette 		lz->pos = 0;
    305  1.6.2.2  pgoyette 	}
    306  1.6.2.2  pgoyette 	lz->spos = lz->pos;
    307  1.6.2.2  pgoyette 	return 0;
    308  1.6.2.2  pgoyette }
    309  1.6.2.2  pgoyette 
    310  1.6.2.2  pgoyette static void
    311  1.6.2.2  pgoyette lz_destroy(struct lz_decoder *lz)
    312  1.6.2.2  pgoyette {
    313  1.6.2.2  pgoyette 	if (lz->fin)
    314  1.6.2.2  pgoyette 		fclose(lz->fin);
    315  1.6.2.2  pgoyette 	if (lz->fout)
    316  1.6.2.2  pgoyette 		fclose(lz->fout);
    317  1.6.2.2  pgoyette 	free(lz->obuf);
    318  1.6.2.2  pgoyette }
    319  1.6.2.2  pgoyette 
    320  1.6.2.2  pgoyette static int
    321  1.6.2.2  pgoyette lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
    322  1.6.2.2  pgoyette {
    323  1.6.2.2  pgoyette 	memset(lz, 0, sizeof(*lz));
    324  1.6.2.2  pgoyette 
    325  1.6.2.2  pgoyette 	lz->fin = fdopen(dup(fin), "r");
    326  1.6.2.2  pgoyette 	if (lz->fin == NULL)
    327  1.6.2.2  pgoyette 		goto out;
    328  1.6.2.2  pgoyette 
    329  1.6.2.2  pgoyette 	lz->fout = fdopen(dup(fdout), "w");
    330  1.6.2.2  pgoyette 	if (lz->fout == NULL)
    331  1.6.2.2  pgoyette 		goto out;
    332  1.6.2.2  pgoyette 
    333  1.6.2.2  pgoyette 	lz->pos = lz->ppos = lz->spos = 0;
    334  1.6.2.2  pgoyette 	lz->crc = ~0;
    335  1.6.2.2  pgoyette 	lz->dict_size = dict_size;
    336  1.6.2.2  pgoyette 	lz->wrapped = false;
    337  1.6.2.2  pgoyette 
    338  1.6.2.2  pgoyette 	lz->obuf = malloc(dict_size);
    339  1.6.2.2  pgoyette 	if (lz->obuf == NULL)
    340  1.6.2.2  pgoyette 		goto out;
    341  1.6.2.2  pgoyette 
    342  1.6.2.2  pgoyette 	if (lz_rd_create(&lz->rdec, lz->fin) == -1)
    343  1.6.2.2  pgoyette 		goto out;
    344  1.6.2.2  pgoyette 	return 0;
    345  1.6.2.2  pgoyette out:
    346  1.6.2.2  pgoyette 	lz_destroy(lz);
    347  1.6.2.2  pgoyette 	return -1;
    348  1.6.2.2  pgoyette }
    349  1.6.2.2  pgoyette 
    350  1.6.2.2  pgoyette static uint8_t
    351  1.6.2.2  pgoyette lz_peek(const struct lz_decoder *lz, unsigned ahead)
    352  1.6.2.2  pgoyette {
    353  1.6.2.2  pgoyette 	off_t diff = lz->pos - ahead - 1;
    354  1.6.2.2  pgoyette 
    355  1.6.2.2  pgoyette 	if (diff >= 0)
    356  1.6.2.2  pgoyette 		return lz->obuf[diff];
    357  1.6.2.2  pgoyette 
    358  1.6.2.2  pgoyette 	if (lz->wrapped)
    359  1.6.2.2  pgoyette 		return lz->obuf[lz->dict_size + diff];
    360  1.6.2.2  pgoyette 
    361  1.6.2.2  pgoyette 	return 0;
    362  1.6.2.2  pgoyette }
    363  1.6.2.2  pgoyette 
    364  1.6.2.2  pgoyette static void
    365  1.6.2.2  pgoyette lz_put(struct lz_decoder *lz, uint8_t b)
    366  1.6.2.2  pgoyette {
    367  1.6.2.2  pgoyette 	lz->obuf[lz->pos++] = b;
    368  1.6.2.2  pgoyette 	if (lz->dict_size == lz->pos)
    369  1.6.2.2  pgoyette 		lz_flush(lz);
    370  1.6.2.2  pgoyette }
    371  1.6.2.2  pgoyette 
    372  1.6.2.2  pgoyette static off_t
    373  1.6.2.2  pgoyette lz_get_data_position(const struct lz_decoder *lz)
    374  1.6.2.2  pgoyette {
    375  1.6.2.2  pgoyette 	return lz->ppos + lz->pos;
    376  1.6.2.2  pgoyette }
    377  1.6.2.2  pgoyette 
    378  1.6.2.2  pgoyette static unsigned
    379  1.6.2.2  pgoyette lz_get_crc(const struct lz_decoder *lz)
    380  1.6.2.2  pgoyette {
    381  1.6.2.2  pgoyette 	return lz->crc ^ 0xffffffffU;
    382  1.6.2.2  pgoyette }
    383  1.6.2.2  pgoyette 
    384  1.6.2.2  pgoyette static void
    385  1.6.2.2  pgoyette lz_bm_init(int *a, size_t l)
    386  1.6.2.2  pgoyette {
    387  1.6.2.2  pgoyette 	for (size_t i = 0; i < l; i++)
    388  1.6.2.2  pgoyette 		a[i] = BIT_MODEL_INIT;
    389  1.6.2.2  pgoyette }
    390  1.6.2.2  pgoyette 
    391  1.6.2.2  pgoyette #define LZ_BM_INIT(a)	lz_bm_init(a, __arraycount(a))
    392  1.6.2.2  pgoyette #define LZ_BM_INIT2(a)	do { \
    393  1.6.2.2  pgoyette 	size_t l = __arraycount(a[0]); \
    394  1.6.2.2  pgoyette 	for (size_t i = 0; i < __arraycount(a); i++) \
    395  1.6.2.2  pgoyette 		lz_bm_init(a[i], l); \
    396  1.6.2.2  pgoyette } while (/*CONSTCOND*/0)
    397  1.6.2.2  pgoyette 
    398  1.6.2.2  pgoyette #define LZ_MODEL_INIT(a) do { \
    399  1.6.2.2  pgoyette 	a.choice1 = BIT_MODEL_INIT; \
    400  1.6.2.2  pgoyette 	a.choice2 = BIT_MODEL_INIT; \
    401  1.6.2.2  pgoyette 	LZ_BM_INIT2(a.bm_low); \
    402  1.6.2.2  pgoyette 	LZ_BM_INIT2(a.bm_mid); \
    403  1.6.2.2  pgoyette 	LZ_BM_INIT(a.bm_high); \
    404  1.6.2.2  pgoyette } while (/*CONSTCOND*/0)
    405  1.6.2.2  pgoyette 
    406  1.6.2.2  pgoyette static bool
    407  1.6.2.2  pgoyette lz_decode_member(struct lz_decoder *lz)
    408  1.6.2.2  pgoyette {
    409  1.6.2.2  pgoyette 	int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
    410  1.6.2.2  pgoyette 	int bm_match[LZ_STATES][POS_STATES];
    411  1.6.2.2  pgoyette 	int bm_rep[4][LZ_STATES];
    412  1.6.2.2  pgoyette 	int bm_len[LZ_STATES][POS_STATES];
    413  1.6.2.2  pgoyette 	int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
    414  1.6.2.2  pgoyette 	int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
    415  1.6.2.2  pgoyette 	int bm_align[DIS_ALIGN_SIZE];
    416  1.6.2.2  pgoyette 
    417  1.6.2.2  pgoyette 	LZ_BM_INIT2(bm_literal);
    418  1.6.2.2  pgoyette 	LZ_BM_INIT2(bm_match);
    419  1.6.2.2  pgoyette 	LZ_BM_INIT2(bm_rep);
    420  1.6.2.2  pgoyette 	LZ_BM_INIT2(bm_len);
    421  1.6.2.2  pgoyette 	LZ_BM_INIT2(bm_dis_slot);
    422  1.6.2.2  pgoyette 	LZ_BM_INIT(bm_dis);
    423  1.6.2.2  pgoyette 	LZ_BM_INIT(bm_align);
    424  1.6.2.2  pgoyette 
    425  1.6.2.2  pgoyette 	struct lz_len_model match_len_model;
    426  1.6.2.2  pgoyette 	struct lz_len_model rep_len_model;
    427  1.6.2.2  pgoyette 
    428  1.6.2.2  pgoyette 	LZ_MODEL_INIT(match_len_model);
    429  1.6.2.2  pgoyette 	LZ_MODEL_INIT(rep_len_model);
    430  1.6.2.2  pgoyette 
    431  1.6.2.2  pgoyette 	struct lz_range_decoder *rd = &lz->rdec;
    432  1.6.2.2  pgoyette 	unsigned rep[4] = { 0 };
    433  1.6.2.2  pgoyette 
    434  1.6.2.2  pgoyette 
    435  1.6.2.2  pgoyette 	int state = 0;
    436  1.6.2.2  pgoyette 
    437  1.6.2.2  pgoyette 	while (!feof(lz->fin) && !ferror(lz->fin)) {
    438  1.6.2.2  pgoyette 		const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
    439  1.6.2.2  pgoyette 		// bit 1
    440  1.6.2.2  pgoyette 		if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
    441  1.6.2.2  pgoyette 			const uint8_t prev_byte = lz_peek(lz, 0);
    442  1.6.2.2  pgoyette 			const int literal_state =
    443  1.6.2.2  pgoyette 			    prev_byte >> (8 - LITERAL_CONTEXT_BITS);
    444  1.6.2.2  pgoyette 			int *bm = bm_literal[literal_state];
    445  1.6.2.2  pgoyette 			if (lz_st_is_char(state))
    446  1.6.2.2  pgoyette 				lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
    447  1.6.2.2  pgoyette 			else {
    448  1.6.2.2  pgoyette 				int peek = lz_peek(lz, rep[0]);
    449  1.6.2.2  pgoyette 				lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
    450  1.6.2.2  pgoyette 			}
    451  1.6.2.2  pgoyette 			state = lz_st_get_char(state);
    452  1.6.2.2  pgoyette 			continue;
    453  1.6.2.2  pgoyette 		}
    454  1.6.2.2  pgoyette 		int len;
    455  1.6.2.2  pgoyette 		// bit 2
    456  1.6.2.2  pgoyette 		if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
    457  1.6.2.2  pgoyette 			// bit 3
    458  1.6.2.2  pgoyette 			if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
    459  1.6.2.2  pgoyette 				// bit 4
    460  1.6.2.2  pgoyette 				if (lz_rd_decode_bit(rd,
    461  1.6.2.2  pgoyette 				    &bm_len[state][pos_state]) == 0)
    462  1.6.2.2  pgoyette 				{
    463  1.6.2.2  pgoyette 					state = lz_st_get_short_rep(state);
    464  1.6.2.2  pgoyette 					lz_put(lz, lz_peek(lz, rep[0]));
    465  1.6.2.2  pgoyette 					continue;
    466  1.6.2.2  pgoyette 				}
    467  1.6.2.2  pgoyette 			} else {
    468  1.6.2.2  pgoyette 				unsigned distance;
    469  1.6.2.2  pgoyette 				// bit 4
    470  1.6.2.2  pgoyette 				if (lz_rd_decode_bit(rd, &bm_rep[2][state])
    471  1.6.2.2  pgoyette 				    == 0)
    472  1.6.2.2  pgoyette 					distance = rep[1];
    473  1.6.2.2  pgoyette 				else {
    474  1.6.2.2  pgoyette 					// bit 5
    475  1.6.2.2  pgoyette 					if (lz_rd_decode_bit(rd,
    476  1.6.2.2  pgoyette 					    &bm_rep[3][state]) == 0)
    477  1.6.2.2  pgoyette 						distance = rep[2];
    478  1.6.2.2  pgoyette 					else {
    479  1.6.2.2  pgoyette 						distance = rep[3];
    480  1.6.2.2  pgoyette 						rep[3] = rep[2];
    481  1.6.2.2  pgoyette 					}
    482  1.6.2.2  pgoyette 					rep[2] = rep[1];
    483  1.6.2.2  pgoyette 				}
    484  1.6.2.2  pgoyette 				rep[1] = rep[0];
    485  1.6.2.2  pgoyette 				rep[0] = distance;
    486  1.6.2.2  pgoyette 			}
    487  1.6.2.2  pgoyette 			state = lz_st_get_rep(state);
    488  1.6.2.2  pgoyette 			len = MIN_MATCH_LEN +
    489  1.6.2.2  pgoyette 			    lz_rd_decode_len(rd, &rep_len_model, pos_state);
    490  1.6.2.2  pgoyette 		} else {
    491  1.6.2.2  pgoyette 			rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
    492  1.6.2.2  pgoyette 			len = MIN_MATCH_LEN +
    493  1.6.2.2  pgoyette 			    lz_rd_decode_len(rd, &match_len_model, pos_state);
    494  1.6.2.2  pgoyette 			const int len_state =
    495  1.6.2.2  pgoyette 			    MIN(len - MIN_MATCH_LEN, STATES - 1);
    496  1.6.2.2  pgoyette 			rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
    497  1.6.2.2  pgoyette 			    DIS_SLOT_BITS);
    498  1.6.2.2  pgoyette 			if (rep[0] >= DIS_MODEL_START) {
    499  1.6.2.2  pgoyette 				const unsigned dis_slot = rep[0];
    500  1.6.2.2  pgoyette 				const int direct_bits = (dis_slot >> 1) - 1;
    501  1.6.2.2  pgoyette 			        rep[0] = (2 | (dis_slot & 1)) << direct_bits;
    502  1.6.2.2  pgoyette 				if (dis_slot < DIS_MODEL_END)
    503  1.6.2.2  pgoyette 					rep[0] += lz_rd_decode_tree_reversed(rd,
    504  1.6.2.2  pgoyette 					    &bm_dis[rep[0] - dis_slot],
    505  1.6.2.2  pgoyette                                             direct_bits);
    506  1.6.2.2  pgoyette 				else {
    507  1.6.2.2  pgoyette 					rep[0] += lz_rd_decode(rd, direct_bits
    508  1.6.2.2  pgoyette 					    - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
    509  1.6.2.2  pgoyette 					rep[0] += lz_rd_decode_tree_reversed(rd,
    510  1.6.2.2  pgoyette 					    bm_align, DIS_ALIGN_BITS);
    511  1.6.2.2  pgoyette 					if (rep[0] == 0xFFFFFFFFU) {
    512  1.6.2.2  pgoyette 						lz_flush(lz);
    513  1.6.2.2  pgoyette 						return len == MIN_MATCH_LEN;
    514  1.6.2.2  pgoyette 					}
    515  1.6.2.2  pgoyette 				}
    516  1.6.2.2  pgoyette 			}
    517  1.6.2.2  pgoyette 			state = lz_st_get_match(state);
    518  1.6.2.2  pgoyette 			if (rep[0] >= lz->dict_size ||
    519  1.6.2.2  pgoyette 			    (rep[0] >= lz->pos && !lz->wrapped)) {
    520  1.6.2.2  pgoyette 				lz_flush(lz);
    521  1.6.2.2  pgoyette 				return false;
    522  1.6.2.2  pgoyette 			}
    523  1.6.2.2  pgoyette 		}
    524  1.6.2.2  pgoyette 		for (int i = 0; i < len; i++)
    525  1.6.2.2  pgoyette 			lz_put(lz, lz_peek(lz, rep[0]));
    526  1.6.2.2  pgoyette     	}
    527  1.6.2.2  pgoyette 	lz_flush(lz);
    528  1.6.2.2  pgoyette 	return false;
    529  1.6.2.2  pgoyette }
    530  1.6.2.2  pgoyette 
    531  1.6.2.2  pgoyette /*
    532  1.6.2.2  pgoyette  * 0-3	CRC32 of the uncompressed data
    533  1.6.2.2  pgoyette  * 4-11 size of the uncompressed data
    534  1.6.2.2  pgoyette  * 12-19 member size including header and trailer
    535  1.6.2.2  pgoyette  */
    536  1.6.2.2  pgoyette #define TRAILER_SIZE 20
    537  1.6.2.2  pgoyette 
    538  1.6.2.2  pgoyette 
    539  1.6.2.2  pgoyette static off_t
    540  1.6.2.2  pgoyette lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
    541  1.6.2.2  pgoyette {
    542  1.6.2.2  pgoyette 	struct lz_decoder lz;
    543  1.6.2.2  pgoyette 	off_t rv = -1;
    544  1.6.2.2  pgoyette 
    545  1.6.2.2  pgoyette 	if (lz_create(&lz, fin, fdout, dict_size) == -1)
    546  1.6.2.2  pgoyette 		return -1;
    547  1.6.2.2  pgoyette 
    548  1.6.2.2  pgoyette 	if (!lz_decode_member(&lz))
    549  1.6.2.2  pgoyette 		goto out;
    550  1.6.2.2  pgoyette 
    551  1.6.2.2  pgoyette 	uint8_t trailer[TRAILER_SIZE];
    552  1.6.2.2  pgoyette 
    553  1.6.2.2  pgoyette 	for(size_t i = 0; i < __arraycount(trailer); i++)
    554  1.6.2.2  pgoyette 		trailer[i] = (uint8_t)getc(lz.fin);
    555  1.6.2.2  pgoyette 
    556  1.6.2.2  pgoyette 	unsigned crc = 0;
    557  1.6.2.2  pgoyette 	for (int i = 3; i >= 0; --i) {
    558  1.6.2.2  pgoyette 		crc <<= 8;
    559  1.6.2.2  pgoyette 		crc += trailer[i];
    560  1.6.2.2  pgoyette 	}
    561  1.6.2.2  pgoyette 
    562  1.6.2.2  pgoyette 	int64_t data_size = 0;
    563  1.6.2.2  pgoyette 	for (int i = 11; i >= 4; --i) {
    564  1.6.2.2  pgoyette 		data_size <<= 8;
    565  1.6.2.2  pgoyette 		data_size += trailer[i];
    566  1.6.2.2  pgoyette 	}
    567  1.6.2.2  pgoyette 
    568  1.6.2.2  pgoyette 	if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
    569  1.6.2.2  pgoyette 		goto out;
    570  1.6.2.2  pgoyette 
    571  1.6.2.2  pgoyette 	rv = 0;
    572  1.6.2.2  pgoyette 	for (int i = 19; i >= 12; --i) {
    573  1.6.2.2  pgoyette 		rv <<= 8;
    574  1.6.2.2  pgoyette 		rv += trailer[i];
    575  1.6.2.2  pgoyette 	}
    576  1.6.2.2  pgoyette 	if (insize)
    577  1.6.2.2  pgoyette 		*insize = rv;
    578  1.6.2.2  pgoyette #if 0
    579  1.6.2.2  pgoyette 	/* Does not work with pipes */
    580  1.6.2.2  pgoyette 	rv = ftello(lz.fout);
    581  1.6.2.2  pgoyette #else
    582  1.6.2.2  pgoyette 	rv = data_size;
    583  1.6.2.2  pgoyette #endif
    584  1.6.2.2  pgoyette out:
    585  1.6.2.2  pgoyette 	lz_destroy(&lz);
    586  1.6.2.2  pgoyette 	return rv;
    587  1.6.2.2  pgoyette }
    588  1.6.2.2  pgoyette 
    589  1.6.2.2  pgoyette 
    590  1.6.2.2  pgoyette /*
    591  1.6.2.2  pgoyette  * 0-3 magic
    592  1.6.2.2  pgoyette  * 4 version
    593  1.6.2.2  pgoyette  * 5 coded dict_size
    594  1.6.2.2  pgoyette  */
    595  1.6.2.2  pgoyette #define HDR_SIZE 6
    596  1.6.2.2  pgoyette #define MIN_DICTIONARY_SIZE (1 << 12)
    597  1.6.2.2  pgoyette #define MAX_DICTIONARY_SIZE (1 << 29)
    598  1.6.2.2  pgoyette 
    599  1.6.2.2  pgoyette static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
    600  1.6.2.2  pgoyette 
    601  1.6.2.2  pgoyette static unsigned
    602  1.6.2.2  pgoyette lz_get_dict_size(unsigned char c)
    603  1.6.2.2  pgoyette {
    604  1.6.2.2  pgoyette 	unsigned dict_size = 1 << (c & 0x1f);
    605  1.6.2.2  pgoyette 	dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
    606  1.6.2.2  pgoyette 	if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
    607  1.6.2.2  pgoyette 		return 0;
    608  1.6.2.2  pgoyette 	return dict_size;
    609  1.6.2.2  pgoyette }
    610  1.6.2.2  pgoyette 
    611  1.6.2.2  pgoyette static off_t
    612  1.6.2.2  pgoyette unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
    613  1.6.2.2  pgoyette {
    614  1.6.2.2  pgoyette 	if (lz_crc[0] == 0)
    615  1.6.2.2  pgoyette 		lz_crc_init();
    616  1.6.2.2  pgoyette 
    617  1.6.2.2  pgoyette 	char header[HDR_SIZE];
    618  1.6.2.2  pgoyette 
    619  1.6.2.2  pgoyette 	if (prelen > sizeof(header))
    620  1.6.2.2  pgoyette 		return -1;
    621  1.6.2.2  pgoyette 	if (pre && prelen)
    622  1.6.2.2  pgoyette 		memcpy(header, pre, prelen);
    623  1.6.2.2  pgoyette 
    624  1.6.2.2  pgoyette 	ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
    625  1.6.2.2  pgoyette 	switch (nr) {
    626  1.6.2.2  pgoyette 	case -1:
    627  1.6.2.2  pgoyette 		return -1;
    628  1.6.2.2  pgoyette 	case 0:
    629  1.6.2.2  pgoyette 		return prelen ? -1 : 0;
    630  1.6.2.2  pgoyette 	default:
    631  1.6.2.2  pgoyette 		if ((size_t)nr != sizeof(header) - prelen)
    632  1.6.2.2  pgoyette 			return -1;
    633  1.6.2.2  pgoyette 		break;
    634  1.6.2.2  pgoyette 	}
    635  1.6.2.2  pgoyette 
    636  1.6.2.2  pgoyette 	if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
    637  1.6.2.2  pgoyette 		return -1;
    638  1.6.2.2  pgoyette 
    639  1.6.2.2  pgoyette 	unsigned dict_size = lz_get_dict_size(header[5]);
    640  1.6.2.2  pgoyette 	if (dict_size == 0)
    641  1.6.2.2  pgoyette 		return -1;
    642  1.6.2.2  pgoyette 
    643  1.6.2.2  pgoyette 	return lz_decode(fin, fout, dict_size, bytes_in);
    644  1.6.2.2  pgoyette }
    645