Home | History | Annotate | Line # | Download | only in gzip
unlz.c revision 1.6
      1  1.6  christos /*	$NetBSD: unlz.c,v 1.6 2018/11/11 01:42:36 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*-
      4  1.1  christos  * Copyright (c) 2018 The NetBSD Foundation, Inc.
      5  1.1  christos  * All rights reserved.
      6  1.1  christos  *
      7  1.1  christos  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  christos  * by Christos Zoulas.
      9  1.1  christos  *
     10  1.1  christos  * Redistribution and use in source and binary forms, with or without
     11  1.1  christos  * modification, are permitted provided that the following conditions
     12  1.1  christos  * are met:
     13  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     14  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     15  1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  christos  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  christos  *    documentation and/or other materials provided with the distribution.
     18  1.1  christos  *
     19  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.1  christos  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.1  christos  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.1  christos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.1  christos  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.1  christos  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.1  christos  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.1  christos  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.1  christos  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.1  christos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.1  christos  * POSSIBILITY OF SUCH DAMAGE.
     30  1.1  christos  */
     31  1.1  christos 
     32  1.1  christos /*  Lzd - Educational decompressor for the lzip format
     33  1.1  christos     Copyright (C) 2013-2018 Antonio Diaz Diaz.
     34  1.1  christos 
     35  1.1  christos     This program is free software. Redistribution and use in source and
     36  1.1  christos     binary forms, with or without modification, are permitted provided
     37  1.1  christos     that the following conditions are met:
     38  1.1  christos 
     39  1.1  christos     1. Redistributions of source code must retain the above copyright
     40  1.1  christos     notice, this list of conditions and the following disclaimer.
     41  1.1  christos 
     42  1.1  christos     2. Redistributions in binary form must reproduce the above copyright
     43  1.1  christos     notice, this list of conditions and the following disclaimer in the
     44  1.1  christos     documentation and/or other materials provided with the distribution.
     45  1.1  christos 
     46  1.1  christos     This program is distributed in the hope that it will be useful,
     47  1.1  christos     but WITHOUT ANY WARRANTY; without even the implied warranty of
     48  1.1  christos     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
     49  1.1  christos */
     50  1.1  christos 
     51  1.1  christos #include <sys/param.h>
     52  1.1  christos #include <stdio.h>
     53  1.1  christos #include <string.h>
     54  1.1  christos #include <stdlib.h>
     55  1.1  christos #include <stdint.h>
     56  1.1  christos #include <stdbool.h>
     57  1.1  christos #include <errno.h>
     58  1.1  christos #include <unistd.h>
     59  1.1  christos 
     60  1.1  christos #define LZ_STATES		12
     61  1.1  christos 
     62  1.1  christos #define LITERAL_CONTEXT_BITS	3
     63  1.1  christos #define POS_STATE_BITS		2
     64  1.1  christos #define POS_STATES		(1 << POS_STATE_BITS)
     65  1.1  christos #define POS_STATE_MASK 		(POS_STATES - 1)
     66  1.1  christos 
     67  1.1  christos #define STATES			4
     68  1.1  christos #define DIS_SLOT_BITS		6
     69  1.1  christos 
     70  1.1  christos #define DIS_MODEL_START		4
     71  1.1  christos #define DIS_MODEL_END		14
     72  1.1  christos 
     73  1.1  christos #define MODELED_DISTANCES	(1 << (DIS_MODEL_END / 2))
     74  1.1  christos #define DIS_ALIGN_BITS		4
     75  1.1  christos #define DIS_ALIGN_SIZE		(1 << DIS_ALIGN_BITS)
     76  1.1  christos 
     77  1.1  christos #define LOW_BITS		3
     78  1.1  christos #define MID_BITS		3
     79  1.1  christos #define HIGH_BITS		8
     80  1.1  christos 
     81  1.1  christos #define LOW_SYMBOLS		(1 << LOW_BITS)
     82  1.1  christos #define MID_SYMBOLS		(1 << MID_BITS)
     83  1.1  christos #define HIGH_SYMBOLS		(1 << HIGH_BITS)
     84  1.1  christos 
     85  1.1  christos #define MAX_SYMBOLS 		(LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS)
     86  1.1  christos 
     87  1.1  christos #define MIN_MATCH_LEN		2
     88  1.1  christos 
     89  1.1  christos #define BIT_MODEL_MOVE_BITS	5
     90  1.1  christos #define BIT_MODEL_TOTAL_BITS 	11
     91  1.1  christos #define BIT_MODEL_TOTAL 	(1 << BIT_MODEL_TOTAL_BITS)
     92  1.1  christos #define BIT_MODEL_INIT		(BIT_MODEL_TOTAL / 2)
     93  1.1  christos 
     94  1.1  christos static const int lz_st_next[] = {
     95  1.1  christos 	0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5,
     96  1.1  christos };
     97  1.1  christos 
     98  1.1  christos static bool
     99  1.1  christos lz_st_is_char(int st) {
    100  1.1  christos 	return st < 7;
    101  1.1  christos }
    102  1.1  christos 
    103  1.1  christos static int
    104  1.1  christos lz_st_get_char(int st) {
    105  1.1  christos 	return lz_st_next[st];
    106  1.1  christos }
    107  1.1  christos 
    108  1.1  christos static int
    109  1.1  christos lz_st_get_match(int st) {
    110  1.1  christos 	return st < 7 ? 7 : 10;
    111  1.1  christos }
    112  1.1  christos 
    113  1.1  christos static int
    114  1.1  christos lz_st_get_rep(int st) {
    115  1.1  christos 	return st < 7 ? 8 : 11;
    116  1.1  christos }
    117  1.1  christos 
    118  1.1  christos static int
    119  1.1  christos lz_st_get_short_rep(int st) {
    120  1.1  christos 	return st < 7 ? 9 : 11;
    121  1.1  christos }
    122  1.1  christos 
    123  1.1  christos struct lz_len_model {
    124  1.1  christos 	int choice1;
    125  1.1  christos 	int choice2;
    126  1.1  christos 	int bm_low[POS_STATES][LOW_SYMBOLS];
    127  1.1  christos 	int bm_mid[POS_STATES][MID_SYMBOLS];
    128  1.1  christos 	int bm_high[HIGH_SYMBOLS];
    129  1.1  christos };
    130  1.1  christos 
    131  1.1  christos static uint32_t lz_crc[256];
    132  1.1  christos 
    133  1.1  christos static void
    134  1.1  christos lz_crc_init(void)
    135  1.1  christos {
    136  1.1  christos 	for (unsigned i = 0; i < __arraycount(lz_crc); i++) {
    137  1.1  christos 		unsigned c = i;
    138  1.1  christos 		for (unsigned j = 0; j < 8; j++) {
    139  1.1  christos 			if (c & 1)
    140  1.1  christos 				c = 0xEDB88320U ^ (c >> 1);
    141  1.1  christos 			else
    142  1.1  christos 				c >>= 1;
    143  1.1  christos 		}
    144  1.1  christos 		lz_crc[i] = c;
    145  1.1  christos       }
    146  1.1  christos }
    147  1.1  christos 
    148  1.1  christos static void
    149  1.1  christos lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len)
    150  1.1  christos {
    151  1.1  christos 	for (size_t i = 0; i < len; i++)
    152  1.1  christos 		*crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8);
    153  1.1  christos }
    154  1.1  christos 
    155  1.1  christos struct lz_range_decoder {
    156  1.1  christos 	FILE *fp;
    157  1.1  christos 	uint32_t code;
    158  1.1  christos 	uint32_t range;
    159  1.1  christos };
    160  1.1  christos 
    161  1.1  christos static int
    162  1.1  christos lz_rd_create(struct lz_range_decoder *rd, FILE *fp)
    163  1.1  christos {
    164  1.1  christos 	rd->fp = fp;
    165  1.1  christos 	rd->code = 0;
    166  1.1  christos 	rd->range = ~0;
    167  1.1  christos 	for (int i = 0; i < 5; i++)
    168  1.1  christos 		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
    169  1.1  christos 	return ferror(rd->fp) ? -1 : 0;
    170  1.1  christos }
    171  1.1  christos 
    172  1.1  christos static unsigned
    173  1.1  christos lz_rd_decode(struct lz_range_decoder *rd, int num_bits)
    174  1.1  christos {
    175  1.1  christos 	unsigned symbol = 0;
    176  1.1  christos 
    177  1.1  christos 	for (int i = num_bits; i > 0; i--) {
    178  1.1  christos 		rd->range >>= 1;
    179  1.1  christos 		symbol <<= 1;
    180  1.1  christos 		if (rd->code >= rd->range) {
    181  1.1  christos 			rd->code -= rd->range;
    182  1.1  christos 			symbol |= 1;
    183  1.1  christos 		}
    184  1.1  christos 		if (rd->range <= 0x00FFFFFFU) {
    185  1.1  christos 			rd->range <<= 8;
    186  1.1  christos 			rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
    187  1.1  christos 		}
    188  1.1  christos 	}
    189  1.1  christos 
    190  1.1  christos 	return symbol;
    191  1.1  christos }
    192  1.1  christos 
    193  1.1  christos static unsigned
    194  1.1  christos lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm)
    195  1.1  christos {
    196  1.1  christos 	unsigned symbol;
    197  1.1  christos 	const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm;
    198  1.1  christos 
    199  1.1  christos 	if(rd->code < bound) {
    200  1.1  christos 		rd->range = bound;
    201  1.1  christos 		*bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS;
    202  1.1  christos 		symbol = 0;
    203  1.1  christos 	}
    204  1.1  christos 	else {
    205  1.1  christos 		rd->range -= bound;
    206  1.1  christos 		rd->code -= bound;
    207  1.1  christos 		*bm -= *bm >> BIT_MODEL_MOVE_BITS;
    208  1.1  christos 		symbol = 1;
    209  1.1  christos 	}
    210  1.1  christos 
    211  1.1  christos 	if (rd->range <= 0x00FFFFFFU) {
    212  1.1  christos 		rd->range <<= 8;
    213  1.1  christos 		rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp);
    214  1.1  christos 	}
    215  1.1  christos 	return symbol;
    216  1.1  christos }
    217  1.1  christos 
    218  1.1  christos static unsigned
    219  1.1  christos lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits)
    220  1.1  christos {
    221  1.1  christos 	unsigned symbol = 1;
    222  1.1  christos 
    223  1.1  christos 	for (int i = 0; i < num_bits; i++)
    224  1.1  christos 		symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]);
    225  1.1  christos 
    226  1.1  christos 	return symbol - (1 << num_bits);
    227  1.1  christos }
    228  1.1  christos 
    229  1.1  christos static unsigned
    230  1.1  christos lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits)
    231  1.1  christos {
    232  1.1  christos 	unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits);
    233  1.1  christos 	unsigned reversed_symbol = 0;
    234  1.1  christos 
    235  1.1  christos 	for (int i = 0; i < num_bits; i++) {
    236  1.1  christos 		reversed_symbol = (reversed_symbol << 1) | (symbol & 1);
    237  1.1  christos 		symbol >>= 1;
    238  1.1  christos 	}
    239  1.1  christos 
    240  1.1  christos 	return reversed_symbol;
    241  1.1  christos }
    242  1.1  christos 
    243  1.1  christos static unsigned
    244  1.1  christos lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte)
    245  1.1  christos {
    246  1.1  christos 	unsigned symbol = 1;
    247  1.1  christos 
    248  1.1  christos 	for (int i = 7; i >= 0; i--) {
    249  1.1  christos 		const unsigned match_bit = (match_byte >> i) & 1;
    250  1.1  christos 		const unsigned bit = lz_rd_decode_bit(rd,
    251  1.1  christos 		    &bm[symbol + (match_bit << 8) + 0x100]);
    252  1.1  christos 		symbol = (symbol << 1) | bit;
    253  1.1  christos 		if (match_bit != bit) {
    254  1.1  christos 			while (symbol < 0x100) {
    255  1.1  christos 				symbol = (symbol << 1) |
    256  1.1  christos 				    lz_rd_decode_bit(rd, &bm[symbol]);
    257  1.1  christos 			}
    258  1.1  christos 			break;
    259  1.1  christos 		}
    260  1.1  christos 	}
    261  1.1  christos 	return symbol & 0xFF;
    262  1.1  christos }
    263  1.1  christos 
    264  1.1  christos static unsigned
    265  1.1  christos lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm,
    266  1.1  christos     int pos_state)
    267  1.1  christos {
    268  1.1  christos 	if (lz_rd_decode_bit(rd, &lm->choice1) == 0)
    269  1.1  christos 		return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS);
    270  1.1  christos 
    271  1.1  christos 	if (lz_rd_decode_bit(rd, &lm->choice2) == 0) {
    272  1.1  christos 		return LOW_SYMBOLS +
    273  1.1  christos 		    lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS);
    274  1.1  christos 	}
    275  1.1  christos 
    276  1.1  christos 	return LOW_SYMBOLS + MID_SYMBOLS +
    277  1.1  christos            lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS);
    278  1.1  christos }
    279  1.1  christos 
    280  1.1  christos struct lz_decoder {
    281  1.1  christos 	FILE *fin, *fout;
    282  1.1  christos 	off_t pos, ppos, spos, dict_size;
    283  1.1  christos 	bool wrapped;
    284  1.1  christos 	uint32_t crc;
    285  1.1  christos 	uint8_t *obuf;
    286  1.1  christos 	struct lz_range_decoder rdec;
    287  1.1  christos };
    288  1.1  christos 
    289  1.1  christos static int
    290  1.1  christos lz_flush(struct lz_decoder *lz)
    291  1.1  christos {
    292  1.1  christos 	off_t offs = lz->pos - lz->spos;
    293  1.1  christos 	if (offs <= 0)
    294  1.1  christos 		return -1;
    295  1.1  christos 
    296  1.1  christos 	size_t size = (size_t)offs;
    297  1.1  christos 	lz_crc_update(&lz->crc, lz->obuf + lz->spos, size);
    298  1.1  christos 	if (fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size)
    299  1.1  christos 		return -1;
    300  1.1  christos 
    301  1.1  christos 	lz->wrapped = lz->pos >= lz->dict_size;
    302  1.1  christos 	if (lz->wrapped) {
    303  1.1  christos 		lz->ppos += lz->pos;
    304  1.1  christos 		lz->pos = 0;
    305  1.1  christos 	}
    306  1.1  christos 	lz->spos = lz->pos;
    307  1.1  christos 	return 0;
    308  1.1  christos }
    309  1.1  christos 
    310  1.1  christos static void
    311  1.1  christos lz_destroy(struct lz_decoder *lz)
    312  1.1  christos {
    313  1.1  christos 	if (lz->fin)
    314  1.1  christos 		fclose(lz->fin);
    315  1.1  christos 	if (lz->fout)
    316  1.1  christos 		fclose(lz->fout);
    317  1.1  christos 	free(lz->obuf);
    318  1.1  christos }
    319  1.1  christos 
    320  1.1  christos static int
    321  1.1  christos lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size)
    322  1.1  christos {
    323  1.1  christos 	memset(lz, 0, sizeof(*lz));
    324  1.1  christos 
    325  1.1  christos 	lz->fin = fdopen(dup(fin), "r");
    326  1.1  christos 	if (lz->fin == NULL)
    327  1.1  christos 		goto out;
    328  1.1  christos 
    329  1.1  christos 	lz->fout = fdopen(dup(fdout), "w");
    330  1.1  christos 	if (lz->fout == NULL)
    331  1.1  christos 		goto out;
    332  1.1  christos 
    333  1.1  christos 	lz->pos = lz->ppos = lz->spos = 0;
    334  1.1  christos 	lz->crc = ~0;
    335  1.1  christos 	lz->dict_size = dict_size;
    336  1.1  christos 	lz->wrapped = false;
    337  1.1  christos 
    338  1.1  christos 	lz->obuf = malloc(dict_size);
    339  1.1  christos 	if (lz->obuf == NULL)
    340  1.1  christos 		goto out;
    341  1.1  christos 
    342  1.1  christos 	if (lz_rd_create(&lz->rdec, lz->fin) == -1)
    343  1.1  christos 		goto out;
    344  1.1  christos 	return 0;
    345  1.1  christos out:
    346  1.1  christos 	lz_destroy(lz);
    347  1.1  christos 	return -1;
    348  1.1  christos }
    349  1.1  christos 
    350  1.1  christos static uint8_t
    351  1.1  christos lz_peek(const struct lz_decoder *lz, unsigned ahead)
    352  1.1  christos {
    353  1.1  christos 	off_t diff = lz->pos - ahead - 1;
    354  1.1  christos 
    355  1.1  christos 	if (diff >= 0)
    356  1.1  christos 		return lz->obuf[diff];
    357  1.1  christos 
    358  1.1  christos 	if (lz->wrapped)
    359  1.1  christos 		return lz->obuf[lz->dict_size + diff];
    360  1.1  christos 
    361  1.1  christos 	return 0;
    362  1.1  christos }
    363  1.1  christos 
    364  1.1  christos static void
    365  1.1  christos lz_put(struct lz_decoder *lz, uint8_t b)
    366  1.1  christos {
    367  1.1  christos 	lz->obuf[lz->pos++] = b;
    368  1.1  christos 	if (lz->dict_size == lz->pos)
    369  1.1  christos 		lz_flush(lz);
    370  1.1  christos }
    371  1.1  christos 
    372  1.1  christos static off_t
    373  1.1  christos lz_get_data_position(const struct lz_decoder *lz)
    374  1.1  christos {
    375  1.1  christos 	return lz->ppos + lz->pos;
    376  1.1  christos }
    377  1.1  christos 
    378  1.1  christos static unsigned
    379  1.1  christos lz_get_crc(const struct lz_decoder *lz)
    380  1.1  christos {
    381  1.1  christos 	return lz->crc ^ 0xffffffffU;
    382  1.1  christos }
    383  1.1  christos 
    384  1.1  christos static void
    385  1.1  christos lz_bm_init(int *a, size_t l)
    386  1.1  christos {
    387  1.1  christos 	for (size_t i = 0; i < l; i++)
    388  1.1  christos 		a[i] = BIT_MODEL_INIT;
    389  1.1  christos }
    390  1.1  christos 
    391  1.1  christos #define LZ_BM_INIT(a)	lz_bm_init(a, __arraycount(a))
    392  1.1  christos #define LZ_BM_INIT2(a)	do { \
    393  1.1  christos 	size_t l = __arraycount(a[0]); \
    394  1.1  christos 	for (size_t i = 0; i < __arraycount(a); i++) \
    395  1.1  christos 		lz_bm_init(a[i], l); \
    396  1.1  christos } while (/*CONSTCOND*/0)
    397  1.1  christos 
    398  1.1  christos #define LZ_MODEL_INIT(a) do { \
    399  1.1  christos 	a.choice1 = BIT_MODEL_INIT; \
    400  1.1  christos 	a.choice2 = BIT_MODEL_INIT; \
    401  1.1  christos 	LZ_BM_INIT2(a.bm_low); \
    402  1.1  christos 	LZ_BM_INIT2(a.bm_mid); \
    403  1.1  christos 	LZ_BM_INIT(a.bm_high); \
    404  1.1  christos } while (/*CONSTCOND*/0)
    405  1.1  christos 
    406  1.1  christos static bool
    407  1.1  christos lz_decode_member(struct lz_decoder *lz)
    408  1.1  christos {
    409  1.1  christos 	int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300];
    410  1.1  christos 	int bm_match[LZ_STATES][POS_STATES];
    411  1.1  christos 	int bm_rep[4][LZ_STATES];
    412  1.1  christos 	int bm_len[LZ_STATES][POS_STATES];
    413  1.1  christos 	int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS];
    414  1.1  christos 	int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1];
    415  1.1  christos 	int bm_align[DIS_ALIGN_SIZE];
    416  1.1  christos 
    417  1.1  christos 	LZ_BM_INIT2(bm_literal);
    418  1.1  christos 	LZ_BM_INIT2(bm_match);
    419  1.1  christos 	LZ_BM_INIT2(bm_rep);
    420  1.1  christos 	LZ_BM_INIT2(bm_len);
    421  1.1  christos 	LZ_BM_INIT2(bm_dis_slot);
    422  1.1  christos 	LZ_BM_INIT(bm_dis);
    423  1.1  christos 	LZ_BM_INIT(bm_align);
    424  1.1  christos 
    425  1.1  christos 	struct lz_len_model match_len_model;
    426  1.1  christos 	struct lz_len_model rep_len_model;
    427  1.1  christos 
    428  1.1  christos 	LZ_MODEL_INIT(match_len_model);
    429  1.1  christos 	LZ_MODEL_INIT(rep_len_model);
    430  1.1  christos 
    431  1.1  christos 	struct lz_range_decoder *rd = &lz->rdec;
    432  1.1  christos 	unsigned rep[4] = { 0 };
    433  1.1  christos 
    434  1.1  christos 
    435  1.1  christos 	int state = 0;
    436  1.1  christos 
    437  1.1  christos 	while (!feof(lz->fin) && !ferror(lz->fin)) {
    438  1.1  christos 		const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK;
    439  1.1  christos 		// bit 1
    440  1.1  christos 		if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) {
    441  1.1  christos 			const uint8_t prev_byte = lz_peek(lz, 0);
    442  1.1  christos 			const int literal_state =
    443  1.1  christos 			    prev_byte >> (8 - LITERAL_CONTEXT_BITS);
    444  1.1  christos 			int *bm = bm_literal[literal_state];
    445  1.1  christos 			if (lz_st_is_char(state))
    446  1.1  christos 				lz_put(lz, lz_rd_decode_tree(rd, bm, 8));
    447  1.1  christos 			else {
    448  1.1  christos 				int peek = lz_peek(lz, rep[0]);
    449  1.1  christos 				lz_put(lz, lz_rd_decode_matched(rd, bm, peek));
    450  1.1  christos 			}
    451  1.1  christos 			state = lz_st_get_char(state);
    452  1.1  christos 			continue;
    453  1.1  christos 		}
    454  1.1  christos 		int len;
    455  1.1  christos 		// bit 2
    456  1.1  christos 		if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) {
    457  1.1  christos 			// bit 3
    458  1.1  christos 			if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) {
    459  1.1  christos 				// bit 4
    460  1.1  christos 				if (lz_rd_decode_bit(rd,
    461  1.1  christos 				    &bm_len[state][pos_state]) == 0)
    462  1.1  christos 				{
    463  1.1  christos 					state = lz_st_get_short_rep(state);
    464  1.1  christos 					lz_put(lz, lz_peek(lz, rep[0]));
    465  1.1  christos 					continue;
    466  1.1  christos 				}
    467  1.1  christos 			} else {
    468  1.1  christos 				unsigned distance;
    469  1.1  christos 				// bit 4
    470  1.1  christos 				if (lz_rd_decode_bit(rd, &bm_rep[2][state])
    471  1.1  christos 				    == 0)
    472  1.1  christos 					distance = rep[1];
    473  1.1  christos 				else {
    474  1.1  christos 					// bit 5
    475  1.1  christos 					if (lz_rd_decode_bit(rd,
    476  1.1  christos 					    &bm_rep[3][state]) == 0)
    477  1.1  christos 						distance = rep[2];
    478  1.1  christos 					else {
    479  1.1  christos 						distance = rep[3];
    480  1.1  christos 						rep[3] = rep[2];
    481  1.1  christos 					}
    482  1.1  christos 					rep[2] = rep[1];
    483  1.1  christos 				}
    484  1.1  christos 				rep[1] = rep[0];
    485  1.1  christos 				rep[0] = distance;
    486  1.1  christos 			}
    487  1.1  christos 			state = lz_st_get_rep(state);
    488  1.1  christos 			len = MIN_MATCH_LEN +
    489  1.1  christos 			    lz_rd_decode_len(rd, &rep_len_model, pos_state);
    490  1.1  christos 		} else {
    491  1.1  christos 			rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0];
    492  1.1  christos 			len = MIN_MATCH_LEN +
    493  1.1  christos 			    lz_rd_decode_len(rd, &match_len_model, pos_state);
    494  1.1  christos 			const int len_state =
    495  1.1  christos 			    MIN(len - MIN_MATCH_LEN, STATES - 1);
    496  1.1  christos 			rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state],
    497  1.1  christos 			    DIS_SLOT_BITS);
    498  1.1  christos 			if (rep[0] >= DIS_MODEL_START) {
    499  1.1  christos 				const unsigned dis_slot = rep[0];
    500  1.1  christos 				const int direct_bits = (dis_slot >> 1) - 1;
    501  1.1  christos 			        rep[0] = (2 | (dis_slot & 1)) << direct_bits;
    502  1.1  christos 				if (dis_slot < DIS_MODEL_END)
    503  1.1  christos 					rep[0] += lz_rd_decode_tree_reversed(rd,
    504  1.1  christos 					    &bm_dis[rep[0] - dis_slot],
    505  1.1  christos                                             direct_bits);
    506  1.1  christos 				else {
    507  1.1  christos 					rep[0] += lz_rd_decode(rd, direct_bits
    508  1.1  christos 					    - DIS_ALIGN_BITS) << DIS_ALIGN_BITS;
    509  1.1  christos 					rep[0] += lz_rd_decode_tree_reversed(rd,
    510  1.1  christos 					    bm_align, DIS_ALIGN_BITS);
    511  1.1  christos 					if (rep[0] == 0xFFFFFFFFU) {
    512  1.1  christos 						lz_flush(lz);
    513  1.1  christos 						return len == MIN_MATCH_LEN;
    514  1.1  christos 					}
    515  1.1  christos 				}
    516  1.1  christos 			}
    517  1.1  christos 			state = lz_st_get_match(state);
    518  1.1  christos 			if (rep[0] >= lz->dict_size ||
    519  1.1  christos 			    (rep[0] >= lz->pos && !lz->wrapped)) {
    520  1.1  christos 				lz_flush(lz);
    521  1.1  christos 				return false;
    522  1.1  christos 			}
    523  1.1  christos 		}
    524  1.1  christos 		for (int i = 0; i < len; i++)
    525  1.1  christos 			lz_put(lz, lz_peek(lz, rep[0]));
    526  1.1  christos     	}
    527  1.1  christos 	lz_flush(lz);
    528  1.1  christos 	return false;
    529  1.1  christos }
    530  1.1  christos 
    531  1.1  christos /*
    532  1.1  christos  * 0-3	CRC32 of the uncompressed data
    533  1.1  christos  * 4-11 size of the uncompressed data
    534  1.1  christos  * 12-19 member size including header and trailer
    535  1.1  christos  */
    536  1.1  christos #define TRAILER_SIZE 20
    537  1.1  christos 
    538  1.1  christos 
    539  1.1  christos static off_t
    540  1.4  christos lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize)
    541  1.1  christos {
    542  1.1  christos 	struct lz_decoder lz;
    543  1.1  christos 	off_t rv = -1;
    544  1.1  christos 
    545  1.1  christos 	if (lz_create(&lz, fin, fdout, dict_size) == -1)
    546  1.1  christos 		return -1;
    547  1.1  christos 
    548  1.1  christos 	if (!lz_decode_member(&lz))
    549  1.1  christos 		goto out;
    550  1.1  christos 
    551  1.1  christos 	uint8_t trailer[TRAILER_SIZE];
    552  1.1  christos 
    553  1.1  christos 	for(size_t i = 0; i < __arraycount(trailer); i++)
    554  1.1  christos 		trailer[i] = (uint8_t)getc(lz.fin);
    555  1.1  christos 
    556  1.1  christos 	unsigned crc = 0;
    557  1.1  christos 	for (int i = 3; i >= 0; --i) {
    558  1.1  christos 		crc <<= 8;
    559  1.1  christos 		crc += trailer[i];
    560  1.1  christos 	}
    561  1.1  christos 
    562  1.1  christos 	int64_t data_size = 0;
    563  1.1  christos 	for (int i = 11; i >= 4; --i) {
    564  1.1  christos 		data_size <<= 8;
    565  1.1  christos 		data_size += trailer[i];
    566  1.1  christos 	}
    567  1.1  christos 
    568  1.1  christos 	if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz))
    569  1.1  christos 		goto out;
    570  1.1  christos 
    571  1.1  christos 	rv = 0;
    572  1.1  christos 	for (int i = 19; i >= 12; --i) {
    573  1.1  christos 		rv <<= 8;
    574  1.1  christos 		rv += trailer[i];
    575  1.1  christos 	}
    576  1.4  christos 	if (insize)
    577  1.4  christos 		*insize = rv;
    578  1.6  christos #if 0
    579  1.6  christos 	/* Does not work with pipes */
    580  1.1  christos 	rv = ftello(lz.fout);
    581  1.6  christos #else
    582  1.6  christos 	rv = data_size;
    583  1.6  christos #endif
    584  1.1  christos out:
    585  1.1  christos 	lz_destroy(&lz);
    586  1.1  christos 	return rv;
    587  1.1  christos }
    588  1.1  christos 
    589  1.1  christos 
    590  1.1  christos /*
    591  1.1  christos  * 0-3 magic
    592  1.1  christos  * 4 version
    593  1.1  christos  * 5 coded dict_size
    594  1.1  christos  */
    595  1.1  christos #define HDR_SIZE 6
    596  1.1  christos #define MIN_DICTIONARY_SIZE (1 << 12)
    597  1.1  christos #define MAX_DICTIONARY_SIZE (1 << 29)
    598  1.1  christos 
    599  1.1  christos static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 };
    600  1.1  christos 
    601  1.1  christos static unsigned
    602  1.1  christos lz_get_dict_size(unsigned char c)
    603  1.1  christos {
    604  1.1  christos 	unsigned dict_size = 1 << (c & 0x1f);
    605  1.5  christos 	dict_size -= (dict_size >> 2) * ( (c >> 5) & 0x7);
    606  1.1  christos 	if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE)
    607  1.1  christos 		return 0;
    608  1.1  christos 	return dict_size;
    609  1.1  christos }
    610  1.1  christos 
    611  1.1  christos static off_t
    612  1.1  christos unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in)
    613  1.1  christos {
    614  1.1  christos 	if (lz_crc[0] == 0)
    615  1.1  christos 		lz_crc_init();
    616  1.1  christos 
    617  1.1  christos 	char header[HDR_SIZE];
    618  1.1  christos 
    619  1.4  christos 	if (prelen > sizeof(header))
    620  1.4  christos 		return -1;
    621  1.4  christos 	if (pre && prelen)
    622  1.4  christos 		memcpy(header, pre, prelen);
    623  1.4  christos 
    624  1.4  christos 	ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen);
    625  1.4  christos 	switch (nr) {
    626  1.4  christos 	case -1:
    627  1.4  christos 		return -1;
    628  1.1  christos 	case 0:
    629  1.4  christos 		return prelen ? -1 : 0;
    630  1.4  christos 	default:
    631  1.4  christos 		if ((size_t)nr != sizeof(header) - prelen)
    632  1.4  christos 			return -1;
    633  1.1  christos 		break;
    634  1.1  christos 	}
    635  1.1  christos 
    636  1.1  christos 	if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0)
    637  1.1  christos 		return -1;
    638  1.1  christos 
    639  1.1  christos 	unsigned dict_size = lz_get_dict_size(header[5]);
    640  1.1  christos 	if (dict_size == 0)
    641  1.1  christos 		return -1;
    642  1.1  christos 
    643  1.4  christos 	return lz_decode(fin, fout, dict_size, bytes_in);
    644  1.1  christos }
    645