1 1.10 christos /* $NetBSD: unlz.c,v 1.10 2024/05/04 13:18:06 christos Exp $ */ 2 1.1 christos 3 1.1 christos /*- 4 1.1 christos * Copyright (c) 2018 The NetBSD Foundation, Inc. 5 1.1 christos * All rights reserved. 6 1.1 christos * 7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation 8 1.1 christos * by Christos Zoulas. 9 1.1 christos * 10 1.1 christos * Redistribution and use in source and binary forms, with or without 11 1.1 christos * modification, are permitted provided that the following conditions 12 1.1 christos * are met: 13 1.1 christos * 1. Redistributions of source code must retain the above copyright 14 1.1 christos * notice, this list of conditions and the following disclaimer. 15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 christos * notice, this list of conditions and the following disclaimer in the 17 1.1 christos * documentation and/or other materials provided with the distribution. 18 1.1 christos * 19 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 1.1 christos * POSSIBILITY OF SUCH DAMAGE. 30 1.1 christos */ 31 1.1 christos 32 1.1 christos /* Lzd - Educational decompressor for the lzip format 33 1.1 christos Copyright (C) 2013-2018 Antonio Diaz Diaz. 34 1.1 christos 35 1.1 christos This program is free software. Redistribution and use in source and 36 1.1 christos binary forms, with or without modification, are permitted provided 37 1.1 christos that the following conditions are met: 38 1.1 christos 39 1.1 christos 1. Redistributions of source code must retain the above copyright 40 1.1 christos notice, this list of conditions and the following disclaimer. 41 1.1 christos 42 1.1 christos 2. Redistributions in binary form must reproduce the above copyright 43 1.1 christos notice, this list of conditions and the following disclaimer in the 44 1.1 christos documentation and/or other materials provided with the distribution. 45 1.1 christos 46 1.1 christos This program is distributed in the hope that it will be useful, 47 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of 48 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 49 1.1 christos */ 50 1.1 christos 51 1.1 christos #include <sys/param.h> 52 1.1 christos #include <stdio.h> 53 1.1 christos #include <string.h> 54 1.1 christos #include <stdlib.h> 55 1.1 christos #include <stdint.h> 56 1.1 christos #include <stdbool.h> 57 1.1 christos #include <errno.h> 58 1.1 christos #include <unistd.h> 59 1.1 christos 60 1.1 christos #define LZ_STATES 12 61 1.1 christos 62 1.1 christos #define LITERAL_CONTEXT_BITS 3 63 1.1 christos #define POS_STATE_BITS 2 64 1.1 christos #define POS_STATES (1 << POS_STATE_BITS) 65 1.1 christos #define POS_STATE_MASK (POS_STATES - 1) 66 1.1 christos 67 1.1 christos #define STATES 4 68 1.1 christos #define DIS_SLOT_BITS 6 69 1.1 christos 70 1.1 christos #define DIS_MODEL_START 4 71 1.1 christos #define DIS_MODEL_END 14 72 1.1 christos 73 1.1 christos #define MODELED_DISTANCES (1 << (DIS_MODEL_END / 2)) 74 1.1 christos #define DIS_ALIGN_BITS 4 75 1.1 christos #define DIS_ALIGN_SIZE (1 << DIS_ALIGN_BITS) 76 1.1 christos 77 1.1 christos #define LOW_BITS 3 78 1.1 christos #define MID_BITS 3 79 1.1 christos #define HIGH_BITS 8 80 1.1 christos 81 1.1 christos #define LOW_SYMBOLS (1 << LOW_BITS) 82 1.1 christos #define MID_SYMBOLS (1 << MID_BITS) 83 1.1 christos #define HIGH_SYMBOLS (1 << HIGH_BITS) 84 1.1 christos 85 1.1 christos #define MAX_SYMBOLS (LOW_SYMBOLS + MID_SYMBOLS + HIGH_SYMBOLS) 86 1.1 christos 87 1.1 christos #define MIN_MATCH_LEN 2 88 1.1 christos 89 1.1 christos #define BIT_MODEL_MOVE_BITS 5 90 1.1 christos #define BIT_MODEL_TOTAL_BITS 11 91 1.1 christos #define BIT_MODEL_TOTAL (1 << BIT_MODEL_TOTAL_BITS) 92 1.1 christos #define BIT_MODEL_INIT (BIT_MODEL_TOTAL / 2) 93 1.1 christos 94 1.1 christos static const int lz_st_next[] = { 95 1.1 christos 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5, 96 1.1 christos }; 97 1.1 christos 98 1.1 christos static bool 99 1.1 christos lz_st_is_char(int st) { 100 1.1 christos return st < 7; 101 1.1 christos } 102 1.1 christos 103 1.1 christos static int 104 1.1 christos lz_st_get_char(int st) { 105 1.1 christos return lz_st_next[st]; 106 1.1 christos } 107 1.1 christos 108 1.1 christos static int 109 1.1 christos lz_st_get_match(int st) { 110 1.1 christos return st < 7 ? 7 : 10; 111 1.1 christos } 112 1.1 christos 113 1.1 christos static int 114 1.1 christos lz_st_get_rep(int st) { 115 1.1 christos return st < 7 ? 8 : 11; 116 1.1 christos } 117 1.1 christos 118 1.1 christos static int 119 1.1 christos lz_st_get_short_rep(int st) { 120 1.1 christos return st < 7 ? 9 : 11; 121 1.1 christos } 122 1.1 christos 123 1.1 christos struct lz_len_model { 124 1.1 christos int choice1; 125 1.1 christos int choice2; 126 1.1 christos int bm_low[POS_STATES][LOW_SYMBOLS]; 127 1.1 christos int bm_mid[POS_STATES][MID_SYMBOLS]; 128 1.1 christos int bm_high[HIGH_SYMBOLS]; 129 1.1 christos }; 130 1.1 christos 131 1.1 christos static uint32_t lz_crc[256]; 132 1.1 christos 133 1.1 christos static void 134 1.1 christos lz_crc_init(void) 135 1.1 christos { 136 1.1 christos for (unsigned i = 0; i < __arraycount(lz_crc); i++) { 137 1.1 christos unsigned c = i; 138 1.1 christos for (unsigned j = 0; j < 8; j++) { 139 1.1 christos if (c & 1) 140 1.1 christos c = 0xEDB88320U ^ (c >> 1); 141 1.1 christos else 142 1.1 christos c >>= 1; 143 1.1 christos } 144 1.1 christos lz_crc[i] = c; 145 1.1 christos } 146 1.1 christos } 147 1.1 christos 148 1.1 christos static void 149 1.1 christos lz_crc_update(uint32_t *crc, const uint8_t *buf, size_t len) 150 1.1 christos { 151 1.1 christos for (size_t i = 0; i < len; i++) 152 1.1 christos *crc = lz_crc[(*crc ^ buf[i]) & 0xFF] ^ (*crc >> 8); 153 1.1 christos } 154 1.1 christos 155 1.1 christos struct lz_range_decoder { 156 1.1 christos FILE *fp; 157 1.1 christos uint32_t code; 158 1.1 christos uint32_t range; 159 1.1 christos }; 160 1.1 christos 161 1.1 christos static int 162 1.1 christos lz_rd_create(struct lz_range_decoder *rd, FILE *fp) 163 1.1 christos { 164 1.1 christos rd->fp = fp; 165 1.1 christos rd->code = 0; 166 1.1 christos rd->range = ~0; 167 1.1 christos for (int i = 0; i < 5; i++) 168 1.1 christos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); 169 1.1 christos return ferror(rd->fp) ? -1 : 0; 170 1.1 christos } 171 1.1 christos 172 1.1 christos static unsigned 173 1.1 christos lz_rd_decode(struct lz_range_decoder *rd, int num_bits) 174 1.1 christos { 175 1.1 christos unsigned symbol = 0; 176 1.1 christos 177 1.1 christos for (int i = num_bits; i > 0; i--) { 178 1.1 christos rd->range >>= 1; 179 1.1 christos symbol <<= 1; 180 1.1 christos if (rd->code >= rd->range) { 181 1.1 christos rd->code -= rd->range; 182 1.1 christos symbol |= 1; 183 1.1 christos } 184 1.1 christos if (rd->range <= 0x00FFFFFFU) { 185 1.1 christos rd->range <<= 8; 186 1.1 christos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); 187 1.1 christos } 188 1.1 christos } 189 1.1 christos 190 1.1 christos return symbol; 191 1.1 christos } 192 1.1 christos 193 1.1 christos static unsigned 194 1.1 christos lz_rd_decode_bit(struct lz_range_decoder *rd, int *bm) 195 1.1 christos { 196 1.1 christos unsigned symbol; 197 1.1 christos const uint32_t bound = (rd->range >> BIT_MODEL_TOTAL_BITS) * *bm; 198 1.1 christos 199 1.1 christos if(rd->code < bound) { 200 1.1 christos rd->range = bound; 201 1.1 christos *bm += (BIT_MODEL_TOTAL - *bm) >> BIT_MODEL_MOVE_BITS; 202 1.1 christos symbol = 0; 203 1.1 christos } 204 1.1 christos else { 205 1.1 christos rd->range -= bound; 206 1.1 christos rd->code -= bound; 207 1.1 christos *bm -= *bm >> BIT_MODEL_MOVE_BITS; 208 1.1 christos symbol = 1; 209 1.1 christos } 210 1.1 christos 211 1.1 christos if (rd->range <= 0x00FFFFFFU) { 212 1.1 christos rd->range <<= 8; 213 1.1 christos rd->code = (rd->code << 8) | (uint8_t)getc(rd->fp); 214 1.1 christos } 215 1.1 christos return symbol; 216 1.1 christos } 217 1.1 christos 218 1.1 christos static unsigned 219 1.1 christos lz_rd_decode_tree(struct lz_range_decoder *rd, int *bm, int num_bits) 220 1.1 christos { 221 1.1 christos unsigned symbol = 1; 222 1.1 christos 223 1.1 christos for (int i = 0; i < num_bits; i++) 224 1.1 christos symbol = (symbol << 1) | lz_rd_decode_bit(rd, &bm[symbol]); 225 1.1 christos 226 1.1 christos return symbol - (1 << num_bits); 227 1.1 christos } 228 1.1 christos 229 1.1 christos static unsigned 230 1.1 christos lz_rd_decode_tree_reversed(struct lz_range_decoder *rd, int *bm, int num_bits) 231 1.1 christos { 232 1.1 christos unsigned symbol = lz_rd_decode_tree(rd, bm, num_bits); 233 1.1 christos unsigned reversed_symbol = 0; 234 1.1 christos 235 1.1 christos for (int i = 0; i < num_bits; i++) { 236 1.1 christos reversed_symbol = (reversed_symbol << 1) | (symbol & 1); 237 1.1 christos symbol >>= 1; 238 1.1 christos } 239 1.1 christos 240 1.1 christos return reversed_symbol; 241 1.1 christos } 242 1.1 christos 243 1.1 christos static unsigned 244 1.1 christos lz_rd_decode_matched(struct lz_range_decoder *rd, int *bm, int match_byte) 245 1.1 christos { 246 1.1 christos unsigned symbol = 1; 247 1.1 christos 248 1.1 christos for (int i = 7; i >= 0; i--) { 249 1.1 christos const unsigned match_bit = (match_byte >> i) & 1; 250 1.1 christos const unsigned bit = lz_rd_decode_bit(rd, 251 1.1 christos &bm[symbol + (match_bit << 8) + 0x100]); 252 1.1 christos symbol = (symbol << 1) | bit; 253 1.1 christos if (match_bit != bit) { 254 1.1 christos while (symbol < 0x100) { 255 1.1 christos symbol = (symbol << 1) | 256 1.1 christos lz_rd_decode_bit(rd, &bm[symbol]); 257 1.1 christos } 258 1.1 christos break; 259 1.1 christos } 260 1.1 christos } 261 1.1 christos return symbol & 0xFF; 262 1.1 christos } 263 1.1 christos 264 1.1 christos static unsigned 265 1.1 christos lz_rd_decode_len(struct lz_range_decoder *rd, struct lz_len_model *lm, 266 1.1 christos int pos_state) 267 1.1 christos { 268 1.1 christos if (lz_rd_decode_bit(rd, &lm->choice1) == 0) 269 1.1 christos return lz_rd_decode_tree(rd, lm->bm_low[pos_state], LOW_BITS); 270 1.1 christos 271 1.1 christos if (lz_rd_decode_bit(rd, &lm->choice2) == 0) { 272 1.1 christos return LOW_SYMBOLS + 273 1.1 christos lz_rd_decode_tree(rd, lm->bm_mid[pos_state], MID_BITS); 274 1.1 christos } 275 1.1 christos 276 1.1 christos return LOW_SYMBOLS + MID_SYMBOLS + 277 1.1 christos lz_rd_decode_tree(rd, lm->bm_high, HIGH_BITS); 278 1.1 christos } 279 1.1 christos 280 1.1 christos struct lz_decoder { 281 1.1 christos FILE *fin, *fout; 282 1.1 christos off_t pos, ppos, spos, dict_size; 283 1.1 christos bool wrapped; 284 1.1 christos uint32_t crc; 285 1.1 christos uint8_t *obuf; 286 1.1 christos struct lz_range_decoder rdec; 287 1.1 christos }; 288 1.1 christos 289 1.1 christos static int 290 1.1 christos lz_flush(struct lz_decoder *lz) 291 1.1 christos { 292 1.1 christos off_t offs = lz->pos - lz->spos; 293 1.1 christos if (offs <= 0) 294 1.1 christos return -1; 295 1.1 christos 296 1.1 christos size_t size = (size_t)offs; 297 1.1 christos lz_crc_update(&lz->crc, lz->obuf + lz->spos, size); 298 1.9 christos if (!tflag && fwrite(lz->obuf + lz->spos, 1, size, lz->fout) != size) 299 1.1 christos return -1; 300 1.1 christos 301 1.1 christos lz->wrapped = lz->pos >= lz->dict_size; 302 1.1 christos if (lz->wrapped) { 303 1.1 christos lz->ppos += lz->pos; 304 1.1 christos lz->pos = 0; 305 1.1 christos } 306 1.1 christos lz->spos = lz->pos; 307 1.1 christos return 0; 308 1.1 christos } 309 1.1 christos 310 1.1 christos static void 311 1.1 christos lz_destroy(struct lz_decoder *lz) 312 1.1 christos { 313 1.1 christos if (lz->fin) 314 1.1 christos fclose(lz->fin); 315 1.1 christos if (lz->fout) 316 1.1 christos fclose(lz->fout); 317 1.1 christos free(lz->obuf); 318 1.1 christos } 319 1.1 christos 320 1.1 christos static int 321 1.1 christos lz_create(struct lz_decoder *lz, int fin, int fdout, int dict_size) 322 1.1 christos { 323 1.1 christos memset(lz, 0, sizeof(*lz)); 324 1.1 christos 325 1.1 christos lz->fin = fdopen(dup(fin), "r"); 326 1.1 christos if (lz->fin == NULL) 327 1.1 christos goto out; 328 1.1 christos 329 1.1 christos lz->fout = fdopen(dup(fdout), "w"); 330 1.1 christos if (lz->fout == NULL) 331 1.1 christos goto out; 332 1.1 christos 333 1.1 christos lz->pos = lz->ppos = lz->spos = 0; 334 1.1 christos lz->crc = ~0; 335 1.1 christos lz->dict_size = dict_size; 336 1.1 christos lz->wrapped = false; 337 1.1 christos 338 1.1 christos lz->obuf = malloc(dict_size); 339 1.1 christos if (lz->obuf == NULL) 340 1.1 christos goto out; 341 1.1 christos 342 1.1 christos if (lz_rd_create(&lz->rdec, lz->fin) == -1) 343 1.1 christos goto out; 344 1.1 christos return 0; 345 1.1 christos out: 346 1.1 christos lz_destroy(lz); 347 1.1 christos return -1; 348 1.1 christos } 349 1.1 christos 350 1.1 christos static uint8_t 351 1.1 christos lz_peek(const struct lz_decoder *lz, unsigned ahead) 352 1.1 christos { 353 1.1 christos off_t diff = lz->pos - ahead - 1; 354 1.1 christos 355 1.1 christos if (diff >= 0) 356 1.1 christos return lz->obuf[diff]; 357 1.1 christos 358 1.1 christos if (lz->wrapped) 359 1.1 christos return lz->obuf[lz->dict_size + diff]; 360 1.1 christos 361 1.1 christos return 0; 362 1.1 christos } 363 1.1 christos 364 1.1 christos static void 365 1.1 christos lz_put(struct lz_decoder *lz, uint8_t b) 366 1.1 christos { 367 1.1 christos lz->obuf[lz->pos++] = b; 368 1.1 christos if (lz->dict_size == lz->pos) 369 1.1 christos lz_flush(lz); 370 1.1 christos } 371 1.1 christos 372 1.1 christos static off_t 373 1.1 christos lz_get_data_position(const struct lz_decoder *lz) 374 1.1 christos { 375 1.1 christos return lz->ppos + lz->pos; 376 1.1 christos } 377 1.1 christos 378 1.1 christos static unsigned 379 1.1 christos lz_get_crc(const struct lz_decoder *lz) 380 1.1 christos { 381 1.1 christos return lz->crc ^ 0xffffffffU; 382 1.1 christos } 383 1.1 christos 384 1.1 christos static void 385 1.1 christos lz_bm_init(int *a, size_t l) 386 1.1 christos { 387 1.1 christos for (size_t i = 0; i < l; i++) 388 1.1 christos a[i] = BIT_MODEL_INIT; 389 1.1 christos } 390 1.1 christos 391 1.1 christos #define LZ_BM_INIT(a) lz_bm_init(a, __arraycount(a)) 392 1.1 christos #define LZ_BM_INIT2(a) do { \ 393 1.1 christos size_t l = __arraycount(a[0]); \ 394 1.1 christos for (size_t i = 0; i < __arraycount(a); i++) \ 395 1.1 christos lz_bm_init(a[i], l); \ 396 1.7 rillig } while (0) 397 1.1 christos 398 1.1 christos #define LZ_MODEL_INIT(a) do { \ 399 1.1 christos a.choice1 = BIT_MODEL_INIT; \ 400 1.1 christos a.choice2 = BIT_MODEL_INIT; \ 401 1.1 christos LZ_BM_INIT2(a.bm_low); \ 402 1.1 christos LZ_BM_INIT2(a.bm_mid); \ 403 1.1 christos LZ_BM_INIT(a.bm_high); \ 404 1.7 rillig } while (0) 405 1.1 christos 406 1.1 christos static bool 407 1.1 christos lz_decode_member(struct lz_decoder *lz) 408 1.1 christos { 409 1.1 christos int bm_literal[1 << LITERAL_CONTEXT_BITS][0x300]; 410 1.1 christos int bm_match[LZ_STATES][POS_STATES]; 411 1.1 christos int bm_rep[4][LZ_STATES]; 412 1.1 christos int bm_len[LZ_STATES][POS_STATES]; 413 1.1 christos int bm_dis_slot[LZ_STATES][1 << DIS_SLOT_BITS]; 414 1.1 christos int bm_dis[MODELED_DISTANCES - DIS_MODEL_END + 1]; 415 1.1 christos int bm_align[DIS_ALIGN_SIZE]; 416 1.1 christos 417 1.1 christos LZ_BM_INIT2(bm_literal); 418 1.1 christos LZ_BM_INIT2(bm_match); 419 1.1 christos LZ_BM_INIT2(bm_rep); 420 1.1 christos LZ_BM_INIT2(bm_len); 421 1.1 christos LZ_BM_INIT2(bm_dis_slot); 422 1.1 christos LZ_BM_INIT(bm_dis); 423 1.1 christos LZ_BM_INIT(bm_align); 424 1.1 christos 425 1.1 christos struct lz_len_model match_len_model; 426 1.1 christos struct lz_len_model rep_len_model; 427 1.1 christos 428 1.1 christos LZ_MODEL_INIT(match_len_model); 429 1.1 christos LZ_MODEL_INIT(rep_len_model); 430 1.1 christos 431 1.1 christos struct lz_range_decoder *rd = &lz->rdec; 432 1.1 christos unsigned rep[4] = { 0 }; 433 1.1 christos 434 1.1 christos 435 1.1 christos int state = 0; 436 1.1 christos 437 1.1 christos while (!feof(lz->fin) && !ferror(lz->fin)) { 438 1.1 christos const int pos_state = lz_get_data_position(lz) & POS_STATE_MASK; 439 1.1 christos // bit 1 440 1.1 christos if (lz_rd_decode_bit(rd, &bm_match[state][pos_state]) == 0) { 441 1.1 christos const uint8_t prev_byte = lz_peek(lz, 0); 442 1.1 christos const int literal_state = 443 1.1 christos prev_byte >> (8 - LITERAL_CONTEXT_BITS); 444 1.1 christos int *bm = bm_literal[literal_state]; 445 1.1 christos if (lz_st_is_char(state)) 446 1.1 christos lz_put(lz, lz_rd_decode_tree(rd, bm, 8)); 447 1.1 christos else { 448 1.1 christos int peek = lz_peek(lz, rep[0]); 449 1.1 christos lz_put(lz, lz_rd_decode_matched(rd, bm, peek)); 450 1.1 christos } 451 1.1 christos state = lz_st_get_char(state); 452 1.1 christos continue; 453 1.1 christos } 454 1.1 christos int len; 455 1.1 christos // bit 2 456 1.1 christos if (lz_rd_decode_bit(rd, &bm_rep[0][state]) != 0) { 457 1.1 christos // bit 3 458 1.1 christos if (lz_rd_decode_bit(rd, &bm_rep[1][state]) == 0) { 459 1.1 christos // bit 4 460 1.1 christos if (lz_rd_decode_bit(rd, 461 1.1 christos &bm_len[state][pos_state]) == 0) 462 1.1 christos { 463 1.1 christos state = lz_st_get_short_rep(state); 464 1.1 christos lz_put(lz, lz_peek(lz, rep[0])); 465 1.1 christos continue; 466 1.1 christos } 467 1.1 christos } else { 468 1.1 christos unsigned distance; 469 1.1 christos // bit 4 470 1.1 christos if (lz_rd_decode_bit(rd, &bm_rep[2][state]) 471 1.1 christos == 0) 472 1.1 christos distance = rep[1]; 473 1.1 christos else { 474 1.1 christos // bit 5 475 1.1 christos if (lz_rd_decode_bit(rd, 476 1.1 christos &bm_rep[3][state]) == 0) 477 1.1 christos distance = rep[2]; 478 1.1 christos else { 479 1.1 christos distance = rep[3]; 480 1.1 christos rep[3] = rep[2]; 481 1.1 christos } 482 1.1 christos rep[2] = rep[1]; 483 1.1 christos } 484 1.1 christos rep[1] = rep[0]; 485 1.1 christos rep[0] = distance; 486 1.1 christos } 487 1.1 christos state = lz_st_get_rep(state); 488 1.1 christos len = MIN_MATCH_LEN + 489 1.1 christos lz_rd_decode_len(rd, &rep_len_model, pos_state); 490 1.1 christos } else { 491 1.1 christos rep[3] = rep[2]; rep[2] = rep[1]; rep[1] = rep[0]; 492 1.1 christos len = MIN_MATCH_LEN + 493 1.1 christos lz_rd_decode_len(rd, &match_len_model, pos_state); 494 1.1 christos const int len_state = 495 1.1 christos MIN(len - MIN_MATCH_LEN, STATES - 1); 496 1.1 christos rep[0] = lz_rd_decode_tree(rd, bm_dis_slot[len_state], 497 1.1 christos DIS_SLOT_BITS); 498 1.1 christos if (rep[0] >= DIS_MODEL_START) { 499 1.1 christos const unsigned dis_slot = rep[0]; 500 1.1 christos const int direct_bits = (dis_slot >> 1) - 1; 501 1.1 christos rep[0] = (2 | (dis_slot & 1)) << direct_bits; 502 1.1 christos if (dis_slot < DIS_MODEL_END) 503 1.1 christos rep[0] += lz_rd_decode_tree_reversed(rd, 504 1.1 christos &bm_dis[rep[0] - dis_slot], 505 1.1 christos direct_bits); 506 1.1 christos else { 507 1.1 christos rep[0] += lz_rd_decode(rd, direct_bits 508 1.1 christos - DIS_ALIGN_BITS) << DIS_ALIGN_BITS; 509 1.1 christos rep[0] += lz_rd_decode_tree_reversed(rd, 510 1.1 christos bm_align, DIS_ALIGN_BITS); 511 1.1 christos if (rep[0] == 0xFFFFFFFFU) { 512 1.1 christos lz_flush(lz); 513 1.1 christos return len == MIN_MATCH_LEN; 514 1.1 christos } 515 1.1 christos } 516 1.1 christos } 517 1.1 christos state = lz_st_get_match(state); 518 1.1 christos if (rep[0] >= lz->dict_size || 519 1.1 christos (rep[0] >= lz->pos && !lz->wrapped)) { 520 1.1 christos lz_flush(lz); 521 1.1 christos return false; 522 1.1 christos } 523 1.1 christos } 524 1.1 christos for (int i = 0; i < len; i++) 525 1.1 christos lz_put(lz, lz_peek(lz, rep[0])); 526 1.1 christos } 527 1.1 christos lz_flush(lz); 528 1.1 christos return false; 529 1.1 christos } 530 1.1 christos 531 1.1 christos /* 532 1.1 christos * 0-3 CRC32 of the uncompressed data 533 1.1 christos * 4-11 size of the uncompressed data 534 1.1 christos * 12-19 member size including header and trailer 535 1.1 christos */ 536 1.1 christos #define TRAILER_SIZE 20 537 1.1 christos 538 1.1 christos 539 1.1 christos static off_t 540 1.4 christos lz_decode(int fin, int fdout, unsigned dict_size, off_t *insize) 541 1.1 christos { 542 1.1 christos struct lz_decoder lz; 543 1.1 christos off_t rv = -1; 544 1.1 christos 545 1.1 christos if (lz_create(&lz, fin, fdout, dict_size) == -1) 546 1.1 christos return -1; 547 1.1 christos 548 1.1 christos if (!lz_decode_member(&lz)) 549 1.1 christos goto out; 550 1.1 christos 551 1.1 christos uint8_t trailer[TRAILER_SIZE]; 552 1.1 christos 553 1.1 christos for(size_t i = 0; i < __arraycount(trailer); i++) 554 1.1 christos trailer[i] = (uint8_t)getc(lz.fin); 555 1.1 christos 556 1.1 christos unsigned crc = 0; 557 1.1 christos for (int i = 3; i >= 0; --i) { 558 1.1 christos crc <<= 8; 559 1.1 christos crc += trailer[i]; 560 1.1 christos } 561 1.1 christos 562 1.1 christos int64_t data_size = 0; 563 1.1 christos for (int i = 11; i >= 4; --i) { 564 1.1 christos data_size <<= 8; 565 1.1 christos data_size += trailer[i]; 566 1.1 christos } 567 1.1 christos 568 1.1 christos if (crc != lz_get_crc(&lz) || data_size != lz_get_data_position(&lz)) 569 1.1 christos goto out; 570 1.1 christos 571 1.1 christos rv = 0; 572 1.1 christos for (int i = 19; i >= 12; --i) { 573 1.1 christos rv <<= 8; 574 1.1 christos rv += trailer[i]; 575 1.1 christos } 576 1.4 christos if (insize) 577 1.4 christos *insize = rv; 578 1.6 christos #if 0 579 1.6 christos /* Does not work with pipes */ 580 1.1 christos rv = ftello(lz.fout); 581 1.6 christos #else 582 1.6 christos rv = data_size; 583 1.6 christos #endif 584 1.1 christos out: 585 1.1 christos lz_destroy(&lz); 586 1.1 christos return rv; 587 1.1 christos } 588 1.1 christos 589 1.1 christos 590 1.1 christos /* 591 1.1 christos * 0-3 magic 592 1.1 christos * 4 version 593 1.1 christos * 5 coded dict_size 594 1.1 christos */ 595 1.1 christos #define HDR_SIZE 6 596 1.1 christos #define MIN_DICTIONARY_SIZE (1 << 12) 597 1.1 christos #define MAX_DICTIONARY_SIZE (1 << 29) 598 1.1 christos 599 1.1 christos static const char hdrmagic[] = { 'L', 'Z', 'I', 'P', 1 }; 600 1.1 christos 601 1.1 christos static unsigned 602 1.1 christos lz_get_dict_size(unsigned char c) 603 1.1 christos { 604 1.1 christos unsigned dict_size = 1 << (c & 0x1f); 605 1.10 christos dict_size -= (dict_size >> 4) * ( (c >> 5) & 0x7); 606 1.1 christos if (dict_size < MIN_DICTIONARY_SIZE || dict_size > MAX_DICTIONARY_SIZE) 607 1.1 christos return 0; 608 1.1 christos return dict_size; 609 1.1 christos } 610 1.1 christos 611 1.1 christos static off_t 612 1.1 christos unlz(int fin, int fout, char *pre, size_t prelen, off_t *bytes_in) 613 1.1 christos { 614 1.1 christos if (lz_crc[0] == 0) 615 1.1 christos lz_crc_init(); 616 1.1 christos 617 1.1 christos char header[HDR_SIZE]; 618 1.1 christos 619 1.4 christos if (pre && prelen) 620 1.4 christos memcpy(header, pre, prelen); 621 1.4 christos 622 1.4 christos ssize_t nr = read(fin, header + prelen, sizeof(header) - prelen); 623 1.4 christos switch (nr) { 624 1.4 christos case -1: 625 1.4 christos return -1; 626 1.1 christos case 0: 627 1.4 christos return prelen ? -1 : 0; 628 1.4 christos default: 629 1.4 christos if ((size_t)nr != sizeof(header) - prelen) 630 1.4 christos return -1; 631 1.1 christos break; 632 1.1 christos } 633 1.1 christos 634 1.1 christos if (memcmp(header, hdrmagic, sizeof(hdrmagic)) != 0) 635 1.1 christos return -1; 636 1.1 christos 637 1.1 christos unsigned dict_size = lz_get_dict_size(header[5]); 638 1.1 christos if (dict_size == 0) 639 1.1 christos return -1; 640 1.1 christos 641 1.4 christos return lz_decode(fin, fout, dict_size, bytes_in); 642 1.1 christos } 643