1 1.7 christos /* $NetBSD: trees.c,v 1.7 2024/09/22 19:12:27 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* trees.c -- output deflated data using Huffman coding 4 1.7 christos * Copyright (C) 1995-2024 Jean-loup Gailly 5 1.4 christos * detect_data_type() function provided freely by Cosmin Truta, 2006 6 1.1 christos * For conditions of distribution and use, see copyright notice in zlib.h 7 1.1 christos */ 8 1.1 christos 9 1.1 christos /* 10 1.1 christos * ALGORITHM 11 1.1 christos * 12 1.1 christos * The "deflation" process uses several Huffman trees. The more 13 1.1 christos * common source values are represented by shorter bit sequences. 14 1.1 christos * 15 1.1 christos * Each code tree is stored in a compressed form which is itself 16 1.1 christos * a Huffman encoding of the lengths of all the code strings (in 17 1.1 christos * ascending order by source values). The actual code strings are 18 1.1 christos * reconstructed from the lengths in the inflate process, as described 19 1.1 christos * in the deflate specification. 20 1.1 christos * 21 1.1 christos * REFERENCES 22 1.1 christos * 23 1.1 christos * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification". 24 1.1 christos * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc 25 1.1 christos * 26 1.1 christos * Storer, James A. 27 1.1 christos * Data Compression: Methods and Theory, pp. 49-50. 28 1.1 christos * Computer Science Press, 1988. ISBN 0-7167-8156-5. 29 1.1 christos * 30 1.1 christos * Sedgewick, R. 31 1.1 christos * Algorithms, p290. 32 1.1 christos * Addison-Wesley, 1983. ISBN 0-201-06672-6. 33 1.1 christos */ 34 1.1 christos 35 1.6 christos /* @(#) Id */ 36 1.1 christos 37 1.1 christos /* #define GEN_TREES_H */ 38 1.1 christos 39 1.1 christos #include "deflate.h" 40 1.1 christos 41 1.2 christos #ifdef ZLIB_DEBUG 42 1.1 christos # include <ctype.h> 43 1.1 christos #endif 44 1.1 christos 45 1.1 christos /* =========================================================================== 46 1.1 christos * Constants 47 1.1 christos */ 48 1.1 christos 49 1.1 christos #define MAX_BL_BITS 7 50 1.1 christos /* Bit length codes must not exceed MAX_BL_BITS bits */ 51 1.1 christos 52 1.1 christos #define END_BLOCK 256 53 1.1 christos /* end of block literal code */ 54 1.1 christos 55 1.1 christos #define REP_3_6 16 56 1.1 christos /* repeat previous bit length 3-6 times (2 bits of repeat count) */ 57 1.1 christos 58 1.1 christos #define REPZ_3_10 17 59 1.1 christos /* repeat a zero length 3-10 times (3 bits of repeat count) */ 60 1.1 christos 61 1.1 christos #define REPZ_11_138 18 62 1.1 christos /* repeat a zero length 11-138 times (7 bits of repeat count) */ 63 1.1 christos 64 1.1 christos local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */ 65 1.1 christos = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0}; 66 1.1 christos 67 1.1 christos local const int extra_dbits[D_CODES] /* extra bits for each distance code */ 68 1.1 christos = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; 69 1.1 christos 70 1.1 christos local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */ 71 1.1 christos = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7}; 72 1.1 christos 73 1.1 christos local const uch bl_order[BL_CODES] 74 1.1 christos = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15}; 75 1.1 christos /* The lengths of the bit length codes are sent in order of decreasing 76 1.1 christos * probability, to avoid transmitting the lengths for unused bit length codes. 77 1.1 christos */ 78 1.1 christos 79 1.1 christos /* =========================================================================== 80 1.1 christos * Local data. These are initialized only once. 81 1.1 christos */ 82 1.1 christos 83 1.1 christos #define DIST_CODE_LEN 512 /* see definition of array dist_code below */ 84 1.1 christos 85 1.1 christos #if defined(GEN_TREES_H) || !defined(STDC) 86 1.1 christos /* non ANSI compilers may not accept trees.h */ 87 1.1 christos 88 1.1 christos local ct_data static_ltree[L_CODES+2]; 89 1.1 christos /* The static literal tree. Since the bit lengths are imposed, there is no 90 1.1 christos * need for the L_CODES extra codes used during heap construction. However 91 1.1 christos * The codes 286 and 287 are needed to build a canonical tree (see _tr_init 92 1.1 christos * below). 93 1.1 christos */ 94 1.1 christos 95 1.1 christos local ct_data static_dtree[D_CODES]; 96 1.1 christos /* The static distance tree. (Actually a trivial tree since all codes use 97 1.1 christos * 5 bits.) 98 1.1 christos */ 99 1.1 christos 100 1.1 christos uch _dist_code[DIST_CODE_LEN]; 101 1.1 christos /* Distance codes. The first 256 values correspond to the distances 102 1.1 christos * 3 .. 258, the last 256 values correspond to the top 8 bits of 103 1.1 christos * the 15 bit distances. 104 1.1 christos */ 105 1.1 christos 106 1.1 christos uch _length_code[MAX_MATCH-MIN_MATCH+1]; 107 1.1 christos /* length code for each normalized match length (0 == MIN_MATCH) */ 108 1.1 christos 109 1.1 christos local int base_length[LENGTH_CODES]; 110 1.1 christos /* First normalized length for each code (0 = MIN_MATCH) */ 111 1.1 christos 112 1.1 christos local int base_dist[D_CODES]; 113 1.1 christos /* First normalized distance for each code (0 = distance of 1) */ 114 1.1 christos 115 1.1 christos #else 116 1.1 christos # include "trees.h" 117 1.1 christos #endif /* GEN_TREES_H */ 118 1.1 christos 119 1.1 christos struct static_tree_desc_s { 120 1.1 christos const ct_data *static_tree; /* static tree or NULL */ 121 1.1 christos const intf *extra_bits; /* extra bits for each code or NULL */ 122 1.1 christos int extra_base; /* base index for extra_bits */ 123 1.1 christos int elems; /* max number of elements in the tree */ 124 1.1 christos int max_length; /* max bit length for the codes */ 125 1.1 christos }; 126 1.1 christos 127 1.7 christos #ifdef NO_INIT_GLOBAL_POINTERS 128 1.7 christos # define TCONST 129 1.7 christos #else 130 1.7 christos # define TCONST const 131 1.7 christos #endif 132 1.7 christos 133 1.7 christos local TCONST static_tree_desc static_l_desc = 134 1.1 christos {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS}; 135 1.1 christos 136 1.7 christos local TCONST static_tree_desc static_d_desc = 137 1.1 christos {static_dtree, extra_dbits, 0, D_CODES, MAX_BITS}; 138 1.1 christos 139 1.7 christos local TCONST static_tree_desc static_bl_desc = 140 1.1 christos {(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS}; 141 1.1 christos 142 1.1 christos /* =========================================================================== 143 1.7 christos * Output a short LSB first on the stream. 144 1.7 christos * IN assertion: there is enough room in pendingBuf. 145 1.7 christos */ 146 1.7 christos #define put_short(s, w) { \ 147 1.7 christos put_byte(s, (uch)((w) & 0xff)); \ 148 1.7 christos put_byte(s, (uch)((ush)(w) >> 8)); \ 149 1.7 christos } 150 1.7 christos 151 1.7 christos /* =========================================================================== 152 1.7 christos * Reverse the first len bits of a code, using straightforward code (a faster 153 1.7 christos * method would use a table) 154 1.7 christos * IN assertion: 1 <= len <= 15 155 1.1 christos */ 156 1.7 christos local unsigned bi_reverse(unsigned code, int len) { 157 1.7 christos register unsigned res = 0; 158 1.7 christos do { 159 1.7 christos res |= code & 1; 160 1.7 christos code >>= 1, res <<= 1; 161 1.7 christos } while (--len > 0); 162 1.7 christos return res >> 1; 163 1.7 christos } 164 1.1 christos 165 1.7 christos /* =========================================================================== 166 1.7 christos * Flush the bit buffer, keeping at most 7 bits in it. 167 1.7 christos */ 168 1.7 christos local void bi_flush(deflate_state *s) { 169 1.7 christos if (s->bi_valid == 16) { 170 1.7 christos put_short(s, s->bi_buf); 171 1.7 christos s->bi_buf = 0; 172 1.7 christos s->bi_valid = 0; 173 1.7 christos } else if (s->bi_valid >= 8) { 174 1.7 christos put_byte(s, (Byte)s->bi_buf); 175 1.7 christos s->bi_buf >>= 8; 176 1.7 christos s->bi_valid -= 8; 177 1.7 christos } 178 1.7 christos } 179 1.7 christos 180 1.7 christos /* =========================================================================== 181 1.7 christos * Flush the bit buffer and align the output on a byte boundary 182 1.7 christos */ 183 1.7 christos local void bi_windup(deflate_state *s) { 184 1.7 christos if (s->bi_valid > 8) { 185 1.7 christos put_short(s, s->bi_buf); 186 1.7 christos } else if (s->bi_valid > 0) { 187 1.7 christos put_byte(s, (Byte)s->bi_buf); 188 1.7 christos } 189 1.7 christos s->bi_buf = 0; 190 1.7 christos s->bi_valid = 0; 191 1.7 christos #ifdef ZLIB_DEBUG 192 1.7 christos s->bits_sent = (s->bits_sent + 7) & ~7; 193 1.7 christos #endif 194 1.7 christos } 195 1.7 christos 196 1.7 christos /* =========================================================================== 197 1.7 christos * Generate the codes for a given tree and bit counts (which need not be 198 1.7 christos * optimal). 199 1.7 christos * IN assertion: the array bl_count contains the bit length statistics for 200 1.7 christos * the given tree and the field len is set for all tree elements. 201 1.7 christos * OUT assertion: the field code is set for all tree elements of non 202 1.7 christos * zero code length. 203 1.7 christos */ 204 1.7 christos local void gen_codes(ct_data *tree, int max_code, ushf *bl_count) { 205 1.7 christos ush next_code[MAX_BITS+1]; /* next code value for each bit length */ 206 1.7 christos unsigned code = 0; /* running code value */ 207 1.7 christos int bits; /* bit index */ 208 1.7 christos int n; /* code index */ 209 1.7 christos 210 1.7 christos /* The distribution counts are first used to generate the code values 211 1.7 christos * without bit reversal. 212 1.7 christos */ 213 1.7 christos for (bits = 1; bits <= MAX_BITS; bits++) { 214 1.7 christos code = (code + bl_count[bits - 1]) << 1; 215 1.7 christos next_code[bits] = (ush)code; 216 1.7 christos } 217 1.7 christos /* Check that the bit counts in bl_count are consistent. The last code 218 1.7 christos * must be all ones. 219 1.7 christos */ 220 1.7 christos Assert (code + bl_count[MAX_BITS] - 1 == (1 << MAX_BITS) - 1, 221 1.7 christos "inconsistent bit counts"); 222 1.7 christos Tracev((stderr,"\ngen_codes: max_code %d ", max_code)); 223 1.7 christos 224 1.7 christos for (n = 0; n <= max_code; n++) { 225 1.7 christos int len = tree[n].Len; 226 1.7 christos if (len == 0) continue; 227 1.7 christos /* Now reverse the bits */ 228 1.7 christos tree[n].Code = (ush)bi_reverse(next_code[len]++, len); 229 1.7 christos 230 1.7 christos Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ", 231 1.7 christos n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len] - 1)); 232 1.7 christos } 233 1.7 christos } 234 1.1 christos 235 1.1 christos #ifdef GEN_TREES_H 236 1.7 christos local void gen_trees_header(void); 237 1.1 christos #endif 238 1.1 christos 239 1.2 christos #ifndef ZLIB_DEBUG 240 1.1 christos # define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) 241 1.1 christos /* Send a code of the given tree. c and tree must not have side effects */ 242 1.1 christos 243 1.4 christos #else /* !ZLIB_DEBUG */ 244 1.1 christos # define send_code(s, c, tree) \ 245 1.1 christos { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \ 246 1.1 christos send_bits(s, tree[c].Code, tree[c].Len); } 247 1.1 christos #endif 248 1.1 christos 249 1.1 christos /* =========================================================================== 250 1.1 christos * Send a value on a given number of bits. 251 1.1 christos * IN assertion: length <= 16 and value fits in length bits. 252 1.1 christos */ 253 1.2 christos #ifdef ZLIB_DEBUG 254 1.7 christos local void send_bits(deflate_state *s, int value, int length) { 255 1.1 christos Tracevv((stderr," l %2d v %4x ", length, value)); 256 1.1 christos Assert(length > 0 && length <= 15, "invalid length"); 257 1.1 christos s->bits_sent += (ulg)length; 258 1.1 christos 259 1.1 christos /* If not enough room in bi_buf, use (valid) bits from bi_buf and 260 1.6 christos * (16 - bi_valid) bits from value, leaving (width - (16 - bi_valid)) 261 1.1 christos * unused bits in value. 262 1.1 christos */ 263 1.1 christos if (s->bi_valid > (int)Buf_size - length) { 264 1.4 christos s->bi_buf |= (ush)value << s->bi_valid; 265 1.1 christos put_short(s, s->bi_buf); 266 1.1 christos s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); 267 1.1 christos s->bi_valid += length - Buf_size; 268 1.1 christos } else { 269 1.4 christos s->bi_buf |= (ush)value << s->bi_valid; 270 1.1 christos s->bi_valid += length; 271 1.1 christos } 272 1.1 christos } 273 1.2 christos #else /* !ZLIB_DEBUG */ 274 1.1 christos 275 1.1 christos #define send_bits(s, value, length) \ 276 1.1 christos { int len = length;\ 277 1.1 christos if (s->bi_valid > (int)Buf_size - len) {\ 278 1.4 christos int val = (int)value;\ 279 1.4 christos s->bi_buf |= (ush)val << s->bi_valid;\ 280 1.1 christos put_short(s, s->bi_buf);\ 281 1.1 christos s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ 282 1.1 christos s->bi_valid += len - Buf_size;\ 283 1.1 christos } else {\ 284 1.4 christos s->bi_buf |= (ush)(value) << s->bi_valid;\ 285 1.1 christos s->bi_valid += len;\ 286 1.1 christos }\ 287 1.1 christos } 288 1.2 christos #endif /* ZLIB_DEBUG */ 289 1.1 christos 290 1.1 christos 291 1.1 christos /* the arguments must not have side effects */ 292 1.1 christos 293 1.1 christos /* =========================================================================== 294 1.1 christos * Initialize the various 'constant' tables. 295 1.1 christos */ 296 1.7 christos local void tr_static_init(void) { 297 1.1 christos #if defined(GEN_TREES_H) || !defined(STDC) 298 1.1 christos static int static_init_done = 0; 299 1.1 christos int n; /* iterates over tree elements */ 300 1.1 christos int bits; /* bit counter */ 301 1.1 christos int length; /* length value */ 302 1.1 christos int code; /* code value */ 303 1.1 christos int dist; /* distance index */ 304 1.1 christos ush bl_count[MAX_BITS+1]; 305 1.1 christos /* number of codes at each bit length for an optimal tree */ 306 1.1 christos 307 1.1 christos if (static_init_done) return; 308 1.1 christos 309 1.1 christos /* For some embedded targets, global variables are not initialized: */ 310 1.4 christos #ifdef NO_INIT_GLOBAL_POINTERS 311 1.1 christos static_l_desc.static_tree = static_ltree; 312 1.1 christos static_l_desc.extra_bits = extra_lbits; 313 1.1 christos static_d_desc.static_tree = static_dtree; 314 1.1 christos static_d_desc.extra_bits = extra_dbits; 315 1.1 christos static_bl_desc.extra_bits = extra_blbits; 316 1.4 christos #endif 317 1.1 christos 318 1.1 christos /* Initialize the mapping length (0..255) -> length code (0..28) */ 319 1.1 christos length = 0; 320 1.1 christos for (code = 0; code < LENGTH_CODES-1; code++) { 321 1.1 christos base_length[code] = length; 322 1.6 christos for (n = 0; n < (1 << extra_lbits[code]); n++) { 323 1.1 christos _length_code[length++] = (uch)code; 324 1.1 christos } 325 1.1 christos } 326 1.1 christos Assert (length == 256, "tr_static_init: length != 256"); 327 1.1 christos /* Note that the length 255 (match length 258) can be represented 328 1.1 christos * in two different ways: code 284 + 5 bits or code 285, so we 329 1.1 christos * overwrite length_code[255] to use the best encoding: 330 1.1 christos */ 331 1.6 christos _length_code[length - 1] = (uch)code; 332 1.1 christos 333 1.1 christos /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ 334 1.1 christos dist = 0; 335 1.1 christos for (code = 0 ; code < 16; code++) { 336 1.1 christos base_dist[code] = dist; 337 1.6 christos for (n = 0; n < (1 << extra_dbits[code]); n++) { 338 1.1 christos _dist_code[dist++] = (uch)code; 339 1.1 christos } 340 1.1 christos } 341 1.1 christos Assert (dist == 256, "tr_static_init: dist != 256"); 342 1.1 christos dist >>= 7; /* from now on, all distances are divided by 128 */ 343 1.1 christos for ( ; code < D_CODES; code++) { 344 1.1 christos base_dist[code] = dist << 7; 345 1.6 christos for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { 346 1.1 christos _dist_code[256 + dist++] = (uch)code; 347 1.1 christos } 348 1.1 christos } 349 1.6 christos Assert (dist == 256, "tr_static_init: 256 + dist != 512"); 350 1.1 christos 351 1.1 christos /* Construct the codes of the static literal tree */ 352 1.1 christos for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0; 353 1.1 christos n = 0; 354 1.1 christos while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++; 355 1.1 christos while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++; 356 1.1 christos while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++; 357 1.1 christos while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++; 358 1.1 christos /* Codes 286 and 287 do not exist, but we must include them in the 359 1.1 christos * tree construction to get a canonical Huffman tree (longest code 360 1.1 christos * all ones) 361 1.1 christos */ 362 1.1 christos gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count); 363 1.1 christos 364 1.1 christos /* The static distance tree is trivial: */ 365 1.1 christos for (n = 0; n < D_CODES; n++) { 366 1.1 christos static_dtree[n].Len = 5; 367 1.1 christos static_dtree[n].Code = bi_reverse((unsigned)n, 5); 368 1.1 christos } 369 1.1 christos static_init_done = 1; 370 1.1 christos 371 1.1 christos # ifdef GEN_TREES_H 372 1.1 christos gen_trees_header(); 373 1.1 christos # endif 374 1.1 christos #endif /* defined(GEN_TREES_H) || !defined(STDC) */ 375 1.1 christos } 376 1.1 christos 377 1.1 christos /* =========================================================================== 378 1.6 christos * Generate the file trees.h describing the static trees. 379 1.1 christos */ 380 1.1 christos #ifdef GEN_TREES_H 381 1.2 christos # ifndef ZLIB_DEBUG 382 1.1 christos # include <stdio.h> 383 1.1 christos # endif 384 1.1 christos 385 1.1 christos # define SEPARATOR(i, last, width) \ 386 1.1 christos ((i) == (last)? "\n};\n\n" : \ 387 1.6 christos ((i) % (width) == (width) - 1 ? ",\n" : ", ")) 388 1.1 christos 389 1.7 christos void gen_trees_header(void) { 390 1.1 christos FILE *header = fopen("trees.h", "w"); 391 1.1 christos int i; 392 1.1 christos 393 1.1 christos Assert (header != NULL, "Can't open trees.h"); 394 1.1 christos fprintf(header, 395 1.1 christos "/* header created automatically with -DGEN_TREES_H */\n\n"); 396 1.1 christos 397 1.1 christos fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n"); 398 1.1 christos for (i = 0; i < L_CODES+2; i++) { 399 1.1 christos fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code, 400 1.1 christos static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5)); 401 1.1 christos } 402 1.1 christos 403 1.1 christos fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n"); 404 1.1 christos for (i = 0; i < D_CODES; i++) { 405 1.1 christos fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code, 406 1.1 christos static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5)); 407 1.1 christos } 408 1.1 christos 409 1.4 christos fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n"); 410 1.1 christos for (i = 0; i < DIST_CODE_LEN; i++) { 411 1.1 christos fprintf(header, "%2u%s", _dist_code[i], 412 1.1 christos SEPARATOR(i, DIST_CODE_LEN-1, 20)); 413 1.1 christos } 414 1.1 christos 415 1.4 christos fprintf(header, 416 1.4 christos "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n"); 417 1.1 christos for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) { 418 1.1 christos fprintf(header, "%2u%s", _length_code[i], 419 1.1 christos SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20)); 420 1.1 christos } 421 1.1 christos 422 1.1 christos fprintf(header, "local const int base_length[LENGTH_CODES] = {\n"); 423 1.1 christos for (i = 0; i < LENGTH_CODES; i++) { 424 1.1 christos fprintf(header, "%1u%s", base_length[i], 425 1.1 christos SEPARATOR(i, LENGTH_CODES-1, 20)); 426 1.1 christos } 427 1.1 christos 428 1.1 christos fprintf(header, "local const int base_dist[D_CODES] = {\n"); 429 1.1 christos for (i = 0; i < D_CODES; i++) { 430 1.1 christos fprintf(header, "%5u%s", base_dist[i], 431 1.1 christos SEPARATOR(i, D_CODES-1, 10)); 432 1.1 christos } 433 1.1 christos 434 1.1 christos fclose(header); 435 1.1 christos } 436 1.1 christos #endif /* GEN_TREES_H */ 437 1.1 christos 438 1.1 christos /* =========================================================================== 439 1.7 christos * Initialize a new block. 440 1.7 christos */ 441 1.7 christos local void init_block(deflate_state *s) { 442 1.7 christos int n; /* iterates over tree elements */ 443 1.7 christos 444 1.7 christos /* Initialize the trees. */ 445 1.7 christos for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0; 446 1.7 christos for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0; 447 1.7 christos for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0; 448 1.7 christos 449 1.7 christos s->dyn_ltree[END_BLOCK].Freq = 1; 450 1.7 christos s->opt_len = s->static_len = 0L; 451 1.7 christos s->sym_next = s->matches = 0; 452 1.7 christos } 453 1.7 christos 454 1.7 christos /* =========================================================================== 455 1.1 christos * Initialize the tree data structures for a new zlib stream. 456 1.1 christos */ 457 1.7 christos void ZLIB_INTERNAL _tr_init(deflate_state *s) { 458 1.1 christos tr_static_init(); 459 1.1 christos 460 1.1 christos s->l_desc.dyn_tree = s->dyn_ltree; 461 1.1 christos s->l_desc.stat_desc = &static_l_desc; 462 1.1 christos 463 1.1 christos s->d_desc.dyn_tree = s->dyn_dtree; 464 1.1 christos s->d_desc.stat_desc = &static_d_desc; 465 1.1 christos 466 1.1 christos s->bl_desc.dyn_tree = s->bl_tree; 467 1.1 christos s->bl_desc.stat_desc = &static_bl_desc; 468 1.1 christos 469 1.1 christos s->bi_buf = 0; 470 1.1 christos s->bi_valid = 0; 471 1.2 christos #ifdef ZLIB_DEBUG 472 1.1 christos s->compressed_len = 0L; 473 1.1 christos s->bits_sent = 0L; 474 1.1 christos #endif 475 1.1 christos 476 1.1 christos /* Initialize the first block of the first file: */ 477 1.1 christos init_block(s); 478 1.1 christos } 479 1.1 christos 480 1.1 christos #define SMALLEST 1 481 1.1 christos /* Index within the heap array of least frequent node in the Huffman tree */ 482 1.1 christos 483 1.1 christos 484 1.1 christos /* =========================================================================== 485 1.1 christos * Remove the smallest element from the heap and recreate the heap with 486 1.1 christos * one less element. Updates heap and heap_len. 487 1.1 christos */ 488 1.1 christos #define pqremove(s, tree, top) \ 489 1.1 christos {\ 490 1.1 christos top = s->heap[SMALLEST]; \ 491 1.1 christos s->heap[SMALLEST] = s->heap[s->heap_len--]; \ 492 1.1 christos pqdownheap(s, tree, SMALLEST); \ 493 1.1 christos } 494 1.1 christos 495 1.1 christos /* =========================================================================== 496 1.1 christos * Compares to subtrees, using the tree depth as tie breaker when 497 1.1 christos * the subtrees have equal frequency. This minimizes the worst case length. 498 1.1 christos */ 499 1.1 christos #define smaller(tree, n, m, depth) \ 500 1.1 christos (tree[n].Freq < tree[m].Freq || \ 501 1.1 christos (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) 502 1.1 christos 503 1.1 christos /* =========================================================================== 504 1.1 christos * Restore the heap property by moving down the tree starting at node k, 505 1.1 christos * exchanging a node with the smallest of its two sons if necessary, stopping 506 1.1 christos * when the heap property is re-established (each father smaller than its 507 1.1 christos * two sons). 508 1.1 christos */ 509 1.7 christos local void pqdownheap(deflate_state *s, ct_data *tree, int k) { 510 1.1 christos int v = s->heap[k]; 511 1.1 christos int j = k << 1; /* left son of k */ 512 1.1 christos while (j <= s->heap_len) { 513 1.1 christos /* Set j to the smallest of the two sons: */ 514 1.1 christos if (j < s->heap_len && 515 1.6 christos smaller(tree, s->heap[j + 1], s->heap[j], s->depth)) { 516 1.1 christos j++; 517 1.1 christos } 518 1.1 christos /* Exit if v is smaller than both sons */ 519 1.1 christos if (smaller(tree, v, s->heap[j], s->depth)) break; 520 1.1 christos 521 1.1 christos /* Exchange v with the smallest son */ 522 1.1 christos s->heap[k] = s->heap[j]; k = j; 523 1.1 christos 524 1.1 christos /* And continue down the tree, setting j to the left son of k */ 525 1.1 christos j <<= 1; 526 1.1 christos } 527 1.1 christos s->heap[k] = v; 528 1.1 christos } 529 1.1 christos 530 1.1 christos /* =========================================================================== 531 1.1 christos * Compute the optimal bit lengths for a tree and update the total bit length 532 1.1 christos * for the current block. 533 1.1 christos * IN assertion: the fields freq and dad are set, heap[heap_max] and 534 1.1 christos * above are the tree nodes sorted by increasing frequency. 535 1.1 christos * OUT assertions: the field len is set to the optimal bit length, the 536 1.1 christos * array bl_count contains the frequencies for each bit length. 537 1.1 christos * The length opt_len is updated; static_len is also updated if stree is 538 1.1 christos * not null. 539 1.1 christos */ 540 1.7 christos local void gen_bitlen(deflate_state *s, tree_desc *desc) { 541 1.1 christos ct_data *tree = desc->dyn_tree; 542 1.1 christos int max_code = desc->max_code; 543 1.1 christos const ct_data *stree = desc->stat_desc->static_tree; 544 1.1 christos const intf *extra = desc->stat_desc->extra_bits; 545 1.1 christos int base = desc->stat_desc->extra_base; 546 1.1 christos int max_length = desc->stat_desc->max_length; 547 1.1 christos int h; /* heap index */ 548 1.1 christos int n, m; /* iterate over the tree elements */ 549 1.1 christos int bits; /* bit length */ 550 1.1 christos int xbits; /* extra bits */ 551 1.1 christos ush f; /* frequency */ 552 1.1 christos int overflow = 0; /* number of elements with bit length too large */ 553 1.1 christos 554 1.1 christos for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0; 555 1.1 christos 556 1.1 christos /* In a first pass, compute the optimal bit lengths (which may 557 1.1 christos * overflow in the case of the bit length tree). 558 1.1 christos */ 559 1.1 christos tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */ 560 1.1 christos 561 1.6 christos for (h = s->heap_max + 1; h < HEAP_SIZE; h++) { 562 1.1 christos n = s->heap[h]; 563 1.1 christos bits = tree[tree[n].Dad].Len + 1; 564 1.1 christos if (bits > max_length) bits = max_length, overflow++; 565 1.1 christos tree[n].Len = (ush)bits; 566 1.1 christos /* We overwrite tree[n].Dad which is no longer needed */ 567 1.1 christos 568 1.1 christos if (n > max_code) continue; /* not a leaf node */ 569 1.1 christos 570 1.1 christos s->bl_count[bits]++; 571 1.1 christos xbits = 0; 572 1.6 christos if (n >= base) xbits = extra[n - base]; 573 1.1 christos f = tree[n].Freq; 574 1.4 christos s->opt_len += (ulg)f * (unsigned)(bits + xbits); 575 1.4 christos if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits); 576 1.1 christos } 577 1.1 christos if (overflow == 0) return; 578 1.1 christos 579 1.4 christos Tracev((stderr,"\nbit length overflow\n")); 580 1.1 christos /* This happens for example on obj2 and pic of the Calgary corpus */ 581 1.1 christos 582 1.1 christos /* Find the first bit length which could increase: */ 583 1.1 christos do { 584 1.6 christos bits = max_length - 1; 585 1.1 christos while (s->bl_count[bits] == 0) bits--; 586 1.6 christos s->bl_count[bits]--; /* move one leaf down the tree */ 587 1.6 christos s->bl_count[bits + 1] += 2; /* move one overflow item as its brother */ 588 1.1 christos s->bl_count[max_length]--; 589 1.1 christos /* The brother of the overflow item also moves one step up, 590 1.1 christos * but this does not affect bl_count[max_length] 591 1.1 christos */ 592 1.1 christos overflow -= 2; 593 1.1 christos } while (overflow > 0); 594 1.1 christos 595 1.1 christos /* Now recompute all bit lengths, scanning in increasing frequency. 596 1.1 christos * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all 597 1.1 christos * lengths instead of fixing only the wrong ones. This idea is taken 598 1.1 christos * from 'ar' written by Haruhiko Okumura.) 599 1.1 christos */ 600 1.1 christos for (bits = max_length; bits != 0; bits--) { 601 1.1 christos n = s->bl_count[bits]; 602 1.1 christos while (n != 0) { 603 1.1 christos m = s->heap[--h]; 604 1.1 christos if (m > max_code) continue; 605 1.1 christos if ((unsigned) tree[m].Len != (unsigned) bits) { 606 1.4 christos Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); 607 1.4 christos s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq; 608 1.1 christos tree[m].Len = (ush)bits; 609 1.1 christos } 610 1.1 christos n--; 611 1.1 christos } 612 1.1 christos } 613 1.1 christos } 614 1.1 christos 615 1.7 christos #ifdef DUMP_BL_TREE 616 1.7 christos # include <stdio.h> 617 1.7 christos #endif 618 1.1 christos 619 1.1 christos /* =========================================================================== 620 1.1 christos * Construct one Huffman tree and assigns the code bit strings and lengths. 621 1.1 christos * Update the total bit length for the current block. 622 1.1 christos * IN assertion: the field freq is set for all tree elements. 623 1.1 christos * OUT assertions: the fields len and code are set to the optimal bit length 624 1.1 christos * and corresponding code. The length opt_len is updated; static_len is 625 1.1 christos * also updated if stree is not null. The field max_code is set. 626 1.1 christos */ 627 1.7 christos local void build_tree(deflate_state *s, tree_desc *desc) { 628 1.1 christos ct_data *tree = desc->dyn_tree; 629 1.1 christos const ct_data *stree = desc->stat_desc->static_tree; 630 1.1 christos int elems = desc->stat_desc->elems; 631 1.1 christos int n, m; /* iterate over heap elements */ 632 1.1 christos int max_code = -1; /* largest code with non zero frequency */ 633 1.1 christos int node; /* new node being created */ 634 1.1 christos 635 1.1 christos /* Construct the initial heap, with least frequent element in 636 1.6 christos * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n + 1]. 637 1.1 christos * heap[0] is not used. 638 1.1 christos */ 639 1.1 christos s->heap_len = 0, s->heap_max = HEAP_SIZE; 640 1.1 christos 641 1.1 christos for (n = 0; n < elems; n++) { 642 1.1 christos if (tree[n].Freq != 0) { 643 1.1 christos s->heap[++(s->heap_len)] = max_code = n; 644 1.1 christos s->depth[n] = 0; 645 1.1 christos } else { 646 1.1 christos tree[n].Len = 0; 647 1.1 christos } 648 1.1 christos } 649 1.1 christos 650 1.1 christos /* The pkzip format requires that at least one distance code exists, 651 1.1 christos * and that at least one bit should be sent even if there is only one 652 1.1 christos * possible code. So to avoid special checks later on we force at least 653 1.1 christos * two codes of non zero frequency. 654 1.1 christos */ 655 1.1 christos while (s->heap_len < 2) { 656 1.1 christos node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0); 657 1.1 christos tree[node].Freq = 1; 658 1.1 christos s->depth[node] = 0; 659 1.1 christos s->opt_len--; if (stree) s->static_len -= stree[node].Len; 660 1.1 christos /* node is 0 or 1 so it does not have extra bits */ 661 1.1 christos } 662 1.1 christos desc->max_code = max_code; 663 1.1 christos 664 1.6 christos /* The elements heap[heap_len/2 + 1 .. heap_len] are leaves of the tree, 665 1.1 christos * establish sub-heaps of increasing lengths: 666 1.1 christos */ 667 1.1 christos for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n); 668 1.1 christos 669 1.1 christos /* Construct the Huffman tree by repeatedly combining the least two 670 1.1 christos * frequent nodes. 671 1.1 christos */ 672 1.1 christos node = elems; /* next internal node of the tree */ 673 1.1 christos do { 674 1.1 christos pqremove(s, tree, n); /* n = node of least frequency */ 675 1.1 christos m = s->heap[SMALLEST]; /* m = node of next least frequency */ 676 1.1 christos 677 1.1 christos s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */ 678 1.1 christos s->heap[--(s->heap_max)] = m; 679 1.1 christos 680 1.1 christos /* Create a new node father of n and m */ 681 1.1 christos tree[node].Freq = tree[n].Freq + tree[m].Freq; 682 1.1 christos s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ? 683 1.1 christos s->depth[n] : s->depth[m]) + 1); 684 1.1 christos tree[n].Dad = tree[m].Dad = (ush)node; 685 1.1 christos #ifdef DUMP_BL_TREE 686 1.1 christos if (tree == s->bl_tree) { 687 1.1 christos fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)", 688 1.1 christos node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq); 689 1.1 christos } 690 1.1 christos #endif 691 1.1 christos /* and insert the new node in the heap */ 692 1.1 christos s->heap[SMALLEST] = node++; 693 1.1 christos pqdownheap(s, tree, SMALLEST); 694 1.1 christos 695 1.1 christos } while (s->heap_len >= 2); 696 1.1 christos 697 1.1 christos s->heap[--(s->heap_max)] = s->heap[SMALLEST]; 698 1.1 christos 699 1.1 christos /* At this point, the fields freq and dad are set. We can now 700 1.1 christos * generate the bit lengths. 701 1.1 christos */ 702 1.1 christos gen_bitlen(s, (tree_desc *)desc); 703 1.1 christos 704 1.1 christos /* The field len is now set, we can generate the bit codes */ 705 1.1 christos gen_codes ((ct_data *)tree, max_code, s->bl_count); 706 1.1 christos } 707 1.1 christos 708 1.1 christos /* =========================================================================== 709 1.1 christos * Scan a literal or distance tree to determine the frequencies of the codes 710 1.1 christos * in the bit length tree. 711 1.1 christos */ 712 1.7 christos local void scan_tree(deflate_state *s, ct_data *tree, int max_code) { 713 1.1 christos int n; /* iterates over all tree elements */ 714 1.1 christos int prevlen = -1; /* last emitted length */ 715 1.1 christos int curlen; /* length of current code */ 716 1.1 christos int nextlen = tree[0].Len; /* length of next code */ 717 1.1 christos int count = 0; /* repeat count of the current code */ 718 1.1 christos int max_count = 7; /* max repeat count */ 719 1.1 christos int min_count = 4; /* min repeat count */ 720 1.1 christos 721 1.1 christos if (nextlen == 0) max_count = 138, min_count = 3; 722 1.6 christos tree[max_code + 1].Len = (ush)0xffff; /* guard */ 723 1.1 christos 724 1.1 christos for (n = 0; n <= max_code; n++) { 725 1.6 christos curlen = nextlen; nextlen = tree[n + 1].Len; 726 1.1 christos if (++count < max_count && curlen == nextlen) { 727 1.1 christos continue; 728 1.1 christos } else if (count < min_count) { 729 1.1 christos s->bl_tree[curlen].Freq += count; 730 1.1 christos } else if (curlen != 0) { 731 1.1 christos if (curlen != prevlen) s->bl_tree[curlen].Freq++; 732 1.1 christos s->bl_tree[REP_3_6].Freq++; 733 1.1 christos } else if (count <= 10) { 734 1.1 christos s->bl_tree[REPZ_3_10].Freq++; 735 1.1 christos } else { 736 1.1 christos s->bl_tree[REPZ_11_138].Freq++; 737 1.1 christos } 738 1.1 christos count = 0; prevlen = curlen; 739 1.1 christos if (nextlen == 0) { 740 1.1 christos max_count = 138, min_count = 3; 741 1.1 christos } else if (curlen == nextlen) { 742 1.1 christos max_count = 6, min_count = 3; 743 1.1 christos } else { 744 1.1 christos max_count = 7, min_count = 4; 745 1.1 christos } 746 1.1 christos } 747 1.1 christos } 748 1.1 christos 749 1.1 christos /* =========================================================================== 750 1.1 christos * Send a literal or distance tree in compressed form, using the codes in 751 1.1 christos * bl_tree. 752 1.1 christos */ 753 1.7 christos local void send_tree(deflate_state *s, ct_data *tree, int max_code) { 754 1.1 christos int n; /* iterates over all tree elements */ 755 1.1 christos int prevlen = -1; /* last emitted length */ 756 1.1 christos int curlen; /* length of current code */ 757 1.1 christos int nextlen = tree[0].Len; /* length of next code */ 758 1.1 christos int count = 0; /* repeat count of the current code */ 759 1.1 christos int max_count = 7; /* max repeat count */ 760 1.1 christos int min_count = 4; /* min repeat count */ 761 1.1 christos 762 1.6 christos /* tree[max_code + 1].Len = -1; */ /* guard already set */ 763 1.1 christos if (nextlen == 0) max_count = 138, min_count = 3; 764 1.1 christos 765 1.1 christos for (n = 0; n <= max_code; n++) { 766 1.6 christos curlen = nextlen; nextlen = tree[n + 1].Len; 767 1.1 christos if (++count < max_count && curlen == nextlen) { 768 1.1 christos continue; 769 1.1 christos } else if (count < min_count) { 770 1.1 christos do { send_code(s, curlen, s->bl_tree); } while (--count != 0); 771 1.1 christos 772 1.1 christos } else if (curlen != 0) { 773 1.1 christos if (curlen != prevlen) { 774 1.1 christos send_code(s, curlen, s->bl_tree); count--; 775 1.1 christos } 776 1.1 christos Assert(count >= 3 && count <= 6, " 3_6?"); 777 1.6 christos send_code(s, REP_3_6, s->bl_tree); send_bits(s, count - 3, 2); 778 1.1 christos 779 1.1 christos } else if (count <= 10) { 780 1.6 christos send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count - 3, 3); 781 1.1 christos 782 1.1 christos } else { 783 1.6 christos send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count - 11, 7); 784 1.1 christos } 785 1.1 christos count = 0; prevlen = curlen; 786 1.1 christos if (nextlen == 0) { 787 1.1 christos max_count = 138, min_count = 3; 788 1.1 christos } else if (curlen == nextlen) { 789 1.1 christos max_count = 6, min_count = 3; 790 1.1 christos } else { 791 1.1 christos max_count = 7, min_count = 4; 792 1.1 christos } 793 1.1 christos } 794 1.1 christos } 795 1.1 christos 796 1.1 christos /* =========================================================================== 797 1.1 christos * Construct the Huffman tree for the bit lengths and return the index in 798 1.1 christos * bl_order of the last bit length code to send. 799 1.1 christos */ 800 1.7 christos local int build_bl_tree(deflate_state *s) { 801 1.1 christos int max_blindex; /* index of last bit length code of non zero freq */ 802 1.1 christos 803 1.1 christos /* Determine the bit length frequencies for literal and distance trees */ 804 1.1 christos scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code); 805 1.1 christos scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code); 806 1.1 christos 807 1.1 christos /* Build the bit length tree: */ 808 1.1 christos build_tree(s, (tree_desc *)(&(s->bl_desc))); 809 1.6 christos /* opt_len now includes the length of the tree representations, except the 810 1.6 christos * lengths of the bit lengths codes and the 5 + 5 + 4 bits for the counts. 811 1.1 christos */ 812 1.1 christos 813 1.1 christos /* Determine the number of bit length codes to send. The pkzip format 814 1.1 christos * requires that at least 4 bit length codes be sent. (appnote.txt says 815 1.1 christos * 3 but the actual value used is 4.) 816 1.1 christos */ 817 1.1 christos for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) { 818 1.1 christos if (s->bl_tree[bl_order[max_blindex]].Len != 0) break; 819 1.1 christos } 820 1.1 christos /* Update opt_len to include the bit length tree and counts */ 821 1.6 christos s->opt_len += 3*((ulg)max_blindex + 1) + 5 + 5 + 4; 822 1.1 christos Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", 823 1.1 christos s->opt_len, s->static_len)); 824 1.1 christos 825 1.1 christos return max_blindex; 826 1.1 christos } 827 1.1 christos 828 1.1 christos /* =========================================================================== 829 1.1 christos * Send the header for a block using dynamic Huffman trees: the counts, the 830 1.1 christos * lengths of the bit length codes, the literal tree and the distance tree. 831 1.1 christos * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. 832 1.1 christos */ 833 1.7 christos local void send_all_trees(deflate_state *s, int lcodes, int dcodes, 834 1.7 christos int blcodes) { 835 1.1 christos int rank; /* index in bl_order */ 836 1.1 christos 837 1.1 christos Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); 838 1.1 christos Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, 839 1.1 christos "too many codes"); 840 1.1 christos Tracev((stderr, "\nbl counts: ")); 841 1.6 christos send_bits(s, lcodes - 257, 5); /* not +255 as stated in appnote.txt */ 842 1.6 christos send_bits(s, dcodes - 1, 5); 843 1.6 christos send_bits(s, blcodes - 4, 4); /* not -3 as stated in appnote.txt */ 844 1.1 christos for (rank = 0; rank < blcodes; rank++) { 845 1.1 christos Tracev((stderr, "\nbl code %2d ", bl_order[rank])); 846 1.1 christos send_bits(s, s->bl_tree[bl_order[rank]].Len, 3); 847 1.1 christos } 848 1.1 christos Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent)); 849 1.1 christos 850 1.6 christos send_tree(s, (ct_data *)s->dyn_ltree, lcodes - 1); /* literal tree */ 851 1.1 christos Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent)); 852 1.1 christos 853 1.6 christos send_tree(s, (ct_data *)s->dyn_dtree, dcodes - 1); /* distance tree */ 854 1.1 christos Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); 855 1.1 christos } 856 1.1 christos 857 1.1 christos /* =========================================================================== 858 1.1 christos * Send a stored block 859 1.1 christos */ 860 1.7 christos void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, 861 1.7 christos ulg stored_len, int last) { 862 1.6 christos send_bits(s, (STORED_BLOCK<<1) + last, 3); /* send block type */ 863 1.4 christos bi_windup(s); /* align on byte boundary */ 864 1.4 christos put_short(s, (ush)stored_len); 865 1.4 christos put_short(s, (ush)~stored_len); 866 1.6 christos if (stored_len) 867 1.6 christos zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len); 868 1.4 christos s->pending += stored_len; 869 1.2 christos #ifdef ZLIB_DEBUG 870 1.1 christos s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; 871 1.1 christos s->compressed_len += (stored_len + 4) << 3; 872 1.4 christos s->bits_sent += 2*16; 873 1.6 christos s->bits_sent += stored_len << 3; 874 1.1 christos #endif 875 1.4 christos } 876 1.4 christos 877 1.4 christos /* =========================================================================== 878 1.4 christos * Flush the bits in the bit buffer to pending output (leaves at most 7 bits) 879 1.4 christos */ 880 1.7 christos void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s) { 881 1.4 christos bi_flush(s); 882 1.1 christos } 883 1.1 christos 884 1.1 christos /* =========================================================================== 885 1.1 christos * Send one empty static block to give enough lookahead for inflate. 886 1.1 christos * This takes 10 bits, of which 7 may remain in the bit buffer. 887 1.1 christos */ 888 1.7 christos void ZLIB_INTERNAL _tr_align(deflate_state *s) { 889 1.1 christos send_bits(s, STATIC_TREES<<1, 3); 890 1.1 christos send_code(s, END_BLOCK, static_ltree); 891 1.2 christos #ifdef ZLIB_DEBUG 892 1.1 christos s->compressed_len += 10L; /* 3 for block type, 7 for EOB */ 893 1.1 christos #endif 894 1.1 christos bi_flush(s); 895 1.1 christos } 896 1.1 christos 897 1.1 christos /* =========================================================================== 898 1.7 christos * Send the block data compressed using the given Huffman trees 899 1.7 christos */ 900 1.7 christos local void compress_block(deflate_state *s, const ct_data *ltree, 901 1.7 christos const ct_data *dtree) { 902 1.7 christos unsigned dist; /* distance of matched string */ 903 1.7 christos int lc; /* match length or unmatched char (if dist == 0) */ 904 1.7 christos unsigned sx = 0; /* running index in symbol buffers */ 905 1.7 christos unsigned code; /* the code to send */ 906 1.7 christos int extra; /* number of extra bits to send */ 907 1.7 christos 908 1.7 christos if (s->sym_next != 0) do { 909 1.7 christos #ifdef LIT_MEM 910 1.7 christos dist = s->d_buf[sx]; 911 1.7 christos lc = s->l_buf[sx++]; 912 1.7 christos #else 913 1.7 christos dist = s->sym_buf[sx++] & 0xff; 914 1.7 christos dist += (unsigned)(s->sym_buf[sx++] & 0xff) << 8; 915 1.7 christos lc = s->sym_buf[sx++]; 916 1.7 christos #endif 917 1.7 christos if (dist == 0) { 918 1.7 christos send_code(s, lc, ltree); /* send a literal byte */ 919 1.7 christos Tracecv(isgraph(lc), (stderr," '%c' ", lc)); 920 1.7 christos } else { 921 1.7 christos /* Here, lc is the match length - MIN_MATCH */ 922 1.7 christos code = _length_code[lc]; 923 1.7 christos send_code(s, code + LITERALS + 1, ltree); /* send length code */ 924 1.7 christos extra = extra_lbits[code]; 925 1.7 christos if (extra != 0) { 926 1.7 christos lc -= base_length[code]; 927 1.7 christos send_bits(s, lc, extra); /* send the extra length bits */ 928 1.7 christos } 929 1.7 christos dist--; /* dist is now the match distance - 1 */ 930 1.7 christos code = d_code(dist); 931 1.7 christos Assert (code < D_CODES, "bad d_code"); 932 1.7 christos 933 1.7 christos send_code(s, code, dtree); /* send the distance code */ 934 1.7 christos extra = extra_dbits[code]; 935 1.7 christos if (extra != 0) { 936 1.7 christos dist -= (unsigned)base_dist[code]; 937 1.7 christos send_bits(s, dist, extra); /* send the extra distance bits */ 938 1.7 christos } 939 1.7 christos } /* literal or match pair ? */ 940 1.7 christos 941 1.7 christos /* Check for no overlay of pending_buf on needed symbols */ 942 1.7 christos #ifdef LIT_MEM 943 1.7 christos Assert(s->pending < 2 * (s->lit_bufsize + sx), "pendingBuf overflow"); 944 1.7 christos #else 945 1.7 christos Assert(s->pending < s->lit_bufsize + sx, "pendingBuf overflow"); 946 1.7 christos #endif 947 1.7 christos 948 1.7 christos } while (sx < s->sym_next); 949 1.7 christos 950 1.7 christos send_code(s, END_BLOCK, ltree); 951 1.7 christos } 952 1.7 christos 953 1.7 christos /* =========================================================================== 954 1.7 christos * Check if the data type is TEXT or BINARY, using the following algorithm: 955 1.7 christos * - TEXT if the two conditions below are satisfied: 956 1.7 christos * a) There are no non-portable control characters belonging to the 957 1.7 christos * "block list" (0..6, 14..25, 28..31). 958 1.7 christos * b) There is at least one printable character belonging to the 959 1.7 christos * "allow list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). 960 1.7 christos * - BINARY otherwise. 961 1.7 christos * - The following partially-portable control characters form a 962 1.7 christos * "gray list" that is ignored in this detection algorithm: 963 1.7 christos * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). 964 1.7 christos * IN assertion: the fields Freq of dyn_ltree are set. 965 1.7 christos */ 966 1.7 christos local int detect_data_type(deflate_state *s) { 967 1.7 christos /* block_mask is the bit mask of block-listed bytes 968 1.7 christos * set bits 0..6, 14..25, and 28..31 969 1.7 christos * 0xf3ffc07f = binary 11110011111111111100000001111111 970 1.7 christos */ 971 1.7 christos unsigned long block_mask = 0xf3ffc07fUL; 972 1.7 christos int n; 973 1.7 christos 974 1.7 christos /* Check for non-textual ("block-listed") bytes. */ 975 1.7 christos for (n = 0; n <= 31; n++, block_mask >>= 1) 976 1.7 christos if ((block_mask & 1) && (s->dyn_ltree[n].Freq != 0)) 977 1.7 christos return Z_BINARY; 978 1.7 christos 979 1.7 christos /* Check for textual ("allow-listed") bytes. */ 980 1.7 christos if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 981 1.7 christos || s->dyn_ltree[13].Freq != 0) 982 1.7 christos return Z_TEXT; 983 1.7 christos for (n = 32; n < LITERALS; n++) 984 1.7 christos if (s->dyn_ltree[n].Freq != 0) 985 1.7 christos return Z_TEXT; 986 1.7 christos 987 1.7 christos /* There are no "block-listed" or "allow-listed" bytes: 988 1.7 christos * this stream either is empty or has tolerated ("gray-listed") bytes only. 989 1.7 christos */ 990 1.7 christos return Z_BINARY; 991 1.7 christos } 992 1.7 christos 993 1.7 christos /* =========================================================================== 994 1.1 christos * Determine the best encoding for the current block: dynamic trees, static 995 1.6 christos * trees or store, and write out the encoded block. 996 1.1 christos */ 997 1.7 christos void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, 998 1.7 christos ulg stored_len, int last) { 999 1.1 christos ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ 1000 1.1 christos int max_blindex = 0; /* index of last bit length code of non zero freq */ 1001 1.1 christos 1002 1.1 christos /* Build the Huffman trees unless a stored block is forced */ 1003 1.1 christos if (s->level > 0) { 1004 1.1 christos 1005 1.1 christos /* Check if the file is binary or text */ 1006 1.4 christos if (s->strm->data_type == Z_UNKNOWN) 1007 1.4 christos s->strm->data_type = detect_data_type(s); 1008 1.1 christos 1009 1.1 christos /* Construct the literal and distance trees */ 1010 1.1 christos build_tree(s, (tree_desc *)(&(s->l_desc))); 1011 1.1 christos Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, 1012 1.1 christos s->static_len)); 1013 1.1 christos 1014 1.1 christos build_tree(s, (tree_desc *)(&(s->d_desc))); 1015 1.1 christos Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, 1016 1.1 christos s->static_len)); 1017 1.1 christos /* At this point, opt_len and static_len are the total bit lengths of 1018 1.1 christos * the compressed block data, excluding the tree representations. 1019 1.1 christos */ 1020 1.1 christos 1021 1.1 christos /* Build the bit length tree for the above two trees, and get the index 1022 1.1 christos * in bl_order of the last bit length code to send. 1023 1.1 christos */ 1024 1.1 christos max_blindex = build_bl_tree(s); 1025 1.1 christos 1026 1.1 christos /* Determine the best encoding. Compute the block lengths in bytes. */ 1027 1.6 christos opt_lenb = (s->opt_len + 3 + 7) >> 3; 1028 1.6 christos static_lenb = (s->static_len + 3 + 7) >> 3; 1029 1.1 christos 1030 1.1 christos Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", 1031 1.1 christos opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len, 1032 1.5 wiz s->sym_next / 3)); 1033 1.1 christos 1034 1.6 christos #ifndef FORCE_STATIC 1035 1.6 christos if (static_lenb <= opt_lenb || s->strategy == Z_FIXED) 1036 1.6 christos #endif 1037 1.6 christos opt_lenb = static_lenb; 1038 1.1 christos 1039 1.1 christos } else { 1040 1.1 christos Assert(buf != (char*)0, "lost buf"); 1041 1.1 christos opt_lenb = static_lenb = stored_len + 5; /* force a stored block */ 1042 1.1 christos } 1043 1.1 christos 1044 1.1 christos #ifdef FORCE_STORED 1045 1.1 christos if (buf != (char*)0) { /* force stored block */ 1046 1.1 christos #else 1047 1.6 christos if (stored_len + 4 <= opt_lenb && buf != (char*)0) { 1048 1.1 christos /* 4: two words for the lengths */ 1049 1.1 christos #endif 1050 1.1 christos /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. 1051 1.1 christos * Otherwise we can't have processed more than WSIZE input bytes since 1052 1.1 christos * the last block flush, because compression would have been 1053 1.1 christos * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to 1054 1.1 christos * transform a block into a stored block. 1055 1.1 christos */ 1056 1.4 christos _tr_stored_block(s, buf, stored_len, last); 1057 1.1 christos 1058 1.6 christos } else if (static_lenb == opt_lenb) { 1059 1.6 christos send_bits(s, (STATIC_TREES<<1) + last, 3); 1060 1.4 christos compress_block(s, (const ct_data *)static_ltree, 1061 1.4 christos (const ct_data *)static_dtree); 1062 1.2 christos #ifdef ZLIB_DEBUG 1063 1.1 christos s->compressed_len += 3 + s->static_len; 1064 1.1 christos #endif 1065 1.1 christos } else { 1066 1.6 christos send_bits(s, (DYN_TREES<<1) + last, 3); 1067 1.6 christos send_all_trees(s, s->l_desc.max_code + 1, s->d_desc.max_code + 1, 1068 1.6 christos max_blindex + 1); 1069 1.4 christos compress_block(s, (const ct_data *)s->dyn_ltree, 1070 1.4 christos (const ct_data *)s->dyn_dtree); 1071 1.2 christos #ifdef ZLIB_DEBUG 1072 1.1 christos s->compressed_len += 3 + s->opt_len; 1073 1.1 christos #endif 1074 1.1 christos } 1075 1.1 christos Assert (s->compressed_len == s->bits_sent, "bad compressed size"); 1076 1.1 christos /* The above check is made mod 2^32, for files larger than 512 MB 1077 1.1 christos * and uLong implemented on 32 bits. 1078 1.1 christos */ 1079 1.1 christos init_block(s); 1080 1.1 christos 1081 1.4 christos if (last) { 1082 1.1 christos bi_windup(s); 1083 1.2 christos #ifdef ZLIB_DEBUG 1084 1.1 christos s->compressed_len += 7; /* align on byte boundary */ 1085 1.1 christos #endif 1086 1.1 christos } 1087 1.6 christos Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len >> 3, 1088 1.6 christos s->compressed_len - 7*last)); 1089 1.1 christos } 1090 1.1 christos 1091 1.1 christos /* =========================================================================== 1092 1.1 christos * Save the match info and tally the frequency counts. Return true if 1093 1.1 christos * the current block must be flushed. 1094 1.1 christos */ 1095 1.7 christos int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc) { 1096 1.7 christos #ifdef LIT_MEM 1097 1.7 christos s->d_buf[s->sym_next] = (ush)dist; 1098 1.7 christos s->l_buf[s->sym_next++] = (uch)lc; 1099 1.7 christos #else 1100 1.6 christos s->sym_buf[s->sym_next++] = (uch)dist; 1101 1.6 christos s->sym_buf[s->sym_next++] = (uch)(dist >> 8); 1102 1.6 christos s->sym_buf[s->sym_next++] = (uch)lc; 1103 1.7 christos #endif 1104 1.1 christos if (dist == 0) { 1105 1.1 christos /* lc is the unmatched char */ 1106 1.1 christos s->dyn_ltree[lc].Freq++; 1107 1.1 christos } else { 1108 1.1 christos s->matches++; 1109 1.1 christos /* Here, lc is the match length - MIN_MATCH */ 1110 1.1 christos dist--; /* dist = match distance - 1 */ 1111 1.1 christos Assert((ush)dist < (ush)MAX_DIST(s) && 1112 1.1 christos (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) && 1113 1.1 christos (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match"); 1114 1.1 christos 1115 1.6 christos s->dyn_ltree[_length_code[lc] + LITERALS + 1].Freq++; 1116 1.1 christos s->dyn_dtree[d_code(dist)].Freq++; 1117 1.1 christos } 1118 1.5 wiz return (s->sym_next == s->sym_end); 1119 1.1 christos } 1120