1 1.6 christos /* $NetBSD: deflate.h,v 1.6 2024/09/22 19:12:27 christos Exp $ */ 2 1.1 christos 3 1.1 christos /* deflate.h -- internal compression state 4 1.6 christos * Copyright (C) 1995-2024 Jean-loup Gailly 5 1.1 christos * For conditions of distribution and use, see copyright notice in zlib.h 6 1.1 christos */ 7 1.1 christos 8 1.1 christos /* WARNING: this file should *not* be used by applications. It is 9 1.1 christos part of the implementation of the compression library and is 10 1.1 christos subject to change. Applications should only use zlib.h. 11 1.1 christos */ 12 1.1 christos 13 1.5 christos /* @(#) Id */ 14 1.1 christos 15 1.1 christos #ifndef DEFLATE_H 16 1.1 christos #define DEFLATE_H 17 1.1 christos 18 1.1 christos #include "zutil.h" 19 1.1 christos 20 1.1 christos /* define NO_GZIP when compiling if you want to disable gzip header and 21 1.1 christos trailer creation by deflate(). NO_GZIP would be used to avoid linking in 22 1.1 christos the crc code when it is not needed. For shared libraries, gzip encoding 23 1.1 christos should be left enabled. */ 24 1.1 christos #ifndef NO_GZIP 25 1.1 christos # define GZIP 26 1.1 christos #endif 27 1.1 christos 28 1.6 christos /* define LIT_MEM to slightly increase the speed of deflate (order 1% to 2%) at 29 1.6 christos the cost of a larger memory footprint */ 30 1.6 christos /* #define LIT_MEM */ 31 1.6 christos 32 1.1 christos /* =========================================================================== 33 1.1 christos * Internal compression state. 34 1.1 christos */ 35 1.1 christos 36 1.1 christos #define LENGTH_CODES 29 37 1.1 christos /* number of length codes, not counting the special END_BLOCK code */ 38 1.1 christos 39 1.1 christos #define LITERALS 256 40 1.1 christos /* number of literal bytes 0..255 */ 41 1.1 christos 42 1.1 christos #define L_CODES (LITERALS+1+LENGTH_CODES) 43 1.1 christos /* number of Literal or Length codes, including the END_BLOCK code */ 44 1.1 christos 45 1.1 christos #define D_CODES 30 46 1.1 christos /* number of distance codes */ 47 1.1 christos 48 1.1 christos #define BL_CODES 19 49 1.1 christos /* number of codes used to transfer the bit lengths */ 50 1.1 christos 51 1.1 christos #define HEAP_SIZE (2*L_CODES+1) 52 1.1 christos /* maximum heap size */ 53 1.1 christos 54 1.1 christos #define MAX_BITS 15 55 1.1 christos /* All codes must not exceed MAX_BITS bits */ 56 1.1 christos 57 1.3 christos #define Buf_size 16 58 1.3 christos /* size of bit buffer in bi_buf */ 59 1.3 christos 60 1.3 christos #define INIT_STATE 42 /* zlib header -> BUSY_STATE */ 61 1.3 christos #ifdef GZIP 62 1.3 christos # define GZIP_STATE 57 /* gzip header -> BUSY_STATE | EXTRA_STATE */ 63 1.3 christos #endif 64 1.3 christos #define EXTRA_STATE 69 /* gzip extra block -> NAME_STATE */ 65 1.3 christos #define NAME_STATE 73 /* gzip file name -> COMMENT_STATE */ 66 1.3 christos #define COMMENT_STATE 91 /* gzip comment -> HCRC_STATE */ 67 1.3 christos #define HCRC_STATE 103 /* gzip header CRC -> BUSY_STATE */ 68 1.3 christos #define BUSY_STATE 113 /* deflate -> FINISH_STATE */ 69 1.3 christos #define FINISH_STATE 666 /* stream complete */ 70 1.1 christos /* Stream status */ 71 1.1 christos 72 1.1 christos 73 1.1 christos /* Data structure describing a single value and its code string. */ 74 1.1 christos typedef struct ct_data_s { 75 1.1 christos union { 76 1.1 christos ush freq; /* frequency count */ 77 1.1 christos ush code; /* bit string */ 78 1.1 christos } fc; 79 1.1 christos union { 80 1.1 christos ush dad; /* father node in Huffman tree */ 81 1.1 christos ush len; /* length of bit string */ 82 1.1 christos } dl; 83 1.1 christos } FAR ct_data; 84 1.1 christos 85 1.1 christos #define Freq fc.freq 86 1.1 christos #define Code fc.code 87 1.1 christos #define Dad dl.dad 88 1.1 christos #define Len dl.len 89 1.1 christos 90 1.1 christos typedef struct static_tree_desc_s static_tree_desc; 91 1.1 christos 92 1.1 christos typedef struct tree_desc_s { 93 1.1 christos ct_data *dyn_tree; /* the dynamic tree */ 94 1.1 christos int max_code; /* largest code with non zero frequency */ 95 1.3 christos const static_tree_desc *stat_desc; /* the corresponding static tree */ 96 1.1 christos } FAR tree_desc; 97 1.1 christos 98 1.1 christos typedef ush Pos; 99 1.1 christos typedef Pos FAR Posf; 100 1.1 christos typedef unsigned IPos; 101 1.1 christos 102 1.1 christos /* A Pos is an index in the character window. We use short instead of int to 103 1.1 christos * save space in the various tables. IPos is used only for parameter passing. 104 1.1 christos */ 105 1.1 christos 106 1.1 christos typedef struct internal_state { 107 1.1 christos z_streamp strm; /* pointer back to this zlib stream */ 108 1.1 christos int status; /* as the name implies */ 109 1.1 christos Bytef *pending_buf; /* output still pending */ 110 1.1 christos ulg pending_buf_size; /* size of pending_buf */ 111 1.1 christos Bytef *pending_out; /* next pending byte to output to the stream */ 112 1.3 christos ulg pending; /* nb of bytes in the pending buffer */ 113 1.1 christos int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 114 1.1 christos gz_headerp gzhead; /* gzip header information to write */ 115 1.3 christos ulg gzindex; /* where in extra, name, or comment */ 116 1.3 christos Byte method; /* can only be DEFLATED */ 117 1.1 christos int last_flush; /* value of flush param for previous deflate call */ 118 1.1 christos 119 1.1 christos /* used by deflate.c: */ 120 1.1 christos 121 1.1 christos uInt w_size; /* LZ77 window size (32K by default) */ 122 1.1 christos uInt w_bits; /* log2(w_size) (8..16) */ 123 1.1 christos uInt w_mask; /* w_size - 1 */ 124 1.1 christos 125 1.1 christos Bytef *window; 126 1.1 christos /* Sliding window. Input bytes are read into the second half of the window, 127 1.1 christos * and move to the first half later to keep a dictionary of at least wSize 128 1.1 christos * bytes. With this organization, matches are limited to a distance of 129 1.1 christos * wSize-MAX_MATCH bytes, but this ensures that IO is always 130 1.1 christos * performed with a length multiple of the block size. Also, it limits 131 1.1 christos * the window size to 64K, which is quite useful on MSDOS. 132 1.1 christos * To do: use the user input buffer as sliding window. 133 1.1 christos */ 134 1.1 christos 135 1.1 christos ulg window_size; 136 1.1 christos /* Actual size of window: 2*wSize, except when the user input buffer 137 1.1 christos * is directly used as sliding window. 138 1.1 christos */ 139 1.1 christos 140 1.1 christos Posf *prev; 141 1.1 christos /* Link to older string with same hash index. To limit the size of this 142 1.1 christos * array to 64K, this link is maintained only for the last 32K strings. 143 1.1 christos * An index in this array is thus a window index modulo 32K. 144 1.1 christos */ 145 1.1 christos 146 1.1 christos Posf *head; /* Heads of the hash chains or NIL. */ 147 1.1 christos 148 1.1 christos uInt ins_h; /* hash index of string to be inserted */ 149 1.1 christos uInt hash_size; /* number of elements in hash table */ 150 1.1 christos uInt hash_bits; /* log2(hash_size) */ 151 1.1 christos uInt hash_mask; /* hash_size-1 */ 152 1.1 christos 153 1.1 christos uInt hash_shift; 154 1.1 christos /* Number of bits by which ins_h must be shifted at each input 155 1.1 christos * step. It must be such that after MIN_MATCH steps, the oldest 156 1.1 christos * byte no longer takes part in the hash key, that is: 157 1.1 christos * hash_shift * MIN_MATCH >= hash_bits 158 1.1 christos */ 159 1.1 christos 160 1.1 christos long block_start; 161 1.1 christos /* Window position at the beginning of the current output block. Gets 162 1.1 christos * negative when the window is moved backwards. 163 1.1 christos */ 164 1.1 christos 165 1.1 christos uInt match_length; /* length of best match */ 166 1.1 christos IPos prev_match; /* previous match */ 167 1.1 christos int match_available; /* set if previous match exists */ 168 1.1 christos uInt strstart; /* start of string to insert */ 169 1.1 christos uInt match_start; /* start of matching string */ 170 1.1 christos uInt lookahead; /* number of valid bytes ahead in window */ 171 1.1 christos 172 1.1 christos uInt prev_length; 173 1.1 christos /* Length of the best match at previous step. Matches not greater than this 174 1.1 christos * are discarded. This is used in the lazy match evaluation. 175 1.1 christos */ 176 1.1 christos 177 1.1 christos uInt max_chain_length; 178 1.1 christos /* To speed up deflation, hash chains are never searched beyond this 179 1.1 christos * length. A higher limit improves compression ratio but degrades the 180 1.1 christos * speed. 181 1.1 christos */ 182 1.1 christos 183 1.1 christos uInt max_lazy_match; 184 1.1 christos /* Attempt to find a better match only when the current match is strictly 185 1.1 christos * smaller than this value. This mechanism is used only for compression 186 1.1 christos * levels >= 4. 187 1.1 christos */ 188 1.1 christos # define max_insert_length max_lazy_match 189 1.1 christos /* Insert new strings in the hash table only if the match length is not 190 1.1 christos * greater than this length. This saves time but degrades compression. 191 1.1 christos * max_insert_length is used only for compression levels <= 3. 192 1.1 christos */ 193 1.1 christos 194 1.1 christos int level; /* compression level (1..9) */ 195 1.1 christos int strategy; /* favor or force Huffman coding*/ 196 1.1 christos 197 1.1 christos uInt good_match; 198 1.1 christos /* Use a faster search when the previous match is longer than this */ 199 1.1 christos 200 1.1 christos int nice_match; /* Stop searching when current match exceeds this */ 201 1.1 christos 202 1.1 christos /* used by trees.c: */ 203 1.3 christos /* Didn't use ct_data typedef below to suppress compiler warning */ 204 1.1 christos struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ 205 1.1 christos struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ 206 1.1 christos struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ 207 1.1 christos 208 1.1 christos struct tree_desc_s l_desc; /* desc. for literal tree */ 209 1.1 christos struct tree_desc_s d_desc; /* desc. for distance tree */ 210 1.1 christos struct tree_desc_s bl_desc; /* desc. for bit length tree */ 211 1.1 christos 212 1.1 christos ush bl_count[MAX_BITS+1]; 213 1.1 christos /* number of codes at each bit length for an optimal tree */ 214 1.1 christos 215 1.1 christos int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ 216 1.1 christos int heap_len; /* number of elements in the heap */ 217 1.1 christos int heap_max; /* element of largest frequency */ 218 1.1 christos /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 219 1.1 christos * The same heap array is used to build all trees. 220 1.1 christos */ 221 1.1 christos 222 1.1 christos uch depth[2*L_CODES+1]; 223 1.1 christos /* Depth of each subtree used as tie breaker for trees of equal frequency 224 1.1 christos */ 225 1.1 christos 226 1.6 christos #ifdef LIT_MEM 227 1.6 christos # define LIT_BUFS 5 228 1.6 christos ushf *d_buf; /* buffer for distances */ 229 1.6 christos uchf *l_buf; /* buffer for literals/lengths */ 230 1.6 christos #else 231 1.6 christos # define LIT_BUFS 4 232 1.4 wiz uchf *sym_buf; /* buffer for distances and literals/lengths */ 233 1.6 christos #endif 234 1.1 christos 235 1.1 christos uInt lit_bufsize; 236 1.1 christos /* Size of match buffer for literals/lengths. There are 4 reasons for 237 1.1 christos * limiting lit_bufsize to 64K: 238 1.1 christos * - frequencies can be kept in 16 bit counters 239 1.1 christos * - if compression is not successful for the first block, all input 240 1.1 christos * data is still in the window so we can still emit a stored block even 241 1.1 christos * when input comes from standard input. (This can also be done for 242 1.1 christos * all blocks if lit_bufsize is not greater than 32K.) 243 1.1 christos * - if compression is not successful for a file smaller than 64K, we can 244 1.1 christos * even emit a stored file instead of a stored block (saving 5 bytes). 245 1.1 christos * This is applicable only for zip (not gzip or zlib). 246 1.1 christos * - creating new Huffman trees less frequently may not provide fast 247 1.1 christos * adaptation to changes in the input data statistics. (Take for 248 1.1 christos * example a binary file with poorly compressible code followed by 249 1.1 christos * a highly compressible string table.) Smaller buffer sizes give 250 1.1 christos * fast adaptation but have of course the overhead of transmitting 251 1.1 christos * trees more frequently. 252 1.1 christos * - I can't count above 4 253 1.1 christos */ 254 1.1 christos 255 1.6 christos uInt sym_next; /* running index in symbol buffer */ 256 1.4 wiz uInt sym_end; /* symbol table full when sym_next reaches this */ 257 1.1 christos 258 1.1 christos ulg opt_len; /* bit length of current block with optimal trees */ 259 1.1 christos ulg static_len; /* bit length of current block with static trees */ 260 1.1 christos uInt matches; /* number of string matches in current block */ 261 1.3 christos uInt insert; /* bytes at end of window left to insert */ 262 1.1 christos 263 1.2 christos #ifdef ZLIB_DEBUG 264 1.1 christos ulg compressed_len; /* total bit length of compressed file mod 2^32 */ 265 1.1 christos ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ 266 1.1 christos #endif 267 1.1 christos 268 1.1 christos ush bi_buf; 269 1.1 christos /* Output buffer. bits are inserted starting at the bottom (least 270 1.1 christos * significant bits). 271 1.1 christos */ 272 1.1 christos int bi_valid; 273 1.1 christos /* Number of valid bits in bi_buf. All bits above the last valid bit 274 1.1 christos * are always zero. 275 1.1 christos */ 276 1.1 christos 277 1.3 christos ulg high_water; 278 1.3 christos /* High water mark offset in window for initialized bytes -- bytes above 279 1.3 christos * this are set to zero in order to avoid memory check warnings when 280 1.3 christos * longest match routines access bytes past the input. This is then 281 1.3 christos * updated to the new high water mark. 282 1.3 christos */ 283 1.3 christos 284 1.1 christos } FAR deflate_state; 285 1.1 christos 286 1.1 christos /* Output a byte on the stream. 287 1.1 christos * IN assertion: there is enough room in pending_buf. 288 1.1 christos */ 289 1.3 christos #define put_byte(s, c) {s->pending_buf[s->pending++] = (Bytef)(c);} 290 1.1 christos 291 1.1 christos 292 1.1 christos #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 293 1.1 christos /* Minimum amount of lookahead, except at the end of the input file. 294 1.1 christos * See deflate.c for comments about the MIN_MATCH+1. 295 1.1 christos */ 296 1.1 christos 297 1.1 christos #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) 298 1.1 christos /* In order to simplify the code, particularly on 16 bit machines, match 299 1.1 christos * distances are limited to MAX_DIST instead of WSIZE. 300 1.1 christos */ 301 1.1 christos 302 1.3 christos #define WIN_INIT MAX_MATCH 303 1.3 christos /* Number of bytes after end of data in window to initialize in order to avoid 304 1.3 christos memory checker errors from longest match routines */ 305 1.3 christos 306 1.1 christos /* in trees.c */ 307 1.6 christos void ZLIB_INTERNAL _tr_init(deflate_state *s); 308 1.6 christos int ZLIB_INTERNAL _tr_tally(deflate_state *s, unsigned dist, unsigned lc); 309 1.6 christos void ZLIB_INTERNAL _tr_flush_block(deflate_state *s, charf *buf, 310 1.6 christos ulg stored_len, int last); 311 1.6 christos void ZLIB_INTERNAL _tr_flush_bits(deflate_state *s); 312 1.6 christos void ZLIB_INTERNAL _tr_align(deflate_state *s); 313 1.6 christos void ZLIB_INTERNAL _tr_stored_block(deflate_state *s, charf *buf, 314 1.6 christos ulg stored_len, int last); 315 1.1 christos 316 1.1 christos #define d_code(dist) \ 317 1.1 christos ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 318 1.1 christos /* Mapping from a distance to a distance code. dist is the distance - 1 and 319 1.1 christos * must not have side effects. _dist_code[256] and _dist_code[257] are never 320 1.1 christos * used. 321 1.1 christos */ 322 1.1 christos 323 1.2 christos #ifndef ZLIB_DEBUG 324 1.1 christos /* Inline versions of _tr_tally for speed: */ 325 1.1 christos 326 1.1 christos #if defined(GEN_TREES_H) || !defined(STDC) 327 1.3 christos extern uch ZLIB_INTERNAL _length_code[]; 328 1.3 christos extern uch ZLIB_INTERNAL _dist_code[]; 329 1.1 christos #else 330 1.3 christos extern const uch ZLIB_INTERNAL _length_code[]; 331 1.3 christos extern const uch ZLIB_INTERNAL _dist_code[]; 332 1.1 christos #endif 333 1.1 christos 334 1.6 christos #ifdef LIT_MEM 335 1.6 christos # define _tr_tally_lit(s, c, flush) \ 336 1.6 christos { uch cc = (c); \ 337 1.6 christos s->d_buf[s->sym_next] = 0; \ 338 1.6 christos s->l_buf[s->sym_next++] = cc; \ 339 1.6 christos s->dyn_ltree[cc].Freq++; \ 340 1.6 christos flush = (s->sym_next == s->sym_end); \ 341 1.6 christos } 342 1.6 christos # define _tr_tally_dist(s, distance, length, flush) \ 343 1.6 christos { uch len = (uch)(length); \ 344 1.6 christos ush dist = (ush)(distance); \ 345 1.6 christos s->d_buf[s->sym_next] = dist; \ 346 1.6 christos s->l_buf[s->sym_next++] = len; \ 347 1.6 christos dist--; \ 348 1.6 christos s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 349 1.6 christos s->dyn_dtree[d_code(dist)].Freq++; \ 350 1.6 christos flush = (s->sym_next == s->sym_end); \ 351 1.6 christos } 352 1.6 christos #else 353 1.1 christos # define _tr_tally_lit(s, c, flush) \ 354 1.1 christos { uch cc = (c); \ 355 1.4 wiz s->sym_buf[s->sym_next++] = 0; \ 356 1.4 wiz s->sym_buf[s->sym_next++] = 0; \ 357 1.4 wiz s->sym_buf[s->sym_next++] = cc; \ 358 1.1 christos s->dyn_ltree[cc].Freq++; \ 359 1.4 wiz flush = (s->sym_next == s->sym_end); \ 360 1.1 christos } 361 1.1 christos # define _tr_tally_dist(s, distance, length, flush) \ 362 1.3 christos { uch len = (uch)(length); \ 363 1.3 christos ush dist = (ush)(distance); \ 364 1.5 christos s->sym_buf[s->sym_next++] = (uch)dist; \ 365 1.5 christos s->sym_buf[s->sym_next++] = (uch)(dist >> 8); \ 366 1.4 wiz s->sym_buf[s->sym_next++] = len; \ 367 1.1 christos dist--; \ 368 1.1 christos s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 369 1.1 christos s->dyn_dtree[d_code(dist)].Freq++; \ 370 1.4 wiz flush = (s->sym_next == s->sym_end); \ 371 1.1 christos } 372 1.6 christos #endif 373 1.1 christos #else 374 1.1 christos # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 375 1.1 christos # define _tr_tally_dist(s, distance, length, flush) \ 376 1.1 christos flush = _tr_tally(s, distance, length) 377 1.1 christos #endif 378 1.1 christos 379 1.1 christos #endif /* DEFLATE_H */ 380