deflate.h revision 1.2 1 1.2 christos /* $NetBSD: deflate.h,v 1.2 2006/01/16 17:02:29 christos Exp $ */
2 1.1 christos
3 1.1 christos /* deflate.h -- internal compression state
4 1.1 christos * Copyright (C) 1995-2004 Jean-loup Gailly
5 1.1 christos * For conditions of distribution and use, see copyright notice in zlib.h
6 1.1 christos */
7 1.1 christos
8 1.1 christos /* WARNING: this file should *not* be used by applications. It is
9 1.1 christos part of the implementation of the compression library and is
10 1.1 christos subject to change. Applications should only use zlib.h.
11 1.1 christos */
12 1.1 christos
13 1.1 christos /* @(#) Id */
14 1.1 christos
15 1.1 christos #ifndef DEFLATE_H
16 1.1 christos #define DEFLATE_H
17 1.1 christos
18 1.1 christos #include "zutil.h"
19 1.1 christos
20 1.1 christos /* define NO_GZIP when compiling if you want to disable gzip header and
21 1.1 christos trailer creation by deflate(). NO_GZIP would be used to avoid linking in
22 1.1 christos the crc code when it is not needed. For shared libraries, gzip encoding
23 1.1 christos should be left enabled. */
24 1.1 christos #ifndef NO_GZIP
25 1.1 christos # define GZIP
26 1.1 christos #endif
27 1.1 christos
28 1.1 christos /* ===========================================================================
29 1.1 christos * Internal compression state.
30 1.1 christos */
31 1.1 christos
32 1.1 christos #define LENGTH_CODES 29
33 1.1 christos /* number of length codes, not counting the special END_BLOCK code */
34 1.1 christos
35 1.1 christos #define LITERALS 256
36 1.1 christos /* number of literal bytes 0..255 */
37 1.1 christos
38 1.1 christos #define L_CODES (LITERALS+1+LENGTH_CODES)
39 1.1 christos /* number of Literal or Length codes, including the END_BLOCK code */
40 1.1 christos
41 1.1 christos #define D_CODES 30
42 1.1 christos /* number of distance codes */
43 1.1 christos
44 1.1 christos #define BL_CODES 19
45 1.1 christos /* number of codes used to transfer the bit lengths */
46 1.1 christos
47 1.1 christos #define HEAP_SIZE (2*L_CODES+1)
48 1.1 christos /* maximum heap size */
49 1.1 christos
50 1.1 christos #define MAX_BITS 15
51 1.1 christos /* All codes must not exceed MAX_BITS bits */
52 1.1 christos
53 1.1 christos #define INIT_STATE 42
54 1.1 christos #define EXTRA_STATE 69
55 1.1 christos #define NAME_STATE 73
56 1.1 christos #define COMMENT_STATE 91
57 1.1 christos #define HCRC_STATE 103
58 1.1 christos #define BUSY_STATE 113
59 1.1 christos #define FINISH_STATE 666
60 1.1 christos /* Stream status */
61 1.1 christos
62 1.1 christos
63 1.1 christos /* Data structure describing a single value and its code string. */
64 1.1 christos typedef struct ct_data_s {
65 1.1 christos union {
66 1.1 christos ush freq; /* frequency count */
67 1.1 christos ush code; /* bit string */
68 1.1 christos } fc;
69 1.1 christos union {
70 1.1 christos ush dad; /* father node in Huffman tree */
71 1.1 christos ush len; /* length of bit string */
72 1.1 christos } dl;
73 1.1 christos } FAR ct_data;
74 1.1 christos
75 1.1 christos #define Freq fc.freq
76 1.1 christos #define Code fc.code
77 1.1 christos #define Dad dl.dad
78 1.1 christos #define Len dl.len
79 1.1 christos
80 1.1 christos typedef struct static_tree_desc_s static_tree_desc;
81 1.1 christos
82 1.1 christos typedef struct tree_desc_s {
83 1.1 christos ct_data *dyn_tree; /* the dynamic tree */
84 1.1 christos int max_code; /* largest code with non zero frequency */
85 1.1 christos static_tree_desc *stat_desc; /* the corresponding static tree */
86 1.1 christos } FAR tree_desc;
87 1.1 christos
88 1.1 christos typedef ush Pos;
89 1.1 christos typedef Pos FAR Posf;
90 1.1 christos typedef unsigned IPos;
91 1.1 christos
92 1.1 christos /* A Pos is an index in the character window. We use short instead of int to
93 1.1 christos * save space in the various tables. IPos is used only for parameter passing.
94 1.1 christos */
95 1.1 christos
96 1.1 christos typedef struct internal_state {
97 1.1 christos z_streamp strm; /* pointer back to this zlib stream */
98 1.1 christos int status; /* as the name implies */
99 1.1 christos Bytef *pending_buf; /* output still pending */
100 1.1 christos ulg pending_buf_size; /* size of pending_buf */
101 1.1 christos Bytef *pending_out; /* next pending byte to output to the stream */
102 1.1 christos uInt pending; /* nb of bytes in the pending buffer */
103 1.1 christos int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
104 1.1 christos gz_headerp gzhead; /* gzip header information to write */
105 1.1 christos uInt gzindex; /* where in extra, name, or comment */
106 1.1 christos Byte method; /* STORED (for zip only) or DEFLATED */
107 1.1 christos int last_flush; /* value of flush param for previous deflate call */
108 1.1 christos
109 1.1 christos /* used by deflate.c: */
110 1.1 christos
111 1.1 christos uInt w_size; /* LZ77 window size (32K by default) */
112 1.1 christos uInt w_bits; /* log2(w_size) (8..16) */
113 1.1 christos uInt w_mask; /* w_size - 1 */
114 1.1 christos
115 1.1 christos Bytef *window;
116 1.1 christos /* Sliding window. Input bytes are read into the second half of the window,
117 1.1 christos * and move to the first half later to keep a dictionary of at least wSize
118 1.1 christos * bytes. With this organization, matches are limited to a distance of
119 1.1 christos * wSize-MAX_MATCH bytes, but this ensures that IO is always
120 1.1 christos * performed with a length multiple of the block size. Also, it limits
121 1.1 christos * the window size to 64K, which is quite useful on MSDOS.
122 1.1 christos * To do: use the user input buffer as sliding window.
123 1.1 christos */
124 1.1 christos
125 1.1 christos ulg window_size;
126 1.1 christos /* Actual size of window: 2*wSize, except when the user input buffer
127 1.1 christos * is directly used as sliding window.
128 1.1 christos */
129 1.1 christos
130 1.1 christos Posf *prev;
131 1.1 christos /* Link to older string with same hash index. To limit the size of this
132 1.1 christos * array to 64K, this link is maintained only for the last 32K strings.
133 1.1 christos * An index in this array is thus a window index modulo 32K.
134 1.1 christos */
135 1.1 christos
136 1.1 christos Posf *head; /* Heads of the hash chains or NIL. */
137 1.1 christos
138 1.1 christos uInt ins_h; /* hash index of string to be inserted */
139 1.1 christos uInt hash_size; /* number of elements in hash table */
140 1.1 christos uInt hash_bits; /* log2(hash_size) */
141 1.1 christos uInt hash_mask; /* hash_size-1 */
142 1.1 christos
143 1.1 christos uInt hash_shift;
144 1.1 christos /* Number of bits by which ins_h must be shifted at each input
145 1.1 christos * step. It must be such that after MIN_MATCH steps, the oldest
146 1.1 christos * byte no longer takes part in the hash key, that is:
147 1.1 christos * hash_shift * MIN_MATCH >= hash_bits
148 1.1 christos */
149 1.1 christos
150 1.1 christos long block_start;
151 1.1 christos /* Window position at the beginning of the current output block. Gets
152 1.1 christos * negative when the window is moved backwards.
153 1.1 christos */
154 1.1 christos
155 1.1 christos uInt match_length; /* length of best match */
156 1.1 christos IPos prev_match; /* previous match */
157 1.1 christos int match_available; /* set if previous match exists */
158 1.1 christos uInt strstart; /* start of string to insert */
159 1.1 christos uInt match_start; /* start of matching string */
160 1.1 christos uInt lookahead; /* number of valid bytes ahead in window */
161 1.1 christos
162 1.1 christos uInt prev_length;
163 1.1 christos /* Length of the best match at previous step. Matches not greater than this
164 1.1 christos * are discarded. This is used in the lazy match evaluation.
165 1.1 christos */
166 1.1 christos
167 1.1 christos uInt max_chain_length;
168 1.1 christos /* To speed up deflation, hash chains are never searched beyond this
169 1.1 christos * length. A higher limit improves compression ratio but degrades the
170 1.1 christos * speed.
171 1.1 christos */
172 1.1 christos
173 1.1 christos uInt max_lazy_match;
174 1.1 christos /* Attempt to find a better match only when the current match is strictly
175 1.1 christos * smaller than this value. This mechanism is used only for compression
176 1.1 christos * levels >= 4.
177 1.1 christos */
178 1.1 christos # define max_insert_length max_lazy_match
179 1.1 christos /* Insert new strings in the hash table only if the match length is not
180 1.1 christos * greater than this length. This saves time but degrades compression.
181 1.1 christos * max_insert_length is used only for compression levels <= 3.
182 1.1 christos */
183 1.1 christos
184 1.1 christos int level; /* compression level (1..9) */
185 1.1 christos int strategy; /* favor or force Huffman coding*/
186 1.1 christos
187 1.1 christos uInt good_match;
188 1.1 christos /* Use a faster search when the previous match is longer than this */
189 1.1 christos
190 1.1 christos int nice_match; /* Stop searching when current match exceeds this */
191 1.1 christos
192 1.1 christos /* used by trees.c: */
193 1.1 christos /* Didn't use ct_data typedef below to supress compiler warning */
194 1.1 christos struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
195 1.1 christos struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
196 1.1 christos struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
197 1.1 christos
198 1.1 christos struct tree_desc_s l_desc; /* desc. for literal tree */
199 1.1 christos struct tree_desc_s d_desc; /* desc. for distance tree */
200 1.1 christos struct tree_desc_s bl_desc; /* desc. for bit length tree */
201 1.1 christos
202 1.1 christos ush bl_count[MAX_BITS+1];
203 1.1 christos /* number of codes at each bit length for an optimal tree */
204 1.1 christos
205 1.1 christos int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
206 1.1 christos int heap_len; /* number of elements in the heap */
207 1.1 christos int heap_max; /* element of largest frequency */
208 1.1 christos /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
209 1.1 christos * The same heap array is used to build all trees.
210 1.1 christos */
211 1.1 christos
212 1.1 christos uch depth[2*L_CODES+1];
213 1.1 christos /* Depth of each subtree used as tie breaker for trees of equal frequency
214 1.1 christos */
215 1.1 christos
216 1.1 christos uchf *l_buf; /* buffer for literals or lengths */
217 1.1 christos
218 1.1 christos uInt lit_bufsize;
219 1.1 christos /* Size of match buffer for literals/lengths. There are 4 reasons for
220 1.1 christos * limiting lit_bufsize to 64K:
221 1.1 christos * - frequencies can be kept in 16 bit counters
222 1.1 christos * - if compression is not successful for the first block, all input
223 1.1 christos * data is still in the window so we can still emit a stored block even
224 1.1 christos * when input comes from standard input. (This can also be done for
225 1.1 christos * all blocks if lit_bufsize is not greater than 32K.)
226 1.1 christos * - if compression is not successful for a file smaller than 64K, we can
227 1.1 christos * even emit a stored file instead of a stored block (saving 5 bytes).
228 1.1 christos * This is applicable only for zip (not gzip or zlib).
229 1.1 christos * - creating new Huffman trees less frequently may not provide fast
230 1.1 christos * adaptation to changes in the input data statistics. (Take for
231 1.1 christos * example a binary file with poorly compressible code followed by
232 1.1 christos * a highly compressible string table.) Smaller buffer sizes give
233 1.1 christos * fast adaptation but have of course the overhead of transmitting
234 1.1 christos * trees more frequently.
235 1.1 christos * - I can't count above 4
236 1.1 christos */
237 1.1 christos
238 1.1 christos uInt last_lit; /* running index in l_buf */
239 1.1 christos
240 1.1 christos ushf *d_buf;
241 1.1 christos /* Buffer for distances. To simplify the code, d_buf and l_buf have
242 1.1 christos * the same number of elements. To use different lengths, an extra flag
243 1.1 christos * array would be necessary.
244 1.1 christos */
245 1.1 christos
246 1.1 christos ulg opt_len; /* bit length of current block with optimal trees */
247 1.1 christos ulg static_len; /* bit length of current block with static trees */
248 1.1 christos uInt matches; /* number of string matches in current block */
249 1.1 christos int last_eob_len; /* bit length of EOB code for last block */
250 1.1 christos
251 1.2 christos #ifdef ZLIB_DEBUG
252 1.1 christos ulg compressed_len; /* total bit length of compressed file mod 2^32 */
253 1.1 christos ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
254 1.1 christos #endif
255 1.1 christos
256 1.1 christos ush bi_buf;
257 1.1 christos /* Output buffer. bits are inserted starting at the bottom (least
258 1.1 christos * significant bits).
259 1.1 christos */
260 1.1 christos int bi_valid;
261 1.1 christos /* Number of valid bits in bi_buf. All bits above the last valid bit
262 1.1 christos * are always zero.
263 1.1 christos */
264 1.1 christos
265 1.1 christos } FAR deflate_state;
266 1.1 christos
267 1.1 christos /* Output a byte on the stream.
268 1.1 christos * IN assertion: there is enough room in pending_buf.
269 1.1 christos */
270 1.1 christos #define put_byte(s, c) {s->pending_buf[s->pending++] = (c);}
271 1.1 christos
272 1.1 christos
273 1.1 christos #define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1)
274 1.1 christos /* Minimum amount of lookahead, except at the end of the input file.
275 1.1 christos * See deflate.c for comments about the MIN_MATCH+1.
276 1.1 christos */
277 1.1 christos
278 1.1 christos #define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD)
279 1.1 christos /* In order to simplify the code, particularly on 16 bit machines, match
280 1.1 christos * distances are limited to MAX_DIST instead of WSIZE.
281 1.1 christos */
282 1.1 christos
283 1.1 christos /* in trees.c */
284 1.1 christos void _tr_init OF((deflate_state *s));
285 1.1 christos int _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc));
286 1.1 christos void _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len,
287 1.1 christos int eof));
288 1.1 christos void _tr_align OF((deflate_state *s));
289 1.1 christos void _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len,
290 1.1 christos int eof));
291 1.1 christos
292 1.1 christos #define d_code(dist) \
293 1.1 christos ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)])
294 1.1 christos /* Mapping from a distance to a distance code. dist is the distance - 1 and
295 1.1 christos * must not have side effects. _dist_code[256] and _dist_code[257] are never
296 1.1 christos * used.
297 1.1 christos */
298 1.1 christos
299 1.2 christos #ifndef ZLIB_DEBUG
300 1.1 christos /* Inline versions of _tr_tally for speed: */
301 1.1 christos
302 1.1 christos #if defined(GEN_TREES_H) || !defined(STDC)
303 1.1 christos extern uch _length_code[];
304 1.1 christos extern uch _dist_code[];
305 1.1 christos #else
306 1.1 christos extern const uch _length_code[];
307 1.1 christos extern const uch _dist_code[];
308 1.1 christos #endif
309 1.1 christos
310 1.1 christos # define _tr_tally_lit(s, c, flush) \
311 1.1 christos { uch cc = (c); \
312 1.1 christos s->d_buf[s->last_lit] = 0; \
313 1.1 christos s->l_buf[s->last_lit++] = cc; \
314 1.1 christos s->dyn_ltree[cc].Freq++; \
315 1.1 christos flush = (s->last_lit == s->lit_bufsize-1); \
316 1.1 christos }
317 1.1 christos # define _tr_tally_dist(s, distance, length, flush) \
318 1.1 christos { uch len = (length); \
319 1.1 christos ush dist = (distance); \
320 1.1 christos s->d_buf[s->last_lit] = dist; \
321 1.1 christos s->l_buf[s->last_lit++] = len; \
322 1.1 christos dist--; \
323 1.1 christos s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \
324 1.1 christos s->dyn_dtree[d_code(dist)].Freq++; \
325 1.1 christos flush = (s->last_lit == s->lit_bufsize-1); \
326 1.1 christos }
327 1.1 christos #else
328 1.1 christos # define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c)
329 1.1 christos # define _tr_tally_dist(s, distance, length, flush) \
330 1.1 christos flush = _tr_tally(s, distance, length)
331 1.1 christos #endif
332 1.1 christos
333 1.1 christos #endif /* DEFLATE_H */
334