Home | History | Annotate | Line # | Download | only in zlib
inffast.c revision 1.3
      1 /*	$NetBSD: inffast.c,v 1.3 2006/01/27 00:45:27 christos Exp $	*/
      2 
      3 /* inffast.c -- fast decoding
      4  * Copyright (C) 1995-2004 Mark Adler
      5  * For conditions of distribution and use, see copyright notice in zlib.h
      6  */
      7 
      8 #include "zutil.h"
      9 #include "inftrees.h"
     10 #include "inflate.h"
     11 #include "inffast.h"
     12 
     13 #ifndef ASMINF
     14 
     15 /* Allow machine dependent optimization for post-increment or pre-increment.
     16    Based on testing to date,
     17    Pre-increment preferred for:
     18    - PowerPC G3 (Adler)
     19    - MIPS R5000 (Randers-Pehrson)
     20    Post-increment preferred for:
     21    - none
     22    No measurable difference:
     23    - Pentium III (Anderson)
     24    - M68060 (Nikl)
     25  */
     26 #ifdef POSTINC
     27 #  define OFF 0
     28 #  define PUP(a) *(a)++
     29 #else
     30 #  define OFF 1
     31 #  define PUP(a) *++(a)
     32 #endif
     33 
     34 /*
     35    Decode literal, length, and distance codes and write out the resulting
     36    literal and match bytes until either not enough input or output is
     37    available, an end-of-block is encountered, or a data error is encountered.
     38    When large enough input and output buffers are supplied to inflate(), for
     39    example, a 16K input buffer and a 64K output buffer, more than 95% of the
     40    inflate execution time is spent in this routine.
     41 
     42    Entry assumptions:
     43 
     44         state->mode == LEN
     45         strm->avail_in >= 6
     46         strm->avail_out >= 258
     47         start >= strm->avail_out
     48         state->bits < 8
     49 
     50    On return, state->mode is one of:
     51 
     52         LEN -- ran out of enough output space or enough available input
     53         TYPE -- reached end of block code, inflate() to interpret next block
     54         BAD -- error in block data
     55 
     56    Notes:
     57 
     58     - The maximum input bits used by a length/distance pair is 15 bits for the
     59       length code, 5 bits for the length extra, 15 bits for the distance code,
     60       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
     61       Therefore if strm->avail_in >= 6, then there is enough input to avoid
     62       checking for available input while decoding.
     63 
     64     - The maximum bytes that a single length/distance pair can output is 258
     65       bytes, which is the maximum length that can be coded.  inflate_fast()
     66       requires strm->avail_out >= 258 for each loop to avoid checking for
     67       output space.
     68  */
     69 void inflate_fast(strm, start)
     70 z_streamp strm;
     71 unsigned start;         /* inflate()'s starting value for strm->avail_out */
     72 {
     73     struct inflate_state FAR *state;
     74     unsigned char FAR *in;      /* local strm->next_in */
     75     unsigned char FAR *last;    /* while in < last, enough input available */
     76     unsigned char FAR *out;     /* local strm->next_out */
     77     unsigned char FAR *beg;     /* inflate()'s initial strm->next_out */
     78     unsigned char FAR *end;     /* while out < end, enough space available */
     79 #ifdef INFLATE_STRICT
     80     unsigned dmax;              /* maximum distance from zlib header */
     81 #endif
     82     unsigned wsize;             /* window size or zero if not using window */
     83     unsigned whave;             /* valid bytes in the window */
     84     unsigned wwrite;            /* window write index */
     85     unsigned char FAR *window;  /* allocated sliding window, if wsize != 0 */
     86     unsigned long hold;         /* local strm->hold */
     87     unsigned bits;              /* local strm->bits */
     88     code const FAR *lcode;      /* local strm->lencode */
     89     code const FAR *dcode;      /* local strm->distcode */
     90     unsigned lmask;             /* mask for first level of length codes */
     91     unsigned dmask;             /* mask for first level of distance codes */
     92     code this;                  /* retrieved table entry */
     93     unsigned op;                /* code bits, operation, extra bits, or */
     94                                 /*  window position, window bytes to copy */
     95     unsigned len;               /* match length, unused bytes */
     96     unsigned dist;              /* match distance */
     97     unsigned char FAR *from;    /* where to copy match from */
     98 
     99     /* copy state to local variables */
    100     state = (struct inflate_state FAR *)strm->state;
    101     in = strm->next_in - OFF;
    102     last = in + (strm->avail_in - 5);
    103     out = strm->next_out - OFF;
    104     beg = out - (start - strm->avail_out);
    105     end = out + (strm->avail_out - 257);
    106 #ifdef INFLATE_STRICT
    107     dmax = state->dmax;
    108 #endif
    109     wsize = state->wsize;
    110     whave = state->whave;
    111     wwrite = state->write;
    112     window = state->window;
    113     hold = state->hold;
    114     bits = state->bits;
    115     lcode = state->lencode;
    116     dcode = state->distcode;
    117     lmask = (1U << state->lenbits) - 1;
    118     dmask = (1U << state->distbits) - 1;
    119 
    120     /* decode literals and length/distances until end-of-block or not enough
    121        input data or output space */
    122     do {
    123         if (bits < 15) {
    124             hold += (unsigned long)(PUP(in)) << bits;
    125             bits += 8;
    126             hold += (unsigned long)(PUP(in)) << bits;
    127             bits += 8;
    128         }
    129         this = lcode[hold & lmask];
    130       dolen:
    131         op = (unsigned)(this.bits);
    132         hold >>= op;
    133         bits -= op;
    134         op = (unsigned)(this.op);
    135         if (op == 0) {                          /* literal */
    136             Tracevv((stderr, this.val >= 0x20 && this.val < 0x7f ?
    137                     "inflate:         literal '%c'\n" :
    138                     "inflate:         literal 0x%02x\n", this.val));
    139             PUP(out) = (unsigned char)(this.val);
    140         }
    141         else if (op & 16) {                     /* length base */
    142             len = (unsigned)(this.val);
    143             op &= 15;                           /* number of extra bits */
    144             if (op) {
    145                 if (bits < op) {
    146                     hold += (unsigned long)(PUP(in)) << bits;
    147                     bits += 8;
    148                 }
    149                 len += (unsigned)hold & ((1U << op) - 1);
    150                 hold >>= op;
    151                 bits -= op;
    152             }
    153             Tracevv((stderr, "inflate:         length %u\n", len));
    154             if (bits < 15) {
    155                 hold += (unsigned long)(PUP(in)) << bits;
    156                 bits += 8;
    157                 hold += (unsigned long)(PUP(in)) << bits;
    158                 bits += 8;
    159             }
    160             this = dcode[hold & dmask];
    161           dodist:
    162             op = (unsigned)(this.bits);
    163             hold >>= op;
    164             bits -= op;
    165             op = (unsigned)(this.op);
    166             if (op & 16) {                      /* distance base */
    167                 dist = (unsigned)(this.val);
    168                 op &= 15;                       /* number of extra bits */
    169                 if (bits < op) {
    170                     hold += (unsigned long)(PUP(in)) << bits;
    171                     bits += 8;
    172                     if (bits < op) {
    173                         hold += (unsigned long)(PUP(in)) << bits;
    174                         bits += 8;
    175                     }
    176                 }
    177                 dist += (unsigned)hold & ((1U << op) - 1);
    178 #ifdef INFLATE_STRICT
    179                 if (dist > dmax) {
    180                     strm->msg = (char *)"invalid distance too far back";
    181                     state->mode = BAD;
    182                     break;
    183                 }
    184 #endif
    185                 hold >>= op;
    186                 bits -= op;
    187                 Tracevv((stderr, "inflate:         distance %u\n", dist));
    188                 op = (unsigned)(out - beg);     /* max distance in output */
    189                 if (dist > op) {                /* see if copy from window */
    190                     op = dist - op;             /* distance back in window */
    191                     if (op > whave) {
    192                         strm->msg = __UNCONST("invalid distance too far back");
    193                         state->mode = BAD;
    194                         break;
    195                     }
    196                     from = window - OFF;
    197                     if (wwrite == 0) {           /* very common case */
    198                         from += wsize - op;
    199                         if (op < len) {         /* some from window */
    200                             len -= op;
    201                             do {
    202                                 PUP(out) = PUP(from);
    203                             } while (--op);
    204                             from = out - dist;  /* rest from output */
    205                         }
    206                     }
    207                     else if (wwrite < op) {      /* wrap around window */
    208                         from += wsize + wwrite - op;
    209                         op -= wwrite;
    210                         if (op < len) {         /* some from end of window */
    211                             len -= op;
    212                             do {
    213                                 PUP(out) = PUP(from);
    214                             } while (--op);
    215                             from = window - OFF;
    216                             if (wwrite < len) {  /* some from start of window */
    217                                 op = wwrite;
    218                                 len -= op;
    219                                 do {
    220                                     PUP(out) = PUP(from);
    221                                 } while (--op);
    222                                 from = out - dist;      /* rest from output */
    223                             }
    224                         }
    225                     }
    226                     else {                      /* contiguous in window */
    227                         from += wwrite - op;
    228                         if (op < len) {         /* some from window */
    229                             len -= op;
    230                             do {
    231                                 PUP(out) = PUP(from);
    232                             } while (--op);
    233                             from = out - dist;  /* rest from output */
    234                         }
    235                     }
    236                     while (len > 2) {
    237                         PUP(out) = PUP(from);
    238                         PUP(out) = PUP(from);
    239                         PUP(out) = PUP(from);
    240                         len -= 3;
    241                     }
    242                     if (len) {
    243                         PUP(out) = PUP(from);
    244                         if (len > 1)
    245                             PUP(out) = PUP(from);
    246                     }
    247                 }
    248                 else {
    249                     from = out - dist;          /* copy direct from output */
    250                     do {                        /* minimum length is three */
    251                         PUP(out) = PUP(from);
    252                         PUP(out) = PUP(from);
    253                         PUP(out) = PUP(from);
    254                         len -= 3;
    255                     } while (len > 2);
    256                     if (len) {
    257                         PUP(out) = PUP(from);
    258                         if (len > 1)
    259                             PUP(out) = PUP(from);
    260                     }
    261                 }
    262             }
    263             else if ((op & 64) == 0) {          /* 2nd level distance code */
    264                 this = dcode[this.val + (hold & ((1U << op) - 1))];
    265                 goto dodist;
    266             }
    267             else {
    268                 strm->msg = __UNCONST("invalid distance code");
    269                 state->mode = BAD;
    270                 break;
    271             }
    272         }
    273         else if ((op & 64) == 0) {              /* 2nd level length code */
    274             this = lcode[this.val + (hold & ((1U << op) - 1))];
    275             goto dolen;
    276         }
    277         else if (op & 32) {                     /* end-of-block */
    278             Tracevv((stderr, "inflate:         end of block\n"));
    279             state->mode = TYPE;
    280             break;
    281         }
    282         else {
    283             strm->msg = __UNCONST("invalid literal/length code");
    284             state->mode = BAD;
    285             break;
    286         }
    287     } while (in < last && out < end);
    288 
    289     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
    290     len = bits >> 3;
    291     in -= len;
    292     bits -= len << 3;
    293     hold &= (1U << bits) - 1;
    294 
    295     /* update state and return */
    296     strm->next_in = in + OFF;
    297     strm->next_out = out + OFF;
    298     strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
    299     strm->avail_out = (unsigned)(out < end ?
    300                                  257 + (end - out) : 257 - (out - end));
    301     state->hold = hold;
    302     state->bits = bits;
    303     return;
    304 }
    305 
    306 /*
    307    inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
    308    - Using bit fields for code structure
    309    - Different op definition to avoid & for extra bits (do & for table bits)
    310    - Three separate decoding do-loops for direct, window, and write == 0
    311    - Special case for distance > 1 copies to do overlapped load and store copy
    312    - Explicit branch predictions (based on measured branch probabilities)
    313    - Deferring match copy and interspersed it with decoding subsequent codes
    314    - Swapping literal/length else
    315    - Swapping window/direct else
    316    - Larger unrolled copy loops (three is about right)
    317    - Moving len -= 3 statement into middle of loop
    318  */
    319 
    320 #endif /* !ASMINF */
    321