Home | History | Annotate | Line # | Download | only in masmx64
      1  1.1  christos /* inffas8664.c is a hand tuned assembler version of inffast.c - fast decoding
      2  1.1  christos  * version for AMD64 on Windows using Microsoft C compiler
      3  1.1  christos  *
      4  1.1  christos  * Copyright (C) 1995-2003 Mark Adler
      5  1.1  christos  * For conditions of distribution and use, see copyright notice in zlib.h
      6  1.1  christos  *
      7  1.1  christos  * Copyright (C) 2003 Chris Anderson <christop (at) charm.net>
      8  1.1  christos  * Please use the copyright conditions above.
      9  1.1  christos  *
     10  1.1  christos  * 2005 - Adaptation to Microsoft C Compiler for AMD64 by Gilles Vollant
     11  1.1  christos  *
     12  1.1  christos  * inffas8664.c call function inffas8664fnc in inffasx64.asm
     13  1.1  christos  *  inffasx64.asm is automatically convert from AMD64 portion of inffas86.c
     14  1.1  christos  *
     15  1.1  christos  * Dec-29-2003 -- I added AMD64 inflate asm support.  This version is also
     16  1.1  christos  * slightly quicker on x86 systems because, instead of using rep movsb to copy
     17  1.1  christos  * data, it uses rep movsw, which moves data in 2-byte chunks instead of single
     18  1.1  christos  * bytes.  I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates
     19  1.1  christos  * from http://fedora.linux.duke.edu/fc1_x86_64
     20  1.1  christos  * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with
     21  1.1  christos  * 1GB ram.  The 64-bit version is about 4% faster than the 32-bit version,
     22  1.1  christos  * when decompressing mozilla-source-1.3.tar.gz.
     23  1.1  christos  *
     24  1.1  christos  * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
     25  1.1  christos  * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
     26  1.1  christos  * the moment.  I have successfully compiled and tested this code with gcc2.96,
     27  1.1  christos  * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
     28  1.1  christos  * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
     29  1.1  christos  * enabled.  I will attempt to merge the MMX code into this version.  Newer
     30  1.1  christos  * versions of this and inffast.S can be found at
     31  1.1  christos  * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
     32  1.1  christos  *
     33  1.1  christos  */
     34  1.1  christos 
     35  1.1  christos #include <stdio.h>
     36  1.1  christos #include "zutil.h"
     37  1.1  christos #include "inftrees.h"
     38  1.1  christos #include "inflate.h"
     39  1.1  christos #include "inffast.h"
     40  1.1  christos 
     41  1.1  christos /* Mark Adler's comments from inffast.c: */
     42  1.1  christos 
     43  1.1  christos /*
     44  1.1  christos    Decode literal, length, and distance codes and write out the resulting
     45  1.1  christos    literal and match bytes until either not enough input or output is
     46  1.1  christos    available, an end-of-block is encountered, or a data error is encountered.
     47  1.1  christos    When large enough input and output buffers are supplied to inflate(), for
     48  1.1  christos    example, a 16K input buffer and a 64K output buffer, more than 95% of the
     49  1.1  christos    inflate execution time is spent in this routine.
     50  1.1  christos 
     51  1.1  christos    Entry assumptions:
     52  1.1  christos 
     53  1.1  christos         state->mode == LEN
     54  1.1  christos         strm->avail_in >= 6
     55  1.1  christos         strm->avail_out >= 258
     56  1.1  christos         start >= strm->avail_out
     57  1.1  christos         state->bits < 8
     58  1.1  christos 
     59  1.1  christos    On return, state->mode is one of:
     60  1.1  christos 
     61  1.1  christos         LEN -- ran out of enough output space or enough available input
     62  1.1  christos         TYPE -- reached end of block code, inflate() to interpret next block
     63  1.1  christos         BAD -- error in block data
     64  1.1  christos 
     65  1.1  christos    Notes:
     66  1.1  christos 
     67  1.1  christos     - The maximum input bits used by a length/distance pair is 15 bits for the
     68  1.1  christos       length code, 5 bits for the length extra, 15 bits for the distance code,
     69  1.1  christos       and 13 bits for the distance extra.  This totals 48 bits, or six bytes.
     70  1.1  christos       Therefore if strm->avail_in >= 6, then there is enough input to avoid
     71  1.1  christos       checking for available input while decoding.
     72  1.1  christos 
     73  1.1  christos     - The maximum bytes that a single length/distance pair can output is 258
     74  1.1  christos       bytes, which is the maximum length that can be coded.  inflate_fast()
     75  1.1  christos       requires strm->avail_out >= 258 for each loop to avoid checking for
     76  1.1  christos       output space.
     77  1.1  christos  */
     78  1.1  christos 
     79  1.1  christos 
     80  1.1  christos 
     81  1.1  christos     typedef struct inffast_ar {
     82  1.1  christos /* 64   32                               x86  x86_64 */
     83  1.1  christos /* ar offset                              register */
     84  1.1  christos /*  0    0 */ void *esp;                /* esp save */
     85  1.1  christos /*  8    4 */ void *ebp;                /* ebp save */
     86  1.1  christos /* 16    8 */ unsigned char FAR *in;    /* esi rsi  local strm->next_in */
     87  1.1  christos /* 24   12 */ unsigned char FAR *last;  /*     r9   while in < last */
     88  1.1  christos /* 32   16 */ unsigned char FAR *out;   /* edi rdi  local strm->next_out */
     89  1.1  christos /* 40   20 */ unsigned char FAR *beg;   /*          inflate()'s init next_out */
     90  1.1  christos /* 48   24 */ unsigned char FAR *end;   /*     r10  while out < end */
     91  1.1  christos /* 56   28 */ unsigned char FAR *window;/*          size of window, wsize!=0 */
     92  1.1  christos /* 64   32 */ code const FAR *lcode;    /* ebp rbp  local strm->lencode */
     93  1.1  christos /* 72   36 */ code const FAR *dcode;    /*     r11  local strm->distcode */
     94  1.1  christos /* 80   40 */ size_t /*unsigned long */hold;       /* edx rdx  local strm->hold */
     95  1.1  christos /* 88   44 */ unsigned bits;            /* ebx rbx  local strm->bits */
     96  1.1  christos /* 92   48 */ unsigned wsize;           /*          window size */
     97  1.1  christos /* 96   52 */ unsigned write;           /*          window write index */
     98  1.1  christos /*100   56 */ unsigned lmask;           /*     r12  mask for lcode */
     99  1.1  christos /*104   60 */ unsigned dmask;           /*     r13  mask for dcode */
    100  1.1  christos /*108   64 */ unsigned len;             /*     r14  match length */
    101  1.1  christos /*112   68 */ unsigned dist;            /*     r15  match distance */
    102  1.1  christos /*116   72 */ unsigned status;          /*          set when state chng*/
    103  1.1  christos     } type_ar;
    104  1.1  christos #ifdef ASMINF
    105  1.1  christos 
    106  1.1  christos void inflate_fast(strm, start)
    107  1.1  christos z_streamp strm;
    108  1.1  christos unsigned start;         /* inflate()'s starting value for strm->avail_out */
    109  1.1  christos {
    110  1.1  christos     struct inflate_state FAR *state;
    111  1.1  christos     type_ar ar;
    112  1.1  christos     void inffas8664fnc(struct inffast_ar * par);
    113  1.1  christos 
    114  1.1  christos 
    115  1.1  christos 
    116  1.1  christos #if (defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 )) || (defined(_MSC_VER) && defined(_M_AMD64))
    117  1.1  christos #define PAD_AVAIL_IN 6
    118  1.1  christos #define PAD_AVAIL_OUT 258
    119  1.1  christos #else
    120  1.1  christos #define PAD_AVAIL_IN 5
    121  1.1  christos #define PAD_AVAIL_OUT 257
    122  1.1  christos #endif
    123  1.1  christos 
    124  1.1  christos     /* copy state to local variables */
    125  1.1  christos     state = (struct inflate_state FAR *)strm->state;
    126  1.1  christos 
    127  1.1  christos     ar.in = strm->next_in;
    128  1.1  christos     ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN);
    129  1.1  christos     ar.out = strm->next_out;
    130  1.1  christos     ar.beg = ar.out - (start - strm->avail_out);
    131  1.1  christos     ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT);
    132  1.1  christos     ar.wsize = state->wsize;
    133  1.1  christos     ar.write = state->wnext;
    134  1.1  christos     ar.window = state->window;
    135  1.1  christos     ar.hold = state->hold;
    136  1.1  christos     ar.bits = state->bits;
    137  1.1  christos     ar.lcode = state->lencode;
    138  1.1  christos     ar.dcode = state->distcode;
    139  1.1  christos     ar.lmask = (1U << state->lenbits) - 1;
    140  1.1  christos     ar.dmask = (1U << state->distbits) - 1;
    141  1.1  christos 
    142  1.1  christos     /* decode literals and length/distances until end-of-block or not enough
    143  1.1  christos        input data or output space */
    144  1.1  christos 
    145  1.1  christos     /* align in on 1/2 hold size boundary */
    146  1.1  christos     while (((size_t)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) {
    147  1.1  christos         ar.hold += (unsigned long)*ar.in++ << ar.bits;
    148  1.1  christos         ar.bits += 8;
    149  1.1  christos     }
    150  1.1  christos 
    151  1.1  christos     inffas8664fnc(&ar);
    152  1.1  christos 
    153  1.1  christos     if (ar.status > 1) {
    154  1.1  christos         if (ar.status == 2)
    155  1.1  christos             strm->msg = "invalid literal/length code";
    156  1.1  christos         else if (ar.status == 3)
    157  1.1  christos             strm->msg = "invalid distance code";
    158  1.1  christos         else
    159  1.1  christos             strm->msg = "invalid distance too far back";
    160  1.1  christos         state->mode = BAD;
    161  1.1  christos     }
    162  1.1  christos     else if ( ar.status == 1 ) {
    163  1.1  christos         state->mode = TYPE;
    164  1.1  christos     }
    165  1.1  christos 
    166  1.1  christos     /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
    167  1.1  christos     ar.len = ar.bits >> 3;
    168  1.1  christos     ar.in -= ar.len;
    169  1.1  christos     ar.bits -= ar.len << 3;
    170  1.1  christos     ar.hold &= (1U << ar.bits) - 1;
    171  1.1  christos 
    172  1.1  christos     /* update state and return */
    173  1.1  christos     strm->next_in = ar.in;
    174  1.1  christos     strm->next_out = ar.out;
    175  1.1  christos     strm->avail_in = (unsigned)(ar.in < ar.last ?
    176  1.1  christos                                 PAD_AVAIL_IN + (ar.last - ar.in) :
    177  1.1  christos                                 PAD_AVAIL_IN - (ar.in - ar.last));
    178  1.1  christos     strm->avail_out = (unsigned)(ar.out < ar.end ?
    179  1.1  christos                                  PAD_AVAIL_OUT + (ar.end - ar.out) :
    180  1.1  christos                                  PAD_AVAIL_OUT - (ar.out - ar.end));
    181  1.1  christos     state->hold = (unsigned long)ar.hold;
    182  1.1  christos     state->bits = ar.bits;
    183  1.1  christos     return;
    184  1.1  christos }
    185  1.1  christos 
    186  1.1  christos #endif
    187