Home | History | Annotate | Line # | Download | only in masmx86
match686.asm revision 1.1.1.1.2.2
      1  1.1.1.1.2.2  pgoyette ; match686.asm -- Asm portion of the optimized longest_match for 32 bits x86
      2  1.1.1.1.2.2  pgoyette ; Copyright (C) 1995-1996 Jean-loup Gailly, Brian Raiter and Gilles Vollant.
      3  1.1.1.1.2.2  pgoyette ; File written by Gilles Vollant, by converting match686.S from Brian Raiter
      4  1.1.1.1.2.2  pgoyette ; for MASM. This is as assembly version of longest_match
      5  1.1.1.1.2.2  pgoyette ;  from Jean-loup Gailly in deflate.c
      6  1.1.1.1.2.2  pgoyette ;
      7  1.1.1.1.2.2  pgoyette ;         http://www.zlib.net
      8  1.1.1.1.2.2  pgoyette ;         http://www.winimage.com/zLibDll
      9  1.1.1.1.2.2  pgoyette ;         http://www.muppetlabs.com/~breadbox/software/assembly.html
     10  1.1.1.1.2.2  pgoyette ;
     11  1.1.1.1.2.2  pgoyette ; For Visual C++ 4.x and higher and ML 6.x and higher
     12  1.1.1.1.2.2  pgoyette ;   ml.exe is distributed in
     13  1.1.1.1.2.2  pgoyette ;  http://www.microsoft.com/downloads/details.aspx?FamilyID=7a1c9da0-0510-44a2-b042-7ef370530c64
     14  1.1.1.1.2.2  pgoyette ;
     15  1.1.1.1.2.2  pgoyette ; this file contain two implementation of longest_match
     16  1.1.1.1.2.2  pgoyette ;
     17  1.1.1.1.2.2  pgoyette ;  this longest_match was written by Brian raiter (1998), optimized for Pentium Pro
     18  1.1.1.1.2.2  pgoyette ;   (and the faster known version of match_init on modern Core 2 Duo and AMD Phenom)
     19  1.1.1.1.2.2  pgoyette ;
     20  1.1.1.1.2.2  pgoyette ;  for using an assembly version of longest_match, you need define ASMV in project
     21  1.1.1.1.2.2  pgoyette ;
     22  1.1.1.1.2.2  pgoyette ;    compile the asm file running
     23  1.1.1.1.2.2  pgoyette ;           ml /coff /Zi /c /Flmatch686.lst match686.asm
     24  1.1.1.1.2.2  pgoyette ;    and do not include match686.obj in your project
     25  1.1.1.1.2.2  pgoyette ;
     26  1.1.1.1.2.2  pgoyette ; note: contrib of zLib 1.2.3 and earlier contained both a deprecated version for
     27  1.1.1.1.2.2  pgoyette ;  Pentium (prior Pentium Pro) and this version for Pentium Pro and modern processor
     28  1.1.1.1.2.2  pgoyette ;  with autoselect (with cpu detection code)
     29  1.1.1.1.2.2  pgoyette ;  if you want support the old pentium optimization, you can still use these version
     30  1.1.1.1.2.2  pgoyette ;
     31  1.1.1.1.2.2  pgoyette ; this file is not optimized for old pentium, but it compatible with all x86 32 bits
     32  1.1.1.1.2.2  pgoyette ; processor (starting 80386)
     33  1.1.1.1.2.2  pgoyette ;
     34  1.1.1.1.2.2  pgoyette ;
     35  1.1.1.1.2.2  pgoyette ; see below : zlib1222add must be adjuster if you use a zlib version < 1.2.2.2
     36  1.1.1.1.2.2  pgoyette 
     37  1.1.1.1.2.2  pgoyette ;uInt longest_match(s, cur_match)
     38  1.1.1.1.2.2  pgoyette ;    deflate_state *s;
     39  1.1.1.1.2.2  pgoyette ;    IPos cur_match;                             /* current match */
     40  1.1.1.1.2.2  pgoyette 
     41  1.1.1.1.2.2  pgoyette     NbStack         equ     76
     42  1.1.1.1.2.2  pgoyette     cur_match       equ     dword ptr[esp+NbStack-0]
     43  1.1.1.1.2.2  pgoyette     str_s           equ     dword ptr[esp+NbStack-4]
     44  1.1.1.1.2.2  pgoyette ; 5 dword on top (ret,ebp,esi,edi,ebx)
     45  1.1.1.1.2.2  pgoyette     adrret          equ     dword ptr[esp+NbStack-8]
     46  1.1.1.1.2.2  pgoyette     pushebp         equ     dword ptr[esp+NbStack-12]
     47  1.1.1.1.2.2  pgoyette     pushedi         equ     dword ptr[esp+NbStack-16]
     48  1.1.1.1.2.2  pgoyette     pushesi         equ     dword ptr[esp+NbStack-20]
     49  1.1.1.1.2.2  pgoyette     pushebx         equ     dword ptr[esp+NbStack-24]
     50  1.1.1.1.2.2  pgoyette 
     51  1.1.1.1.2.2  pgoyette     chain_length    equ     dword ptr [esp+NbStack-28]
     52  1.1.1.1.2.2  pgoyette     limit           equ     dword ptr [esp+NbStack-32]
     53  1.1.1.1.2.2  pgoyette     best_len        equ     dword ptr [esp+NbStack-36]
     54  1.1.1.1.2.2  pgoyette     window          equ     dword ptr [esp+NbStack-40]
     55  1.1.1.1.2.2  pgoyette     prev            equ     dword ptr [esp+NbStack-44]
     56  1.1.1.1.2.2  pgoyette     scan_start      equ      word ptr [esp+NbStack-48]
     57  1.1.1.1.2.2  pgoyette     wmask           equ     dword ptr [esp+NbStack-52]
     58  1.1.1.1.2.2  pgoyette     match_start_ptr equ     dword ptr [esp+NbStack-56]
     59  1.1.1.1.2.2  pgoyette     nice_match      equ     dword ptr [esp+NbStack-60]
     60  1.1.1.1.2.2  pgoyette     scan            equ     dword ptr [esp+NbStack-64]
     61  1.1.1.1.2.2  pgoyette 
     62  1.1.1.1.2.2  pgoyette     windowlen       equ     dword ptr [esp+NbStack-68]
     63  1.1.1.1.2.2  pgoyette     match_start     equ     dword ptr [esp+NbStack-72]
     64  1.1.1.1.2.2  pgoyette     strend          equ     dword ptr [esp+NbStack-76]
     65  1.1.1.1.2.2  pgoyette     NbStackAdd      equ     (NbStack-24)
     66  1.1.1.1.2.2  pgoyette 
     67  1.1.1.1.2.2  pgoyette     .386p
     68  1.1.1.1.2.2  pgoyette 
     69  1.1.1.1.2.2  pgoyette     name    gvmatch
     70  1.1.1.1.2.2  pgoyette     .MODEL  FLAT
     71  1.1.1.1.2.2  pgoyette 
     72  1.1.1.1.2.2  pgoyette 
     73  1.1.1.1.2.2  pgoyette 
     74  1.1.1.1.2.2  pgoyette ;  all the +zlib1222add offsets are due to the addition of fields
     75  1.1.1.1.2.2  pgoyette ;  in zlib in the deflate_state structure since the asm code was first written
     76  1.1.1.1.2.2  pgoyette ;  (if you compile with zlib 1.0.4 or older, use "zlib1222add equ (-4)").
     77  1.1.1.1.2.2  pgoyette ;  (if you compile with zlib between 1.0.5 and 1.2.2.1, use "zlib1222add equ 0").
     78  1.1.1.1.2.2  pgoyette ;  if you compile with zlib 1.2.2.2 or later , use "zlib1222add equ 8").
     79  1.1.1.1.2.2  pgoyette 
     80  1.1.1.1.2.2  pgoyette     zlib1222add         equ     8
     81  1.1.1.1.2.2  pgoyette 
     82  1.1.1.1.2.2  pgoyette ;  Note : these value are good with a 8 bytes boundary pack structure
     83  1.1.1.1.2.2  pgoyette     dep_chain_length    equ     74h+zlib1222add
     84  1.1.1.1.2.2  pgoyette     dep_window          equ     30h+zlib1222add
     85  1.1.1.1.2.2  pgoyette     dep_strstart        equ     64h+zlib1222add
     86  1.1.1.1.2.2  pgoyette     dep_prev_length     equ     70h+zlib1222add
     87  1.1.1.1.2.2  pgoyette     dep_nice_match      equ     88h+zlib1222add
     88  1.1.1.1.2.2  pgoyette     dep_w_size          equ     24h+zlib1222add
     89  1.1.1.1.2.2  pgoyette     dep_prev            equ     38h+zlib1222add
     90  1.1.1.1.2.2  pgoyette     dep_w_mask          equ     2ch+zlib1222add
     91  1.1.1.1.2.2  pgoyette     dep_good_match      equ     84h+zlib1222add
     92  1.1.1.1.2.2  pgoyette     dep_match_start     equ     68h+zlib1222add
     93  1.1.1.1.2.2  pgoyette     dep_lookahead       equ     6ch+zlib1222add
     94  1.1.1.1.2.2  pgoyette 
     95  1.1.1.1.2.2  pgoyette 
     96  1.1.1.1.2.2  pgoyette _TEXT                   segment
     97  1.1.1.1.2.2  pgoyette 
     98  1.1.1.1.2.2  pgoyette IFDEF NOUNDERLINE
     99  1.1.1.1.2.2  pgoyette             public  longest_match
    100  1.1.1.1.2.2  pgoyette             public  match_init
    101  1.1.1.1.2.2  pgoyette ELSE
    102  1.1.1.1.2.2  pgoyette             public  _longest_match
    103  1.1.1.1.2.2  pgoyette             public  _match_init
    104  1.1.1.1.2.2  pgoyette ENDIF
    105  1.1.1.1.2.2  pgoyette 
    106  1.1.1.1.2.2  pgoyette     MAX_MATCH           equ     258
    107  1.1.1.1.2.2  pgoyette     MIN_MATCH           equ     3
    108  1.1.1.1.2.2  pgoyette     MIN_LOOKAHEAD       equ     (MAX_MATCH+MIN_MATCH+1)
    109  1.1.1.1.2.2  pgoyette 
    110  1.1.1.1.2.2  pgoyette 
    111  1.1.1.1.2.2  pgoyette 
    112  1.1.1.1.2.2  pgoyette MAX_MATCH       equ     258
    113  1.1.1.1.2.2  pgoyette MIN_MATCH       equ     3
    114  1.1.1.1.2.2  pgoyette MIN_LOOKAHEAD   equ     (MAX_MATCH + MIN_MATCH + 1)
    115  1.1.1.1.2.2  pgoyette MAX_MATCH_8_     equ     ((MAX_MATCH + 7) AND 0FFF0h)
    116  1.1.1.1.2.2  pgoyette 
    117  1.1.1.1.2.2  pgoyette 
    118  1.1.1.1.2.2  pgoyette ;;; stack frame offsets
    119  1.1.1.1.2.2  pgoyette 
    120  1.1.1.1.2.2  pgoyette chainlenwmask   equ  esp + 0    ; high word: current chain len
    121  1.1.1.1.2.2  pgoyette                     ; low word: s->wmask
    122  1.1.1.1.2.2  pgoyette window      equ  esp + 4    ; local copy of s->window
    123  1.1.1.1.2.2  pgoyette windowbestlen   equ  esp + 8    ; s->window + bestlen
    124  1.1.1.1.2.2  pgoyette scanstart   equ  esp + 16   ; first two bytes of string
    125  1.1.1.1.2.2  pgoyette scanend     equ  esp + 12   ; last two bytes of string
    126  1.1.1.1.2.2  pgoyette scanalign   equ  esp + 20   ; dword-misalignment of string
    127  1.1.1.1.2.2  pgoyette nicematch   equ  esp + 24   ; a good enough match size
    128  1.1.1.1.2.2  pgoyette bestlen     equ  esp + 28   ; size of best match so far
    129  1.1.1.1.2.2  pgoyette scan        equ  esp + 32   ; ptr to string wanting match
    130  1.1.1.1.2.2  pgoyette 
    131  1.1.1.1.2.2  pgoyette LocalVarsSize   equ 36
    132  1.1.1.1.2.2  pgoyette ;   saved ebx   byte esp + 36
    133  1.1.1.1.2.2  pgoyette ;   saved edi   byte esp + 40
    134  1.1.1.1.2.2  pgoyette ;   saved esi   byte esp + 44
    135  1.1.1.1.2.2  pgoyette ;   saved ebp   byte esp + 48
    136  1.1.1.1.2.2  pgoyette ;   return address  byte esp + 52
    137  1.1.1.1.2.2  pgoyette deflatestate    equ  esp + 56   ; the function arguments
    138  1.1.1.1.2.2  pgoyette curmatch    equ  esp + 60
    139  1.1.1.1.2.2  pgoyette 
    140  1.1.1.1.2.2  pgoyette ;;; Offsets for fields in the deflate_state structure. These numbers
    141  1.1.1.1.2.2  pgoyette ;;; are calculated from the definition of deflate_state, with the
    142  1.1.1.1.2.2  pgoyette ;;; assumption that the compiler will dword-align the fields. (Thus,
    143  1.1.1.1.2.2  pgoyette ;;; changing the definition of deflate_state could easily cause this
    144  1.1.1.1.2.2  pgoyette ;;; program to crash horribly, without so much as a warning at
    145  1.1.1.1.2.2  pgoyette ;;; compile time. Sigh.)
    146  1.1.1.1.2.2  pgoyette 
    147  1.1.1.1.2.2  pgoyette dsWSize     equ 36+zlib1222add
    148  1.1.1.1.2.2  pgoyette dsWMask     equ 44+zlib1222add
    149  1.1.1.1.2.2  pgoyette dsWindow    equ 48+zlib1222add
    150  1.1.1.1.2.2  pgoyette dsPrev      equ 56+zlib1222add
    151  1.1.1.1.2.2  pgoyette dsMatchLen  equ 88+zlib1222add
    152  1.1.1.1.2.2  pgoyette dsPrevMatch equ 92+zlib1222add
    153  1.1.1.1.2.2  pgoyette dsStrStart  equ 100+zlib1222add
    154  1.1.1.1.2.2  pgoyette dsMatchStart    equ 104+zlib1222add
    155  1.1.1.1.2.2  pgoyette dsLookahead equ 108+zlib1222add
    156  1.1.1.1.2.2  pgoyette dsPrevLen   equ 112+zlib1222add
    157  1.1.1.1.2.2  pgoyette dsMaxChainLen   equ 116+zlib1222add
    158  1.1.1.1.2.2  pgoyette dsGoodMatch equ 132+zlib1222add
    159  1.1.1.1.2.2  pgoyette dsNiceMatch equ 136+zlib1222add
    160  1.1.1.1.2.2  pgoyette 
    161  1.1.1.1.2.2  pgoyette 
    162  1.1.1.1.2.2  pgoyette ;;; match686.asm -- Pentium-Pro-optimized version of longest_match()
    163  1.1.1.1.2.2  pgoyette ;;; Written for zlib 1.1.2
    164  1.1.1.1.2.2  pgoyette ;;; Copyright (C) 1998 Brian Raiter <breadbox (a] muppetlabs.com>
    165  1.1.1.1.2.2  pgoyette ;;; You can look at http://www.muppetlabs.com/~breadbox/software/assembly.html
    166  1.1.1.1.2.2  pgoyette ;;;
    167  1.1.1.1.2.2  pgoyette ;;
    168  1.1.1.1.2.2  pgoyette ;;  This software is provided 'as-is', without any express or implied
    169  1.1.1.1.2.2  pgoyette ;;  warranty.  In no event will the authors be held liable for any damages
    170  1.1.1.1.2.2  pgoyette ;;  arising from the use of this software.
    171  1.1.1.1.2.2  pgoyette ;;
    172  1.1.1.1.2.2  pgoyette ;;  Permission is granted to anyone to use this software for any purpose,
    173  1.1.1.1.2.2  pgoyette ;;  including commercial applications, and to alter it and redistribute it
    174  1.1.1.1.2.2  pgoyette ;;  freely, subject to the following restrictions:
    175  1.1.1.1.2.2  pgoyette ;;
    176  1.1.1.1.2.2  pgoyette ;;  1. The origin of this software must not be misrepresented; you must not
    177  1.1.1.1.2.2  pgoyette ;;     claim that you wrote the original software. If you use this software
    178  1.1.1.1.2.2  pgoyette ;;     in a product, an acknowledgment in the product documentation would be
    179  1.1.1.1.2.2  pgoyette ;;     appreciated but is not required.
    180  1.1.1.1.2.2  pgoyette ;;  2. Altered source versions must be plainly marked as such, and must not be
    181  1.1.1.1.2.2  pgoyette ;;     misrepresented as being the original software
    182  1.1.1.1.2.2  pgoyette ;;  3. This notice may not be removed or altered from any source distribution.
    183  1.1.1.1.2.2  pgoyette ;;
    184  1.1.1.1.2.2  pgoyette 
    185  1.1.1.1.2.2  pgoyette ;GLOBAL _longest_match, _match_init
    186  1.1.1.1.2.2  pgoyette 
    187  1.1.1.1.2.2  pgoyette 
    188  1.1.1.1.2.2  pgoyette ;SECTION    .text
    189  1.1.1.1.2.2  pgoyette 
    190  1.1.1.1.2.2  pgoyette ;;; uInt longest_match(deflate_state *deflatestate, IPos curmatch)
    191  1.1.1.1.2.2  pgoyette 
    192  1.1.1.1.2.2  pgoyette ;_longest_match:
    193  1.1.1.1.2.2  pgoyette     IFDEF NOUNDERLINE
    194  1.1.1.1.2.2  pgoyette     longest_match       proc near
    195  1.1.1.1.2.2  pgoyette     ELSE
    196  1.1.1.1.2.2  pgoyette     _longest_match      proc near
    197  1.1.1.1.2.2  pgoyette     ENDIF
    198  1.1.1.1.2.2  pgoyette .FPO (9, 4, 0, 0, 1, 0)
    199  1.1.1.1.2.2  pgoyette 
    200  1.1.1.1.2.2  pgoyette ;;; Save registers that the compiler may be using, and adjust esp to
    201  1.1.1.1.2.2  pgoyette ;;; make room for our stack frame.
    202  1.1.1.1.2.2  pgoyette 
    203  1.1.1.1.2.2  pgoyette         push    ebp
    204  1.1.1.1.2.2  pgoyette         push    edi
    205  1.1.1.1.2.2  pgoyette         push    esi
    206  1.1.1.1.2.2  pgoyette         push    ebx
    207  1.1.1.1.2.2  pgoyette         sub esp, LocalVarsSize
    208  1.1.1.1.2.2  pgoyette 
    209  1.1.1.1.2.2  pgoyette ;;; Retrieve the function arguments. ecx will hold cur_match
    210  1.1.1.1.2.2  pgoyette ;;; throughout the entire function. edx will hold the pointer to the
    211  1.1.1.1.2.2  pgoyette ;;; deflate_state structure during the function's setup (before
    212  1.1.1.1.2.2  pgoyette ;;; entering the main loop.
    213  1.1.1.1.2.2  pgoyette 
    214  1.1.1.1.2.2  pgoyette         mov edx, [deflatestate]
    215  1.1.1.1.2.2  pgoyette         mov ecx, [curmatch]
    216  1.1.1.1.2.2  pgoyette 
    217  1.1.1.1.2.2  pgoyette ;;; uInt wmask = s->w_mask;
    218  1.1.1.1.2.2  pgoyette ;;; unsigned chain_length = s->max_chain_length;
    219  1.1.1.1.2.2  pgoyette ;;; if (s->prev_length >= s->good_match) {
    220  1.1.1.1.2.2  pgoyette ;;;     chain_length >>= 2;
    221  1.1.1.1.2.2  pgoyette ;;; }
    222  1.1.1.1.2.2  pgoyette 
    223  1.1.1.1.2.2  pgoyette         mov eax, [edx + dsPrevLen]
    224  1.1.1.1.2.2  pgoyette         mov ebx, [edx + dsGoodMatch]
    225  1.1.1.1.2.2  pgoyette         cmp eax, ebx
    226  1.1.1.1.2.2  pgoyette         mov eax, [edx + dsWMask]
    227  1.1.1.1.2.2  pgoyette         mov ebx, [edx + dsMaxChainLen]
    228  1.1.1.1.2.2  pgoyette         jl  LastMatchGood
    229  1.1.1.1.2.2  pgoyette         shr ebx, 2
    230  1.1.1.1.2.2  pgoyette LastMatchGood:
    231  1.1.1.1.2.2  pgoyette 
    232  1.1.1.1.2.2  pgoyette ;;; chainlen is decremented once beforehand so that the function can
    233  1.1.1.1.2.2  pgoyette ;;; use the sign flag instead of the zero flag for the exit test.
    234  1.1.1.1.2.2  pgoyette ;;; It is then shifted into the high word, to make room for the wmask
    235  1.1.1.1.2.2  pgoyette ;;; value, which it will always accompany.
    236  1.1.1.1.2.2  pgoyette 
    237  1.1.1.1.2.2  pgoyette         dec ebx
    238  1.1.1.1.2.2  pgoyette         shl ebx, 16
    239  1.1.1.1.2.2  pgoyette         or  ebx, eax
    240  1.1.1.1.2.2  pgoyette         mov [chainlenwmask], ebx
    241  1.1.1.1.2.2  pgoyette 
    242  1.1.1.1.2.2  pgoyette ;;; if ((uInt)nice_match > s->lookahead) nice_match = s->lookahead;
    243  1.1.1.1.2.2  pgoyette 
    244  1.1.1.1.2.2  pgoyette         mov eax, [edx + dsNiceMatch]
    245  1.1.1.1.2.2  pgoyette         mov ebx, [edx + dsLookahead]
    246  1.1.1.1.2.2  pgoyette         cmp ebx, eax
    247  1.1.1.1.2.2  pgoyette         jl  LookaheadLess
    248  1.1.1.1.2.2  pgoyette         mov ebx, eax
    249  1.1.1.1.2.2  pgoyette LookaheadLess:  mov [nicematch], ebx
    250  1.1.1.1.2.2  pgoyette 
    251  1.1.1.1.2.2  pgoyette ;;; register Bytef *scan = s->window + s->strstart;
    252  1.1.1.1.2.2  pgoyette 
    253  1.1.1.1.2.2  pgoyette         mov esi, [edx + dsWindow]
    254  1.1.1.1.2.2  pgoyette         mov [window], esi
    255  1.1.1.1.2.2  pgoyette         mov ebp, [edx + dsStrStart]
    256  1.1.1.1.2.2  pgoyette         lea edi, [esi + ebp]
    257  1.1.1.1.2.2  pgoyette         mov [scan], edi
    258  1.1.1.1.2.2  pgoyette 
    259  1.1.1.1.2.2  pgoyette ;;; Determine how many bytes the scan ptr is off from being
    260  1.1.1.1.2.2  pgoyette ;;; dword-aligned.
    261  1.1.1.1.2.2  pgoyette 
    262  1.1.1.1.2.2  pgoyette         mov eax, edi
    263  1.1.1.1.2.2  pgoyette         neg eax
    264  1.1.1.1.2.2  pgoyette         and eax, 3
    265  1.1.1.1.2.2  pgoyette         mov [scanalign], eax
    266  1.1.1.1.2.2  pgoyette 
    267  1.1.1.1.2.2  pgoyette ;;; IPos limit = s->strstart > (IPos)MAX_DIST(s) ?
    268  1.1.1.1.2.2  pgoyette ;;;     s->strstart - (IPos)MAX_DIST(s) : NIL;
    269  1.1.1.1.2.2  pgoyette 
    270  1.1.1.1.2.2  pgoyette         mov eax, [edx + dsWSize]
    271  1.1.1.1.2.2  pgoyette         sub eax, MIN_LOOKAHEAD
    272  1.1.1.1.2.2  pgoyette         sub ebp, eax
    273  1.1.1.1.2.2  pgoyette         jg  LimitPositive
    274  1.1.1.1.2.2  pgoyette         xor ebp, ebp
    275  1.1.1.1.2.2  pgoyette LimitPositive:
    276  1.1.1.1.2.2  pgoyette 
    277  1.1.1.1.2.2  pgoyette ;;; int best_len = s->prev_length;
    278  1.1.1.1.2.2  pgoyette 
    279  1.1.1.1.2.2  pgoyette         mov eax, [edx + dsPrevLen]
    280  1.1.1.1.2.2  pgoyette         mov [bestlen], eax
    281  1.1.1.1.2.2  pgoyette 
    282  1.1.1.1.2.2  pgoyette ;;; Store the sum of s->window + best_len in esi locally, and in esi.
    283  1.1.1.1.2.2  pgoyette 
    284  1.1.1.1.2.2  pgoyette         add esi, eax
    285  1.1.1.1.2.2  pgoyette         mov [windowbestlen], esi
    286  1.1.1.1.2.2  pgoyette 
    287  1.1.1.1.2.2  pgoyette ;;; register ush scan_start = *(ushf*)scan;
    288  1.1.1.1.2.2  pgoyette ;;; register ush scan_end   = *(ushf*)(scan+best_len-1);
    289  1.1.1.1.2.2  pgoyette ;;; Posf *prev = s->prev;
    290  1.1.1.1.2.2  pgoyette 
    291  1.1.1.1.2.2  pgoyette         movzx   ebx, word ptr [edi]
    292  1.1.1.1.2.2  pgoyette         mov [scanstart], ebx
    293  1.1.1.1.2.2  pgoyette         movzx   ebx, word ptr [edi + eax - 1]
    294  1.1.1.1.2.2  pgoyette         mov [scanend], ebx
    295  1.1.1.1.2.2  pgoyette         mov edi, [edx + dsPrev]
    296  1.1.1.1.2.2  pgoyette 
    297  1.1.1.1.2.2  pgoyette ;;; Jump into the main loop.
    298  1.1.1.1.2.2  pgoyette 
    299  1.1.1.1.2.2  pgoyette         mov edx, [chainlenwmask]
    300  1.1.1.1.2.2  pgoyette         jmp short LoopEntry
    301  1.1.1.1.2.2  pgoyette 
    302  1.1.1.1.2.2  pgoyette align 4
    303  1.1.1.1.2.2  pgoyette 
    304  1.1.1.1.2.2  pgoyette ;;; do {
    305  1.1.1.1.2.2  pgoyette ;;;     match = s->window + cur_match;
    306  1.1.1.1.2.2  pgoyette ;;;     if (*(ushf*)(match+best_len-1) != scan_end ||
    307  1.1.1.1.2.2  pgoyette ;;;         *(ushf*)match != scan_start) continue;
    308  1.1.1.1.2.2  pgoyette ;;;     [...]
    309  1.1.1.1.2.2  pgoyette ;;; } while ((cur_match = prev[cur_match & wmask]) > limit
    310  1.1.1.1.2.2  pgoyette ;;;          && --chain_length != 0);
    311  1.1.1.1.2.2  pgoyette ;;;
    312  1.1.1.1.2.2  pgoyette ;;; Here is the inner loop of the function. The function will spend the
    313  1.1.1.1.2.2  pgoyette ;;; majority of its time in this loop, and majority of that time will
    314  1.1.1.1.2.2  pgoyette ;;; be spent in the first ten instructions.
    315  1.1.1.1.2.2  pgoyette ;;;
    316  1.1.1.1.2.2  pgoyette ;;; Within this loop:
    317  1.1.1.1.2.2  pgoyette ;;; ebx = scanend
    318  1.1.1.1.2.2  pgoyette ;;; ecx = curmatch
    319  1.1.1.1.2.2  pgoyette ;;; edx = chainlenwmask - i.e., ((chainlen << 16) | wmask)
    320  1.1.1.1.2.2  pgoyette ;;; esi = windowbestlen - i.e., (window + bestlen)
    321  1.1.1.1.2.2  pgoyette ;;; edi = prev
    322  1.1.1.1.2.2  pgoyette ;;; ebp = limit
    323  1.1.1.1.2.2  pgoyette 
    324  1.1.1.1.2.2  pgoyette LookupLoop:
    325  1.1.1.1.2.2  pgoyette         and ecx, edx
    326  1.1.1.1.2.2  pgoyette         movzx   ecx, word ptr [edi + ecx*2]
    327  1.1.1.1.2.2  pgoyette         cmp ecx, ebp
    328  1.1.1.1.2.2  pgoyette         jbe LeaveNow
    329  1.1.1.1.2.2  pgoyette         sub edx, 00010000h
    330  1.1.1.1.2.2  pgoyette         js  LeaveNow
    331  1.1.1.1.2.2  pgoyette LoopEntry:  movzx   eax, word ptr [esi + ecx - 1]
    332  1.1.1.1.2.2  pgoyette         cmp eax, ebx
    333  1.1.1.1.2.2  pgoyette         jnz LookupLoop
    334  1.1.1.1.2.2  pgoyette         mov eax, [window]
    335  1.1.1.1.2.2  pgoyette         movzx   eax, word ptr [eax + ecx]
    336  1.1.1.1.2.2  pgoyette         cmp eax, [scanstart]
    337  1.1.1.1.2.2  pgoyette         jnz LookupLoop
    338  1.1.1.1.2.2  pgoyette 
    339  1.1.1.1.2.2  pgoyette ;;; Store the current value of chainlen.
    340  1.1.1.1.2.2  pgoyette 
    341  1.1.1.1.2.2  pgoyette         mov [chainlenwmask], edx
    342  1.1.1.1.2.2  pgoyette 
    343  1.1.1.1.2.2  pgoyette ;;; Point edi to the string under scrutiny, and esi to the string we
    344  1.1.1.1.2.2  pgoyette ;;; are hoping to match it up with. In actuality, esi and edi are
    345  1.1.1.1.2.2  pgoyette ;;; both pointed (MAX_MATCH_8 - scanalign) bytes ahead, and edx is
    346  1.1.1.1.2.2  pgoyette ;;; initialized to -(MAX_MATCH_8 - scanalign).
    347  1.1.1.1.2.2  pgoyette 
    348  1.1.1.1.2.2  pgoyette         mov esi, [window]
    349  1.1.1.1.2.2  pgoyette         mov edi, [scan]
    350  1.1.1.1.2.2  pgoyette         add esi, ecx
    351  1.1.1.1.2.2  pgoyette         mov eax, [scanalign]
    352  1.1.1.1.2.2  pgoyette         mov edx, 0fffffef8h; -(MAX_MATCH_8)
    353  1.1.1.1.2.2  pgoyette         lea edi, [edi + eax + 0108h] ;MAX_MATCH_8]
    354  1.1.1.1.2.2  pgoyette         lea esi, [esi + eax + 0108h] ;MAX_MATCH_8]
    355  1.1.1.1.2.2  pgoyette 
    356  1.1.1.1.2.2  pgoyette ;;; Test the strings for equality, 8 bytes at a time. At the end,
    357  1.1.1.1.2.2  pgoyette ;;; adjust edx so that it is offset to the exact byte that mismatched.
    358  1.1.1.1.2.2  pgoyette ;;;
    359  1.1.1.1.2.2  pgoyette ;;; We already know at this point that the first three bytes of the
    360  1.1.1.1.2.2  pgoyette ;;; strings match each other, and they can be safely passed over before
    361  1.1.1.1.2.2  pgoyette ;;; starting the compare loop. So what this code does is skip over 0-3
    362  1.1.1.1.2.2  pgoyette ;;; bytes, as much as necessary in order to dword-align the edi
    363  1.1.1.1.2.2  pgoyette ;;; pointer. (esi will still be misaligned three times out of four.)
    364  1.1.1.1.2.2  pgoyette ;;;
    365  1.1.1.1.2.2  pgoyette ;;; It should be confessed that this loop usually does not represent
    366  1.1.1.1.2.2  pgoyette ;;; much of the total running time. Replacing it with a more
    367  1.1.1.1.2.2  pgoyette ;;; straightforward "rep cmpsb" would not drastically degrade
    368  1.1.1.1.2.2  pgoyette ;;; performance.
    369  1.1.1.1.2.2  pgoyette 
    370  1.1.1.1.2.2  pgoyette LoopCmps:
    371  1.1.1.1.2.2  pgoyette         mov eax, [esi + edx]
    372  1.1.1.1.2.2  pgoyette         xor eax, [edi + edx]
    373  1.1.1.1.2.2  pgoyette         jnz LeaveLoopCmps
    374  1.1.1.1.2.2  pgoyette         mov eax, [esi + edx + 4]
    375  1.1.1.1.2.2  pgoyette         xor eax, [edi + edx + 4]
    376  1.1.1.1.2.2  pgoyette         jnz LeaveLoopCmps4
    377  1.1.1.1.2.2  pgoyette         add edx, 8
    378  1.1.1.1.2.2  pgoyette         jnz LoopCmps
    379  1.1.1.1.2.2  pgoyette         jmp short LenMaximum
    380  1.1.1.1.2.2  pgoyette LeaveLoopCmps4: add edx, 4
    381  1.1.1.1.2.2  pgoyette LeaveLoopCmps:  test    eax, 0000FFFFh
    382  1.1.1.1.2.2  pgoyette         jnz LenLower
    383  1.1.1.1.2.2  pgoyette         add edx,  2
    384  1.1.1.1.2.2  pgoyette         shr eax, 16
    385  1.1.1.1.2.2  pgoyette LenLower:   sub al, 1
    386  1.1.1.1.2.2  pgoyette         adc edx, 0
    387  1.1.1.1.2.2  pgoyette 
    388  1.1.1.1.2.2  pgoyette ;;; Calculate the length of the match. If it is longer than MAX_MATCH,
    389  1.1.1.1.2.2  pgoyette ;;; then automatically accept it as the best possible match and leave.
    390  1.1.1.1.2.2  pgoyette 
    391  1.1.1.1.2.2  pgoyette         lea eax, [edi + edx]
    392  1.1.1.1.2.2  pgoyette         mov edi, [scan]
    393  1.1.1.1.2.2  pgoyette         sub eax, edi
    394  1.1.1.1.2.2  pgoyette         cmp eax, MAX_MATCH
    395  1.1.1.1.2.2  pgoyette         jge LenMaximum
    396  1.1.1.1.2.2  pgoyette 
    397  1.1.1.1.2.2  pgoyette ;;; If the length of the match is not longer than the best match we
    398  1.1.1.1.2.2  pgoyette ;;; have so far, then forget it and return to the lookup loop.
    399  1.1.1.1.2.2  pgoyette 
    400  1.1.1.1.2.2  pgoyette         mov edx, [deflatestate]
    401  1.1.1.1.2.2  pgoyette         mov ebx, [bestlen]
    402  1.1.1.1.2.2  pgoyette         cmp eax, ebx
    403  1.1.1.1.2.2  pgoyette         jg  LongerMatch
    404  1.1.1.1.2.2  pgoyette         mov esi, [windowbestlen]
    405  1.1.1.1.2.2  pgoyette         mov edi, [edx + dsPrev]
    406  1.1.1.1.2.2  pgoyette         mov ebx, [scanend]
    407  1.1.1.1.2.2  pgoyette         mov edx, [chainlenwmask]
    408  1.1.1.1.2.2  pgoyette         jmp LookupLoop
    409  1.1.1.1.2.2  pgoyette 
    410  1.1.1.1.2.2  pgoyette ;;;         s->match_start = cur_match;
    411  1.1.1.1.2.2  pgoyette ;;;         best_len = len;
    412  1.1.1.1.2.2  pgoyette ;;;         if (len >= nice_match) break;
    413  1.1.1.1.2.2  pgoyette ;;;         scan_end = *(ushf*)(scan+best_len-1);
    414  1.1.1.1.2.2  pgoyette 
    415  1.1.1.1.2.2  pgoyette LongerMatch:    mov ebx, [nicematch]
    416  1.1.1.1.2.2  pgoyette         mov [bestlen], eax
    417  1.1.1.1.2.2  pgoyette         mov [edx + dsMatchStart], ecx
    418  1.1.1.1.2.2  pgoyette         cmp eax, ebx
    419  1.1.1.1.2.2  pgoyette         jge LeaveNow
    420  1.1.1.1.2.2  pgoyette         mov esi, [window]
    421  1.1.1.1.2.2  pgoyette         add esi, eax
    422  1.1.1.1.2.2  pgoyette         mov [windowbestlen], esi
    423  1.1.1.1.2.2  pgoyette         movzx   ebx, word ptr [edi + eax - 1]
    424  1.1.1.1.2.2  pgoyette         mov edi, [edx + dsPrev]
    425  1.1.1.1.2.2  pgoyette         mov [scanend], ebx
    426  1.1.1.1.2.2  pgoyette         mov edx, [chainlenwmask]
    427  1.1.1.1.2.2  pgoyette         jmp LookupLoop
    428  1.1.1.1.2.2  pgoyette 
    429  1.1.1.1.2.2  pgoyette ;;; Accept the current string, with the maximum possible length.
    430  1.1.1.1.2.2  pgoyette 
    431  1.1.1.1.2.2  pgoyette LenMaximum: mov edx, [deflatestate]
    432  1.1.1.1.2.2  pgoyette         mov dword ptr [bestlen], MAX_MATCH
    433  1.1.1.1.2.2  pgoyette         mov [edx + dsMatchStart], ecx
    434  1.1.1.1.2.2  pgoyette 
    435  1.1.1.1.2.2  pgoyette ;;; if ((uInt)best_len <= s->lookahead) return (uInt)best_len;
    436  1.1.1.1.2.2  pgoyette ;;; return s->lookahead;
    437  1.1.1.1.2.2  pgoyette 
    438  1.1.1.1.2.2  pgoyette LeaveNow:
    439  1.1.1.1.2.2  pgoyette         mov edx, [deflatestate]
    440  1.1.1.1.2.2  pgoyette         mov ebx, [bestlen]
    441  1.1.1.1.2.2  pgoyette         mov eax, [edx + dsLookahead]
    442  1.1.1.1.2.2  pgoyette         cmp ebx, eax
    443  1.1.1.1.2.2  pgoyette         jg  LookaheadRet
    444  1.1.1.1.2.2  pgoyette         mov eax, ebx
    445  1.1.1.1.2.2  pgoyette LookaheadRet:
    446  1.1.1.1.2.2  pgoyette 
    447  1.1.1.1.2.2  pgoyette ;;; Restore the stack and return from whence we came.
    448  1.1.1.1.2.2  pgoyette 
    449  1.1.1.1.2.2  pgoyette         add esp, LocalVarsSize
    450  1.1.1.1.2.2  pgoyette         pop ebx
    451  1.1.1.1.2.2  pgoyette         pop esi
    452  1.1.1.1.2.2  pgoyette         pop edi
    453  1.1.1.1.2.2  pgoyette         pop ebp
    454  1.1.1.1.2.2  pgoyette 
    455  1.1.1.1.2.2  pgoyette         ret
    456  1.1.1.1.2.2  pgoyette ; please don't remove this string !
    457  1.1.1.1.2.2  pgoyette ; Your can freely use match686 in any free or commercial app if you don't remove the string in the binary!
    458  1.1.1.1.2.2  pgoyette     db     0dh,0ah,"asm686 with masm, optimised assembly code from Brian Raiter, written 1998",0dh,0ah
    459  1.1.1.1.2.2  pgoyette 
    460  1.1.1.1.2.2  pgoyette 
    461  1.1.1.1.2.2  pgoyette     IFDEF NOUNDERLINE
    462  1.1.1.1.2.2  pgoyette     longest_match       endp
    463  1.1.1.1.2.2  pgoyette     ELSE
    464  1.1.1.1.2.2  pgoyette     _longest_match      endp
    465  1.1.1.1.2.2  pgoyette     ENDIF
    466  1.1.1.1.2.2  pgoyette 
    467  1.1.1.1.2.2  pgoyette     IFDEF NOUNDERLINE
    468  1.1.1.1.2.2  pgoyette     match_init      proc near
    469  1.1.1.1.2.2  pgoyette                     ret
    470  1.1.1.1.2.2  pgoyette     match_init      endp
    471  1.1.1.1.2.2  pgoyette     ELSE
    472  1.1.1.1.2.2  pgoyette     _match_init     proc near
    473  1.1.1.1.2.2  pgoyette                     ret
    474  1.1.1.1.2.2  pgoyette     _match_init     endp
    475  1.1.1.1.2.2  pgoyette     ENDIF
    476  1.1.1.1.2.2  pgoyette 
    477  1.1.1.1.2.2  pgoyette 
    478  1.1.1.1.2.2  pgoyette _TEXT   ends
    479  1.1.1.1.2.2  pgoyette end
    480