1 1.1 christos /* inffas86.c is a hand tuned assembler version of 2 1.1 christos * 3 1.1 christos * inffast.c -- fast decoding 4 1.1 christos * Copyright (C) 1995-2003 Mark Adler 5 1.1 christos * For conditions of distribution and use, see copyright notice in zlib.h 6 1.1 christos * 7 1.1 christos * Copyright (C) 2003 Chris Anderson <christop (at) charm.net> 8 1.1 christos * Please use the copyright conditions above. 9 1.1 christos * 10 1.1 christos * Dec-29-2003 -- I added AMD64 inflate asm support. This version is also 11 1.1 christos * slightly quicker on x86 systems because, instead of using rep movsb to copy 12 1.1 christos * data, it uses rep movsw, which moves data in 2-byte chunks instead of single 13 1.1 christos * bytes. I've tested the AMD64 code on a Fedora Core 1 + the x86_64 updates 14 1.1 christos * from http://fedora.linux.duke.edu/fc1_x86_64 15 1.1 christos * which is running on an Athlon 64 3000+ / Gigabyte GA-K8VT800M system with 16 1.1 christos * 1GB ram. The 64-bit version is about 4% faster than the 32-bit version, 17 1.1 christos * when decompressing mozilla-source-1.3.tar.gz. 18 1.1 christos * 19 1.1 christos * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from 20 1.1 christos * the gcc -S output of zlib-1.2.0/inffast.c. Zlib-1.2.0 is in beta release at 21 1.1 christos * the moment. I have successfully compiled and tested this code with gcc2.96, 22 1.1 christos * gcc3.2, icc5.0, msvc6.0. It is very close to the speed of inffast.S 23 1.1 christos * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX 24 1.1 christos * enabled. I will attempt to merge the MMX code into this version. Newer 25 1.1 christos * versions of this and inffast.S can be found at 26 1.1 christos * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/ 27 1.1 christos */ 28 1.1 christos 29 1.1 christos #include "zutil.h" 30 1.1 christos #include "inftrees.h" 31 1.1 christos #include "inflate.h" 32 1.1 christos #include "inffast.h" 33 1.1 christos 34 1.1 christos /* Mark Adler's comments from inffast.c: */ 35 1.1 christos 36 1.1 christos /* 37 1.1 christos Decode literal, length, and distance codes and write out the resulting 38 1.1 christos literal and match bytes until either not enough input or output is 39 1.1 christos available, an end-of-block is encountered, or a data error is encountered. 40 1.1 christos When large enough input and output buffers are supplied to inflate(), for 41 1.1 christos example, a 16K input buffer and a 64K output buffer, more than 95% of the 42 1.1 christos inflate execution time is spent in this routine. 43 1.1 christos 44 1.1 christos Entry assumptions: 45 1.1 christos 46 1.1 christos state->mode == LEN 47 1.1 christos strm->avail_in >= 6 48 1.1 christos strm->avail_out >= 258 49 1.1 christos start >= strm->avail_out 50 1.1 christos state->bits < 8 51 1.1 christos 52 1.1 christos On return, state->mode is one of: 53 1.1 christos 54 1.1 christos LEN -- ran out of enough output space or enough available input 55 1.1 christos TYPE -- reached end of block code, inflate() to interpret next block 56 1.1 christos BAD -- error in block data 57 1.1 christos 58 1.1 christos Notes: 59 1.1 christos 60 1.1 christos - The maximum input bits used by a length/distance pair is 15 bits for the 61 1.1 christos length code, 5 bits for the length extra, 15 bits for the distance code, 62 1.1 christos and 13 bits for the distance extra. This totals 48 bits, or six bytes. 63 1.1 christos Therefore if strm->avail_in >= 6, then there is enough input to avoid 64 1.1 christos checking for available input while decoding. 65 1.1 christos 66 1.1 christos - The maximum bytes that a single length/distance pair can output is 258 67 1.1 christos bytes, which is the maximum length that can be coded. inflate_fast() 68 1.1 christos requires strm->avail_out >= 258 for each loop to avoid checking for 69 1.1 christos output space. 70 1.1 christos */ 71 1.1 christos void inflate_fast(strm, start) 72 1.1 christos z_streamp strm; 73 1.1 christos unsigned start; /* inflate()'s starting value for strm->avail_out */ 74 1.1 christos { 75 1.1 christos struct inflate_state FAR *state; 76 1.1 christos struct inffast_ar { 77 1.1 christos /* 64 32 x86 x86_64 */ 78 1.1 christos /* ar offset register */ 79 1.1 christos /* 0 0 */ void *esp; /* esp save */ 80 1.1 christos /* 8 4 */ void *ebp; /* ebp save */ 81 1.1 christos /* 16 8 */ unsigned char FAR *in; /* esi rsi local strm->next_in */ 82 1.1 christos /* 24 12 */ unsigned char FAR *last; /* r9 while in < last */ 83 1.1 christos /* 32 16 */ unsigned char FAR *out; /* edi rdi local strm->next_out */ 84 1.1 christos /* 40 20 */ unsigned char FAR *beg; /* inflate()'s init next_out */ 85 1.1 christos /* 48 24 */ unsigned char FAR *end; /* r10 while out < end */ 86 1.1 christos /* 56 28 */ unsigned char FAR *window;/* size of window, wsize!=0 */ 87 1.1 christos /* 64 32 */ code const FAR *lcode; /* ebp rbp local strm->lencode */ 88 1.1 christos /* 72 36 */ code const FAR *dcode; /* r11 local strm->distcode */ 89 1.1 christos /* 80 40 */ unsigned long hold; /* edx rdx local strm->hold */ 90 1.1 christos /* 88 44 */ unsigned bits; /* ebx rbx local strm->bits */ 91 1.1 christos /* 92 48 */ unsigned wsize; /* window size */ 92 1.1 christos /* 96 52 */ unsigned write; /* window write index */ 93 1.1 christos /*100 56 */ unsigned lmask; /* r12 mask for lcode */ 94 1.1 christos /*104 60 */ unsigned dmask; /* r13 mask for dcode */ 95 1.1 christos /*108 64 */ unsigned len; /* r14 match length */ 96 1.1 christos /*112 68 */ unsigned dist; /* r15 match distance */ 97 1.1 christos /*116 72 */ unsigned status; /* set when state chng*/ 98 1.1 christos } ar; 99 1.1 christos 100 1.1 christos #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) 101 1.1 christos #define PAD_AVAIL_IN 6 102 1.1 christos #define PAD_AVAIL_OUT 258 103 1.1 christos #else 104 1.1 christos #define PAD_AVAIL_IN 5 105 1.1 christos #define PAD_AVAIL_OUT 257 106 1.1 christos #endif 107 1.1 christos 108 1.1 christos /* copy state to local variables */ 109 1.1 christos state = (struct inflate_state FAR *)strm->state; 110 1.1 christos ar.in = strm->next_in; 111 1.1 christos ar.last = ar.in + (strm->avail_in - PAD_AVAIL_IN); 112 1.1 christos ar.out = strm->next_out; 113 1.1 christos ar.beg = ar.out - (start - strm->avail_out); 114 1.1 christos ar.end = ar.out + (strm->avail_out - PAD_AVAIL_OUT); 115 1.1 christos ar.wsize = state->wsize; 116 1.1 christos ar.write = state->wnext; 117 1.1 christos ar.window = state->window; 118 1.1 christos ar.hold = state->hold; 119 1.1 christos ar.bits = state->bits; 120 1.1 christos ar.lcode = state->lencode; 121 1.1 christos ar.dcode = state->distcode; 122 1.1 christos ar.lmask = (1U << state->lenbits) - 1; 123 1.1 christos ar.dmask = (1U << state->distbits) - 1; 124 1.1 christos 125 1.1 christos /* decode literals and length/distances until end-of-block or not enough 126 1.1 christos input data or output space */ 127 1.1 christos 128 1.1 christos /* align in on 1/2 hold size boundary */ 129 1.1 christos while (((unsigned long)(void *)ar.in & (sizeof(ar.hold) / 2 - 1)) != 0) { 130 1.1 christos ar.hold += (unsigned long)*ar.in++ << ar.bits; 131 1.1 christos ar.bits += 8; 132 1.1 christos } 133 1.1 christos 134 1.1 christos #if defined( __GNUC__ ) && defined( __amd64__ ) && ! defined( __i386 ) 135 1.1 christos __asm__ __volatile__ ( 136 1.1 christos " leaq %0, %%rax\n" 137 1.1 christos " movq %%rbp, 8(%%rax)\n" /* save regs rbp and rsp */ 138 1.1 christos " movq %%rsp, (%%rax)\n" 139 1.1 christos " movq %%rax, %%rsp\n" /* make rsp point to &ar */ 140 1.1 christos " movq 16(%%rsp), %%rsi\n" /* rsi = in */ 141 1.1 christos " movq 32(%%rsp), %%rdi\n" /* rdi = out */ 142 1.1 christos " movq 24(%%rsp), %%r9\n" /* r9 = last */ 143 1.1 christos " movq 48(%%rsp), %%r10\n" /* r10 = end */ 144 1.1 christos " movq 64(%%rsp), %%rbp\n" /* rbp = lcode */ 145 1.1 christos " movq 72(%%rsp), %%r11\n" /* r11 = dcode */ 146 1.1 christos " movq 80(%%rsp), %%rdx\n" /* rdx = hold */ 147 1.1 christos " movl 88(%%rsp), %%ebx\n" /* ebx = bits */ 148 1.1 christos " movl 100(%%rsp), %%r12d\n" /* r12d = lmask */ 149 1.1 christos " movl 104(%%rsp), %%r13d\n" /* r13d = dmask */ 150 1.1 christos /* r14d = len */ 151 1.1 christos /* r15d = dist */ 152 1.1 christos " cld\n" 153 1.1 christos " cmpq %%rdi, %%r10\n" 154 1.1 christos " je .L_one_time\n" /* if only one decode left */ 155 1.1 christos " cmpq %%rsi, %%r9\n" 156 1.1 christos " je .L_one_time\n" 157 1.1 christos " jmp .L_do_loop\n" 158 1.1 christos 159 1.1 christos ".L_one_time:\n" 160 1.1 christos " movq %%r12, %%r8\n" /* r8 = lmask */ 161 1.1 christos " cmpb $32, %%bl\n" 162 1.1 christos " ja .L_get_length_code_one_time\n" 163 1.1 christos 164 1.1 christos " lodsl\n" /* eax = *(uint *)in++ */ 165 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 166 1.1 christos " addb $32, %%bl\n" /* bits += 32 */ 167 1.1 christos " shlq %%cl, %%rax\n" 168 1.1 christos " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ 169 1.1 christos " jmp .L_get_length_code_one_time\n" 170 1.1 christos 171 1.1 christos ".align 32,0x90\n" 172 1.1 christos ".L_while_test:\n" 173 1.1 christos " cmpq %%rdi, %%r10\n" 174 1.1 christos " jbe .L_break_loop\n" 175 1.1 christos " cmpq %%rsi, %%r9\n" 176 1.1 christos " jbe .L_break_loop\n" 177 1.1 christos 178 1.1 christos ".L_do_loop:\n" 179 1.1 christos " movq %%r12, %%r8\n" /* r8 = lmask */ 180 1.1 christos " cmpb $32, %%bl\n" 181 1.1 christos " ja .L_get_length_code\n" /* if (32 < bits) */ 182 1.1 christos 183 1.1 christos " lodsl\n" /* eax = *(uint *)in++ */ 184 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 185 1.1 christos " addb $32, %%bl\n" /* bits += 32 */ 186 1.1 christos " shlq %%cl, %%rax\n" 187 1.1 christos " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ 188 1.1 christos 189 1.1 christos ".L_get_length_code:\n" 190 1.1 christos " andq %%rdx, %%r8\n" /* r8 &= hold */ 191 1.1 christos " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ 192 1.1 christos 193 1.1 christos " movb %%ah, %%cl\n" /* cl = this.bits */ 194 1.1 christos " subb %%ah, %%bl\n" /* bits -= this.bits */ 195 1.1 christos " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ 196 1.1 christos 197 1.1 christos " testb %%al, %%al\n" 198 1.1 christos " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ 199 1.1 christos 200 1.1 christos " movq %%r12, %%r8\n" /* r8 = lmask */ 201 1.1 christos " shrl $16, %%eax\n" /* output this.val char */ 202 1.1 christos " stosb\n" 203 1.1 christos 204 1.1 christos ".L_get_length_code_one_time:\n" 205 1.1 christos " andq %%rdx, %%r8\n" /* r8 &= hold */ 206 1.1 christos " movl (%%rbp,%%r8,4), %%eax\n" /* eax = lcode[hold & lmask] */ 207 1.1 christos 208 1.1 christos ".L_dolen:\n" 209 1.1 christos " movb %%ah, %%cl\n" /* cl = this.bits */ 210 1.1 christos " subb %%ah, %%bl\n" /* bits -= this.bits */ 211 1.1 christos " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ 212 1.1 christos 213 1.1 christos " testb %%al, %%al\n" 214 1.1 christos " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ 215 1.1 christos 216 1.1 christos " shrl $16, %%eax\n" /* output this.val char */ 217 1.1 christos " stosb\n" 218 1.1 christos " jmp .L_while_test\n" 219 1.1 christos 220 1.1 christos ".align 32,0x90\n" 221 1.1 christos ".L_test_for_length_base:\n" 222 1.1 christos " movl %%eax, %%r14d\n" /* len = this */ 223 1.1 christos " shrl $16, %%r14d\n" /* len = this.val */ 224 1.1 christos " movb %%al, %%cl\n" 225 1.1 christos 226 1.1 christos " testb $16, %%al\n" 227 1.1 christos " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ 228 1.1 christos " andb $15, %%cl\n" /* op &= 15 */ 229 1.1 christos " jz .L_decode_distance\n" /* if (!op) */ 230 1.1 christos 231 1.1 christos ".L_add_bits_to_len:\n" 232 1.1 christos " subb %%cl, %%bl\n" 233 1.1 christos " xorl %%eax, %%eax\n" 234 1.1 christos " incl %%eax\n" 235 1.1 christos " shll %%cl, %%eax\n" 236 1.1 christos " decl %%eax\n" 237 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 238 1.1 christos " shrq %%cl, %%rdx\n" 239 1.1 christos " addl %%eax, %%r14d\n" /* len += hold & mask[op] */ 240 1.1 christos 241 1.1 christos ".L_decode_distance:\n" 242 1.1 christos " movq %%r13, %%r8\n" /* r8 = dmask */ 243 1.1 christos " cmpb $32, %%bl\n" 244 1.1 christos " ja .L_get_distance_code\n" /* if (32 < bits) */ 245 1.1 christos 246 1.1 christos " lodsl\n" /* eax = *(uint *)in++ */ 247 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 248 1.1 christos " addb $32, %%bl\n" /* bits += 32 */ 249 1.1 christos " shlq %%cl, %%rax\n" 250 1.1 christos " orq %%rax, %%rdx\n" /* hold |= *((uint *)in)++ << bits */ 251 1.1 christos 252 1.1 christos ".L_get_distance_code:\n" 253 1.1 christos " andq %%rdx, %%r8\n" /* r8 &= hold */ 254 1.1 christos " movl (%%r11,%%r8,4), %%eax\n" /* eax = dcode[hold & dmask] */ 255 1.1 christos 256 1.1 christos ".L_dodist:\n" 257 1.1 christos " movl %%eax, %%r15d\n" /* dist = this */ 258 1.1 christos " shrl $16, %%r15d\n" /* dist = this.val */ 259 1.1 christos " movb %%ah, %%cl\n" 260 1.1 christos " subb %%ah, %%bl\n" /* bits -= this.bits */ 261 1.1 christos " shrq %%cl, %%rdx\n" /* hold >>= this.bits */ 262 1.1 christos " movb %%al, %%cl\n" /* cl = this.op */ 263 1.1 christos 264 1.1 christos " testb $16, %%al\n" /* if ((op & 16) == 0) */ 265 1.1 christos " jz .L_test_for_second_level_dist\n" 266 1.1 christos " andb $15, %%cl\n" /* op &= 15 */ 267 1.1 christos " jz .L_check_dist_one\n" 268 1.1 christos 269 1.1 christos ".L_add_bits_to_dist:\n" 270 1.1 christos " subb %%cl, %%bl\n" 271 1.1 christos " xorl %%eax, %%eax\n" 272 1.1 christos " incl %%eax\n" 273 1.1 christos " shll %%cl, %%eax\n" 274 1.1 christos " decl %%eax\n" /* (1 << op) - 1 */ 275 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 276 1.1 christos " shrq %%cl, %%rdx\n" 277 1.1 christos " addl %%eax, %%r15d\n" /* dist += hold & ((1 << op) - 1) */ 278 1.1 christos 279 1.1 christos ".L_check_window:\n" 280 1.1 christos " movq %%rsi, %%r8\n" /* save in so from can use it's reg */ 281 1.1 christos " movq %%rdi, %%rax\n" 282 1.1 christos " subq 40(%%rsp), %%rax\n" /* nbytes = out - beg */ 283 1.1 christos 284 1.1 christos " cmpl %%r15d, %%eax\n" 285 1.1 christos " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ 286 1.1 christos 287 1.1 christos " movl %%r14d, %%ecx\n" /* ecx = len */ 288 1.1 christos " movq %%rdi, %%rsi\n" 289 1.1 christos " subq %%r15, %%rsi\n" /* from = out - dist */ 290 1.1 christos 291 1.1 christos " sarl %%ecx\n" 292 1.1 christos " jnc .L_copy_two\n" /* if len % 2 == 0 */ 293 1.1 christos 294 1.1 christos " rep movsw\n" 295 1.1 christos " movb (%%rsi), %%al\n" 296 1.1 christos " movb %%al, (%%rdi)\n" 297 1.1 christos " incq %%rdi\n" 298 1.1 christos 299 1.1 christos " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ 300 1.1 christos " jmp .L_while_test\n" 301 1.1 christos 302 1.1 christos ".L_copy_two:\n" 303 1.1 christos " rep movsw\n" 304 1.1 christos " movq %%r8, %%rsi\n" /* move in back to %rsi, toss from */ 305 1.1 christos " jmp .L_while_test\n" 306 1.1 christos 307 1.1 christos ".align 32,0x90\n" 308 1.1 christos ".L_check_dist_one:\n" 309 1.1 christos " cmpl $1, %%r15d\n" /* if dist 1, is a memset */ 310 1.1 christos " jne .L_check_window\n" 311 1.1 christos " cmpq %%rdi, 40(%%rsp)\n" /* if out == beg, outside window */ 312 1.1 christos " je .L_check_window\n" 313 1.1 christos 314 1.1 christos " movl %%r14d, %%ecx\n" /* ecx = len */ 315 1.1 christos " movb -1(%%rdi), %%al\n" 316 1.1 christos " movb %%al, %%ah\n" 317 1.1 christos 318 1.1 christos " sarl %%ecx\n" 319 1.1 christos " jnc .L_set_two\n" 320 1.1 christos " movb %%al, (%%rdi)\n" 321 1.1 christos " incq %%rdi\n" 322 1.1 christos 323 1.1 christos ".L_set_two:\n" 324 1.1 christos " rep stosw\n" 325 1.1 christos " jmp .L_while_test\n" 326 1.1 christos 327 1.1 christos ".align 32,0x90\n" 328 1.1 christos ".L_test_for_second_level_length:\n" 329 1.1 christos " testb $64, %%al\n" 330 1.1 christos " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ 331 1.1 christos 332 1.1 christos " xorl %%eax, %%eax\n" 333 1.1 christos " incl %%eax\n" 334 1.1 christos " shll %%cl, %%eax\n" 335 1.1 christos " decl %%eax\n" 336 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 337 1.1 christos " addl %%r14d, %%eax\n" /* eax += len */ 338 1.1 christos " movl (%%rbp,%%rax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ 339 1.1 christos " jmp .L_dolen\n" 340 1.1 christos 341 1.1 christos ".align 32,0x90\n" 342 1.1 christos ".L_test_for_second_level_dist:\n" 343 1.1 christos " testb $64, %%al\n" 344 1.1 christos " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ 345 1.1 christos 346 1.1 christos " xorl %%eax, %%eax\n" 347 1.1 christos " incl %%eax\n" 348 1.1 christos " shll %%cl, %%eax\n" 349 1.1 christos " decl %%eax\n" 350 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 351 1.1 christos " addl %%r15d, %%eax\n" /* eax += dist */ 352 1.1 christos " movl (%%r11,%%rax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ 353 1.1 christos " jmp .L_dodist\n" 354 1.1 christos 355 1.1 christos ".align 32,0x90\n" 356 1.1 christos ".L_clip_window:\n" 357 1.1 christos " movl %%eax, %%ecx\n" /* ecx = nbytes */ 358 1.1 christos " movl 92(%%rsp), %%eax\n" /* eax = wsize, prepare for dist cmp */ 359 1.1 christos " negl %%ecx\n" /* nbytes = -nbytes */ 360 1.1 christos 361 1.1 christos " cmpl %%r15d, %%eax\n" 362 1.1 christos " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ 363 1.1 christos 364 1.1 christos " addl %%r15d, %%ecx\n" /* nbytes = dist - nbytes */ 365 1.1 christos " cmpl $0, 96(%%rsp)\n" 366 1.1 christos " jne .L_wrap_around_window\n" /* if (write != 0) */ 367 1.1 christos 368 1.1 christos " movq 56(%%rsp), %%rsi\n" /* from = window */ 369 1.1 christos " subl %%ecx, %%eax\n" /* eax -= nbytes */ 370 1.1 christos " addq %%rax, %%rsi\n" /* from += wsize - nbytes */ 371 1.1 christos 372 1.1 christos " movl %%r14d, %%eax\n" /* eax = len */ 373 1.1 christos " cmpl %%ecx, %%r14d\n" 374 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 375 1.1 christos 376 1.1 christos " subl %%ecx, %%eax\n" /* eax -= nbytes */ 377 1.1 christos " rep movsb\n" 378 1.1 christos " movq %%rdi, %%rsi\n" 379 1.1 christos " subq %%r15, %%rsi\n" /* from = &out[ -dist ] */ 380 1.1 christos " jmp .L_do_copy\n" 381 1.1 christos 382 1.1 christos ".align 32,0x90\n" 383 1.1 christos ".L_wrap_around_window:\n" 384 1.1 christos " movl 96(%%rsp), %%eax\n" /* eax = write */ 385 1.1 christos " cmpl %%eax, %%ecx\n" 386 1.1 christos " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ 387 1.1 christos 388 1.1 christos " movl 92(%%rsp), %%esi\n" /* from = wsize */ 389 1.1 christos " addq 56(%%rsp), %%rsi\n" /* from += window */ 390 1.1 christos " addq %%rax, %%rsi\n" /* from += write */ 391 1.1 christos " subq %%rcx, %%rsi\n" /* from -= nbytes */ 392 1.1 christos " subl %%eax, %%ecx\n" /* nbytes -= write */ 393 1.1 christos 394 1.1 christos " movl %%r14d, %%eax\n" /* eax = len */ 395 1.1 christos " cmpl %%ecx, %%eax\n" 396 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 397 1.1 christos 398 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 399 1.1 christos " rep movsb\n" 400 1.1 christos " movq 56(%%rsp), %%rsi\n" /* from = window */ 401 1.1 christos " movl 96(%%rsp), %%ecx\n" /* nbytes = write */ 402 1.1 christos " cmpl %%ecx, %%eax\n" 403 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 404 1.1 christos 405 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 406 1.1 christos " rep movsb\n" 407 1.1 christos " movq %%rdi, %%rsi\n" 408 1.1 christos " subq %%r15, %%rsi\n" /* from = out - dist */ 409 1.1 christos " jmp .L_do_copy\n" 410 1.1 christos 411 1.1 christos ".align 32,0x90\n" 412 1.1 christos ".L_contiguous_in_window:\n" 413 1.1 christos " movq 56(%%rsp), %%rsi\n" /* rsi = window */ 414 1.1 christos " addq %%rax, %%rsi\n" 415 1.1 christos " subq %%rcx, %%rsi\n" /* from += write - nbytes */ 416 1.1 christos 417 1.1 christos " movl %%r14d, %%eax\n" /* eax = len */ 418 1.1 christos " cmpl %%ecx, %%eax\n" 419 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 420 1.1 christos 421 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 422 1.1 christos " rep movsb\n" 423 1.1 christos " movq %%rdi, %%rsi\n" 424 1.1 christos " subq %%r15, %%rsi\n" /* from = out - dist */ 425 1.1 christos " jmp .L_do_copy\n" /* if (nbytes >= len) */ 426 1.1 christos 427 1.1 christos ".align 32,0x90\n" 428 1.1 christos ".L_do_copy:\n" 429 1.1 christos " movl %%eax, %%ecx\n" /* ecx = len */ 430 1.1 christos " rep movsb\n" 431 1.1 christos 432 1.1 christos " movq %%r8, %%rsi\n" /* move in back to %esi, toss from */ 433 1.1 christos " jmp .L_while_test\n" 434 1.1 christos 435 1.1 christos ".L_test_for_end_of_block:\n" 436 1.1 christos " testb $32, %%al\n" 437 1.1 christos " jz .L_invalid_literal_length_code\n" 438 1.1 christos " movl $1, 116(%%rsp)\n" 439 1.1 christos " jmp .L_break_loop_with_status\n" 440 1.1 christos 441 1.1 christos ".L_invalid_literal_length_code:\n" 442 1.1 christos " movl $2, 116(%%rsp)\n" 443 1.1 christos " jmp .L_break_loop_with_status\n" 444 1.1 christos 445 1.1 christos ".L_invalid_distance_code:\n" 446 1.1 christos " movl $3, 116(%%rsp)\n" 447 1.1 christos " jmp .L_break_loop_with_status\n" 448 1.1 christos 449 1.1 christos ".L_invalid_distance_too_far:\n" 450 1.1 christos " movl $4, 116(%%rsp)\n" 451 1.1 christos " jmp .L_break_loop_with_status\n" 452 1.1 christos 453 1.1 christos ".L_break_loop:\n" 454 1.1 christos " movl $0, 116(%%rsp)\n" 455 1.1 christos 456 1.1 christos ".L_break_loop_with_status:\n" 457 1.1 christos /* put in, out, bits, and hold back into ar and pop esp */ 458 1.1 christos " movq %%rsi, 16(%%rsp)\n" /* in */ 459 1.1 christos " movq %%rdi, 32(%%rsp)\n" /* out */ 460 1.1 christos " movl %%ebx, 88(%%rsp)\n" /* bits */ 461 1.1 christos " movq %%rdx, 80(%%rsp)\n" /* hold */ 462 1.1 christos " movq (%%rsp), %%rax\n" /* restore rbp and rsp */ 463 1.1 christos " movq 8(%%rsp), %%rbp\n" 464 1.1 christos " movq %%rax, %%rsp\n" 465 1.1 christos : 466 1.1 christos : "m" (ar) 467 1.1 christos : "memory", "%rax", "%rbx", "%rcx", "%rdx", "%rsi", "%rdi", 468 1.1 christos "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15" 469 1.1 christos ); 470 1.1 christos #elif ( defined( __GNUC__ ) || defined( __ICC ) ) && defined( __i386 ) 471 1.1 christos __asm__ __volatile__ ( 472 1.1 christos " leal %0, %%eax\n" 473 1.1 christos " movl %%esp, (%%eax)\n" /* save esp, ebp */ 474 1.1 christos " movl %%ebp, 4(%%eax)\n" 475 1.1 christos " movl %%eax, %%esp\n" 476 1.1 christos " movl 8(%%esp), %%esi\n" /* esi = in */ 477 1.1 christos " movl 16(%%esp), %%edi\n" /* edi = out */ 478 1.1 christos " movl 40(%%esp), %%edx\n" /* edx = hold */ 479 1.1 christos " movl 44(%%esp), %%ebx\n" /* ebx = bits */ 480 1.1 christos " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 481 1.1 christos 482 1.1 christos " cld\n" 483 1.1 christos " jmp .L_do_loop\n" 484 1.1 christos 485 1.1 christos ".align 32,0x90\n" 486 1.1 christos ".L_while_test:\n" 487 1.1 christos " cmpl %%edi, 24(%%esp)\n" /* out < end */ 488 1.1 christos " jbe .L_break_loop\n" 489 1.1 christos " cmpl %%esi, 12(%%esp)\n" /* in < last */ 490 1.1 christos " jbe .L_break_loop\n" 491 1.1 christos 492 1.1 christos ".L_do_loop:\n" 493 1.1 christos " cmpb $15, %%bl\n" 494 1.1 christos " ja .L_get_length_code\n" /* if (15 < bits) */ 495 1.1 christos 496 1.1 christos " xorl %%eax, %%eax\n" 497 1.1 christos " lodsw\n" /* al = *(ushort *)in++ */ 498 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 499 1.1 christos " addb $16, %%bl\n" /* bits += 16 */ 500 1.1 christos " shll %%cl, %%eax\n" 501 1.1 christos " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 502 1.1 christos 503 1.1 christos ".L_get_length_code:\n" 504 1.1 christos " movl 56(%%esp), %%eax\n" /* eax = lmask */ 505 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 506 1.1 christos " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[hold & lmask] */ 507 1.1 christos 508 1.1 christos ".L_dolen:\n" 509 1.1 christos " movb %%ah, %%cl\n" /* cl = this.bits */ 510 1.1 christos " subb %%ah, %%bl\n" /* bits -= this.bits */ 511 1.1 christos " shrl %%cl, %%edx\n" /* hold >>= this.bits */ 512 1.1 christos 513 1.1 christos " testb %%al, %%al\n" 514 1.1 christos " jnz .L_test_for_length_base\n" /* if (op != 0) 45.7% */ 515 1.1 christos 516 1.1 christos " shrl $16, %%eax\n" /* output this.val char */ 517 1.1 christos " stosb\n" 518 1.1 christos " jmp .L_while_test\n" 519 1.1 christos 520 1.1 christos ".align 32,0x90\n" 521 1.1 christos ".L_test_for_length_base:\n" 522 1.1 christos " movl %%eax, %%ecx\n" /* len = this */ 523 1.1 christos " shrl $16, %%ecx\n" /* len = this.val */ 524 1.1 christos " movl %%ecx, 64(%%esp)\n" /* save len */ 525 1.1 christos " movb %%al, %%cl\n" 526 1.1 christos 527 1.1 christos " testb $16, %%al\n" 528 1.1 christos " jz .L_test_for_second_level_length\n" /* if ((op & 16) == 0) 8% */ 529 1.1 christos " andb $15, %%cl\n" /* op &= 15 */ 530 1.1 christos " jz .L_decode_distance\n" /* if (!op) */ 531 1.1 christos " cmpb %%cl, %%bl\n" 532 1.1 christos " jae .L_add_bits_to_len\n" /* if (op <= bits) */ 533 1.1 christos 534 1.1 christos " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ 535 1.1 christos " xorl %%eax, %%eax\n" 536 1.1 christos " lodsw\n" /* al = *(ushort *)in++ */ 537 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 538 1.1 christos " addb $16, %%bl\n" /* bits += 16 */ 539 1.1 christos " shll %%cl, %%eax\n" 540 1.1 christos " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 541 1.1 christos " movb %%ch, %%cl\n" /* move op back to ecx */ 542 1.1 christos 543 1.1 christos ".L_add_bits_to_len:\n" 544 1.1 christos " subb %%cl, %%bl\n" 545 1.1 christos " xorl %%eax, %%eax\n" 546 1.1 christos " incl %%eax\n" 547 1.1 christos " shll %%cl, %%eax\n" 548 1.1 christos " decl %%eax\n" 549 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 550 1.1 christos " shrl %%cl, %%edx\n" 551 1.1 christos " addl %%eax, 64(%%esp)\n" /* len += hold & mask[op] */ 552 1.1 christos 553 1.1 christos ".L_decode_distance:\n" 554 1.1 christos " cmpb $15, %%bl\n" 555 1.1 christos " ja .L_get_distance_code\n" /* if (15 < bits) */ 556 1.1 christos 557 1.1 christos " xorl %%eax, %%eax\n" 558 1.1 christos " lodsw\n" /* al = *(ushort *)in++ */ 559 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 560 1.1 christos " addb $16, %%bl\n" /* bits += 16 */ 561 1.1 christos " shll %%cl, %%eax\n" 562 1.1 christos " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 563 1.1 christos 564 1.1 christos ".L_get_distance_code:\n" 565 1.1 christos " movl 60(%%esp), %%eax\n" /* eax = dmask */ 566 1.1 christos " movl 36(%%esp), %%ecx\n" /* ecx = dcode */ 567 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 568 1.1 christos " movl (%%ecx,%%eax,4), %%eax\n"/* eax = dcode[hold & dmask] */ 569 1.1 christos 570 1.1 christos ".L_dodist:\n" 571 1.1 christos " movl %%eax, %%ebp\n" /* dist = this */ 572 1.1 christos " shrl $16, %%ebp\n" /* dist = this.val */ 573 1.1 christos " movb %%ah, %%cl\n" 574 1.1 christos " subb %%ah, %%bl\n" /* bits -= this.bits */ 575 1.1 christos " shrl %%cl, %%edx\n" /* hold >>= this.bits */ 576 1.1 christos " movb %%al, %%cl\n" /* cl = this.op */ 577 1.1 christos 578 1.1 christos " testb $16, %%al\n" /* if ((op & 16) == 0) */ 579 1.1 christos " jz .L_test_for_second_level_dist\n" 580 1.1 christos " andb $15, %%cl\n" /* op &= 15 */ 581 1.1 christos " jz .L_check_dist_one\n" 582 1.1 christos " cmpb %%cl, %%bl\n" 583 1.1 christos " jae .L_add_bits_to_dist\n" /* if (op <= bits) 97.6% */ 584 1.1 christos 585 1.1 christos " movb %%cl, %%ch\n" /* stash op in ch, freeing cl */ 586 1.1 christos " xorl %%eax, %%eax\n" 587 1.1 christos " lodsw\n" /* al = *(ushort *)in++ */ 588 1.1 christos " movb %%bl, %%cl\n" /* cl = bits, needs it for shifting */ 589 1.1 christos " addb $16, %%bl\n" /* bits += 16 */ 590 1.1 christos " shll %%cl, %%eax\n" 591 1.1 christos " orl %%eax, %%edx\n" /* hold |= *((ushort *)in)++ << bits */ 592 1.1 christos " movb %%ch, %%cl\n" /* move op back to ecx */ 593 1.1 christos 594 1.1 christos ".L_add_bits_to_dist:\n" 595 1.1 christos " subb %%cl, %%bl\n" 596 1.1 christos " xorl %%eax, %%eax\n" 597 1.1 christos " incl %%eax\n" 598 1.1 christos " shll %%cl, %%eax\n" 599 1.1 christos " decl %%eax\n" /* (1 << op) - 1 */ 600 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 601 1.1 christos " shrl %%cl, %%edx\n" 602 1.1 christos " addl %%eax, %%ebp\n" /* dist += hold & ((1 << op) - 1) */ 603 1.1 christos 604 1.1 christos ".L_check_window:\n" 605 1.1 christos " movl %%esi, 8(%%esp)\n" /* save in so from can use it's reg */ 606 1.1 christos " movl %%edi, %%eax\n" 607 1.1 christos " subl 20(%%esp), %%eax\n" /* nbytes = out - beg */ 608 1.1 christos 609 1.1 christos " cmpl %%ebp, %%eax\n" 610 1.1 christos " jb .L_clip_window\n" /* if (dist > nbytes) 4.2% */ 611 1.1 christos 612 1.1 christos " movl 64(%%esp), %%ecx\n" /* ecx = len */ 613 1.1 christos " movl %%edi, %%esi\n" 614 1.1 christos " subl %%ebp, %%esi\n" /* from = out - dist */ 615 1.1 christos 616 1.1 christos " sarl %%ecx\n" 617 1.1 christos " jnc .L_copy_two\n" /* if len % 2 == 0 */ 618 1.1 christos 619 1.1 christos " rep movsw\n" 620 1.1 christos " movb (%%esi), %%al\n" 621 1.1 christos " movb %%al, (%%edi)\n" 622 1.1 christos " incl %%edi\n" 623 1.1 christos 624 1.1 christos " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ 625 1.1 christos " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 626 1.1 christos " jmp .L_while_test\n" 627 1.1 christos 628 1.1 christos ".L_copy_two:\n" 629 1.1 christos " rep movsw\n" 630 1.1 christos " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ 631 1.1 christos " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 632 1.1 christos " jmp .L_while_test\n" 633 1.1 christos 634 1.1 christos ".align 32,0x90\n" 635 1.1 christos ".L_check_dist_one:\n" 636 1.1 christos " cmpl $1, %%ebp\n" /* if dist 1, is a memset */ 637 1.1 christos " jne .L_check_window\n" 638 1.1 christos " cmpl %%edi, 20(%%esp)\n" 639 1.1 christos " je .L_check_window\n" /* out == beg, if outside window */ 640 1.1 christos 641 1.1 christos " movl 64(%%esp), %%ecx\n" /* ecx = len */ 642 1.1 christos " movb -1(%%edi), %%al\n" 643 1.1 christos " movb %%al, %%ah\n" 644 1.1 christos 645 1.1 christos " sarl %%ecx\n" 646 1.1 christos " jnc .L_set_two\n" 647 1.1 christos " movb %%al, (%%edi)\n" 648 1.1 christos " incl %%edi\n" 649 1.1 christos 650 1.1 christos ".L_set_two:\n" 651 1.1 christos " rep stosw\n" 652 1.1 christos " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 653 1.1 christos " jmp .L_while_test\n" 654 1.1 christos 655 1.1 christos ".align 32,0x90\n" 656 1.1 christos ".L_test_for_second_level_length:\n" 657 1.1 christos " testb $64, %%al\n" 658 1.1 christos " jnz .L_test_for_end_of_block\n" /* if ((op & 64) != 0) */ 659 1.1 christos 660 1.1 christos " xorl %%eax, %%eax\n" 661 1.1 christos " incl %%eax\n" 662 1.1 christos " shll %%cl, %%eax\n" 663 1.1 christos " decl %%eax\n" 664 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 665 1.1 christos " addl 64(%%esp), %%eax\n" /* eax += len */ 666 1.1 christos " movl (%%ebp,%%eax,4), %%eax\n" /* eax = lcode[val+(hold&mask[op])]*/ 667 1.1 christos " jmp .L_dolen\n" 668 1.1 christos 669 1.1 christos ".align 32,0x90\n" 670 1.1 christos ".L_test_for_second_level_dist:\n" 671 1.1 christos " testb $64, %%al\n" 672 1.1 christos " jnz .L_invalid_distance_code\n" /* if ((op & 64) != 0) */ 673 1.1 christos 674 1.1 christos " xorl %%eax, %%eax\n" 675 1.1 christos " incl %%eax\n" 676 1.1 christos " shll %%cl, %%eax\n" 677 1.1 christos " decl %%eax\n" 678 1.1 christos " andl %%edx, %%eax\n" /* eax &= hold */ 679 1.1 christos " addl %%ebp, %%eax\n" /* eax += dist */ 680 1.1 christos " movl 36(%%esp), %%ecx\n" /* ecx = dcode */ 681 1.1 christos " movl (%%ecx,%%eax,4), %%eax\n" /* eax = dcode[val+(hold&mask[op])]*/ 682 1.1 christos " jmp .L_dodist\n" 683 1.1 christos 684 1.1 christos ".align 32,0x90\n" 685 1.1 christos ".L_clip_window:\n" 686 1.1 christos " movl %%eax, %%ecx\n" 687 1.1 christos " movl 48(%%esp), %%eax\n" /* eax = wsize */ 688 1.1 christos " negl %%ecx\n" /* nbytes = -nbytes */ 689 1.1 christos " movl 28(%%esp), %%esi\n" /* from = window */ 690 1.1 christos 691 1.1 christos " cmpl %%ebp, %%eax\n" 692 1.1 christos " jb .L_invalid_distance_too_far\n" /* if (dist > wsize) */ 693 1.1 christos 694 1.1 christos " addl %%ebp, %%ecx\n" /* nbytes = dist - nbytes */ 695 1.1 christos " cmpl $0, 52(%%esp)\n" 696 1.1 christos " jne .L_wrap_around_window\n" /* if (write != 0) */ 697 1.1 christos 698 1.1 christos " subl %%ecx, %%eax\n" 699 1.1 christos " addl %%eax, %%esi\n" /* from += wsize - nbytes */ 700 1.1 christos 701 1.1 christos " movl 64(%%esp), %%eax\n" /* eax = len */ 702 1.1 christos " cmpl %%ecx, %%eax\n" 703 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 704 1.1 christos 705 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 706 1.1 christos " rep movsb\n" 707 1.1 christos " movl %%edi, %%esi\n" 708 1.1 christos " subl %%ebp, %%esi\n" /* from = out - dist */ 709 1.1 christos " jmp .L_do_copy\n" 710 1.1 christos 711 1.1 christos ".align 32,0x90\n" 712 1.1 christos ".L_wrap_around_window:\n" 713 1.1 christos " movl 52(%%esp), %%eax\n" /* eax = write */ 714 1.1 christos " cmpl %%eax, %%ecx\n" 715 1.1 christos " jbe .L_contiguous_in_window\n" /* if (write >= nbytes) */ 716 1.1 christos 717 1.1 christos " addl 48(%%esp), %%esi\n" /* from += wsize */ 718 1.1 christos " addl %%eax, %%esi\n" /* from += write */ 719 1.1 christos " subl %%ecx, %%esi\n" /* from -= nbytes */ 720 1.1 christos " subl %%eax, %%ecx\n" /* nbytes -= write */ 721 1.1 christos 722 1.1 christos " movl 64(%%esp), %%eax\n" /* eax = len */ 723 1.1 christos " cmpl %%ecx, %%eax\n" 724 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 725 1.1 christos 726 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 727 1.1 christos " rep movsb\n" 728 1.1 christos " movl 28(%%esp), %%esi\n" /* from = window */ 729 1.1 christos " movl 52(%%esp), %%ecx\n" /* nbytes = write */ 730 1.1 christos " cmpl %%ecx, %%eax\n" 731 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 732 1.1 christos 733 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 734 1.1 christos " rep movsb\n" 735 1.1 christos " movl %%edi, %%esi\n" 736 1.1 christos " subl %%ebp, %%esi\n" /* from = out - dist */ 737 1.1 christos " jmp .L_do_copy\n" 738 1.1 christos 739 1.1 christos ".align 32,0x90\n" 740 1.1 christos ".L_contiguous_in_window:\n" 741 1.1 christos " addl %%eax, %%esi\n" 742 1.1 christos " subl %%ecx, %%esi\n" /* from += write - nbytes */ 743 1.1 christos 744 1.1 christos " movl 64(%%esp), %%eax\n" /* eax = len */ 745 1.1 christos " cmpl %%ecx, %%eax\n" 746 1.1 christos " jbe .L_do_copy\n" /* if (nbytes >= len) */ 747 1.1 christos 748 1.1 christos " subl %%ecx, %%eax\n" /* len -= nbytes */ 749 1.1 christos " rep movsb\n" 750 1.1 christos " movl %%edi, %%esi\n" 751 1.1 christos " subl %%ebp, %%esi\n" /* from = out - dist */ 752 1.1 christos " jmp .L_do_copy\n" /* if (nbytes >= len) */ 753 1.1 christos 754 1.1 christos ".align 32,0x90\n" 755 1.1 christos ".L_do_copy:\n" 756 1.1 christos " movl %%eax, %%ecx\n" 757 1.1 christos " rep movsb\n" 758 1.1 christos 759 1.1 christos " movl 8(%%esp), %%esi\n" /* move in back to %esi, toss from */ 760 1.1 christos " movl 32(%%esp), %%ebp\n" /* ebp = lcode */ 761 1.1 christos " jmp .L_while_test\n" 762 1.1 christos 763 1.1 christos ".L_test_for_end_of_block:\n" 764 1.1 christos " testb $32, %%al\n" 765 1.1 christos " jz .L_invalid_literal_length_code\n" 766 1.1 christos " movl $1, 72(%%esp)\n" 767 1.1 christos " jmp .L_break_loop_with_status\n" 768 1.1 christos 769 1.1 christos ".L_invalid_literal_length_code:\n" 770 1.1 christos " movl $2, 72(%%esp)\n" 771 1.1 christos " jmp .L_break_loop_with_status\n" 772 1.1 christos 773 1.1 christos ".L_invalid_distance_code:\n" 774 1.1 christos " movl $3, 72(%%esp)\n" 775 1.1 christos " jmp .L_break_loop_with_status\n" 776 1.1 christos 777 1.1 christos ".L_invalid_distance_too_far:\n" 778 1.1 christos " movl 8(%%esp), %%esi\n" 779 1.1 christos " movl $4, 72(%%esp)\n" 780 1.1 christos " jmp .L_break_loop_with_status\n" 781 1.1 christos 782 1.1 christos ".L_break_loop:\n" 783 1.1 christos " movl $0, 72(%%esp)\n" 784 1.1 christos 785 1.1 christos ".L_break_loop_with_status:\n" 786 1.1 christos /* put in, out, bits, and hold back into ar and pop esp */ 787 1.1 christos " movl %%esi, 8(%%esp)\n" /* save in */ 788 1.1 christos " movl %%edi, 16(%%esp)\n" /* save out */ 789 1.1 christos " movl %%ebx, 44(%%esp)\n" /* save bits */ 790 1.1 christos " movl %%edx, 40(%%esp)\n" /* save hold */ 791 1.1 christos " movl 4(%%esp), %%ebp\n" /* restore esp, ebp */ 792 1.1 christos " movl (%%esp), %%esp\n" 793 1.1 christos : 794 1.1 christos : "m" (ar) 795 1.1 christos : "memory", "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" 796 1.1 christos ); 797 1.1 christos #elif defined( _MSC_VER ) && ! defined( _M_AMD64 ) 798 1.1 christos __asm { 799 1.1 christos lea eax, ar 800 1.1 christos mov [eax], esp /* save esp, ebp */ 801 1.1 christos mov [eax+4], ebp 802 1.1 christos mov esp, eax 803 1.1 christos mov esi, [esp+8] /* esi = in */ 804 1.1 christos mov edi, [esp+16] /* edi = out */ 805 1.1 christos mov edx, [esp+40] /* edx = hold */ 806 1.1 christos mov ebx, [esp+44] /* ebx = bits */ 807 1.1 christos mov ebp, [esp+32] /* ebp = lcode */ 808 1.1 christos 809 1.1 christos cld 810 1.1 christos jmp L_do_loop 811 1.1 christos 812 1.1 christos ALIGN 4 813 1.1 christos L_while_test: 814 1.1 christos cmp [esp+24], edi 815 1.1 christos jbe L_break_loop 816 1.1 christos cmp [esp+12], esi 817 1.1 christos jbe L_break_loop 818 1.1 christos 819 1.1 christos L_do_loop: 820 1.1 christos cmp bl, 15 821 1.1 christos ja L_get_length_code /* if (15 < bits) */ 822 1.1 christos 823 1.1 christos xor eax, eax 824 1.1 christos lodsw /* al = *(ushort *)in++ */ 825 1.1 christos mov cl, bl /* cl = bits, needs it for shifting */ 826 1.1 christos add bl, 16 /* bits += 16 */ 827 1.1 christos shl eax, cl 828 1.1 christos or edx, eax /* hold |= *((ushort *)in)++ << bits */ 829 1.1 christos 830 1.1 christos L_get_length_code: 831 1.1 christos mov eax, [esp+56] /* eax = lmask */ 832 1.1 christos and eax, edx /* eax &= hold */ 833 1.1 christos mov eax, [ebp+eax*4] /* eax = lcode[hold & lmask] */ 834 1.1 christos 835 1.1 christos L_dolen: 836 1.1 christos mov cl, ah /* cl = this.bits */ 837 1.1 christos sub bl, ah /* bits -= this.bits */ 838 1.1 christos shr edx, cl /* hold >>= this.bits */ 839 1.1 christos 840 1.1 christos test al, al 841 1.1 christos jnz L_test_for_length_base /* if (op != 0) 45.7% */ 842 1.1 christos 843 1.1 christos shr eax, 16 /* output this.val char */ 844 1.1 christos stosb 845 1.1 christos jmp L_while_test 846 1.1 christos 847 1.1 christos ALIGN 4 848 1.1 christos L_test_for_length_base: 849 1.1 christos mov ecx, eax /* len = this */ 850 1.1 christos shr ecx, 16 /* len = this.val */ 851 1.1 christos mov [esp+64], ecx /* save len */ 852 1.1 christos mov cl, al 853 1.1 christos 854 1.1 christos test al, 16 855 1.1 christos jz L_test_for_second_level_length /* if ((op & 16) == 0) 8% */ 856 1.1 christos and cl, 15 /* op &= 15 */ 857 1.1 christos jz L_decode_distance /* if (!op) */ 858 1.1 christos cmp bl, cl 859 1.1 christos jae L_add_bits_to_len /* if (op <= bits) */ 860 1.1 christos 861 1.1 christos mov ch, cl /* stash op in ch, freeing cl */ 862 1.1 christos xor eax, eax 863 1.1 christos lodsw /* al = *(ushort *)in++ */ 864 1.1 christos mov cl, bl /* cl = bits, needs it for shifting */ 865 1.1 christos add bl, 16 /* bits += 16 */ 866 1.1 christos shl eax, cl 867 1.1 christos or edx, eax /* hold |= *((ushort *)in)++ << bits */ 868 1.1 christos mov cl, ch /* move op back to ecx */ 869 1.1 christos 870 1.1 christos L_add_bits_to_len: 871 1.1 christos sub bl, cl 872 1.1 christos xor eax, eax 873 1.1 christos inc eax 874 1.1 christos shl eax, cl 875 1.1 christos dec eax 876 1.1 christos and eax, edx /* eax &= hold */ 877 1.1 christos shr edx, cl 878 1.1 christos add [esp+64], eax /* len += hold & mask[op] */ 879 1.1 christos 880 1.1 christos L_decode_distance: 881 1.1 christos cmp bl, 15 882 1.1 christos ja L_get_distance_code /* if (15 < bits) */ 883 1.1 christos 884 1.1 christos xor eax, eax 885 1.1 christos lodsw /* al = *(ushort *)in++ */ 886 1.1 christos mov cl, bl /* cl = bits, needs it for shifting */ 887 1.1 christos add bl, 16 /* bits += 16 */ 888 1.1 christos shl eax, cl 889 1.1 christos or edx, eax /* hold |= *((ushort *)in)++ << bits */ 890 1.1 christos 891 1.1 christos L_get_distance_code: 892 1.1 christos mov eax, [esp+60] /* eax = dmask */ 893 1.1 christos mov ecx, [esp+36] /* ecx = dcode */ 894 1.1 christos and eax, edx /* eax &= hold */ 895 1.1 christos mov eax, [ecx+eax*4]/* eax = dcode[hold & dmask] */ 896 1.1 christos 897 1.1 christos L_dodist: 898 1.1 christos mov ebp, eax /* dist = this */ 899 1.1 christos shr ebp, 16 /* dist = this.val */ 900 1.1 christos mov cl, ah 901 1.1 christos sub bl, ah /* bits -= this.bits */ 902 1.1 christos shr edx, cl /* hold >>= this.bits */ 903 1.1 christos mov cl, al /* cl = this.op */ 904 1.1 christos 905 1.1 christos test al, 16 /* if ((op & 16) == 0) */ 906 1.1 christos jz L_test_for_second_level_dist 907 1.1 christos and cl, 15 /* op &= 15 */ 908 1.1 christos jz L_check_dist_one 909 1.1 christos cmp bl, cl 910 1.1 christos jae L_add_bits_to_dist /* if (op <= bits) 97.6% */ 911 1.1 christos 912 1.1 christos mov ch, cl /* stash op in ch, freeing cl */ 913 1.1 christos xor eax, eax 914 1.1 christos lodsw /* al = *(ushort *)in++ */ 915 1.1 christos mov cl, bl /* cl = bits, needs it for shifting */ 916 1.1 christos add bl, 16 /* bits += 16 */ 917 1.1 christos shl eax, cl 918 1.1 christos or edx, eax /* hold |= *((ushort *)in)++ << bits */ 919 1.1 christos mov cl, ch /* move op back to ecx */ 920 1.1 christos 921 1.1 christos L_add_bits_to_dist: 922 1.1 christos sub bl, cl 923 1.1 christos xor eax, eax 924 1.1 christos inc eax 925 1.1 christos shl eax, cl 926 1.1 christos dec eax /* (1 << op) - 1 */ 927 1.1 christos and eax, edx /* eax &= hold */ 928 1.1 christos shr edx, cl 929 1.1 christos add ebp, eax /* dist += hold & ((1 << op) - 1) */ 930 1.1 christos 931 1.1 christos L_check_window: 932 1.1 christos mov [esp+8], esi /* save in so from can use it's reg */ 933 1.1 christos mov eax, edi 934 1.1 christos sub eax, [esp+20] /* nbytes = out - beg */ 935 1.1 christos 936 1.1 christos cmp eax, ebp 937 1.1 christos jb L_clip_window /* if (dist > nbytes) 4.2% */ 938 1.1 christos 939 1.1 christos mov ecx, [esp+64] /* ecx = len */ 940 1.1 christos mov esi, edi 941 1.1 christos sub esi, ebp /* from = out - dist */ 942 1.1 christos 943 1.1 christos sar ecx, 1 944 1.1 christos jnc L_copy_two 945 1.1 christos 946 1.1 christos rep movsw 947 1.1 christos mov al, [esi] 948 1.1 christos mov [edi], al 949 1.1 christos inc edi 950 1.1 christos 951 1.1 christos mov esi, [esp+8] /* move in back to %esi, toss from */ 952 1.1 christos mov ebp, [esp+32] /* ebp = lcode */ 953 1.1 christos jmp L_while_test 954 1.1 christos 955 1.1 christos L_copy_two: 956 1.1 christos rep movsw 957 1.1 christos mov esi, [esp+8] /* move in back to %esi, toss from */ 958 1.1 christos mov ebp, [esp+32] /* ebp = lcode */ 959 1.1 christos jmp L_while_test 960 1.1 christos 961 1.1 christos ALIGN 4 962 1.1 christos L_check_dist_one: 963 1.1 christos cmp ebp, 1 /* if dist 1, is a memset */ 964 1.1 christos jne L_check_window 965 1.1 christos cmp [esp+20], edi 966 1.1 christos je L_check_window /* out == beg, if outside window */ 967 1.1 christos 968 1.1 christos mov ecx, [esp+64] /* ecx = len */ 969 1.1 christos mov al, [edi-1] 970 1.1 christos mov ah, al 971 1.1 christos 972 1.1 christos sar ecx, 1 973 1.1 christos jnc L_set_two 974 1.1 christos mov [edi], al /* memset out with from[-1] */ 975 1.1 christos inc edi 976 1.1 christos 977 1.1 christos L_set_two: 978 1.1 christos rep stosw 979 1.1 christos mov ebp, [esp+32] /* ebp = lcode */ 980 1.1 christos jmp L_while_test 981 1.1 christos 982 1.1 christos ALIGN 4 983 1.1 christos L_test_for_second_level_length: 984 1.1 christos test al, 64 985 1.1 christos jnz L_test_for_end_of_block /* if ((op & 64) != 0) */ 986 1.1 christos 987 1.1 christos xor eax, eax 988 1.1 christos inc eax 989 1.1 christos shl eax, cl 990 1.1 christos dec eax 991 1.1 christos and eax, edx /* eax &= hold */ 992 1.1 christos add eax, [esp+64] /* eax += len */ 993 1.1 christos mov eax, [ebp+eax*4] /* eax = lcode[val+(hold&mask[op])]*/ 994 1.1 christos jmp L_dolen 995 1.1 christos 996 1.1 christos ALIGN 4 997 1.1 christos L_test_for_second_level_dist: 998 1.1 christos test al, 64 999 1.1 christos jnz L_invalid_distance_code /* if ((op & 64) != 0) */ 1000 1.1 christos 1001 1.1 christos xor eax, eax 1002 1.1 christos inc eax 1003 1.1 christos shl eax, cl 1004 1.1 christos dec eax 1005 1.1 christos and eax, edx /* eax &= hold */ 1006 1.1 christos add eax, ebp /* eax += dist */ 1007 1.1 christos mov ecx, [esp+36] /* ecx = dcode */ 1008 1.1 christos mov eax, [ecx+eax*4] /* eax = dcode[val+(hold&mask[op])]*/ 1009 1.1 christos jmp L_dodist 1010 1.1 christos 1011 1.1 christos ALIGN 4 1012 1.1 christos L_clip_window: 1013 1.1 christos mov ecx, eax 1014 1.1 christos mov eax, [esp+48] /* eax = wsize */ 1015 1.1 christos neg ecx /* nbytes = -nbytes */ 1016 1.1 christos mov esi, [esp+28] /* from = window */ 1017 1.1 christos 1018 1.1 christos cmp eax, ebp 1019 1.1 christos jb L_invalid_distance_too_far /* if (dist > wsize) */ 1020 1.1 christos 1021 1.1 christos add ecx, ebp /* nbytes = dist - nbytes */ 1022 1.1 christos cmp dword ptr [esp+52], 0 1023 1.1 christos jne L_wrap_around_window /* if (write != 0) */ 1024 1.1 christos 1025 1.1 christos sub eax, ecx 1026 1.1 christos add esi, eax /* from += wsize - nbytes */ 1027 1.1 christos 1028 1.1 christos mov eax, [esp+64] /* eax = len */ 1029 1.1 christos cmp eax, ecx 1030 1.1 christos jbe L_do_copy /* if (nbytes >= len) */ 1031 1.1 christos 1032 1.1 christos sub eax, ecx /* len -= nbytes */ 1033 1.1 christos rep movsb 1034 1.1 christos mov esi, edi 1035 1.1 christos sub esi, ebp /* from = out - dist */ 1036 1.1 christos jmp L_do_copy 1037 1.1 christos 1038 1.1 christos ALIGN 4 1039 1.1 christos L_wrap_around_window: 1040 1.1 christos mov eax, [esp+52] /* eax = write */ 1041 1.1 christos cmp ecx, eax 1042 1.1 christos jbe L_contiguous_in_window /* if (write >= nbytes) */ 1043 1.1 christos 1044 1.1 christos add esi, [esp+48] /* from += wsize */ 1045 1.1 christos add esi, eax /* from += write */ 1046 1.1 christos sub esi, ecx /* from -= nbytes */ 1047 1.1 christos sub ecx, eax /* nbytes -= write */ 1048 1.1 christos 1049 1.1 christos mov eax, [esp+64] /* eax = len */ 1050 1.1 christos cmp eax, ecx 1051 1.1 christos jbe L_do_copy /* if (nbytes >= len) */ 1052 1.1 christos 1053 1.1 christos sub eax, ecx /* len -= nbytes */ 1054 1.1 christos rep movsb 1055 1.1 christos mov esi, [esp+28] /* from = window */ 1056 1.1 christos mov ecx, [esp+52] /* nbytes = write */ 1057 1.1 christos cmp eax, ecx 1058 1.1 christos jbe L_do_copy /* if (nbytes >= len) */ 1059 1.1 christos 1060 1.1 christos sub eax, ecx /* len -= nbytes */ 1061 1.1 christos rep movsb 1062 1.1 christos mov esi, edi 1063 1.1 christos sub esi, ebp /* from = out - dist */ 1064 1.1 christos jmp L_do_copy 1065 1.1 christos 1066 1.1 christos ALIGN 4 1067 1.1 christos L_contiguous_in_window: 1068 1.1 christos add esi, eax 1069 1.1 christos sub esi, ecx /* from += write - nbytes */ 1070 1.1 christos 1071 1.1 christos mov eax, [esp+64] /* eax = len */ 1072 1.1 christos cmp eax, ecx 1073 1.1 christos jbe L_do_copy /* if (nbytes >= len) */ 1074 1.1 christos 1075 1.1 christos sub eax, ecx /* len -= nbytes */ 1076 1.1 christos rep movsb 1077 1.1 christos mov esi, edi 1078 1.1 christos sub esi, ebp /* from = out - dist */ 1079 1.1 christos jmp L_do_copy 1080 1.1 christos 1081 1.1 christos ALIGN 4 1082 1.1 christos L_do_copy: 1083 1.1 christos mov ecx, eax 1084 1.1 christos rep movsb 1085 1.1 christos 1086 1.1 christos mov esi, [esp+8] /* move in back to %esi, toss from */ 1087 1.1 christos mov ebp, [esp+32] /* ebp = lcode */ 1088 1.1 christos jmp L_while_test 1089 1.1 christos 1090 1.1 christos L_test_for_end_of_block: 1091 1.1 christos test al, 32 1092 1.1 christos jz L_invalid_literal_length_code 1093 1.1 christos mov dword ptr [esp+72], 1 1094 1.1 christos jmp L_break_loop_with_status 1095 1.1 christos 1096 1.1 christos L_invalid_literal_length_code: 1097 1.1 christos mov dword ptr [esp+72], 2 1098 1.1 christos jmp L_break_loop_with_status 1099 1.1 christos 1100 1.1 christos L_invalid_distance_code: 1101 1.1 christos mov dword ptr [esp+72], 3 1102 1.1 christos jmp L_break_loop_with_status 1103 1.1 christos 1104 1.1 christos L_invalid_distance_too_far: 1105 1.1 christos mov esi, [esp+4] 1106 1.1 christos mov dword ptr [esp+72], 4 1107 1.1 christos jmp L_break_loop_with_status 1108 1.1 christos 1109 1.1 christos L_break_loop: 1110 1.1 christos mov dword ptr [esp+72], 0 1111 1.1 christos 1112 1.1 christos L_break_loop_with_status: 1113 1.1 christos /* put in, out, bits, and hold back into ar and pop esp */ 1114 1.1 christos mov [esp+8], esi /* save in */ 1115 1.1 christos mov [esp+16], edi /* save out */ 1116 1.1 christos mov [esp+44], ebx /* save bits */ 1117 1.1 christos mov [esp+40], edx /* save hold */ 1118 1.1 christos mov ebp, [esp+4] /* restore esp, ebp */ 1119 1.1 christos mov esp, [esp] 1120 1.1 christos } 1121 1.1 christos #else 1122 1.1 christos #error "x86 architecture not defined" 1123 1.1 christos #endif 1124 1.1 christos 1125 1.1 christos if (ar.status > 1) { 1126 1.1 christos if (ar.status == 2) 1127 1.1 christos strm->msg = "invalid literal/length code"; 1128 1.1 christos else if (ar.status == 3) 1129 1.1 christos strm->msg = "invalid distance code"; 1130 1.1 christos else 1131 1.1 christos strm->msg = "invalid distance too far back"; 1132 1.1 christos state->mode = BAD; 1133 1.1 christos } 1134 1.1 christos else if ( ar.status == 1 ) { 1135 1.1 christos state->mode = TYPE; 1136 1.1 christos } 1137 1.1 christos 1138 1.1 christos /* return unused bytes (on entry, bits < 8, so in won't go too far back) */ 1139 1.1 christos ar.len = ar.bits >> 3; 1140 1.1 christos ar.in -= ar.len; 1141 1.1 christos ar.bits -= ar.len << 3; 1142 1.1 christos ar.hold &= (1U << ar.bits) - 1; 1143 1.1 christos 1144 1.1 christos /* update state and return */ 1145 1.1 christos strm->next_in = ar.in; 1146 1.1 christos strm->next_out = ar.out; 1147 1.1 christos strm->avail_in = (unsigned)(ar.in < ar.last ? 1148 1.1 christos PAD_AVAIL_IN + (ar.last - ar.in) : 1149 1.1 christos PAD_AVAIL_IN - (ar.in - ar.last)); 1150 1.1 christos strm->avail_out = (unsigned)(ar.out < ar.end ? 1151 1.1 christos PAD_AVAIL_OUT + (ar.end - ar.out) : 1152 1.1 christos PAD_AVAIL_OUT - (ar.out - ar.end)); 1153 1.1 christos state->hold = ar.hold; 1154 1.1 christos state->bits = ar.bits; 1155 1.1 christos return; 1156 1.1 christos } 1157 1.1 christos 1158