1 #ifdef HAVE_AMD64_ASM 2 3 .text 4 .p2align 5 5 6 #ifdef ASM_HIDE_SYMBOL 7 ASM_HIDE_SYMBOL stream_salsa20_xmm6 8 ASM_HIDE_SYMBOL _stream_salsa20_xmm6 9 #endif 10 .globl stream_salsa20_xmm6 11 .globl _stream_salsa20_xmm6 12 #ifdef __ELF__ 13 .type stream_salsa20_xmm6, @function 14 .type _stream_salsa20_xmm6, @function 15 #endif 16 stream_salsa20_xmm6: 17 _stream_salsa20_xmm6: 18 mov %rsp,%r11 19 and $31,%r11 20 add $512,%r11 21 sub %r11,%rsp 22 movq %r11,416(%rsp) 23 movq %r12,424(%rsp) 24 movq %r13,432(%rsp) 25 movq %r14,440(%rsp) 26 movq %r15,448(%rsp) 27 movq %rbx,456(%rsp) 28 movq %rbp,464(%rsp) 29 mov %rsi,%r9 30 mov %rdi,%rdi 31 mov %rdi,%rsi 32 mov %rdx,%rdx 33 mov %rcx,%r10 34 cmp $0,%r9 35 jbe ._done 36 mov $0,%rax 37 mov %r9,%rcx 38 rep stosb 39 sub %r9,%rdi 40 movq $0,472(%rsp) 41 jmp ._start 42 43 .text 44 .p2align 5 45 46 #ifdef ASM_HIDE_SYMBOL 47 ASM_HIDE_SYMBOL stream_salsa20_xmm6_xor_ic 48 ASM_HIDE_SYMBOL _stream_salsa20_xmm6_xor_ic 49 #endif 50 .globl stream_salsa20_xmm6_xor_ic 51 .globl _stream_salsa20_xmm6_xor_ic 52 #ifdef __ELF__ 53 .type stream_salsa20_xmm6_xor_ic, @function 54 .type _stream_salsa20_xmm6_xor_ic, @function 55 #endif 56 stream_salsa20_xmm6_xor_ic: 57 _stream_salsa20_xmm6_xor_ic: 58 59 mov %rsp,%r11 60 and $31,%r11 61 add $512,%r11 62 sub %r11,%rsp 63 movq %r11,416(%rsp) 64 movq %r12,424(%rsp) 65 movq %r13,432(%rsp) 66 movq %r14,440(%rsp) 67 movq %r15,448(%rsp) 68 movq %rbx,456(%rsp) 69 movq %rbp,464(%rsp) 70 mov %rdi,%rdi 71 mov %rsi,%rsi 72 mov %r9,%r10 73 movq %r8,472(%rsp) 74 mov %rdx,%r9 75 mov %rcx,%rdx 76 cmp $0,%r9 77 jbe ._done 78 79 ._start: 80 movl 20(%r10),%ecx 81 movl 0(%r10),%r8d 82 movl 0(%rdx),%eax 83 movl 16(%r10),%r11d 84 movl %ecx,64(%rsp) 85 movl %r8d,4+64(%rsp) 86 movl %eax,8+64(%rsp) 87 movl %r11d,12+64(%rsp) 88 movl 24(%r10),%r8d 89 movl 4(%r10),%eax 90 movl 4(%rdx),%edx 91 movq 472(%rsp),%rcx 92 movl %ecx,80(%rsp) 93 movl %r8d,4+80(%rsp) 94 movl %eax,8+80(%rsp) 95 movl %edx,12+80(%rsp) 96 movl 12(%r10),%edx 97 shr $32,%rcx 98 movl 28(%r10),%r8d 99 movl 8(%r10),%eax 100 movl %edx,96(%rsp) 101 movl %ecx,4+96(%rsp) 102 movl %r8d,8+96(%rsp) 103 movl %eax,12+96(%rsp) 104 mov $1634760805,%rdx 105 mov $857760878,%rcx 106 mov $2036477234,%r8 107 mov $1797285236,%rax 108 movl %edx,112(%rsp) 109 movl %ecx,4+112(%rsp) 110 movl %r8d,8+112(%rsp) 111 movl %eax,12+112(%rsp) 112 cmp $256,%r9 113 jb ._bytesbetween1and255 114 movdqa 112(%rsp),%xmm0 115 pshufd $0x55,%xmm0,%xmm1 116 pshufd $0xaa,%xmm0,%xmm2 117 pshufd $0xff,%xmm0,%xmm3 118 pshufd $0x00,%xmm0,%xmm0 119 movdqa %xmm1,128(%rsp) 120 movdqa %xmm2,144(%rsp) 121 movdqa %xmm3,160(%rsp) 122 movdqa %xmm0,176(%rsp) 123 movdqa 64(%rsp),%xmm0 124 pshufd $0xaa,%xmm0,%xmm1 125 pshufd $0xff,%xmm0,%xmm2 126 pshufd $0x00,%xmm0,%xmm3 127 pshufd $0x55,%xmm0,%xmm0 128 movdqa %xmm1,192(%rsp) 129 movdqa %xmm2,208(%rsp) 130 movdqa %xmm3,224(%rsp) 131 movdqa %xmm0,240(%rsp) 132 movdqa 80(%rsp),%xmm0 133 pshufd $0xff,%xmm0,%xmm1 134 pshufd $0x55,%xmm0,%xmm2 135 pshufd $0xaa,%xmm0,%xmm0 136 movdqa %xmm1,256(%rsp) 137 movdqa %xmm2,272(%rsp) 138 movdqa %xmm0,288(%rsp) 139 movdqa 96(%rsp),%xmm0 140 pshufd $0x00,%xmm0,%xmm1 141 pshufd $0xaa,%xmm0,%xmm2 142 pshufd $0xff,%xmm0,%xmm0 143 movdqa %xmm1,304(%rsp) 144 movdqa %xmm2,320(%rsp) 145 movdqa %xmm0,336(%rsp) 146 147 .p2align 4 148 ._bytesatleast256: 149 movq 472(%rsp),%rdx 150 mov %rdx,%rcx 151 shr $32,%rcx 152 movl %edx,352(%rsp) 153 movl %ecx,368(%rsp) 154 add $1,%rdx 155 mov %rdx,%rcx 156 shr $32,%rcx 157 movl %edx,4+352(%rsp) 158 movl %ecx,4+368(%rsp) 159 add $1,%rdx 160 mov %rdx,%rcx 161 shr $32,%rcx 162 movl %edx,8+352(%rsp) 163 movl %ecx,8+368(%rsp) 164 add $1,%rdx 165 mov %rdx,%rcx 166 shr $32,%rcx 167 movl %edx,12+352(%rsp) 168 movl %ecx,12+368(%rsp) 169 add $1,%rdx 170 mov %rdx,%rcx 171 shr $32,%rcx 172 movl %edx,80(%rsp) 173 movl %ecx,4+96(%rsp) 174 movq %rdx,472(%rsp) 175 movq %r9,480(%rsp) 176 mov $20,%rdx 177 movdqa 128(%rsp),%xmm0 178 movdqa 144(%rsp),%xmm1 179 movdqa 160(%rsp),%xmm2 180 movdqa 320(%rsp),%xmm3 181 movdqa 336(%rsp),%xmm4 182 movdqa 192(%rsp),%xmm5 183 movdqa 208(%rsp),%xmm6 184 movdqa 240(%rsp),%xmm7 185 movdqa 256(%rsp),%xmm8 186 movdqa 272(%rsp),%xmm9 187 movdqa 288(%rsp),%xmm10 188 movdqa 368(%rsp),%xmm11 189 movdqa 176(%rsp),%xmm12 190 movdqa 224(%rsp),%xmm13 191 movdqa 304(%rsp),%xmm14 192 movdqa 352(%rsp),%xmm15 193 194 .p2align 4 195 ._mainloop1: 196 movdqa %xmm1,384(%rsp) 197 movdqa %xmm2,400(%rsp) 198 movdqa %xmm13,%xmm1 199 paddd %xmm12,%xmm1 200 movdqa %xmm1,%xmm2 201 pslld $7,%xmm1 202 pxor %xmm1,%xmm14 203 psrld $25,%xmm2 204 pxor %xmm2,%xmm14 205 movdqa %xmm7,%xmm1 206 paddd %xmm0,%xmm1 207 movdqa %xmm1,%xmm2 208 pslld $7,%xmm1 209 pxor %xmm1,%xmm11 210 psrld $25,%xmm2 211 pxor %xmm2,%xmm11 212 movdqa %xmm12,%xmm1 213 paddd %xmm14,%xmm1 214 movdqa %xmm1,%xmm2 215 pslld $9,%xmm1 216 pxor %xmm1,%xmm15 217 psrld $23,%xmm2 218 pxor %xmm2,%xmm15 219 movdqa %xmm0,%xmm1 220 paddd %xmm11,%xmm1 221 movdqa %xmm1,%xmm2 222 pslld $9,%xmm1 223 pxor %xmm1,%xmm9 224 psrld $23,%xmm2 225 pxor %xmm2,%xmm9 226 movdqa %xmm14,%xmm1 227 paddd %xmm15,%xmm1 228 movdqa %xmm1,%xmm2 229 pslld $13,%xmm1 230 pxor %xmm1,%xmm13 231 psrld $19,%xmm2 232 pxor %xmm2,%xmm13 233 movdqa %xmm11,%xmm1 234 paddd %xmm9,%xmm1 235 movdqa %xmm1,%xmm2 236 pslld $13,%xmm1 237 pxor %xmm1,%xmm7 238 psrld $19,%xmm2 239 pxor %xmm2,%xmm7 240 movdqa %xmm15,%xmm1 241 paddd %xmm13,%xmm1 242 movdqa %xmm1,%xmm2 243 pslld $18,%xmm1 244 pxor %xmm1,%xmm12 245 psrld $14,%xmm2 246 pxor %xmm2,%xmm12 247 movdqa 384(%rsp),%xmm1 248 movdqa %xmm12,384(%rsp) 249 movdqa %xmm9,%xmm2 250 paddd %xmm7,%xmm2 251 movdqa %xmm2,%xmm12 252 pslld $18,%xmm2 253 pxor %xmm2,%xmm0 254 psrld $14,%xmm12 255 pxor %xmm12,%xmm0 256 movdqa %xmm5,%xmm2 257 paddd %xmm1,%xmm2 258 movdqa %xmm2,%xmm12 259 pslld $7,%xmm2 260 pxor %xmm2,%xmm3 261 psrld $25,%xmm12 262 pxor %xmm12,%xmm3 263 movdqa 400(%rsp),%xmm2 264 movdqa %xmm0,400(%rsp) 265 movdqa %xmm6,%xmm0 266 paddd %xmm2,%xmm0 267 movdqa %xmm0,%xmm12 268 pslld $7,%xmm0 269 pxor %xmm0,%xmm4 270 psrld $25,%xmm12 271 pxor %xmm12,%xmm4 272 movdqa %xmm1,%xmm0 273 paddd %xmm3,%xmm0 274 movdqa %xmm0,%xmm12 275 pslld $9,%xmm0 276 pxor %xmm0,%xmm10 277 psrld $23,%xmm12 278 pxor %xmm12,%xmm10 279 movdqa %xmm2,%xmm0 280 paddd %xmm4,%xmm0 281 movdqa %xmm0,%xmm12 282 pslld $9,%xmm0 283 pxor %xmm0,%xmm8 284 psrld $23,%xmm12 285 pxor %xmm12,%xmm8 286 movdqa %xmm3,%xmm0 287 paddd %xmm10,%xmm0 288 movdqa %xmm0,%xmm12 289 pslld $13,%xmm0 290 pxor %xmm0,%xmm5 291 psrld $19,%xmm12 292 pxor %xmm12,%xmm5 293 movdqa %xmm4,%xmm0 294 paddd %xmm8,%xmm0 295 movdqa %xmm0,%xmm12 296 pslld $13,%xmm0 297 pxor %xmm0,%xmm6 298 psrld $19,%xmm12 299 pxor %xmm12,%xmm6 300 movdqa %xmm10,%xmm0 301 paddd %xmm5,%xmm0 302 movdqa %xmm0,%xmm12 303 pslld $18,%xmm0 304 pxor %xmm0,%xmm1 305 psrld $14,%xmm12 306 pxor %xmm12,%xmm1 307 movdqa 384(%rsp),%xmm0 308 movdqa %xmm1,384(%rsp) 309 movdqa %xmm4,%xmm1 310 paddd %xmm0,%xmm1 311 movdqa %xmm1,%xmm12 312 pslld $7,%xmm1 313 pxor %xmm1,%xmm7 314 psrld $25,%xmm12 315 pxor %xmm12,%xmm7 316 movdqa %xmm8,%xmm1 317 paddd %xmm6,%xmm1 318 movdqa %xmm1,%xmm12 319 pslld $18,%xmm1 320 pxor %xmm1,%xmm2 321 psrld $14,%xmm12 322 pxor %xmm12,%xmm2 323 movdqa 400(%rsp),%xmm12 324 movdqa %xmm2,400(%rsp) 325 movdqa %xmm14,%xmm1 326 paddd %xmm12,%xmm1 327 movdqa %xmm1,%xmm2 328 pslld $7,%xmm1 329 pxor %xmm1,%xmm5 330 psrld $25,%xmm2 331 pxor %xmm2,%xmm5 332 movdqa %xmm0,%xmm1 333 paddd %xmm7,%xmm1 334 movdqa %xmm1,%xmm2 335 pslld $9,%xmm1 336 pxor %xmm1,%xmm10 337 psrld $23,%xmm2 338 pxor %xmm2,%xmm10 339 movdqa %xmm12,%xmm1 340 paddd %xmm5,%xmm1 341 movdqa %xmm1,%xmm2 342 pslld $9,%xmm1 343 pxor %xmm1,%xmm8 344 psrld $23,%xmm2 345 pxor %xmm2,%xmm8 346 movdqa %xmm7,%xmm1 347 paddd %xmm10,%xmm1 348 movdqa %xmm1,%xmm2 349 pslld $13,%xmm1 350 pxor %xmm1,%xmm4 351 psrld $19,%xmm2 352 pxor %xmm2,%xmm4 353 movdqa %xmm5,%xmm1 354 paddd %xmm8,%xmm1 355 movdqa %xmm1,%xmm2 356 pslld $13,%xmm1 357 pxor %xmm1,%xmm14 358 psrld $19,%xmm2 359 pxor %xmm2,%xmm14 360 movdqa %xmm10,%xmm1 361 paddd %xmm4,%xmm1 362 movdqa %xmm1,%xmm2 363 pslld $18,%xmm1 364 pxor %xmm1,%xmm0 365 psrld $14,%xmm2 366 pxor %xmm2,%xmm0 367 movdqa 384(%rsp),%xmm1 368 movdqa %xmm0,384(%rsp) 369 movdqa %xmm8,%xmm0 370 paddd %xmm14,%xmm0 371 movdqa %xmm0,%xmm2 372 pslld $18,%xmm0 373 pxor %xmm0,%xmm12 374 psrld $14,%xmm2 375 pxor %xmm2,%xmm12 376 movdqa %xmm11,%xmm0 377 paddd %xmm1,%xmm0 378 movdqa %xmm0,%xmm2 379 pslld $7,%xmm0 380 pxor %xmm0,%xmm6 381 psrld $25,%xmm2 382 pxor %xmm2,%xmm6 383 movdqa 400(%rsp),%xmm2 384 movdqa %xmm12,400(%rsp) 385 movdqa %xmm3,%xmm0 386 paddd %xmm2,%xmm0 387 movdqa %xmm0,%xmm12 388 pslld $7,%xmm0 389 pxor %xmm0,%xmm13 390 psrld $25,%xmm12 391 pxor %xmm12,%xmm13 392 movdqa %xmm1,%xmm0 393 paddd %xmm6,%xmm0 394 movdqa %xmm0,%xmm12 395 pslld $9,%xmm0 396 pxor %xmm0,%xmm15 397 psrld $23,%xmm12 398 pxor %xmm12,%xmm15 399 movdqa %xmm2,%xmm0 400 paddd %xmm13,%xmm0 401 movdqa %xmm0,%xmm12 402 pslld $9,%xmm0 403 pxor %xmm0,%xmm9 404 psrld $23,%xmm12 405 pxor %xmm12,%xmm9 406 movdqa %xmm6,%xmm0 407 paddd %xmm15,%xmm0 408 movdqa %xmm0,%xmm12 409 pslld $13,%xmm0 410 pxor %xmm0,%xmm11 411 psrld $19,%xmm12 412 pxor %xmm12,%xmm11 413 movdqa %xmm13,%xmm0 414 paddd %xmm9,%xmm0 415 movdqa %xmm0,%xmm12 416 pslld $13,%xmm0 417 pxor %xmm0,%xmm3 418 psrld $19,%xmm12 419 pxor %xmm12,%xmm3 420 movdqa %xmm15,%xmm0 421 paddd %xmm11,%xmm0 422 movdqa %xmm0,%xmm12 423 pslld $18,%xmm0 424 pxor %xmm0,%xmm1 425 psrld $14,%xmm12 426 pxor %xmm12,%xmm1 427 movdqa %xmm9,%xmm0 428 paddd %xmm3,%xmm0 429 movdqa %xmm0,%xmm12 430 pslld $18,%xmm0 431 pxor %xmm0,%xmm2 432 psrld $14,%xmm12 433 pxor %xmm12,%xmm2 434 movdqa 384(%rsp),%xmm12 435 movdqa 400(%rsp),%xmm0 436 sub $2,%rdx 437 ja ._mainloop1 438 439 paddd 176(%rsp),%xmm12 440 paddd 240(%rsp),%xmm7 441 paddd 288(%rsp),%xmm10 442 paddd 336(%rsp),%xmm4 443 movd %xmm12,%rdx 444 movd %xmm7,%rcx 445 movd %xmm10,%r8 446 movd %xmm4,%r9 447 pshufd $0x39,%xmm12,%xmm12 448 pshufd $0x39,%xmm7,%xmm7 449 pshufd $0x39,%xmm10,%xmm10 450 pshufd $0x39,%xmm4,%xmm4 451 xorl 0(%rsi),%edx 452 xorl 4(%rsi),%ecx 453 xorl 8(%rsi),%r8d 454 xorl 12(%rsi),%r9d 455 movl %edx,0(%rdi) 456 movl %ecx,4(%rdi) 457 movl %r8d,8(%rdi) 458 movl %r9d,12(%rdi) 459 movd %xmm12,%rdx 460 movd %xmm7,%rcx 461 movd %xmm10,%r8 462 movd %xmm4,%r9 463 pshufd $0x39,%xmm12,%xmm12 464 pshufd $0x39,%xmm7,%xmm7 465 pshufd $0x39,%xmm10,%xmm10 466 pshufd $0x39,%xmm4,%xmm4 467 xorl 64(%rsi),%edx 468 xorl 68(%rsi),%ecx 469 xorl 72(%rsi),%r8d 470 xorl 76(%rsi),%r9d 471 movl %edx,64(%rdi) 472 movl %ecx,68(%rdi) 473 movl %r8d,72(%rdi) 474 movl %r9d,76(%rdi) 475 movd %xmm12,%rdx 476 movd %xmm7,%rcx 477 movd %xmm10,%r8 478 movd %xmm4,%r9 479 pshufd $0x39,%xmm12,%xmm12 480 pshufd $0x39,%xmm7,%xmm7 481 pshufd $0x39,%xmm10,%xmm10 482 pshufd $0x39,%xmm4,%xmm4 483 xorl 128(%rsi),%edx 484 xorl 132(%rsi),%ecx 485 xorl 136(%rsi),%r8d 486 xorl 140(%rsi),%r9d 487 movl %edx,128(%rdi) 488 movl %ecx,132(%rdi) 489 movl %r8d,136(%rdi) 490 movl %r9d,140(%rdi) 491 movd %xmm12,%rdx 492 movd %xmm7,%rcx 493 movd %xmm10,%r8 494 movd %xmm4,%r9 495 xorl 192(%rsi),%edx 496 xorl 196(%rsi),%ecx 497 xorl 200(%rsi),%r8d 498 xorl 204(%rsi),%r9d 499 movl %edx,192(%rdi) 500 movl %ecx,196(%rdi) 501 movl %r8d,200(%rdi) 502 movl %r9d,204(%rdi) 503 paddd 304(%rsp),%xmm14 504 paddd 128(%rsp),%xmm0 505 paddd 192(%rsp),%xmm5 506 paddd 256(%rsp),%xmm8 507 movd %xmm14,%rdx 508 movd %xmm0,%rcx 509 movd %xmm5,%r8 510 movd %xmm8,%r9 511 pshufd $0x39,%xmm14,%xmm14 512 pshufd $0x39,%xmm0,%xmm0 513 pshufd $0x39,%xmm5,%xmm5 514 pshufd $0x39,%xmm8,%xmm8 515 xorl 16(%rsi),%edx 516 xorl 20(%rsi),%ecx 517 xorl 24(%rsi),%r8d 518 xorl 28(%rsi),%r9d 519 movl %edx,16(%rdi) 520 movl %ecx,20(%rdi) 521 movl %r8d,24(%rdi) 522 movl %r9d,28(%rdi) 523 movd %xmm14,%rdx 524 movd %xmm0,%rcx 525 movd %xmm5,%r8 526 movd %xmm8,%r9 527 pshufd $0x39,%xmm14,%xmm14 528 pshufd $0x39,%xmm0,%xmm0 529 pshufd $0x39,%xmm5,%xmm5 530 pshufd $0x39,%xmm8,%xmm8 531 xorl 80(%rsi),%edx 532 xorl 84(%rsi),%ecx 533 xorl 88(%rsi),%r8d 534 xorl 92(%rsi),%r9d 535 movl %edx,80(%rdi) 536 movl %ecx,84(%rdi) 537 movl %r8d,88(%rdi) 538 movl %r9d,92(%rdi) 539 movd %xmm14,%rdx 540 movd %xmm0,%rcx 541 movd %xmm5,%r8 542 movd %xmm8,%r9 543 pshufd $0x39,%xmm14,%xmm14 544 pshufd $0x39,%xmm0,%xmm0 545 pshufd $0x39,%xmm5,%xmm5 546 pshufd $0x39,%xmm8,%xmm8 547 xorl 144(%rsi),%edx 548 xorl 148(%rsi),%ecx 549 xorl 152(%rsi),%r8d 550 xorl 156(%rsi),%r9d 551 movl %edx,144(%rdi) 552 movl %ecx,148(%rdi) 553 movl %r8d,152(%rdi) 554 movl %r9d,156(%rdi) 555 movd %xmm14,%rdx 556 movd %xmm0,%rcx 557 movd %xmm5,%r8 558 movd %xmm8,%r9 559 xorl 208(%rsi),%edx 560 xorl 212(%rsi),%ecx 561 xorl 216(%rsi),%r8d 562 xorl 220(%rsi),%r9d 563 movl %edx,208(%rdi) 564 movl %ecx,212(%rdi) 565 movl %r8d,216(%rdi) 566 movl %r9d,220(%rdi) 567 paddd 352(%rsp),%xmm15 568 paddd 368(%rsp),%xmm11 569 paddd 144(%rsp),%xmm1 570 paddd 208(%rsp),%xmm6 571 movd %xmm15,%rdx 572 movd %xmm11,%rcx 573 movd %xmm1,%r8 574 movd %xmm6,%r9 575 pshufd $0x39,%xmm15,%xmm15 576 pshufd $0x39,%xmm11,%xmm11 577 pshufd $0x39,%xmm1,%xmm1 578 pshufd $0x39,%xmm6,%xmm6 579 xorl 32(%rsi),%edx 580 xorl 36(%rsi),%ecx 581 xorl 40(%rsi),%r8d 582 xorl 44(%rsi),%r9d 583 movl %edx,32(%rdi) 584 movl %ecx,36(%rdi) 585 movl %r8d,40(%rdi) 586 movl %r9d,44(%rdi) 587 movd %xmm15,%rdx 588 movd %xmm11,%rcx 589 movd %xmm1,%r8 590 movd %xmm6,%r9 591 pshufd $0x39,%xmm15,%xmm15 592 pshufd $0x39,%xmm11,%xmm11 593 pshufd $0x39,%xmm1,%xmm1 594 pshufd $0x39,%xmm6,%xmm6 595 xorl 96(%rsi),%edx 596 xorl 100(%rsi),%ecx 597 xorl 104(%rsi),%r8d 598 xorl 108(%rsi),%r9d 599 movl %edx,96(%rdi) 600 movl %ecx,100(%rdi) 601 movl %r8d,104(%rdi) 602 movl %r9d,108(%rdi) 603 movd %xmm15,%rdx 604 movd %xmm11,%rcx 605 movd %xmm1,%r8 606 movd %xmm6,%r9 607 pshufd $0x39,%xmm15,%xmm15 608 pshufd $0x39,%xmm11,%xmm11 609 pshufd $0x39,%xmm1,%xmm1 610 pshufd $0x39,%xmm6,%xmm6 611 xorl 160(%rsi),%edx 612 xorl 164(%rsi),%ecx 613 xorl 168(%rsi),%r8d 614 xorl 172(%rsi),%r9d 615 movl %edx,160(%rdi) 616 movl %ecx,164(%rdi) 617 movl %r8d,168(%rdi) 618 movl %r9d,172(%rdi) 619 movd %xmm15,%rdx 620 movd %xmm11,%rcx 621 movd %xmm1,%r8 622 movd %xmm6,%r9 623 xorl 224(%rsi),%edx 624 xorl 228(%rsi),%ecx 625 xorl 232(%rsi),%r8d 626 xorl 236(%rsi),%r9d 627 movl %edx,224(%rdi) 628 movl %ecx,228(%rdi) 629 movl %r8d,232(%rdi) 630 movl %r9d,236(%rdi) 631 paddd 224(%rsp),%xmm13 632 paddd 272(%rsp),%xmm9 633 paddd 320(%rsp),%xmm3 634 paddd 160(%rsp),%xmm2 635 movd %xmm13,%rdx 636 movd %xmm9,%rcx 637 movd %xmm3,%r8 638 movd %xmm2,%r9 639 pshufd $0x39,%xmm13,%xmm13 640 pshufd $0x39,%xmm9,%xmm9 641 pshufd $0x39,%xmm3,%xmm3 642 pshufd $0x39,%xmm2,%xmm2 643 xorl 48(%rsi),%edx 644 xorl 52(%rsi),%ecx 645 xorl 56(%rsi),%r8d 646 xorl 60(%rsi),%r9d 647 movl %edx,48(%rdi) 648 movl %ecx,52(%rdi) 649 movl %r8d,56(%rdi) 650 movl %r9d,60(%rdi) 651 movd %xmm13,%rdx 652 movd %xmm9,%rcx 653 movd %xmm3,%r8 654 movd %xmm2,%r9 655 pshufd $0x39,%xmm13,%xmm13 656 pshufd $0x39,%xmm9,%xmm9 657 pshufd $0x39,%xmm3,%xmm3 658 pshufd $0x39,%xmm2,%xmm2 659 xorl 112(%rsi),%edx 660 xorl 116(%rsi),%ecx 661 xorl 120(%rsi),%r8d 662 xorl 124(%rsi),%r9d 663 movl %edx,112(%rdi) 664 movl %ecx,116(%rdi) 665 movl %r8d,120(%rdi) 666 movl %r9d,124(%rdi) 667 movd %xmm13,%rdx 668 movd %xmm9,%rcx 669 movd %xmm3,%r8 670 movd %xmm2,%r9 671 pshufd $0x39,%xmm13,%xmm13 672 pshufd $0x39,%xmm9,%xmm9 673 pshufd $0x39,%xmm3,%xmm3 674 pshufd $0x39,%xmm2,%xmm2 675 xorl 176(%rsi),%edx 676 xorl 180(%rsi),%ecx 677 xorl 184(%rsi),%r8d 678 xorl 188(%rsi),%r9d 679 movl %edx,176(%rdi) 680 movl %ecx,180(%rdi) 681 movl %r8d,184(%rdi) 682 movl %r9d,188(%rdi) 683 movd %xmm13,%rdx 684 movd %xmm9,%rcx 685 movd %xmm3,%r8 686 movd %xmm2,%r9 687 xorl 240(%rsi),%edx 688 xorl 244(%rsi),%ecx 689 xorl 248(%rsi),%r8d 690 xorl 252(%rsi),%r9d 691 movl %edx,240(%rdi) 692 movl %ecx,244(%rdi) 693 movl %r8d,248(%rdi) 694 movl %r9d,252(%rdi) 695 movq 480(%rsp),%r9 696 sub $256,%r9 697 add $256,%rsi 698 add $256,%rdi 699 cmp $256,%r9 700 jae ._bytesatleast256 701 702 cmp $0,%r9 703 jbe ._done 704 705 ._bytesbetween1and255: 706 cmp $64,%r9 707 jae ._nocopy 708 709 mov %rdi,%rdx 710 leaq 0(%rsp),%rdi 711 mov %r9,%rcx 712 rep movsb 713 leaq 0(%rsp),%rdi 714 leaq 0(%rsp),%rsi 715 716 ._nocopy: 717 movq %r9,480(%rsp) 718 movdqa 112(%rsp),%xmm0 719 movdqa 64(%rsp),%xmm1 720 movdqa 80(%rsp),%xmm2 721 movdqa 96(%rsp),%xmm3 722 movdqa %xmm1,%xmm4 723 mov $20,%rcx 724 725 .p2align 4 726 ._mainloop2: 727 paddd %xmm0,%xmm4 728 movdqa %xmm0,%xmm5 729 movdqa %xmm4,%xmm6 730 pslld $7,%xmm4 731 psrld $25,%xmm6 732 pxor %xmm4,%xmm3 733 pxor %xmm6,%xmm3 734 paddd %xmm3,%xmm5 735 movdqa %xmm3,%xmm4 736 movdqa %xmm5,%xmm6 737 pslld $9,%xmm5 738 psrld $23,%xmm6 739 pxor %xmm5,%xmm2 740 pshufd $0x93,%xmm3,%xmm3 741 pxor %xmm6,%xmm2 742 paddd %xmm2,%xmm4 743 movdqa %xmm2,%xmm5 744 movdqa %xmm4,%xmm6 745 pslld $13,%xmm4 746 psrld $19,%xmm6 747 pxor %xmm4,%xmm1 748 pshufd $0x4e,%xmm2,%xmm2 749 pxor %xmm6,%xmm1 750 paddd %xmm1,%xmm5 751 movdqa %xmm3,%xmm4 752 movdqa %xmm5,%xmm6 753 pslld $18,%xmm5 754 psrld $14,%xmm6 755 pxor %xmm5,%xmm0 756 pshufd $0x39,%xmm1,%xmm1 757 pxor %xmm6,%xmm0 758 paddd %xmm0,%xmm4 759 movdqa %xmm0,%xmm5 760 movdqa %xmm4,%xmm6 761 pslld $7,%xmm4 762 psrld $25,%xmm6 763 pxor %xmm4,%xmm1 764 pxor %xmm6,%xmm1 765 paddd %xmm1,%xmm5 766 movdqa %xmm1,%xmm4 767 movdqa %xmm5,%xmm6 768 pslld $9,%xmm5 769 psrld $23,%xmm6 770 pxor %xmm5,%xmm2 771 pshufd $0x93,%xmm1,%xmm1 772 pxor %xmm6,%xmm2 773 paddd %xmm2,%xmm4 774 movdqa %xmm2,%xmm5 775 movdqa %xmm4,%xmm6 776 pslld $13,%xmm4 777 psrld $19,%xmm6 778 pxor %xmm4,%xmm3 779 pshufd $0x4e,%xmm2,%xmm2 780 pxor %xmm6,%xmm3 781 paddd %xmm3,%xmm5 782 movdqa %xmm1,%xmm4 783 movdqa %xmm5,%xmm6 784 pslld $18,%xmm5 785 psrld $14,%xmm6 786 pxor %xmm5,%xmm0 787 pshufd $0x39,%xmm3,%xmm3 788 pxor %xmm6,%xmm0 789 paddd %xmm0,%xmm4 790 movdqa %xmm0,%xmm5 791 movdqa %xmm4,%xmm6 792 pslld $7,%xmm4 793 psrld $25,%xmm6 794 pxor %xmm4,%xmm3 795 pxor %xmm6,%xmm3 796 paddd %xmm3,%xmm5 797 movdqa %xmm3,%xmm4 798 movdqa %xmm5,%xmm6 799 pslld $9,%xmm5 800 psrld $23,%xmm6 801 pxor %xmm5,%xmm2 802 pshufd $0x93,%xmm3,%xmm3 803 pxor %xmm6,%xmm2 804 paddd %xmm2,%xmm4 805 movdqa %xmm2,%xmm5 806 movdqa %xmm4,%xmm6 807 pslld $13,%xmm4 808 psrld $19,%xmm6 809 pxor %xmm4,%xmm1 810 pshufd $0x4e,%xmm2,%xmm2 811 pxor %xmm6,%xmm1 812 paddd %xmm1,%xmm5 813 movdqa %xmm3,%xmm4 814 movdqa %xmm5,%xmm6 815 pslld $18,%xmm5 816 psrld $14,%xmm6 817 pxor %xmm5,%xmm0 818 pshufd $0x39,%xmm1,%xmm1 819 pxor %xmm6,%xmm0 820 paddd %xmm0,%xmm4 821 movdqa %xmm0,%xmm5 822 movdqa %xmm4,%xmm6 823 pslld $7,%xmm4 824 psrld $25,%xmm6 825 pxor %xmm4,%xmm1 826 pxor %xmm6,%xmm1 827 paddd %xmm1,%xmm5 828 movdqa %xmm1,%xmm4 829 movdqa %xmm5,%xmm6 830 pslld $9,%xmm5 831 psrld $23,%xmm6 832 pxor %xmm5,%xmm2 833 pshufd $0x93,%xmm1,%xmm1 834 pxor %xmm6,%xmm2 835 paddd %xmm2,%xmm4 836 movdqa %xmm2,%xmm5 837 movdqa %xmm4,%xmm6 838 pslld $13,%xmm4 839 psrld $19,%xmm6 840 pxor %xmm4,%xmm3 841 pshufd $0x4e,%xmm2,%xmm2 842 pxor %xmm6,%xmm3 843 sub $4,%rcx 844 paddd %xmm3,%xmm5 845 movdqa %xmm1,%xmm4 846 movdqa %xmm5,%xmm6 847 pslld $18,%xmm5 848 pxor %xmm7,%xmm7 849 psrld $14,%xmm6 850 pxor %xmm5,%xmm0 851 pshufd $0x39,%xmm3,%xmm3 852 pxor %xmm6,%xmm0 853 ja ._mainloop2 854 855 paddd 112(%rsp),%xmm0 856 paddd 64(%rsp),%xmm1 857 paddd 80(%rsp),%xmm2 858 paddd 96(%rsp),%xmm3 859 movd %xmm0,%rcx 860 movd %xmm1,%r8 861 movd %xmm2,%r9 862 movd %xmm3,%rax 863 pshufd $0x39,%xmm0,%xmm0 864 pshufd $0x39,%xmm1,%xmm1 865 pshufd $0x39,%xmm2,%xmm2 866 pshufd $0x39,%xmm3,%xmm3 867 xorl 0(%rsi),%ecx 868 xorl 48(%rsi),%r8d 869 xorl 32(%rsi),%r9d 870 xorl 16(%rsi),%eax 871 movl %ecx,0(%rdi) 872 movl %r8d,48(%rdi) 873 movl %r9d,32(%rdi) 874 movl %eax,16(%rdi) 875 movd %xmm0,%rcx 876 movd %xmm1,%r8 877 movd %xmm2,%r9 878 movd %xmm3,%rax 879 pshufd $0x39,%xmm0,%xmm0 880 pshufd $0x39,%xmm1,%xmm1 881 pshufd $0x39,%xmm2,%xmm2 882 pshufd $0x39,%xmm3,%xmm3 883 xorl 20(%rsi),%ecx 884 xorl 4(%rsi),%r8d 885 xorl 52(%rsi),%r9d 886 xorl 36(%rsi),%eax 887 movl %ecx,20(%rdi) 888 movl %r8d,4(%rdi) 889 movl %r9d,52(%rdi) 890 movl %eax,36(%rdi) 891 movd %xmm0,%rcx 892 movd %xmm1,%r8 893 movd %xmm2,%r9 894 movd %xmm3,%rax 895 pshufd $0x39,%xmm0,%xmm0 896 pshufd $0x39,%xmm1,%xmm1 897 pshufd $0x39,%xmm2,%xmm2 898 pshufd $0x39,%xmm3,%xmm3 899 xorl 40(%rsi),%ecx 900 xorl 24(%rsi),%r8d 901 xorl 8(%rsi),%r9d 902 xorl 56(%rsi),%eax 903 movl %ecx,40(%rdi) 904 movl %r8d,24(%rdi) 905 movl %r9d,8(%rdi) 906 movl %eax,56(%rdi) 907 movd %xmm0,%rcx 908 movd %xmm1,%r8 909 movd %xmm2,%r9 910 movd %xmm3,%rax 911 xorl 60(%rsi),%ecx 912 xorl 44(%rsi),%r8d 913 xorl 28(%rsi),%r9d 914 xorl 12(%rsi),%eax 915 movl %ecx,60(%rdi) 916 movl %r8d,44(%rdi) 917 movl %r9d,28(%rdi) 918 movl %eax,12(%rdi) 919 movq 480(%rsp),%r9 920 movq 472(%rsp),%rcx 921 add $1,%rcx 922 mov %rcx,%r8 923 shr $32,%r8 924 movl %ecx,80(%rsp) 925 movl %r8d,4+96(%rsp) 926 movq %rcx,472(%rsp) 927 cmp $64,%r9 928 ja ._bytesatleast65 929 jae ._bytesatleast64 930 931 mov %rdi,%rsi 932 mov %rdx,%rdi 933 mov %r9,%rcx 934 rep movsb 935 936 ._bytesatleast64: 937 ._done: 938 movq 416(%rsp),%r11 939 movq 424(%rsp),%r12 940 movq 432(%rsp),%r13 941 movq 440(%rsp),%r14 942 movq 448(%rsp),%r15 943 movq 456(%rsp),%rbx 944 movq 464(%rsp),%rbp 945 add %r11,%rsp 946 xor %rax,%rax 947 mov %rsi,%rdx 948 ret 949 950 ._bytesatleast65: 951 sub $64,%r9 952 add $64,%rdi 953 add $64,%rsi 954 jmp ._bytesbetween1and255 955 956 #endif 957 958 #if defined(__linux__) && defined(__ELF__) 959 .section .note.GNU-stack,"",%progbits 960 #endif 961