via_memcpy.c revision 7e6fb56f
1/* 2 * Copyright (C) 2004 Thomas Hellström, All Rights Reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sub license, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the 12 * next paragraph) shall be included in all copies or substantial portions 13 * of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#ifdef HAVE_CONFIG_H 25#include "config.h" 26#endif 27 28#include "via.h" 29#include "via_driver.h" 30#include "via_memcpy.h" 31#include "compiler.h" 32 33 34#define BSIZ 2048 /* size of /proc/cpuinfo buffer */ 35#define BSIZW 720 /* typical copy width (YUV420) */ 36#define BSIZA 736 /* multiple of 32 bytes */ 37#define BSIZH 576 /* typical copy height */ 38 39#define SSE_PREFETCH " prefetchnta " 40#define FENCE __asm__ __volatile__ ("sfence":::"memory"); 41#define FENCEMMS __asm__ __volatile__ ("\t" \ 42 "sfence\n\t" \ 43 "emms\n\t" \ 44 :::"memory"); 45#define FEMMS __asm__ __volatile__("femms":::"memory"); 46#define EMMS __asm__ __volatile__("emms":::"memory"); 47 48#define NOW_PREFETCH " prefetch " 49 50 51#define PREFETCH1(arch_prefetch,from) \ 52 __asm__ __volatile__ ( \ 53 "1: " arch_prefetch "(%0)\n" \ 54 arch_prefetch "32(%0)\n" \ 55 arch_prefetch "64(%0)\n" \ 56 arch_prefetch "96(%0)\n" \ 57 arch_prefetch "128(%0)\n" \ 58 arch_prefetch "160(%0)\n" \ 59 arch_prefetch "192(%0)\n" \ 60 arch_prefetch "256(%0)\n" \ 61 arch_prefetch "288(%0)\n" \ 62 "2:\n" \ 63 : : "r" (from) ); 64 65#define PREFETCH2(arch_prefetch,from) \ 66 __asm__ __volatile__ ( \ 67 arch_prefetch "320(%0)\n" \ 68 : : "r" (from) ); 69#define PREFETCH3(arch_prefetch,from) \ 70 __asm__ __volatile__ ( \ 71 arch_prefetch "288(%0)\n" \ 72 : : "r" (from) ); 73 74 75#define small_memcpy(to, from, n) \ 76 { \ 77 __asm__ __volatile__( \ 78 "movl %2,%%ecx\n\t" \ 79 "sarl $2,%%ecx\n\t" \ 80 "rep ; movsl\n\t" \ 81 "testb $2,%b2\n\t" \ 82 "je 1f\n\t" \ 83 "movsw\n" \ 84 "1:\ttestb $1,%b2\n\t" \ 85 "je 2f\n\t" \ 86 "movsb\n" \ 87 "2:" \ 88 :"=&D" (to), "=&S" (from) \ 89 :"q" (n),"0" ((long) to),"1" ((long) from) \ 90 : "%ecx","memory"); \ 91 } 92 93 94#define SSE_CPY(prefetch, from, to, dummy, lcnt) \ 95 if ((unsigned long) from & 15) { \ 96 __asm__ __volatile__ ( \ 97 "1:\n" \ 98 prefetch "320(%1)\n" \ 99 " movups (%1), %%xmm0\n" \ 100 " movups 16(%1), %%xmm1\n" \ 101 " movntps %%xmm0, (%0)\n" \ 102 " movntps %%xmm1, 16(%0)\n" \ 103 prefetch "352(%1)\n" \ 104 " movups 32(%1), %%xmm2\n" \ 105 " movups 48(%1), %%xmm3\n" \ 106 " movntps %%xmm2, 32(%0)\n" \ 107 " movntps %%xmm3, 48(%0)\n" \ 108 " addl $64,%0\n" \ 109 " addl $64,%1\n" \ 110 " decl %2\n" \ 111 " jne 1b\n" \ 112 :"=&D"(to), "=&S"(from), "=&r"(dummy) \ 113 :"0" (to), "1" (from), "2" (lcnt): "memory"); \ 114 } else { \ 115 __asm__ __volatile__ ( \ 116 "2:\n" \ 117 prefetch "320(%1)\n" \ 118 " movaps (%1), %%xmm0\n" \ 119 " movaps 16(%1), %%xmm1\n" \ 120 " movntps %%xmm0, (%0)\n" \ 121 " movntps %%xmm1, 16(%0)\n" \ 122 prefetch "352(%1)\n" \ 123 " movaps 32(%1), %%xmm2\n" \ 124 " movaps 48(%1), %%xmm3\n" \ 125 " movntps %%xmm2, 32(%0)\n" \ 126 " movntps %%xmm3, 48(%0)\n" \ 127 " addl $64,%0\n" \ 128 " addl $64,%1\n" \ 129 " decl %2\n" \ 130 " jne 2b\n" \ 131 :"=&D"(to), "=&S"(from), "=&r"(dummy) \ 132 :"0" (to), "1" (from), "2" (lcnt): "memory"); \ 133 } 134 135#define MMX_CPY(prefetch, from, to, dummy, lcnt) \ 136 __asm__ __volatile__ ( \ 137 "1:\n" \ 138 prefetch "320(%1)\n" \ 139 "2: movq (%1), %%mm0\n" \ 140 " movq 8(%1), %%mm1\n" \ 141 " movq 16(%1), %%mm2\n" \ 142 " movq 24(%1), %%mm3\n" \ 143 " movq %%mm0, (%0)\n" \ 144 " movq %%mm1, 8(%0)\n" \ 145 " movq %%mm2, 16(%0)\n" \ 146 " movq %%mm3, 24(%0)\n" \ 147 prefetch "352(%1)\n" \ 148 " movq 32(%1), %%mm0\n" \ 149 " movq 40(%1), %%mm1\n" \ 150 " movq 48(%1), %%mm2\n" \ 151 " movq 56(%1), %%mm3\n" \ 152 " movq %%mm0, 32(%0)\n" \ 153 " movq %%mm1, 40(%0)\n" \ 154 " movq %%mm2, 48(%0)\n" \ 155 " movq %%mm3, 56(%0)\n" \ 156 " addl $64,%0\n" \ 157 " addl $64,%1\n" \ 158 " decl %2\n" \ 159 " jne 1b\n" \ 160 :"=&D"(to), "=&S"(from), "=&r"(dummy) \ 161 :"0" (to), "1" (from), "2" (lcnt) : "memory"); 162 163#define MMXEXT_CPY(prefetch, from, to, dummy, lcnt) \ 164 __asm__ __volatile__ ( \ 165 ".p2align 4,,7\n" \ 166 "1:\n" \ 167 prefetch "320(%1)\n" \ 168 " movq (%1), %%mm0\n" \ 169 " movq 8(%1), %%mm1\n" \ 170 " movq 16(%1), %%mm2\n" \ 171 " movq 24(%1), %%mm3\n" \ 172 " movntq %%mm0, (%0)\n" \ 173 " movntq %%mm1, 8(%0)\n" \ 174 " movntq %%mm2, 16(%0)\n" \ 175 " movntq %%mm3, 24(%0)\n" \ 176 prefetch "352(%1)\n" \ 177 " movq 32(%1), %%mm0\n" \ 178 " movq 40(%1), %%mm1\n" \ 179 " movq 48(%1), %%mm2\n" \ 180 " movq 56(%1), %%mm3\n" \ 181 " movntq %%mm0, 32(%0)\n" \ 182 " movntq %%mm1, 40(%0)\n" \ 183 " movntq %%mm2, 48(%0)\n" \ 184 " movntq %%mm3, 56(%0)\n" \ 185 " addl $64,%0\n" \ 186 " addl $64,%1\n" \ 187 " decl %2\n" \ 188 " jne 1b\n" \ 189 :"=&D"(to), "=&S"(from), "=&r"(dummy) \ 190 :"0" (to), "1" (from), "2" (lcnt) : "memory"); 191 192 193#define PREFETCH_FUNC(prefix, itype, ptype, begin, fence) \ 194 \ 195 static void prefix##_YUV42X(unsigned char *to, \ 196 const unsigned char *from, \ 197 int dstPitch, \ 198 int w, \ 199 int h, \ 200 int yuv422) \ 201 { \ 202 int dadd, rest, count, hc, lcnt; \ 203 register int dummy; \ 204 PREFETCH1(ptype##_PREFETCH, from); \ 205 begin; \ 206 count = 2; \ 207 \ 208 /* If destination pitch equals width, do it all in one go. */ \ 209 \ 210 if (yuv422) { \ 211 w <<= 1; \ 212 if (w == dstPitch) { \ 213 w *= h; \ 214 h = 1; \ 215 dstPitch = w; \ 216 count = 0; \ 217 } else { \ 218 h -= 1; \ 219 count = 1; \ 220 } \ 221 } else if (w == dstPitch) { \ 222 w = h*(w + (w >> 1)); \ 223 count = 0; \ 224 h = 1; \ 225 dstPitch = w; \ 226 } \ 227 \ 228 lcnt = w >> 6; \ 229 rest = w & 63; \ 230 while (count--) { \ 231 hc = h; \ 232 lcnt = w >> 6; \ 233 rest = w & 63; \ 234 dadd = dstPitch - w; \ 235 while (hc--) { \ 236 if (lcnt) { \ 237 itype##_CPY(ptype##_PREFETCH, from, to, dummy, lcnt); \ 238 } \ 239 if (rest) { \ 240 PREFETCH2(ptype##_PREFETCH, from); \ 241 small_memcpy(to, from, rest); \ 242 PREFETCH3(ptype##_PREFETCH, from); \ 243 } \ 244 to += dadd; \ 245 } \ 246 w >>= 1; \ 247 dstPitch >>= 1; \ 248 h -= 1; \ 249 } \ 250 if (lcnt > 5) { \ 251 lcnt -= 5; \ 252 itype##_CPY(ptype##_PREFETCH, from, to, dummy, lcnt); \ 253 lcnt = 5; \ 254 } \ 255 if (lcnt) { \ 256 itype##_CPY("#", from, to, dummy, lcnt); \ 257 } \ 258 if (rest) small_memcpy(to, from, rest); \ 259 fence; \ 260 } 261 262#define NOPREFETCH_FUNC(prefix, itype, begin, fence) \ 263 static void prefix##_YUV42X(unsigned char *to, \ 264 const unsigned char *from, \ 265 int dstPitch, \ 266 int w, \ 267 int h, \ 268 int yuv422) \ 269 \ 270 { \ 271 int dadd, rest, count, hc, lcnt; \ 272 register int dummy; \ 273 begin; \ 274 count = 2; \ 275 \ 276 /* If destination pitch equals width, do it all in one go. */ \ 277 \ 278 if (yuv422) { \ 279 w <<= 1; \ 280 count = 1; \ 281 if (w == dstPitch) { \ 282 w *= h; \ 283 h = 1; \ 284 dstPitch = w; \ 285 } \ 286 } else if (w == dstPitch) { \ 287 w = h*(w + (w >> 1)); \ 288 count = 1; \ 289 h = 1; \ 290 dstPitch = w; \ 291 } \ 292 \ 293 lcnt = w >> 6; \ 294 rest = w & 63; \ 295 while (count--) { \ 296 hc = h; \ 297 dadd = dstPitch - w; \ 298 lcnt = w >> 6; \ 299 rest = w & 63; \ 300 while (hc--) { \ 301 if (lcnt) { \ 302 itype##_CPY("#", from, to, dummy, lcnt); \ 303 } \ 304 if (rest) small_memcpy(to, from, rest); \ 305 to += dadd; \ 306 } \ 307 w >>= 1; \ 308 dstPitch >>= 1; \ 309 } \ 310 fence; \ 311 } 312 313 314#if !defined(__i386__) || (defined(linux) && defined(__i386__)) 315 316static void 317libc_YUV42X(unsigned char *dst, const unsigned char *src, 318 int dstPitch, int w, int h, int yuv422) 319{ 320 if (yuv422) 321 w <<= 1; 322 if (dstPitch == w) { 323 int size = h * ((yuv422) ? w : (w + (w >> 1))); 324 325 memcpy(dst, src, size); 326 return; 327 } else { 328 int count; 329 330 /* Copy Y component to video memory. */ 331 count = h; 332 while (count--) { 333 memcpy(dst, src, w); 334 src += w; 335 dst += dstPitch; 336 } 337 338 /* UV component is 1/2 of Y. */ 339 if (!yuv422) { 340 w >>= 1; 341 dstPitch >>= 1; 342 343 /* Copy V(Cr),U(Cb) components to video memory. */ 344 count = h; 345 while (count--) { 346 memcpy(dst, src, w); 347 src += w; 348 dst += dstPitch; 349 } 350 } 351 } 352} 353#endif 354 355 356#ifdef __i386__ 357 358/* Linux kernel __memcpy. */ 359static __inline void * 360__memcpy(void *to, const void *from, size_t n) 361{ 362 int d1, d2, d3; 363 364 __asm__ __volatile__( 365 "rep ; movsl\n\t" 366 "testb $2,%b4\n\t" 367 "je 1f\n\t" 368 "movsw\n" 369 "1:\ttestb $1,%b4\n\t" 370 "je 2f\n\t" 371 "movsb\n" 372 "2:" 373 :"=&c"(d1), "=&D"(d2), "=&S"(d3) 374 :"0"(n >> 2), "q"(n), "1"((long)to), "2"((long)from) 375 :"memory"); 376 377 return (to); 378} 379 380 381static void 382kernel_YUV42X(unsigned char *dst, const unsigned char *src, 383 int dstPitch, int w, int h, int yuv422) 384{ 385 if (yuv422) 386 w <<= 1; 387 if (dstPitch == w) { 388 int size = h * ((yuv422) ? w : (w + (w >> 1))); 389 390 __memcpy(dst, src, size); 391 return; 392 } else { 393 int count; 394 395 /* Copy Y component to video memory. */ 396 count = h; 397 while (count--) { 398 __memcpy(dst, src, w); 399 src += w; 400 dst += dstPitch; 401 } 402 403 /* UV component is 1/2 of Y. */ 404 if (!yuv422) { 405 406 w >>= 1; 407 dstPitch >>= 1; 408 409 /* Copy V(Cr),U(Cb) components to video memory. */ 410 count = h; 411 while (count--) { 412 __memcpy(dst, src, w); 413 src += w; 414 dst += dstPitch; 415 } 416 } 417 } 418} 419 420#ifdef linux 421PREFETCH_FUNC(sse, SSE, SSE,, FENCE) 422PREFETCH_FUNC(mmxext, MMXEXT, SSE, EMMS, FENCEMMS) 423PREFETCH_FUNC(now, MMX, NOW, FEMMS, FEMMS) 424NOPREFETCH_FUNC(mmx, MMX, EMMS, EMMS) 425 426static void 427*kernel_memcpy(void *to, const void *from, size_t len) 428{ 429 return __memcpy(to, from, len); 430} 431 432static unsigned 433fastrdtsc(void) 434{ 435 unsigned eax; 436 437 __asm__ volatile ("\t" 438 "pushl %%ebx\n\t" 439 "cpuid\n\t" 440 ".byte 0x0f, 0x31\n\t" 441 "popl %%ebx\n" 442 :"=a" (eax) 443 :"0"(0) 444 :"ecx", "edx", "cc"); 445 446 return eax; 447} 448 449 450static unsigned 451time_function(vidCopyFunc mf, unsigned char *buf1, unsigned char *buf2) 452{ 453 unsigned t, t2; 454 455 t = fastrdtsc(); 456 457 (*mf) (buf1, buf2, BSIZA, BSIZW, BSIZH, 0); 458 459 t2 = fastrdtsc(); 460 return ((t < t2) ? t2 - t : 0xFFFFFFFFU - (t - t2 - 1)); 461} 462 463enum 464{ libc = 0, kernel, sse, mmx, now, mmxext, totNum }; 465 466typedef struct 467{ 468 vidCopyFunc mFunc; 469 char *mName, **cpuFlag; 470} McFuncData; 471 472static char *libc_cpuflags[] = { " ", 0 }; 473static char *kernel_cpuflags[] = { " ", 0 }; 474static char *sse_cpuflags[] = { " sse ", 0 }; 475static char *mmx_cpuflags[] = { " mmx ", 0 }; 476static char *now_cpuflags[] = { " 3dnow ", 0 }; 477static char *mmx2_cpuflags[] = { " mmxext ", " sse ", 0 }; 478 479static McFuncData mcFunctions[totNum] = { 480{libc_YUV42X, "libc", libc_cpuflags}, 481{kernel_YUV42X, "kernel", kernel_cpuflags}, 482{sse_YUV42X, "SSE", sse_cpuflags}, 483{mmx_YUV42X, "MMX", mmx_cpuflags}, 484{now_YUV42X, "3DNow!", now_cpuflags}, 485{mmxext_YUV42X, "MMX2", mmx2_cpuflags} 486}; 487 488 489static int 490flagValid(const char *cpuinfo, char *flag) 491{ 492 const char *flagLoc, *nextProc; 493 int located = 0; 494 495 while ((cpuinfo = strstr(cpuinfo, "processor\t:"))) { 496 located = 1; 497 cpuinfo += 11; 498 if ((flagLoc = strstr(cpuinfo, flag))) { 499 if ((nextProc = strstr(cpuinfo, "processor\t:"))) { 500 if (nextProc < flagLoc) 501 return 0; 502 } 503 } else { 504 return 0; 505 } 506 } 507 return located; 508} 509 510 511static int 512cpuValid(const char *cpuinfo, char **flags) 513{ 514 for (; *flags != 0; flags++) { 515 if (flagValid(cpuinfo, *flags)) 516 return 1; 517 } 518 return 0; 519} 520#endif /* linux */ 521 522/* 523 * Benchmark the video copy routines and choose the fastest. 524 */ 525vidCopyFunc 526viaVidCopyInit(char *copyType, ScreenPtr pScreen) 527{ 528 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 529 530#ifdef linux 531 char buf[BSIZ]; 532 unsigned char *buf1, *buf2, *buf3; 533 char *tmpBuf, *endBuf; 534 int count, j, bestSoFar; 535 unsigned best, tmp, testSize, alignSize, tmp2; 536 VIAMem tmpFbBuffer; 537 McFuncData *curData; 538 FILE *cpuInfoFile; 539 double cpuFreq; 540 VIAPtr pVia = VIAPTR(pScrn); 541 542 if (NULL == (cpuInfoFile = fopen("/proc/cpuinfo", "r"))) { 543 return libc_YUV42X; 544 } 545 count = fread(buf, 1, BSIZ, cpuInfoFile); 546 if (ferror(cpuInfoFile)) { 547 fclose(cpuInfoFile); 548 return libc_YUV42X; 549 } 550 fclose(cpuInfoFile); 551 if (BSIZ == count) { 552 xf86DrvMsg(pScrn->scrnIndex, X_WARNING, 553 "\"/proc/cpuinfo\" file too long. " 554 "Using Linux kernel memcpy.\n"); 555 return libc_YUV42X; 556 } 557 buf[count] = 0; 558 559 while (count--) 560 if ('\n' == buf[count]) 561 buf[count] = ' '; 562 563 /* Extract the CPU frequency. */ 564 cpuFreq = 0.; 565 if (NULL != (tmpBuf = strstr(buf, "cpu MHz"))) { 566 if (NULL != (tmpBuf = strstr(tmpBuf, ":") + 1)) { 567 cpuFreq = strtod(tmpBuf, &endBuf); 568 if (endBuf == tmpBuf) 569 tmpBuf = NULL; 570 } 571 } 572 573 alignSize = BSIZH * (BSIZA + (BSIZA >> 1)); 574 testSize = BSIZH * (BSIZW + (BSIZW >> 1)); 575 tmpFbBuffer.pool = 0; 576 577 /* 578 * Allocate an area of offscreen FB memory, (buf1), a simulated video 579 * player buffer (buf2) and a pool of uninitialized "video" data (buf3). 580 */ 581 582 if (VIAAllocLinear(&tmpFbBuffer, pScrn, alignSize + 31)) 583 return libc_YUV42X; 584 if (NULL == (buf2 = (unsigned char *)xalloc(testSize))) { 585 VIAFreeLinear(&tmpFbBuffer); 586 return libc_YUV42X; 587 } 588 if (NULL == (buf3 = (unsigned char *)xalloc(testSize))) { 589 xfree(buf2); 590 VIAFreeLinear(&tmpFbBuffer); 591 return libc_YUV42X; 592 } 593 buf1 = (unsigned char *)pVia->FBBase + tmpFbBuffer.base; 594 595 /* Align the frame buffer destination memory to a 32 byte boundary. */ 596 if ((unsigned long)buf1 & 31) 597 buf1 += (32 - ((unsigned long)buf1 & 31)); 598 599 bestSoFar = 0; 600 best = 0xFFFFFFFFU; 601 602 /* Make probable that buf1 and buf2 are in memory by referencing them. */ 603 libc_YUV42X(buf1, buf2, BSIZA, BSIZW, BSIZH, 0); 604 605 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 606 "Benchmarking %s copy. Less time is better.\n", copyType); 607 for (j = 0; j < totNum; ++j) { 608 curData = mcFunctions + j; 609 610 if (cpuValid(buf, curData->cpuFlag)) { 611 612 /* Simulate setup of the video buffer. */ 613 kernel_memcpy(buf2, buf3, testSize); 614 615 /* Copy the video buffer to frame-buffer memory. */ 616 tmp = time_function(curData->mFunc, buf1, buf2); 617 618 /* Do it again to avoid context-switch effects. */ 619 kernel_memcpy(buf2, buf3, testSize); 620 tmp2 = time_function(curData->mFunc, buf1, buf2); 621 tmp = (tmp2 < tmp) ? tmp2 : tmp; 622 623 if (NULL == tmpBuf) { 624 xf86DrvMsg(pScrn->scrnIndex, X_PROBED, 625 "Timed %6s YUV420 copy... %u.\n", 626 curData->mName, tmp); 627 } else { 628 xf86DrvMsg(pScrn->scrnIndex, X_PROBED, 629 "Timed %6s YUV420 copy... %u. " 630 "Throughput: %.1f MiB/s.\n", 631 curData->mName, tmp, 632 cpuFreq * 1.e6 * (double)testSize / 633 ((double)(tmp) * (double)(0x100000))); 634 } 635 if (tmp < best) { 636 best = tmp; 637 bestSoFar = j; 638 } 639 } else { 640 xf86DrvMsg(pScrn->scrnIndex, X_PROBED, 641 "Ditching %6s YUV420 copy. Not supported by CPU.\n", 642 curData->mName); 643 } 644 } 645 xfree(buf3); 646 xfree(buf2); 647 VIAFreeLinear(&tmpFbBuffer); 648 xf86DrvMsg(pScrn->scrnIndex, X_PROBED, 649 "Using %s YUV42X copy for %s.\n", 650 mcFunctions[bestSoFar].mName, copyType); 651 return mcFunctions[bestSoFar].mFunc; 652#else 653 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 654 "Using Linux kernel memcpy for video.\n"); 655 return kernel_YUV42X; 656#endif /* linux */ 657} 658 659#else 660 661vidCopyFunc 662viaVidCopyInit(char *copyType, ScreenPtr pScreen) 663{ 664 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 665 666 xf86DrvMsg(pScrn->scrnIndex, X_INFO, 667 "Using default xfree86 memcpy for video.\n"); 668 return libc_YUV42X; 669} 670 671#endif /* __i386__ */ 672