tseng_accel.c revision 962c3257
1 2#ifdef HAVE_CONFIG_H 3#include "config.h" 4#endif 5 6/* 7 * if NO_OPTIMIZE is set, some optimizations are disabled. 8 * 9 * What it basically tries to do is minimize the amounts of writes to 10 * accelerator registers, since these are the ones that slow down small 11 * operations a lot. 12 */ 13/* #define NO_OPTIMIZE */ 14 15/* 16 * if ET6K_TRANSPARENCY is set, ScreentoScreenCopy operations (and pattern 17 * fills) will support transparency. But then the planemask support has to 18 * be dropped. The default here is to support planemasks, because all Tseng 19 * chips can do this. Only the ET6000 supports a transparency compare. The 20 * code could be easily changed to support transparency on the ET6000 and 21 * planemasks on the others, but that's only useful when transparency is 22 * more important than planemasks. 23 */ 24#undef ET6K_TRANSPARENCY 25 26#include "tseng.h" 27#include "tseng_accel.h" 28 29#include "miline.h" 30 31/* 32 * conversion from X ROPs to Microsoft ROPs. 33 */ 34 35static int W32OpTable[] = 36{ 37 0x00, /* Xclear 0 */ 38 0x88, /* Xand src AND dst */ 39 0x44, /* XandReverse src AND NOT dst */ 40 0xcc, /* Xcopy src */ 41 0x22, /* XandInverted NOT src AND dst */ 42 0xaa, /* Xnoop dst */ 43 0x66, /* Xxor src XOR dst */ 44 0xee, /* Xor src OR dst */ 45 0x11, /* Xnor NOT src AND NOT dst */ 46 0x99, /* Xequiv NOT src XOR dst */ 47 0x55, /* Xinvert NOT dst */ 48 0xdd, /* XorReverse src OR NOT dst */ 49 0x33, /* XcopyInverted NOT src */ 50 0xbb, /* XorInverted NOT src OR dst */ 51 0x77, /* Xnand NOT src OR NOT dst */ 52 0xff /* Xset 1 */ 53}; 54 55static int W32OpTable_planemask[] = 56{ 57 0x0a, /* Xclear 0 */ 58 0x8a, /* Xand src AND dst */ 59 0x4a, /* XandReverse src AND NOT dst */ 60 0xca, /* Xcopy src */ 61 0x2a, /* XandInverted NOT src AND dst */ 62 0xaa, /* Xnoop dst */ 63 0x6a, /* Xxor src XOR dst */ 64 0xea, /* Xor src OR dst */ 65 0x1a, /* Xnor NOT src AND NOT dst */ 66 0x9a, /* Xequiv NOT src XOR dst */ 67 0x5a, /* Xinvert NOT dst */ 68 0xda, /* XorReverse src OR NOT dst */ 69 0x3a, /* XcopyInverted NOT src */ 70 0xba, /* XorInverted NOT src OR dst */ 71 0x7a, /* Xnand NOT src OR NOT dst */ 72 0xfa /* Xset 1 */ 73}; 74 75static int W32PatternOpTable[] = 76{ 77 0x00, /* Xclear 0 */ 78 0xa0, /* Xand pat AND dst */ 79 0x50, /* XandReverse pat AND NOT dst */ 80 0xf0, /* Xcopy pat */ 81 0x0a, /* XandInverted NOT pat AND dst */ 82 0xaa, /* Xnoop dst */ 83 0x5a, /* Xxor pat XOR dst */ 84 0xfa, /* Xor pat OR dst */ 85 0x05, /* Xnor NOT pat AND NOT dst */ 86 0xa5, /* Xequiv NOT pat XOR dst */ 87 0x55, /* Xinvert NOT dst */ 88 0xf5, /* XorReverse pat OR NOT dst */ 89 0x0f, /* XcopyInverted NOT pat */ 90 0xaf, /* XorInverted NOT pat OR dst */ 91 0x5f, /* Xnand NOT pat OR NOT dst */ 92 0xff /* Xset 1 */ 93}; 94 95 96 97/**********************************************************************/ 98 99static void 100tseng_terminate_acl(TsengPtr pTseng) 101{ 102 /* only terminate when needed */ 103/* if (*(volatile unsigned char *)ACL_ACCELERATOR_STATUS & 0x06) */ 104 { 105 ACL_SUSPEND_TERMINATE(0x00); 106 /* suspend any running operation */ 107 ACL_SUSPEND_TERMINATE(0x01); 108 WAIT_ACL; 109 ACL_SUSPEND_TERMINATE(0x00); 110 /* ... and now terminate it */ 111 ACL_SUSPEND_TERMINATE(0x10); 112 WAIT_ACL; 113 ACL_SUSPEND_TERMINATE(0x00); 114 } 115} 116 117static void 118tseng_recover_timeout(TsengPtr pTseng) 119{ 120 if (pTseng->ChipType == ET4000) { 121 ErrorF("trying to unlock......................................\n"); 122 MMIO_OUT32(pTseng->tsengCPU2ACLBase,0,0L); /* try unlocking the bus when CPU-to-accel gets stuck */ 123 124 /* flush the accelerator pipeline */ 125 ACL_SUSPEND_TERMINATE(0x00); 126 ACL_SUSPEND_TERMINATE(0x02); 127 ACL_SUSPEND_TERMINATE(0x00); 128 } 129} 130 131void 132tseng_init_acl(ScrnInfoPtr pScrn) 133{ 134 TsengPtr pTseng = TsengPTR(pScrn); 135 136 PDEBUG(" tseng_init_acl\n"); 137 /* 138 * prepare some shortcuts for faster access to memory mapped registers 139 */ 140 141 pTseng->scratchMemBase = pTseng->FbBase + pTseng->AccelColorBufferOffset; 142 /* 143 * we won't be using tsengCPU2ACLBase in linear memory mode anyway, since 144 * using the MMU apertures restricts the amount of useable video memory 145 * to only 2MB, supposing we ONLY redirect MMU aperture 2 to the CPU. 146 * (see data book W32p, page 207) 147 */ 148 pTseng->tsengCPU2ACLBase = pTseng->FbBase + 0x200000; /* MMU aperture 2 */ 149 150#ifdef DEBUG 151 ErrorF("MMioBase = 0x%x, scratchMemBase = 0x%x\n", pTseng->MMioBase, pTseng->scratchMemBase); 152#endif 153 154 /* 155 * prepare the accelerator for some real work 156 */ 157 158 tseng_terminate_acl(pTseng); 159 160 ACL_INTERRUPT_STATUS(0xe); /* clear interrupts */ 161 ACL_INTERRUPT_MASK(0x04); /* disable interrupts, but enable deadlock exit */ 162 ACL_INTERRUPT_STATUS(0x0); 163 ACL_ACCELERATOR_STATUS_SET(0x0); 164 165 if (pTseng->ChipType == ET6000) { 166 ACL_STEPPING_INHIBIT(0x0); /* Undefined at power-on, let all maps (Src, Dst, Mix, Pat) step */ 167 ACL_6K_CONFIG(0x00); /* maximum performance -- what did you think? */ 168 ACL_POWER_CONTROL(0x01); /* conserve power when ACL is idle */ 169 ACL_MIX_CONTROL(0x33); 170 ACL_TRANSFER_DISABLE(0x00); /* Undefined at power-on, enable all transfers */ 171 } else { /* W32i/W32p */ 172 ACL_RELOAD_CONTROL(0x0); 173 ACL_SYNC_ENABLE(0x1); /* | 0x2 = 0WS ACL read. Yields up to 10% faster operation for small blits */ 174 ACL_ROUTING_CONTROL(0x00); 175 } 176 177 /* Enable the W32p startup bit and set use an eight-bit pixel depth */ 178 ACL_NQ_X_POSITION(0); 179 ACL_NQ_Y_POSITION(0); 180 ACL_PIXEL_DEPTH((pScrn->bitsPerPixel - 8) << 1); 181 /* writing destination address will start ACL */ 182 ACL_OPERATION_STATE(0x10); 183 184 ACL_DESTINATION_Y_OFFSET(pScrn->displayWidth * pTseng->Bytesperpixel - 1); 185 ACL_XY_DIRECTION(0); 186 187 MMU_CONTROL(0x74); 188 189 if (pTseng->ChipType == ET4000) { 190 /* 191 * Since the w32p revs C and D don't have any memory mapped when the 192 * accelerator registers are used it is necessary to use the MMUs to 193 * provide a semblance of linear memory. Fortunately on these chips 194 * the MMU appertures are 1 megabyte each. So as long as we are 195 * willing to only use 3 megs of video memory we can have some 196 * acceleration. If we ever get the CPU-to-screen-color-expansion 197 * stuff working then we will NOT need to sacrifice the extra 1MB 198 * provided by MBP2, because we could do dynamic switching of the APT 199 * bit in the MMU control register. 200 * 201 * On W32p rev c and d MBP2 is hardwired to 0x200000 when linear 202 * memory mode is enabled. (On rev a it is programmable). 203 * 204 * W32p rev a and b have their first 2M mapped in the normal (non-MMU) 205 * way, and MMU0 and MMU1, each 512 kb wide, can be used to access 206 * another 1MB of memory. This totals to 3MB of mem. available in 207 * linear memory when the accelerator is enabled. 208 */ 209 if ((pTseng->ChipRev == REV_A) || (pTseng->ChipRev == REV_B)) { 210 MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x200000L); 211 MMIO_OUT32(pTseng->MMioBase, 0x04<<0, 0x280000L); 212 } else { /* rev C & D */ 213 MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x0L); 214 MMIO_OUT32 (pTseng->MMioBase, 0x04<<0, 0x100000L); 215 } 216 } 217} 218 219/* 220 * ET4/6K acceleration interface -- color expansion primitives. 221 * 222 * Uses Harm Hanemaayer's generic acceleration interface (XAA). 223 * 224 * Author: Koen Gadeyne 225 * 226 * Much of the acceleration code is based on the XF86_W32 server code from 227 * Glenn Lai. 228 * 229 * 230 * Color expansion capabilities of the Tseng chip families: 231 * 232 * Chip screen-to-screen CPU-to-screen Supported depths 233 * 234 * ET4000W32/W32i No Yes 8bpp only 235 * ET4000W32p Yes Yes 8bpp only 236 * ET6000 Yes No 8/16/24/32 bpp 237 */ 238#define SET_FUNCTION_COLOREXPAND \ 239 if (pTseng->ChipType == ET6000) \ 240 ACL_MIX_CONTROL(0x32); \ 241 else \ 242 ACL_ROUTING_CONTROL(0x08); 243 244#define SET_FUNCTION_COLOREXPAND_CPU \ 245 ACL_ROUTING_CONTROL(0x02); 246 247 248static void 249TsengSubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 250 int x, int y, int w, int h, int skipleft) 251{ 252 TsengPtr pTseng = TsengPTR(pScrn); 253 254 if (pTseng->ChipType == ET4000) { 255 /* the accelerator needs DWORD padding, and "w" is in PIXELS... */ 256 pTseng->acl_colexp_width_dwords = (MULBPP(pTseng, w) + 31) >> 5; 257 pTseng->acl_colexp_width_bytes = (MULBPP(pTseng, w) + 7) >> 3; 258 } 259 260 pTseng->acl_ColorExpandDst = FBADDR(pTseng, x, y); 261 pTseng->acl_skipleft = skipleft; 262 263 wait_acl_queue(pTseng); 264 265#if 0 266 ACL_MIX_Y_OFFSET(w - 1); 267 268 ErrorF(" W=%d", w); 269#endif 270 SET_XY(pTseng, w, 1); 271} 272 273static void 274TsengSubsequentColorExpandScanline(ScrnInfoPtr pScrn, 275 int bufno) 276{ 277 TsengPtr pTseng = TsengPTR(pScrn); 278 279 wait_acl_queue(pTseng); 280 281 ACL_MIX_ADDRESS((pTseng->AccelColorExpandBufferOffsets[bufno] << 3) + pTseng->acl_skipleft); 282 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 283 284 /* move to next scanline */ 285 pTseng->acl_ColorExpandDst += pTseng->line_width; 286 287 /* 288 * If not using triple-buffering, we need to wait for the queued 289 * register set to be transferred to the working register set here, 290 * because otherwise an e.g. double-buffering mechanism could overwrite 291 * the buffer that's currently being worked with with new data too soon. 292 * 293 * WAIT_QUEUE; // not needed with triple-buffering 294 */ 295} 296 297 298 299/* 300 * We use this intermediate CPU-to-Screen color expansion because the one 301 * provided by XAA seems to lock up the accelerator engine. 302 * 303 * One of the main differences between the XAA approach and this one is that 304 * transfers are done per byte. I'm not sure if that is needed though. 305 */ 306static void 307TsengSubsequentColorExpandScanline_8bpp(ScrnInfoPtr pScrn, int bufno) 308{ 309 TsengPtr pTseng = TsengPTR(pScrn); 310 pointer dest = pTseng->tsengCPU2ACLBase; 311 int i,j; 312 CARD8 *bufptr; 313 314 i = pTseng->acl_colexp_width_bytes; 315 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 316 317 wait_acl_queue(pTseng); 318 START_ACL (pTseng, pTseng->acl_ColorExpandDst); 319 320/* *((LongP) (MMioBase + 0x08)) = (CARD32) pTseng->acl_ColorExpandDst;*/ 321/* MMIO_OUT32(tsengCPU2ACLBase,0, (CARD32)pTseng->acl_ColorExpandDst); */ 322 j = 0; 323 /* Copy scanline data to accelerator MMU aperture byte by byte */ 324 while (i--) { /* FIXME: we need to take care of PCI bursting and MMU overflow here! */ 325 MMIO_OUT8(dest,j++, *bufptr++); 326 } 327 328 /* move to next scanline */ 329 pTseng->acl_ColorExpandDst += pTseng->line_width; 330} 331 332/* 333 * This function does direct memory-to-CPU bit doubling for color-expansion 334 * at 16bpp on W32 chips. They can only do 8bpp color expansion, so we have 335 * to expand the incoming data to 2bpp first. 336 */ 337static void 338TsengSubsequentColorExpandScanline_16bpp(ScrnInfoPtr pScrn, int bufno) 339{ 340 TsengPtr pTseng = TsengPTR(pScrn); 341 pointer dest = pTseng->tsengCPU2ACLBase; 342 int i,j; 343 CARD8 *bufptr; 344 register CARD32 bits16; 345 346 i = pTseng->acl_colexp_width_dwords * 2; 347 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 348 349 wait_acl_queue(pTseng); 350 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 351 352 j = 0; 353 while (i--) { 354 bits16 = pTseng->ColExpLUT[*bufptr++]; 355 MMIO_OUT8(dest,j++,bits16 & 0xFF); 356 MMIO_OUT8(dest,j++,(bits16 >> 8) & 0xFF); 357 } 358 359 /* move to next scanline */ 360 pTseng->acl_ColorExpandDst += pTseng->line_width; 361} 362 363/* 364 * This function does direct memory-to-CPU bit doubling for color-expansion 365 * at 24bpp on W32 chips. They can only do 8bpp color expansion, so we have 366 * to expand the incoming data to 3bpp first. 367 */ 368static void 369TsengSubsequentColorExpandScanline_24bpp(ScrnInfoPtr pScrn, int bufno) 370{ 371 TsengPtr pTseng = TsengPTR(pScrn); 372 pointer dest = pTseng->tsengCPU2ACLBase; 373 int i, k, j = -1; 374 CARD8 *bufptr; 375 register CARD32 bits24; 376 377 i = pTseng->acl_colexp_width_dwords * 4; 378 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 379 380 wait_acl_queue(pTseng); 381 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 382 383 /* take 8 input bits, expand to 3 output bytes */ 384 bits24 = pTseng->ColExpLUT[*bufptr++]; 385 k = 0; 386 while (i--) { 387 if ((j++) == 2) { /* "i % 3" operation is much to expensive */ 388 j = 0; 389 bits24 = pTseng->ColExpLUT[*bufptr++]; 390 } 391 MMIO_OUT8(dest,k++,bits24 & 0xFF); 392 bits24 >>= 8; 393 } 394 395 /* move to next scanline */ 396 pTseng->acl_ColorExpandDst += pTseng->line_width; 397} 398 399/* 400 * This function does direct memory-to-CPU bit doubling for color-expansion 401 * at 32bpp on W32 chips. They can only do 8bpp color expansion, so we have 402 * to expand the incoming data to 4bpp first. 403 */ 404static void 405TsengSubsequentColorExpandScanline_32bpp(ScrnInfoPtr pScrn, int bufno) 406{ 407 TsengPtr pTseng = TsengPTR(pScrn); 408 pointer dest = pTseng->tsengCPU2ACLBase; 409 int i,j; 410 CARD8 *bufptr; 411 register CARD32 bits32; 412 413 i = pTseng->acl_colexp_width_dwords; 414 /* amount of blocks of 8 bits to expand to 32 bits (=1 DWORD) */ 415 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 416 417 wait_acl_queue(pTseng); 418 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 419 420 j = 0; 421 while (i--) { 422 bits32 = pTseng->ColExpLUT[*bufptr++]; 423 MMIO_OUT8(dest,j++,bits32 & 0xFF); 424 MMIO_OUT8(dest,j++,(bits32 >> 8) & 0xFF); 425 MMIO_OUT8(dest,j++,(bits32 >> 16) & 0xFF); 426 MMIO_OUT8(dest,j++,(bits32 >> 24) & 0xFF); 427 } 428 429 /* move to next scanline */ 430 pTseng->acl_ColorExpandDst += pTseng->line_width; 431} 432 433/* 434 * CPU-to-Screen color expansion. 435 * This is for ET4000 only (The ET6000 cannot do this) 436 */ 437static void 438TsengSetupForCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 439 int fg, int bg, int rop, unsigned int planemask) 440{ 441 TsengPtr pTseng = TsengPTR(pScrn); 442 443/* ErrorF("X"); */ 444 445 PINGPONG(pTseng); 446 447 wait_acl_queue(pTseng); 448 449 SET_FG_ROP(rop); 450 SET_BG_ROP_TR(rop, bg); 451 452 SET_XYDIR(0); 453 454 SET_FG_BG_COLOR(pTseng, fg, bg); 455 456 SET_FUNCTION_COLOREXPAND_CPU; 457 458 /* assure correct alignment of MIX address (ACL needs same alignment here as in MMU aperture) */ 459 ACL_MIX_ADDRESS(0); 460} 461 462#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND 463/* 464 * TsengSubsequentCPUToScreenColorExpand() is potentially dangerous: 465 * Not writing enough data to the MMU aperture for CPU-to-screen color 466 * expansion will eventually cause a system deadlock! 467 * 468 * Note that CPUToScreenColorExpand operations _always_ require a 469 * WAIT_INTERFACE before starting a new operation (this is empyrical, 470 * though) 471 */ 472static void 473TsengSubsequentCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 474 int x, int y, int w, int h, int skipleft) 475{ 476 TsengPtr pTseng = TsengPTR(pScrn); 477 int destaddr = FBADDR(pTseng, x, y); 478 479 /* ErrorF(" %dx%d|%d ",w,h,skipleft); */ 480 if (skipleft) 481 ErrorF("Can't do: Skipleft = %d\n", skipleft); 482 483/* wait_acl_queue(); */ 484 ErrorF("=========WAIT FIXME!\n"); 485 WAIT_INTERFACE; 486 487 ACL_MIX_Y_OFFSET(w - 1); 488 SET_XY(pTseng, w, h); 489 START_ACL(pTseng, destaddr); 490} 491#endif 492 493static void 494TsengSetupForScreenToScreenColorExpandFill(ScrnInfoPtr pScrn, 495 int fg, int bg, int rop, unsigned int planemask) 496{ 497 TsengPtr pTseng = TsengPTR(pScrn); 498 499/* ErrorF("SSC "); */ 500 501 PINGPONG(pTseng); 502 503 wait_acl_queue(pTseng); 504 505 SET_FG_ROP(rop); 506 SET_BG_ROP_TR(rop, bg); 507 508 SET_FG_BG_COLOR(pTseng, fg, bg); 509 510 SET_FUNCTION_COLOREXPAND; 511 512 SET_XYDIR(0); 513} 514 515static void 516TsengSubsequentScreenToScreenColorExpandFill(ScrnInfoPtr pScrn, 517 int x, int y, int w, int h, int srcx, int srcy, int skipleft) 518{ 519 TsengPtr pTseng = TsengPTR(pScrn); 520 int destaddr = FBADDR(pTseng, x, y); 521 522/* int srcaddr = FBADDR(pTseng, srcx, srcy); */ 523 524 wait_acl_queue(pTseng); 525 526 SET_XY(pTseng, w, h); 527 ACL_MIX_ADDRESS( /* MIX address is in BITS */ 528 (((srcy * pScrn->displayWidth) + srcx) * pScrn->bitsPerPixel) + skipleft); 529 530 ACL_MIX_Y_OFFSET(pTseng->line_width << 3); 531 532 START_ACL(pTseng, destaddr); 533} 534 535/* 536 * 537 */ 538static Bool 539TsengXAAInit_Colexp(ScrnInfoPtr pScrn) 540{ 541 int i, j, r; 542 TsengPtr pTseng = TsengPTR(pScrn); 543 XAAInfoRecPtr pXAAInfo = pTseng->AccelInfoRec; 544 545 PDEBUG(" TsengXAAInit_Colexp\n"); 546 547#ifdef TODO 548 if (OFLG_ISSET(OPTION_XAA_NO_COL_EXP, &vga256InfoRec.options)) 549 return; 550#endif 551 552 /* FIXME! disable accelerated color expansion for W32/W32i until it's fixed */ 553/* if (Is_W32 || Is_W32i) return; */ 554 555 /* 556 * Screen-to-screen color expansion. 557 * 558 * Scanline-screen-to-screen color expansion is slower than 559 * CPU-to-screen color expansion. 560 */ 561 562 pXAAInfo->ScreenToScreenColorExpandFillFlags = 563 BIT_ORDER_IN_BYTE_LSBFIRST | 564 SCANLINE_PAD_DWORD | 565 LEFT_EDGE_CLIPPING | 566 NO_PLANEMASK; 567 568#if 1 569 if ((pTseng->ChipType == ET6000) || (pScrn->bitsPerPixel == 8)) { 570 pXAAInfo->SetupForScreenToScreenColorExpandFill = 571 TsengSetupForScreenToScreenColorExpandFill; 572 pXAAInfo->SubsequentScreenToScreenColorExpandFill = 573 TsengSubsequentScreenToScreenColorExpandFill; 574 } 575#endif 576 577 /* 578 * Scanline CPU to screen color expansion for all W32 engines. 579 * 580 * real CPU-to-screen color expansion is extremely tricky, and only 581 * works for 8bpp anyway. 582 * 583 * This also allows us to do 16, 24 and 32 bpp color expansion by first 584 * doubling the bitmap pattern before color-expanding it, because W32s 585 * can only do 8bpp color expansion. 586 */ 587 588 pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags = 589 BIT_ORDER_IN_BYTE_LSBFIRST | 590 SCANLINE_PAD_DWORD | 591 NO_PLANEMASK; 592 593 if (pTseng->ChipType == ET4000) { 594 pTseng->XAAScanlineColorExpandBuffers[0] = 595 xnfalloc(((pScrn->virtualX + 31)/32) * 4 * pTseng->Bytesperpixel); 596 if (pTseng->XAAScanlineColorExpandBuffers[0] == NULL) { 597 xf86Msg(X_ERROR, "Could not malloc color expansion scanline buffer.\n"); 598 return FALSE; 599 } 600 pXAAInfo->NumScanlineColorExpandBuffers = 1; 601 pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAScanlineColorExpandBuffers; 602 603 pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill = 604 TsengSetupForCPUToScreenColorExpandFill; 605 606 pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill = 607 TsengSubsequentScanlineCPUToScreenColorExpandFill; 608 609 switch (pScrn->bitsPerPixel) { 610 case 8: 611 pXAAInfo->SubsequentColorExpandScanline = 612 TsengSubsequentColorExpandScanline_8bpp; 613 break; 614 case 15: 615 case 16: 616 pXAAInfo->SubsequentColorExpandScanline = 617 TsengSubsequentColorExpandScanline_16bpp; 618 break; 619 case 24: 620 pXAAInfo->SubsequentColorExpandScanline = 621 TsengSubsequentColorExpandScanline_24bpp; 622 break; 623 case 32: 624 pXAAInfo->SubsequentColorExpandScanline = 625 TsengSubsequentColorExpandScanline_32bpp; 626 break; 627 } 628 /* create color expansion LUT (used for >8bpp only) */ 629 pTseng->ColExpLUT = xnfalloc(sizeof(CARD32)*256); 630 if (pTseng->ColExpLUT == NULL) { 631 xf86Msg(X_ERROR, "Could not malloc color expansion tables.\n"); 632 return FALSE; 633 } 634 for (i = 0; i < 256; i++) { 635 r = 0; 636 for (j = 7; j >= 0; j--) { 637 r <<= pTseng->Bytesperpixel; 638 if ((i >> j) & 1) 639 r |= (1 << pTseng->Bytesperpixel) - 1; 640 } 641 pTseng->ColExpLUT[i] = r; 642 /* ErrorF("0x%08X, ",r ); if ((i%8)==7) ErrorF("\n"); */ 643 } 644 } else { 645 /* 646 * Triple-buffering is needed to account for double-buffering of Tseng 647 * acceleration registers. 648 */ 649 pXAAInfo->NumScanlineColorExpandBuffers = 3; 650 pXAAInfo->ScanlineColorExpandBuffers = 651 pTseng->XAAColorExpandBuffers; 652 pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill = 653 TsengSetupForScreenToScreenColorExpandFill; 654 pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill = 655 TsengSubsequentScanlineCPUToScreenColorExpandFill; 656 pXAAInfo->SubsequentColorExpandScanline = 657 TsengSubsequentColorExpandScanline; 658 659 /* calculate memory addresses from video memory offsets */ 660 for (i = 0; i < pXAAInfo->NumScanlineColorExpandBuffers; i++) { 661 pTseng->XAAColorExpandBuffers[i] = 662 pTseng->FbBase + pTseng->AccelColorExpandBufferOffsets[i]; 663 } 664 665 pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAColorExpandBuffers; 666 } 667 668#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND 669 /* 670 * CPU-to-screen color expansion doesn't seem to be reliable yet. The 671 * W32 needs the correct amount of data sent to it in this mode, or it 672 * hangs the machine until is does (?). Currently, the init code in this 673 * file or the XAA code that uses this does something wrong, so that 674 * occasionally we get accelerator timeouts, and after a few, complete 675 * system hangs. 676 * 677 * The W32 engine requires SCANLINE_NO_PAD, but that doesn't seem to 678 * work very well (accelerator hangs). 679 * 680 * What works is this: tell XAA that we have SCANLINE_PAD_DWORD, and then 681 * add the following code in TsengSubsequentCPUToScreenColorExpand(): 682 * w = (w + 31) & ~31; this code rounds the width up to the nearest 683 * multiple of 32, and together with SCANLINE_PAD_DWORD, this makes 684 * CPU-to-screen color expansion work. Of course, the display isn't 685 * correct (4 chars are "blanked out" when only one is written, for 686 * example). But this shows that the principle works. But the code 687 * doesn't... 688 * 689 * The same thing goes for PAD_BYTE: this also works (with the same 690 * problems as SCANLINE_PAD_DWORD, although less prominent) 691 */ 692 693 pXAAInfo->CPUToScreenColorExpandFillFlags = 694 BIT_ORDER_IN_BYTE_LSBFIRST | 695 SCANLINE_PAD_DWORD | /* no other choice */ 696 CPU_TRANSFER_PAD_DWORD | 697 NO_PLANEMASK; 698 699 if (Is_W32_any && (pScrn->bitsPerPixel == 8)) { 700 pXAAInfo->SetupForCPUToScreenColorExpandFill = 701 TsengSetupForCPUToScreenColorExpandFill; 702 pXAAInfo->SubsequentCPUToScreenColorExpandFill = 703 TsengSubsequentCPUToScreenColorExpandFill; 704 705 /* we'll be using MMU aperture 2 */ 706 pXAAInfo->ColorExpandBase = (CARD8 *)pTseng->tsengCPU2ACLBase; 707 /* ErrorF("tsengCPU2ACLBase = 0x%x\n", pTseng->tsengCPU2ACLBase); */ 708 /* aperture size is 8kb in banked mode. Larger in linear mode, but 8kb is enough */ 709 pXAAInfo->ColorExpandRange = 8192; 710 } 711#endif 712 return TRUE; 713} 714 715/* 716 * ET4/6K acceleration interface. 717 * 718 * Uses Harm Hanemaayer's generic acceleration interface (XAA). 719 * 720 * Author: Koen Gadeyne 721 * 722 * Much of the acceleration code is based on the XF86_W32 server code from 723 * Glenn Lai. 724 * 725 */ 726 727/* 728 * This is the implementation of the Sync() function. 729 * 730 * To avoid pipeline/cache/buffer flushing in the PCI subsystem and the VGA 731 * controller, we might replace this read-intensive code with a dummy 732 * accelerator operation that causes a hardware-blocking (wait-states) until 733 * the running operation is done. 734 */ 735static void 736TsengSync(ScrnInfoPtr pScrn) 737{ 738 TsengPtr pTseng = TsengPTR(pScrn); 739 740 WAIT_ACL; 741} 742 743/* 744 * This is the implementation of the SetupForSolidFill function 745 * that sets up the coprocessor for a subsequent batch for solid 746 * rectangle fills. 747 */ 748static void 749TsengSetupForSolidFill(ScrnInfoPtr pScrn, 750 int color, int rop, unsigned int planemask) 751{ 752 TsengPtr pTseng = TsengPTR(pScrn); 753 754 /* 755 * all registers are queued in the Tseng chips, except of course for the 756 * stuff we want to store in off-screen memory. So we have to use a 757 * ping-pong method for those if we want to avoid having to wait for the 758 * accelerator when we want to write to these. 759 */ 760 761/* ErrorF("S"); */ 762 763 PINGPONG(pTseng); 764 765 wait_acl_queue(pTseng); 766 767 /* 768 * planemask emulation uses a modified "standard" FG ROP (see ET6000 769 * data book p 66 or W32p databook p 37: "Bit masking"). We only enable 770 * the planemask emulation when the planemask is not a no-op, because 771 * blitting speed would suffer. 772 */ 773 774 if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) { 775 SET_FG_ROP_PLANEMASK(rop); 776 SET_BG_COLOR(pTseng, planemask); 777 } else { 778 SET_FG_ROP(rop); 779 } 780 SET_FG_COLOR(pTseng, color); 781 782 SET_FUNCTION_BLT; 783} 784 785/* 786 * This is the implementation of the SubsequentForSolidFillRect function 787 * that sends commands to the coprocessor to fill a solid rectangle of 788 * the specified location and size, with the parameters from the SetUp 789 * call. 790 * 791 * Splitting it up between ET4000 and ET6000 avoids lots of chipset type 792 * comparisons. 793 */ 794static void 795TsengW32pSubsequentSolidFillRect(ScrnInfoPtr pScrn, 796 int x, int y, int w, int h) 797{ 798 TsengPtr pTseng = TsengPTR(pScrn); 799 int destaddr = FBADDR(pTseng, x, y); 800 801 wait_acl_queue(pTseng); 802 803 /* 804 * Restoring the ACL_SOURCE_ADDRESS here is needed as long as Bresenham 805 * lines are enabled for >8bpp. Or until XAA allows us to render 806 * horizontal lines using the same Bresenham code instead of re-routing 807 * them to FillRectSolid. For XDECREASING lines, the SubsequentBresenham 808 * code adjusts the ACL_SOURCE_ADDRESS to make sure XDECREASING lines 809 * are drawn with the correct colors. But if a batch of subsequent 810 * operations also holds a few horizontal lines, they will be routed to 811 * here without calling the SetupFor... code again, and the 812 * ACL_SOURCE_ADDRESS will be wrong. 813 */ 814 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 815 816 SET_XYDIR(0); /* FIXME: not needed with separate setupforsolidline */ 817 818 SET_XY_4(pTseng, w, h); 819 START_ACL(pTseng, destaddr); 820} 821 822static void 823Tseng6KSubsequentSolidFillRect(ScrnInfoPtr pScrn, 824 int x, int y, int w, int h) 825{ 826 TsengPtr pTseng = TsengPTR(pScrn); 827 int destaddr = FBADDR(pTseng, x, y); 828 829 wait_acl_queue(pTseng); 830 831 /* see comment in TsengW32pSubsequentFillRectSolid */ 832 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 833 834 /* if XYDIR is not reset here, drawing a hardware line in between 835 * blitting, with the same ROP, color, etc will not cause a call to 836 * SetupFor... (because linedrawing uses SetupForSolidFill() as its 837 * Setup() function), and thus the direction register will have been 838 * changed by the last LineDraw operation. 839 */ 840 SET_XYDIR(0); 841 842 SET_XY_6(pTseng, w, h); 843 START_ACL_6(destaddr); 844} 845 846/* 847 * This is the implementation of the SetupForScreenToScreenCopy function 848 * that sets up the coprocessor for a subsequent batch of 849 * screen-to-screen copies. 850 */ 851 852static __inline__ void 853Tseng_setup_screencopy(TsengPtr pTseng, 854 int rop, unsigned int planemask, 855 int trans_color, int blit_dir) 856{ 857 wait_acl_queue(pTseng); 858 859#ifdef ET6K_TRANSPARENCY 860 if ((pTseng->ChipType == ET6000) && (trans_color != -1)) { 861 SET_BG_COLOR(trans_color); 862 SET_FUNCTION_BLT_TR; 863 } else 864 SET_FUNCTION_BLT; 865 866 SET_FG_ROP(rop); 867#else 868 if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) { 869 SET_FG_ROP_PLANEMASK(rop); 870 SET_BG_COLOR(pTseng, planemask); 871 } else { 872 SET_FG_ROP(rop); 873 } 874 SET_FUNCTION_BLT; 875#endif 876 SET_XYDIR(blit_dir); 877} 878 879static void 880TsengSetupForScreenToScreenCopy(ScrnInfoPtr pScrn, 881 int xdir, int ydir, int rop, 882 unsigned int planemask, int trans_color) 883{ 884 /* 885 * xdir can be either 1 (left-to-right) or -1 (right-to-left). 886 * ydir can be either 1 (top-to-bottom) or -1 (bottom-to-top). 887 */ 888 889 TsengPtr pTseng = TsengPTR(pScrn); 890 int blit_dir = 0; 891 892/* ErrorF("C%d ", trans_color); */ 893 894 pTseng->acl_blitxdir = xdir; 895 pTseng->acl_blitydir = ydir; 896 897 if (xdir == -1) 898 blit_dir |= 0x1; 899 if (ydir == -1) 900 blit_dir |= 0x2; 901 902 Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, blit_dir); 903 904 ACL_SOURCE_WRAP(0x77); /* no wrap */ 905 ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1); 906} 907 908/* 909 * This is the implementation of the SubsequentForScreenToScreenCopy 910 * that sends commands to the coprocessor to perform a screen-to-screen 911 * copy of the specified areas, with the parameters from the SetUp call. 912 * In this sample implementation, the direction must be taken into 913 * account when calculating the addresses (with coordinates, it might be 914 * a little easier). 915 * 916 * Splitting up the SubsequentScreenToScreenCopy between ET4000 and ET6000 917 * doesn't seem to improve speed for small blits (as it did with 918 * SolidFillRect). 919 */ 920static void 921TsengSubsequentScreenToScreenCopy(ScrnInfoPtr pScrn, 922 int x1, int y1, int x2, int y2, 923 int w, int h) 924{ 925 TsengPtr pTseng = TsengPTR(pScrn); 926 int srcaddr, destaddr; 927 928 /* 929 * Optimizing note: the pre-calc code below (i.e. until the first 930 * register write) doesn't significantly affect performance. Removing it 931 * all boosts small blits from 24.22 to 25.47 MB/sec. Don't waste time 932 * on that. One less PCI bus write would boost us to 30.00 MB/sec, up 933 * from 24.22. Waste time on _that_... 934 */ 935 936 /* tseng chips want x-sizes in bytes, not pixels */ 937 x1 = MULBPP(pTseng, x1); 938 x2 = MULBPP(pTseng, x2); 939 940 /* 941 * If the direction is "decreasing", the chip wants the addresses 942 * to be at the other end, so we must be aware of that in our 943 * calculations. 944 */ 945 if (pTseng->acl_blitydir == -1) { 946 srcaddr = (y1 + h - 1) * pTseng->line_width; 947 destaddr = (y2 + h - 1) * pTseng->line_width; 948 } else { 949 srcaddr = y1 * pTseng->line_width; 950 destaddr = y2 * pTseng->line_width; 951 } 952 if (pTseng->acl_blitxdir == -1) { 953 /* Accelerator start address must point to first byte to be processed. 954 * Depending on the direction, this is the first or the last byte 955 * in the multi-byte pixel. 956 */ 957 int eol = MULBPP(pTseng, w); 958 959 srcaddr += x1 + eol - 1; 960 destaddr += x2 + eol - 1; 961 } else { 962 srcaddr += x1; 963 destaddr += x2; 964 } 965 966 wait_acl_queue(pTseng); 967 968 SET_XY(pTseng, w, h); 969 ACL_SOURCE_ADDRESS(srcaddr); 970 START_ACL(pTseng, destaddr); 971} 972 973#if 0 974static int pat_src_addr; 975 976static void 977TsengSetupForColor8x8PatternFill(ScrnInfoPtr pScrn, 978 int patx, int paty, int rop, unsigned int planemask, int trans_color) 979{ 980 TsengPtr pTseng = TsengPTR(pScrn); 981 982 pat_src_addr = FBADDR(pTseng, patx, paty); 983 984 ErrorF("P"); 985 986 Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0); 987 988 switch (pTseng->Bytesperpixel) { 989 case 1: 990 ACL_SOURCE_WRAP(0x33); /* 8x8 wrap */ 991 ACL_SOURCE_Y_OFFSET(8 - 1); 992 break; 993 case 2: 994 ACL_SOURCE_WRAP(0x34); /* 16x8 wrap */ 995 ACL_SOURCE_Y_OFFSET(16 - 1); 996 break; 997 case 3: 998 ACL_SOURCE_WRAP(0x3D); /* 24x8 wrap --- only for ET6000 !!! */ 999 ACL_SOURCE_Y_OFFSET(32 - 1); /* this is no error -- see databook */ 1000 break; 1001 case 4: 1002 ACL_SOURCE_WRAP(0x35); /* 32x8 wrap */ 1003 ACL_SOURCE_Y_OFFSET(32 - 1); 1004 } 1005} 1006 1007static void 1008TsengSubsequentColor8x8PatternFillRect(ScrnInfoPtr pScrn, 1009 int patx, int paty, int x, int y, int w, int h) 1010{ 1011 TsengPtr pTseng = TsengPTR(pScrn); 1012 int destaddr = FBADDR(pTseng, x, y); 1013 int srcaddr = pat_src_addr + MULBPP(pTseng, paty * 8 + patx); 1014 1015 wait_acl_queue(pTseng); 1016 1017 ACL_SOURCE_ADDRESS(srcaddr); 1018 1019 SET_XY(pTseng, w, h); 1020 START_ACL(pTseng, destaddr); 1021} 1022#endif 1023 1024#if 0 1025/* 1026 * ImageWrite is nothing more than a per-scanline screencopy. 1027 */ 1028 1029static void 1030TsengSetupForScanlineImageWrite(ScrnInfoPtr pScrn, 1031 int rop, unsigned int planemask, int trans_color, int bpp, int depth) 1032{ 1033 TsengPtr pTseng = TsengPTR(pScrn); 1034 1035/* ErrorF("IW"); */ 1036 1037 Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0); 1038 1039 ACL_SOURCE_WRAP(0x77); /* no wrap */ 1040 ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1); 1041} 1042 1043static void 1044TsengSubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn, 1045 int x, int y, int w, int h, int skipleft) 1046{ 1047 TsengPtr pTseng = TsengPTR(pScrn); 1048 1049/* ErrorF("r%d",h); */ 1050 1051 pTseng->acl_iw_dest = y * pTseng->line_width + MULBPP(pTseng, x); 1052 pTseng->acl_skipleft = MULBPP(pTseng, skipleft); 1053 1054 wait_acl_queue(pTseng); 1055 SET_XY(pTseng, w, 1); 1056} 1057 1058static void 1059TsengSubsequentImageWriteScanline(ScrnInfoPtr pScrn, 1060 int bufno) 1061{ 1062 TsengPtr pTseng = TsengPTR(pScrn); 1063 1064/* ErrorF("%d", bufno); */ 1065 1066 wait_acl_queue(pTseng); 1067 1068 ACL_SOURCE_ADDRESS(pTseng->AccelImageWriteBufferOffsets[bufno] 1069 + pTseng->acl_skipleft); 1070 START_ACL(pTseng, pTseng->acl_iw_dest); 1071 pTseng->acl_iw_dest += pTseng->line_width; 1072} 1073#endif 1074 1075#if 0 1076/* 1077 * W32p/ET6000 hardware linedraw code. 1078 * 1079 * TsengSetupForSolidFill() is used as a setup function. 1080 * 1081 * Three major problems that needed to be solved here: 1082 * 1083 * 1. The "bias" value must be translated into the "line draw algorithm" 1084 * parameter in the Tseng accelerators. This parameter, although not 1085 * documented as such, needs to be set to the _inverse_ of the 1086 * appropriate bias bit (i.e. for the appropriate octant). 1087 * 1088 * 2. In >8bpp modes, the accelerator will render BYTES in the same order as 1089 * it is drawing the line. This means it will render the colors in the 1090 * same order as well, reversing the byte-order in pixels that are drawn 1091 * right-to-left. This causes wrong colors to be rendered. 1092 * 1093 * 3. The Tseng data book says that the ACL Y count register needs to be 1094 * programmed with "dy-1". A similar thing is said about ACL X count. But 1095 * this assumes (x2,y2) is NOT drawn (although that is not mentionned in 1096 * the data book). X assumes the endpoint _is_ drawn. If "dy-1" is used, 1097 * this sometimes results in a negative value (if dx==dy==0), 1098 * causing a complete accelerator hang. 1099 */ 1100 1101static void 1102TsengSubsequentSolidBresenhamLine(ScrnInfoPtr pScrn, 1103 int x, int y, int major, int minor, int err, int len, int octant) 1104{ 1105 TsengPtr pTseng = TsengPTR(pScrn); 1106 int destaddr = FBADDR(pTseng, x, y); 1107 int xydir = pTseng->BresenhamTable[octant]; 1108 1109 /* Tseng wants the real dx/dy in major/minor. Bresenham uses 2*dx and 2*dy */ 1110 minor >>= 1; 1111 major >>= 1; 1112 1113 wait_acl_queue(pTseng); 1114 1115 if (!(octant & YMAJOR)) { 1116 SET_X_YRAW(pTseng, len, 0xFFF); 1117 } else { 1118 SET_XY_RAW(pTseng,0xFFF, len - 1); 1119 } 1120 1121 SET_DELTA(minor, major); 1122 ACL_ERROR_TERM(-err); /* error term from XAA is NEGATIVE */ 1123 1124 /* make sure colors are rendered correctly if >8bpp */ 1125 if (octant & XDECREASING) { 1126 destaddr += pTseng->Bytesperpixel - 1; 1127 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset 1128 + pTseng->tsengFg + pTseng->neg_x_pixel_offset); 1129 } else 1130 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 1131 1132 SET_XYDIR(xydir); 1133 1134 START_ACL(pTseng, destaddr); 1135} 1136#endif 1137 1138#ifdef TODO 1139/* 1140 * Trapezoid filling code. 1141 * 1142 * TsengSetupForSolidFill() is used as a setup function 1143 */ 1144 1145#undef DEBUG_TRAP 1146 1147#ifdef TSENG_TRAPEZOIDS 1148static void 1149TsengSubsequentFillTrapezoidSolid(ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR) 1150 int ytop; 1151 int height; 1152 int left; 1153 int dxL, dyL; 1154 int eL; 1155 int right; 1156 int dxR, dyR; 1157 int eR; 1158{ 1159 unsigned int tseng_bias_compensate = 0xd8; 1160 int destaddr, algrthm; 1161 int xcount = right - left + 1; /* both edges included */ 1162 int dir_reg = 0x60; /* trapezoid drawing; use error term for primary edge */ 1163 int sec_dir_reg = 0x20; /* use error term for secondary edge */ 1164 int octant = 0; 1165 1166 /* ErrorF("#"); */ 1167 1168 int destaddr, algrthm; 1169 int xcount = right - left + 1; 1170 1171#ifdef USE_ERROR_TERM 1172 int dir_reg = 0x60; 1173 int sec_dir_reg = 0x20; 1174 1175#else 1176 int dir_reg = 0x40; 1177 int sec_dir_reg = 0x00; 1178 1179#endif 1180 int octant = 0; 1181 int bias = 0x00; /* FIXME !!! */ 1182 1183/* ErrorF("#"); */ 1184 1185#ifdef DEBUG_TRAP 1186 ErrorF("ytop=%d, height=%d, left=%d, dxL=%d, dyL=%d, eL=%d, right=%d, dxR=%d, dyR=%d, eR=%d ", 1187 ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR); 1188#endif 1189 1190 if ((dyL < 0) || (dyR < 0)) 1191 ErrorF("Tseng Trapezoids: Wrong assumption: dyL/R < 0\n"); 1192 1193 destaddr = FBADDR(pTseng, left, ytop); 1194 1195 /* left edge */ 1196 if (dxL < 0) { 1197 dir_reg |= 1; 1198 octant |= XDECREASING; 1199 dxL = -dxL; 1200 } 1201 /* Y direction is always positive (top-to-bottom drawing) */ 1202 1203 wait_acl_queue(pTseng); 1204 1205 /* left edge */ 1206 /* compute axial direction and load registers */ 1207 if (dxL >= dyL) { /* X is major axis */ 1208 dir_reg |= 4; 1209 SET_DELTA(dyL, dxL); 1210 if (dir_reg & 1) { /* edge coherency: draw left edge */ 1211 destaddr += pTseng->Bytesperpixel; 1212 sec_dir_reg |= 0x80; 1213 xcount--; 1214 } 1215 } else { /* Y is major axis */ 1216 SetYMajorOctant(octant); 1217 SET_DELTA(dxL, dyL); 1218 } 1219 ACL_ERROR_TERM(eL); 1220 1221 /* select "linedraw algorithm" (=bias) and load direction register */ 1222 /* ErrorF(" o=%d ", octant); */ 1223 algrthm = ((tseng_bias_compensate >> octant) & 1) ^ 1; 1224 dir_reg |= algrthm << 4; 1225 SET_XYDIR(dir_reg); 1226 1227 /* right edge */ 1228 if (dxR < 0) { 1229 sec_dir_reg |= 1; 1230 dxR = -dxR; 1231 } 1232 /* compute axial direction and load registers */ 1233 if (dxR >= dyR) { /* X is major axis */ 1234 sec_dir_reg |= 4; 1235 SET_SECONDARY_DELTA(dyR, dxR); 1236 if (dir_reg & 1) { /* edge coherency: do not draw right edge */ 1237 sec_dir_reg |= 0x40; 1238 xcount++; 1239 } 1240 } else { /* Y is major axis */ 1241 SET_SECONDARY_DELTA(dxR, dyR); 1242 } 1243 ACL_SECONDARY_ERROR_TERM(eR); 1244 1245 /* ErrorF("%02x", sec_dir_reg); */ 1246 SET_SECONDARY_XYDIR(sec_dir_reg); 1247 1248 SET_XY_6(pTseng, xcount, height); 1249 1250#ifdef DEBUG_TRAP 1251 ErrorF("-> %d,%d\n", xcount, height); 1252#endif 1253 1254 START_ACL_6(destaddr); 1255} 1256#endif 1257 1258#endif 1259 1260 1261/* 1262 * The following function sets up the supported acceleration. Call it from 1263 * the FbInit() function in the SVGA driver. Do NOT initialize any hardware 1264 * in here. That belongs in tseng_init_acl(). 1265 */ 1266Bool 1267TsengXAAInit(ScreenPtr pScreen) 1268{ 1269 ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; 1270 TsengPtr pTseng = TsengPTR(pScrn); 1271 XAAInfoRecPtr pXAAinfo; 1272 BoxRec AvailFBArea; 1273 1274 PDEBUG(" TsengXAAInit\n"); 1275 pTseng->AccelInfoRec = pXAAinfo = XAACreateInfoRec(); 1276 if (!pXAAinfo) 1277 return FALSE; 1278 1279 /* 1280 * Set up the main acceleration flags. 1281 */ 1282 pXAAinfo->Flags = PIXMAP_CACHE; 1283 1284 /* 1285 * The following line installs a "Sync" function, that waits for 1286 * all coprocessor operations to complete. 1287 */ 1288 pXAAinfo->Sync = TsengSync; 1289 1290 /* W32 and W32i must wait for ACL before changing registers */ 1291 if (pTseng->ChipType == ET4000) 1292 pTseng->need_wait_acl = TRUE; 1293 else 1294 pTseng->need_wait_acl = FALSE; 1295 1296 pTseng->line_width = pScrn->displayWidth * pTseng->Bytesperpixel; 1297 1298#if 1 1299 /* 1300 * SolidFillRect. 1301 * 1302 * The W32 and W32i chips don't have a register to set the amount of 1303 * bytes per pixel, and hence they don't skip 1 byte in each 4-byte word 1304 * at 24bpp. Therefor, the FG or BG colors would have to be concatenated 1305 * in video memory (R-G-B-R-G-B-... instead of R-G-B-X-R-G-B-X-..., with 1306 * X = dont' care), plus a wrap value that is a multiple of 3 would have 1307 * to be set. There is no such wrap combination available. 1308 */ 1309#ifdef OBSOLETE 1310 pXAAinfo->SolidFillFlags |= NO_PLANEMASK; 1311#endif 1312 1313 pXAAinfo->SetupForSolidFill = TsengSetupForSolidFill; 1314 if (pTseng->ChipType == ET6000) 1315 pXAAinfo->SubsequentSolidFillRect = Tseng6KSubsequentSolidFillRect; 1316 else 1317 pXAAinfo->SubsequentSolidFillRect = TsengW32pSubsequentSolidFillRect; 1318 1319#ifdef TSENG_TRAPEZOIDS 1320 if (pTseng->ChipType == ET6000) 1321 /* disabled for now: not fully compliant yet */ 1322 pXAAinfo->SubsequentFillTrapezoidSolid = TsengSubsequentFillTrapezoidSolid; 1323#endif 1324#endif 1325 1326#if 1 1327 /* 1328 * SceenToScreenCopy (BitBLT). 1329 * 1330 * Restrictions: On ET6000, we support EITHER a planemask OR 1331 * TRANSPARENCY, but not both (they use the same Pattern map). 1332 * All other chips can't do TRANSPARENCY at all. 1333 */ 1334#ifdef ET6K_TRANSPARENCY 1335 pXAAinfo->CopyAreaFlags = NO_PLANEMASK; 1336 if (pTseng->ChipType == ET4000) 1337 pXAAinfo->CopyAreaFlags |= NO_TRANSPARENCY; 1338 1339#else 1340 pXAAinfo->CopyAreaFlags = NO_TRANSPARENCY; 1341#endif 1342 1343 pXAAinfo->SetupForScreenToScreenCopy = 1344 TsengSetupForScreenToScreenCopy; 1345 pXAAinfo->SubsequentScreenToScreenCopy = 1346 TsengSubsequentScreenToScreenCopy; 1347#endif 1348 1349#if 0 1350 /* 1351 * ImageWrite. 1352 * 1353 * SInce this uses off-screen scanline buffers, it is only of use when 1354 * complex ROPs are used. But since the current XAA pixmap cache code 1355 * only works when an ImageWrite is provided, the NO_GXCOPY flag is 1356 * temporarily disabled. 1357 */ 1358 1359 if (pTseng->AccelImageWriteBufferOffsets[0]) { 1360 pXAAinfo->ScanlineImageWriteFlags = 1361 pXAAinfo->CopyAreaFlags | LEFT_EDGE_CLIPPING /* | NO_GXCOPY */ ; 1362 pXAAinfo->NumScanlineImageWriteBuffers = 2; 1363 pXAAinfo->SetupForScanlineImageWrite = 1364 TsengSetupForScanlineImageWrite; 1365 pXAAinfo->SubsequentScanlineImageWriteRect = 1366 TsengSubsequentScanlineImageWriteRect; 1367 pXAAinfo->SubsequentImageWriteScanline = 1368 TsengSubsequentImageWriteScanline; 1369 1370 /* calculate memory addresses from video memory offsets */ 1371 for (i = 0; i < pXAAinfo->NumScanlineImageWriteBuffers; i++) { 1372 pTseng->XAAScanlineImageWriteBuffers[i] = 1373 pTseng->FbBase + pTseng->AccelImageWriteBufferOffsets[i]; 1374 } 1375 1376 pXAAinfo->ScanlineImageWriteBuffers = pTseng->XAAScanlineImageWriteBuffers; 1377 } 1378#endif 1379 /* 1380 * 8x8 pattern tiling not possible on W32/i/p chips in 24bpp mode. 1381 * Currently, 24bpp pattern tiling doesn't work at all on those. 1382 * 1383 * FIXME: On W32 cards, pattern tiling doesn't work as expected. 1384 */ 1385 pXAAinfo->Color8x8PatternFillFlags = HARDWARE_PATTERN_PROGRAMMED_ORIGIN; 1386 1387 pXAAinfo->CachePixelGranularity = 8 * 8; 1388 1389#ifdef ET6K_TRANSPARENCY 1390 pXAAinfo->PatternFlags |= HARDWARE_PATTERN_NO_PLANEMASK; 1391 if (pTseng->ChipType == ET6000) 1392 pXAAinfo->PatternFlags |= HARDWARE_PATTERN_TRANSPARENCY; 1393#endif 1394 1395#if 0 1396 /* FIXME! This needs to be fixed for W32 and W32i (it "should work") */ 1397 if (pScrn->bitsPerPixel != 24) { 1398 pXAAinfo->SetupForColor8x8PatternFill = 1399 TsengSetupForColor8x8PatternFill; 1400 pXAAinfo->SubsequentColor8x8PatternFillRect = 1401 TsengSubsequentColor8x8PatternFillRect; 1402 } 1403#endif 1404 1405#if 0 /*1*/ 1406 /* 1407 * SolidLine. 1408 * 1409 * We use Bresenham by preference, because it supports hardware clipping 1410 * (using the error term). TwoPointLines() is implemented, but not used, 1411 * because clipped lines are not accelerated (hardware clipping support 1412 * is lacking)... 1413 */ 1414 1415 /* 1416 * Fill in the hardware linedraw ACL_XY_DIRECTION table 1417 * 1418 * W32BresTable[] converts XAA interface Bresenham octants to direct 1419 * ACL direction register contents. This includes the correct bias 1420 * setting etc. 1421 * 1422 * According to miline.h (but with base 0 instead of base 1 as in 1423 * miline.h), the octants are numbered as follows: 1424 * 1425 * \ | / 1426 * \ 2 | 1 / 1427 * \ | / 1428 * 3 \ | / 0 1429 * \|/ 1430 * ----------- 1431 * /| \ 1432 * 4 / | \ 7 1433 * / | \ 1434 * / 5 | 6 \ 1435 * / | \ 1436 * 1437 * In ACL_XY_DIRECTION, bits 2:0 are defined as follows: 1438 * 0: '1' if XDECREASING 1439 * 1: '1' if YDECREASING 1440 * 2: '1' if XMAJOR (== not YMAJOR) 1441 * 1442 * Bit 4 defines the bias. It should be set to '1' for all octants 1443 * NOT passed to miSetZeroLineBias(). i.e. the inverse of the X bias. 1444 * 1445 * (For MS compatible bias, the data book says to set to the same as 1446 * YDIR, i.e. bit 1 of the same register, = '1' if YDECREASING. MS 1447 * bias is towards octants 0..3 (i.e. Y decreasing), hence this 1448 * definition of bit 4) 1449 * 1450 */ 1451 pTseng->BresenhamTable = xnfalloc(8); 1452 if (pTseng->BresenhamTable == NULL) { 1453 xf86Msg(X_ERROR, "Could not malloc Bresenham Table.\n"); 1454 return FALSE; 1455 } 1456 for (i=0; i<8; i++) { 1457 unsigned char zerolinebias = miGetZeroLineBias(pScreen); 1458 pTseng->BresenhamTable[i] = 0xA0; /* command=linedraw, use error term */ 1459 if (i & XDECREASING) pTseng->BresenhamTable[i] |= 0x01; 1460 if (i & YDECREASING) pTseng->BresenhamTable[i] |= 0x02; 1461 if (!(i & YMAJOR)) pTseng->BresenhamTable[i] |= 0x04; 1462 if ((1 << i) & zerolinebias) pTseng->BresenhamTable[i] |= 0x10; 1463 /* ErrorF("BresenhamTable[%d]=0x%x\n", i, pTseng->BresenhamTable[i]); */ 1464 } 1465 1466 pXAAinfo->SolidLineFlags = 0; 1467 pXAAinfo->SetupForSolidLine = TsengSetupForSolidFill; 1468 pXAAinfo->SubsequentSolidBresenhamLine = 1469 TsengSubsequentSolidBresenhamLine; 1470 /* 1471 * ErrorTermBits is used to limit minor, major and error term, so it 1472 * must be min(errorterm_size, delta_major_size, delta_minor_size) 1473 * But the calculation for major and minor is done on the DOUBLED 1474 * values (as per the Bresenham algorithm), so they can also have 13 1475 * bits (inside XAA). They are divided by 2 in this driver, so they 1476 * are then again limited to 12 bits. 1477 */ 1478 pXAAinfo->SolidBresenhamLineErrorTermBits = 13; 1479 1480#endif 1481 1482#if 1 1483 /* set up color expansion acceleration */ 1484 if (!TsengXAAInit_Colexp(pScrn)) 1485 return FALSE; 1486#endif 1487 1488 1489 /* 1490 * For Tseng, we set up some often-used values 1491 */ 1492 1493 switch (pTseng->Bytesperpixel) { /* for MULBPP optimization */ 1494 case 1: 1495 pTseng->powerPerPixel = 0; 1496 pTseng->planemask_mask = 0x000000FF; 1497 pTseng->neg_x_pixel_offset = 0; 1498 break; 1499 case 2: 1500 pTseng->powerPerPixel = 1; 1501 pTseng->planemask_mask = 0x0000FFFF; 1502 pTseng->neg_x_pixel_offset = 1; 1503 break; 1504 case 3: 1505 pTseng->powerPerPixel = 1; 1506 pTseng->planemask_mask = 0x00FFFFFF; 1507 pTseng->neg_x_pixel_offset = 2; /* is this correct ??? */ 1508 break; 1509 case 4: 1510 pTseng->powerPerPixel = 2; 1511 pTseng->planemask_mask = 0xFFFFFFFF; 1512 pTseng->neg_x_pixel_offset = 3; 1513 break; 1514 } 1515 1516 /* 1517 * Init ping-pong registers. 1518 * This might be obsoleted by the BACKGROUND_OPERATIONS flag. 1519 */ 1520 pTseng->tsengFg = 0; 1521 pTseng->tsengBg = 16; 1522 pTseng->tsengPat = 32; 1523 1524 /* for register write optimisation */ 1525 pTseng->tseng_old_dir = -1; 1526 pTseng->old_x = 0; 1527 pTseng->old_y = 0; 1528 1529 /* 1530 * Finally, we set up the video memory space available to the pixmap 1531 * cache. In this case, all memory from the end of the virtual screen to 1532 * the end of video memory minus 1K (which we already reserved), can be 1533 * used. 1534 */ 1535 1536 AvailFBArea.x1 = 0; 1537 AvailFBArea.y1 = 0; 1538 AvailFBArea.x2 = pScrn->displayWidth; 1539 AvailFBArea.y2 = (pScrn->videoRam * 1024) / 1540 (pScrn->displayWidth * pTseng->Bytesperpixel); 1541 1542 xf86InitFBManager(pScreen, &AvailFBArea); 1543 1544 return (XAAInit(pScreen, pXAAinfo)); 1545 1546} 1547