1 2#ifdef HAVE_CONFIG_H 3#include "config.h" 4#endif 5 6/* 7 * if NO_OPTIMIZE is set, some optimizations are disabled. 8 * 9 * What it basically tries to do is minimize the amounts of writes to 10 * accelerator registers, since these are the ones that slow down small 11 * operations a lot. 12 */ 13/* #define NO_OPTIMIZE */ 14 15/* 16 * if ET6K_TRANSPARENCY is set, ScreentoScreenCopy operations (and pattern 17 * fills) will support transparency. But then the planemask support has to 18 * be dropped. The default here is to support planemasks, because all Tseng 19 * chips can do this. Only the ET6000 supports a transparency compare. The 20 * code could be easily changed to support transparency on the ET6000 and 21 * planemasks on the others, but that's only useful when transparency is 22 * more important than planemasks. 23 */ 24#undef ET6K_TRANSPARENCY 25 26#include "tseng.h" 27#include "tseng_accel.h" 28 29#ifdef HAVE_XAA_H 30#include "miline.h" 31 32/* 33 * conversion from X ROPs to Microsoft ROPs. 34 */ 35 36static int W32OpTable[] = 37{ 38 0x00, /* Xclear 0 */ 39 0x88, /* Xand src AND dst */ 40 0x44, /* XandReverse src AND NOT dst */ 41 0xcc, /* Xcopy src */ 42 0x22, /* XandInverted NOT src AND dst */ 43 0xaa, /* Xnoop dst */ 44 0x66, /* Xxor src XOR dst */ 45 0xee, /* Xor src OR dst */ 46 0x11, /* Xnor NOT src AND NOT dst */ 47 0x99, /* Xequiv NOT src XOR dst */ 48 0x55, /* Xinvert NOT dst */ 49 0xdd, /* XorReverse src OR NOT dst */ 50 0x33, /* XcopyInverted NOT src */ 51 0xbb, /* XorInverted NOT src OR dst */ 52 0x77, /* Xnand NOT src OR NOT dst */ 53 0xff /* Xset 1 */ 54}; 55 56static int W32OpTable_planemask[] = 57{ 58 0x0a, /* Xclear 0 */ 59 0x8a, /* Xand src AND dst */ 60 0x4a, /* XandReverse src AND NOT dst */ 61 0xca, /* Xcopy src */ 62 0x2a, /* XandInverted NOT src AND dst */ 63 0xaa, /* Xnoop dst */ 64 0x6a, /* Xxor src XOR dst */ 65 0xea, /* Xor src OR dst */ 66 0x1a, /* Xnor NOT src AND NOT dst */ 67 0x9a, /* Xequiv NOT src XOR dst */ 68 0x5a, /* Xinvert NOT dst */ 69 0xda, /* XorReverse src OR NOT dst */ 70 0x3a, /* XcopyInverted NOT src */ 71 0xba, /* XorInverted NOT src OR dst */ 72 0x7a, /* Xnand NOT src OR NOT dst */ 73 0xfa /* Xset 1 */ 74}; 75 76static int W32PatternOpTable[] = 77{ 78 0x00, /* Xclear 0 */ 79 0xa0, /* Xand pat AND dst */ 80 0x50, /* XandReverse pat AND NOT dst */ 81 0xf0, /* Xcopy pat */ 82 0x0a, /* XandInverted NOT pat AND dst */ 83 0xaa, /* Xnoop dst */ 84 0x5a, /* Xxor pat XOR dst */ 85 0xfa, /* Xor pat OR dst */ 86 0x05, /* Xnor NOT pat AND NOT dst */ 87 0xa5, /* Xequiv NOT pat XOR dst */ 88 0x55, /* Xinvert NOT dst */ 89 0xf5, /* XorReverse pat OR NOT dst */ 90 0x0f, /* XcopyInverted NOT pat */ 91 0xaf, /* XorInverted NOT pat OR dst */ 92 0x5f, /* Xnand NOT pat OR NOT dst */ 93 0xff /* Xset 1 */ 94}; 95 96 97 98/**********************************************************************/ 99 100static void 101tseng_terminate_acl(TsengPtr pTseng) 102{ 103 /* only terminate when needed */ 104/* if (*(volatile unsigned char *)ACL_ACCELERATOR_STATUS & 0x06) */ 105 { 106 ACL_SUSPEND_TERMINATE(0x00); 107 /* suspend any running operation */ 108 ACL_SUSPEND_TERMINATE(0x01); 109 WAIT_ACL; 110 ACL_SUSPEND_TERMINATE(0x00); 111 /* ... and now terminate it */ 112 ACL_SUSPEND_TERMINATE(0x10); 113 WAIT_ACL; 114 ACL_SUSPEND_TERMINATE(0x00); 115 } 116} 117 118void 119tseng_recover_timeout(TsengPtr pTseng) 120{ 121 if (pTseng->ChipType == ET4000) { 122 ErrorF("trying to unlock......................................\n"); 123 MMIO_OUT32(pTseng->tsengCPU2ACLBase,0,0L); /* try unlocking the bus when CPU-to-accel gets stuck */ 124 125 /* flush the accelerator pipeline */ 126 ACL_SUSPEND_TERMINATE(0x00); 127 ACL_SUSPEND_TERMINATE(0x02); 128 ACL_SUSPEND_TERMINATE(0x00); 129 } 130} 131 132void 133tseng_init_acl(ScrnInfoPtr pScrn) 134{ 135 TsengPtr pTseng = TsengPTR(pScrn); 136 137 PDEBUG(" tseng_init_acl\n"); 138 /* 139 * prepare some shortcuts for faster access to memory mapped registers 140 */ 141 142 pTseng->scratchMemBase = pTseng->FbBase + pTseng->AccelColorBufferOffset; 143 /* 144 * we won't be using tsengCPU2ACLBase in linear memory mode anyway, since 145 * using the MMU apertures restricts the amount of useable video memory 146 * to only 2MB, supposing we ONLY redirect MMU aperture 2 to the CPU. 147 * (see data book W32p, page 207) 148 */ 149 pTseng->tsengCPU2ACLBase = pTseng->FbBase + 0x200000; /* MMU aperture 2 */ 150 151#ifdef DEBUG 152 ErrorF("MMioBase = 0x%x, scratchMemBase = 0x%x\n", pTseng->MMioBase, pTseng->scratchMemBase); 153#endif 154 155 /* 156 * prepare the accelerator for some real work 157 */ 158 159 tseng_terminate_acl(pTseng); 160 161 ACL_INTERRUPT_STATUS(0xe); /* clear interrupts */ 162 ACL_INTERRUPT_MASK(0x04); /* disable interrupts, but enable deadlock exit */ 163 ACL_INTERRUPT_STATUS(0x0); 164 ACL_ACCELERATOR_STATUS_SET(0x0); 165 166 if (pTseng->ChipType == ET6000) { 167 ACL_STEPPING_INHIBIT(0x0); /* Undefined at power-on, let all maps (Src, Dst, Mix, Pat) step */ 168 ACL_6K_CONFIG(0x00); /* maximum performance -- what did you think? */ 169 ACL_POWER_CONTROL(0x01); /* conserve power when ACL is idle */ 170 ACL_MIX_CONTROL(0x33); 171 ACL_TRANSFER_DISABLE(0x00); /* Undefined at power-on, enable all transfers */ 172 } else { /* W32i/W32p */ 173 ACL_RELOAD_CONTROL(0x0); 174 ACL_SYNC_ENABLE(0x1); /* | 0x2 = 0WS ACL read. Yields up to 10% faster operation for small blits */ 175 ACL_ROUTING_CONTROL(0x00); 176 } 177 178 /* Enable the W32p startup bit and set use an eight-bit pixel depth */ 179 ACL_NQ_X_POSITION(0); 180 ACL_NQ_Y_POSITION(0); 181 ACL_PIXEL_DEPTH((pScrn->bitsPerPixel - 8) << 1); 182 /* writing destination address will start ACL */ 183 ACL_OPERATION_STATE(0x10); 184 185 ACL_DESTINATION_Y_OFFSET(pScrn->displayWidth * pTseng->Bytesperpixel - 1); 186 ACL_XY_DIRECTION(0); 187 188 MMU_CONTROL(0x74); 189 190 if (pTseng->ChipType == ET4000) { 191 /* 192 * Since the w32p revs C and D don't have any memory mapped when the 193 * accelerator registers are used it is necessary to use the MMUs to 194 * provide a semblance of linear memory. Fortunately on these chips 195 * the MMU appertures are 1 megabyte each. So as long as we are 196 * willing to only use 3 megs of video memory we can have some 197 * acceleration. If we ever get the CPU-to-screen-color-expansion 198 * stuff working then we will NOT need to sacrifice the extra 1MB 199 * provided by MBP2, because we could do dynamic switching of the APT 200 * bit in the MMU control register. 201 * 202 * On W32p rev c and d MBP2 is hardwired to 0x200000 when linear 203 * memory mode is enabled. (On rev a it is programmable). 204 * 205 * W32p rev a and b have their first 2M mapped in the normal (non-MMU) 206 * way, and MMU0 and MMU1, each 512 kb wide, can be used to access 207 * another 1MB of memory. This totals to 3MB of mem. available in 208 * linear memory when the accelerator is enabled. 209 */ 210 if ((pTseng->ChipRev == REV_A) || (pTseng->ChipRev == REV_B)) { 211 MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x200000L); 212 MMIO_OUT32(pTseng->MMioBase, 0x04<<0, 0x280000L); 213 } else { /* rev C & D */ 214 MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x0L); 215 MMIO_OUT32 (pTseng->MMioBase, 0x04<<0, 0x100000L); 216 } 217 } 218} 219 220/* 221 * ET4/6K acceleration interface -- color expansion primitives. 222 * 223 * Uses Harm Hanemaayer's generic acceleration interface (XAA). 224 * 225 * Author: Koen Gadeyne 226 * 227 * Much of the acceleration code is based on the XF86_W32 server code from 228 * Glenn Lai. 229 * 230 * 231 * Color expansion capabilities of the Tseng chip families: 232 * 233 * Chip screen-to-screen CPU-to-screen Supported depths 234 * 235 * ET4000W32/W32i No Yes 8bpp only 236 * ET4000W32p Yes Yes 8bpp only 237 * ET6000 Yes No 8/16/24/32 bpp 238 */ 239#define SET_FUNCTION_COLOREXPAND \ 240 if (pTseng->ChipType == ET6000) \ 241 ACL_MIX_CONTROL(0x32); \ 242 else \ 243 ACL_ROUTING_CONTROL(0x08); 244 245#define SET_FUNCTION_COLOREXPAND_CPU \ 246 ACL_ROUTING_CONTROL(0x02); 247 248 249static void 250TsengSubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 251 int x, int y, int w, int h, int skipleft) 252{ 253 TsengPtr pTseng = TsengPTR(pScrn); 254 255 if (pTseng->ChipType == ET4000) { 256 /* the accelerator needs DWORD padding, and "w" is in PIXELS... */ 257 pTseng->acl_colexp_width_dwords = (MULBPP(pTseng, w) + 31) >> 5; 258 pTseng->acl_colexp_width_bytes = (MULBPP(pTseng, w) + 7) >> 3; 259 } 260 261 pTseng->acl_ColorExpandDst = FBADDR(pTseng, x, y); 262 pTseng->acl_skipleft = skipleft; 263 264 wait_acl_queue(pTseng); 265 266#if 0 267 ACL_MIX_Y_OFFSET(w - 1); 268 269 ErrorF(" W=%d", w); 270#endif 271 SET_XY(pTseng, w, 1); 272} 273 274static void 275TsengSubsequentColorExpandScanline(ScrnInfoPtr pScrn, 276 int bufno) 277{ 278 TsengPtr pTseng = TsengPTR(pScrn); 279 280 wait_acl_queue(pTseng); 281 282 ACL_MIX_ADDRESS((pTseng->AccelColorExpandBufferOffsets[bufno] << 3) + pTseng->acl_skipleft); 283 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 284 285 /* move to next scanline */ 286 pTseng->acl_ColorExpandDst += pTseng->line_width; 287 288 /* 289 * If not using triple-buffering, we need to wait for the queued 290 * register set to be transferred to the working register set here, 291 * because otherwise an e.g. double-buffering mechanism could overwrite 292 * the buffer that's currently being worked with with new data too soon. 293 * 294 * WAIT_QUEUE; // not needed with triple-buffering 295 */ 296} 297 298 299 300/* 301 * We use this intermediate CPU-to-Screen color expansion because the one 302 * provided by XAA seems to lock up the accelerator engine. 303 * 304 * One of the main differences between the XAA approach and this one is that 305 * transfers are done per byte. I'm not sure if that is needed though. 306 */ 307static void 308TsengSubsequentColorExpandScanline_8bpp(ScrnInfoPtr pScrn, int bufno) 309{ 310 TsengPtr pTseng = TsengPTR(pScrn); 311 pointer dest = pTseng->tsengCPU2ACLBase; 312 int i,j; 313 CARD8 *bufptr; 314 315 i = pTseng->acl_colexp_width_bytes; 316 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 317 318 wait_acl_queue(pTseng); 319 START_ACL (pTseng, pTseng->acl_ColorExpandDst); 320 321/* *((LongP) (MMioBase + 0x08)) = (CARD32) pTseng->acl_ColorExpandDst;*/ 322/* MMIO_OUT32(tsengCPU2ACLBase,0, (CARD32)pTseng->acl_ColorExpandDst); */ 323 j = 0; 324 /* Copy scanline data to accelerator MMU aperture byte by byte */ 325 while (i--) { /* FIXME: we need to take care of PCI bursting and MMU overflow here! */ 326 MMIO_OUT8(dest,j++, *bufptr++); 327 } 328 329 /* move to next scanline */ 330 pTseng->acl_ColorExpandDst += pTseng->line_width; 331} 332 333/* 334 * This function does direct memory-to-CPU bit doubling for color-expansion 335 * at 16bpp on W32 chips. They can only do 8bpp color expansion, so we have 336 * to expand the incoming data to 2bpp first. 337 */ 338static void 339TsengSubsequentColorExpandScanline_16bpp(ScrnInfoPtr pScrn, int bufno) 340{ 341 TsengPtr pTseng = TsengPTR(pScrn); 342 pointer dest = pTseng->tsengCPU2ACLBase; 343 int i,j; 344 CARD8 *bufptr; 345 register CARD32 bits16; 346 347 i = pTseng->acl_colexp_width_dwords * 2; 348 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 349 350 wait_acl_queue(pTseng); 351 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 352 353 j = 0; 354 while (i--) { 355 bits16 = pTseng->ColExpLUT[*bufptr++]; 356 MMIO_OUT8(dest,j++,bits16 & 0xFF); 357 MMIO_OUT8(dest,j++,(bits16 >> 8) & 0xFF); 358 } 359 360 /* move to next scanline */ 361 pTseng->acl_ColorExpandDst += pTseng->line_width; 362} 363 364/* 365 * This function does direct memory-to-CPU bit doubling for color-expansion 366 * at 24bpp on W32 chips. They can only do 8bpp color expansion, so we have 367 * to expand the incoming data to 3bpp first. 368 */ 369static void 370TsengSubsequentColorExpandScanline_24bpp(ScrnInfoPtr pScrn, int bufno) 371{ 372 TsengPtr pTseng = TsengPTR(pScrn); 373 pointer dest = pTseng->tsengCPU2ACLBase; 374 int i, k, j = -1; 375 CARD8 *bufptr; 376 register CARD32 bits24; 377 378 i = pTseng->acl_colexp_width_dwords * 4; 379 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 380 381 wait_acl_queue(pTseng); 382 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 383 384 /* take 8 input bits, expand to 3 output bytes */ 385 bits24 = pTseng->ColExpLUT[*bufptr++]; 386 k = 0; 387 while (i--) { 388 if ((j++) == 2) { /* "i % 3" operation is much to expensive */ 389 j = 0; 390 bits24 = pTseng->ColExpLUT[*bufptr++]; 391 } 392 MMIO_OUT8(dest,k++,bits24 & 0xFF); 393 bits24 >>= 8; 394 } 395 396 /* move to next scanline */ 397 pTseng->acl_ColorExpandDst += pTseng->line_width; 398} 399 400/* 401 * This function does direct memory-to-CPU bit doubling for color-expansion 402 * at 32bpp on W32 chips. They can only do 8bpp color expansion, so we have 403 * to expand the incoming data to 4bpp first. 404 */ 405static void 406TsengSubsequentColorExpandScanline_32bpp(ScrnInfoPtr pScrn, int bufno) 407{ 408 TsengPtr pTseng = TsengPTR(pScrn); 409 pointer dest = pTseng->tsengCPU2ACLBase; 410 int i,j; 411 CARD8 *bufptr; 412 register CARD32 bits32; 413 414 i = pTseng->acl_colexp_width_dwords; 415 /* amount of blocks of 8 bits to expand to 32 bits (=1 DWORD) */ 416 bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 417 418 wait_acl_queue(pTseng); 419 START_ACL(pTseng, pTseng->acl_ColorExpandDst); 420 421 j = 0; 422 while (i--) { 423 bits32 = pTseng->ColExpLUT[*bufptr++]; 424 MMIO_OUT8(dest,j++,bits32 & 0xFF); 425 MMIO_OUT8(dest,j++,(bits32 >> 8) & 0xFF); 426 MMIO_OUT8(dest,j++,(bits32 >> 16) & 0xFF); 427 MMIO_OUT8(dest,j++,(bits32 >> 24) & 0xFF); 428 } 429 430 /* move to next scanline */ 431 pTseng->acl_ColorExpandDst += pTseng->line_width; 432} 433 434/* 435 * CPU-to-Screen color expansion. 436 * This is for ET4000 only (The ET6000 cannot do this) 437 */ 438static void 439TsengSetupForCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 440 int fg, int bg, int rop, unsigned int planemask) 441{ 442 TsengPtr pTseng = TsengPTR(pScrn); 443 444/* ErrorF("X"); */ 445 446 PINGPONG(pTseng); 447 448 wait_acl_queue(pTseng); 449 450 SET_FG_ROP(rop); 451 SET_BG_ROP_TR(rop, bg); 452 453 SET_XYDIR(0); 454 455 SET_FG_BG_COLOR(pTseng, fg, bg); 456 457 SET_FUNCTION_COLOREXPAND_CPU; 458 459 /* assure correct alignment of MIX address (ACL needs same alignment here as in MMU aperture) */ 460 ACL_MIX_ADDRESS(0); 461} 462 463#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND 464/* 465 * TsengSubsequentCPUToScreenColorExpand() is potentially dangerous: 466 * Not writing enough data to the MMU aperture for CPU-to-screen color 467 * expansion will eventually cause a system deadlock! 468 * 469 * Note that CPUToScreenColorExpand operations _always_ require a 470 * WAIT_INTERFACE before starting a new operation (this is empyrical, 471 * though) 472 */ 473static void 474TsengSubsequentCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 475 int x, int y, int w, int h, int skipleft) 476{ 477 TsengPtr pTseng = TsengPTR(pScrn); 478 int destaddr = FBADDR(pTseng, x, y); 479 480 /* ErrorF(" %dx%d|%d ",w,h,skipleft); */ 481 if (skipleft) 482 ErrorF("Can't do: Skipleft = %d\n", skipleft); 483 484/* wait_acl_queue(); */ 485 ErrorF("=========WAIT FIXME!\n"); 486 WAIT_INTERFACE; 487 488 ACL_MIX_Y_OFFSET(w - 1); 489 SET_XY(pTseng, w, h); 490 START_ACL(pTseng, destaddr); 491} 492#endif 493 494static void 495TsengSetupForScreenToScreenColorExpandFill(ScrnInfoPtr pScrn, 496 int fg, int bg, int rop, unsigned int planemask) 497{ 498 TsengPtr pTseng = TsengPTR(pScrn); 499 500/* ErrorF("SSC "); */ 501 502 PINGPONG(pTseng); 503 504 wait_acl_queue(pTseng); 505 506 SET_FG_ROP(rop); 507 SET_BG_ROP_TR(rop, bg); 508 509 SET_FG_BG_COLOR(pTseng, fg, bg); 510 511 SET_FUNCTION_COLOREXPAND; 512 513 SET_XYDIR(0); 514} 515 516static void 517TsengSubsequentScreenToScreenColorExpandFill(ScrnInfoPtr pScrn, 518 int x, int y, int w, int h, int srcx, int srcy, int skipleft) 519{ 520 TsengPtr pTseng = TsengPTR(pScrn); 521 int destaddr = FBADDR(pTseng, x, y); 522 523/* int srcaddr = FBADDR(pTseng, srcx, srcy); */ 524 525 wait_acl_queue(pTseng); 526 527 SET_XY(pTseng, w, h); 528 ACL_MIX_ADDRESS( /* MIX address is in BITS */ 529 (((srcy * pScrn->displayWidth) + srcx) * pScrn->bitsPerPixel) + skipleft); 530 531 ACL_MIX_Y_OFFSET(pTseng->line_width << 3); 532 533 START_ACL(pTseng, destaddr); 534} 535 536/* 537 * 538 */ 539static Bool 540TsengXAAInit_Colexp(ScrnInfoPtr pScrn) 541{ 542 int i, j, r; 543 TsengPtr pTseng = TsengPTR(pScrn); 544 XAAInfoRecPtr pXAAInfo = pTseng->AccelInfoRec; 545 546 PDEBUG(" TsengXAAInit_Colexp\n"); 547 548#ifdef TODO 549 if (OFLG_ISSET(OPTION_XAA_NO_COL_EXP, &vga256InfoRec.options)) 550 return; 551#endif 552 553 /* FIXME! disable accelerated color expansion for W32/W32i until it's fixed */ 554/* if (Is_W32 || Is_W32i) return; */ 555 556 /* 557 * Screen-to-screen color expansion. 558 * 559 * Scanline-screen-to-screen color expansion is slower than 560 * CPU-to-screen color expansion. 561 */ 562 563 pXAAInfo->ScreenToScreenColorExpandFillFlags = 564 BIT_ORDER_IN_BYTE_LSBFIRST | 565 SCANLINE_PAD_DWORD | 566 LEFT_EDGE_CLIPPING | 567 NO_PLANEMASK; 568 569#if 1 570 if ((pTseng->ChipType == ET6000) || (pScrn->bitsPerPixel == 8)) { 571 pXAAInfo->SetupForScreenToScreenColorExpandFill = 572 TsengSetupForScreenToScreenColorExpandFill; 573 pXAAInfo->SubsequentScreenToScreenColorExpandFill = 574 TsengSubsequentScreenToScreenColorExpandFill; 575 } 576#endif 577 578 /* 579 * Scanline CPU to screen color expansion for all W32 engines. 580 * 581 * real CPU-to-screen color expansion is extremely tricky, and only 582 * works for 8bpp anyway. 583 * 584 * This also allows us to do 16, 24 and 32 bpp color expansion by first 585 * doubling the bitmap pattern before color-expanding it, because W32s 586 * can only do 8bpp color expansion. 587 */ 588 589 pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags = 590 BIT_ORDER_IN_BYTE_LSBFIRST | 591 SCANLINE_PAD_DWORD | 592 NO_PLANEMASK; 593 594 if (pTseng->ChipType == ET4000) { 595 pTseng->XAAScanlineColorExpandBuffers[0] = 596 xnfalloc(((pScrn->virtualX + 31)/32) * 4 * pTseng->Bytesperpixel); 597 if (pTseng->XAAScanlineColorExpandBuffers[0] == NULL) { 598 xf86Msg(X_ERROR, "Could not malloc color expansion scanline buffer.\n"); 599 return FALSE; 600 } 601 pXAAInfo->NumScanlineColorExpandBuffers = 1; 602 pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAScanlineColorExpandBuffers; 603 604 pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill = 605 TsengSetupForCPUToScreenColorExpandFill; 606 607 pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill = 608 TsengSubsequentScanlineCPUToScreenColorExpandFill; 609 610 switch (pScrn->bitsPerPixel) { 611 case 8: 612 pXAAInfo->SubsequentColorExpandScanline = 613 TsengSubsequentColorExpandScanline_8bpp; 614 break; 615 case 15: 616 case 16: 617 pXAAInfo->SubsequentColorExpandScanline = 618 TsengSubsequentColorExpandScanline_16bpp; 619 break; 620 case 24: 621 pXAAInfo->SubsequentColorExpandScanline = 622 TsengSubsequentColorExpandScanline_24bpp; 623 break; 624 case 32: 625 pXAAInfo->SubsequentColorExpandScanline = 626 TsengSubsequentColorExpandScanline_32bpp; 627 break; 628 } 629 /* create color expansion LUT (used for >8bpp only) */ 630 pTseng->ColExpLUT = xnfalloc(sizeof(CARD32)*256); 631 if (pTseng->ColExpLUT == NULL) { 632 xf86Msg(X_ERROR, "Could not malloc color expansion tables.\n"); 633 return FALSE; 634 } 635 for (i = 0; i < 256; i++) { 636 r = 0; 637 for (j = 7; j >= 0; j--) { 638 r <<= pTseng->Bytesperpixel; 639 if ((i >> j) & 1) 640 r |= (1 << pTseng->Bytesperpixel) - 1; 641 } 642 pTseng->ColExpLUT[i] = r; 643 /* ErrorF("0x%08X, ",r ); if ((i%8)==7) ErrorF("\n"); */ 644 } 645 } else { 646 /* 647 * Triple-buffering is needed to account for double-buffering of Tseng 648 * acceleration registers. 649 */ 650 pXAAInfo->NumScanlineColorExpandBuffers = 3; 651 pXAAInfo->ScanlineColorExpandBuffers = 652 pTseng->XAAColorExpandBuffers; 653 pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill = 654 TsengSetupForScreenToScreenColorExpandFill; 655 pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill = 656 TsengSubsequentScanlineCPUToScreenColorExpandFill; 657 pXAAInfo->SubsequentColorExpandScanline = 658 TsengSubsequentColorExpandScanline; 659 660 /* calculate memory addresses from video memory offsets */ 661 for (i = 0; i < pXAAInfo->NumScanlineColorExpandBuffers; i++) { 662 pTseng->XAAColorExpandBuffers[i] = 663 pTseng->FbBase + pTseng->AccelColorExpandBufferOffsets[i]; 664 } 665 666 pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAColorExpandBuffers; 667 } 668 669#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND 670 /* 671 * CPU-to-screen color expansion doesn't seem to be reliable yet. The 672 * W32 needs the correct amount of data sent to it in this mode, or it 673 * hangs the machine until is does (?). Currently, the init code in this 674 * file or the XAA code that uses this does something wrong, so that 675 * occasionally we get accelerator timeouts, and after a few, complete 676 * system hangs. 677 * 678 * The W32 engine requires SCANLINE_NO_PAD, but that doesn't seem to 679 * work very well (accelerator hangs). 680 * 681 * What works is this: tell XAA that we have SCANLINE_PAD_DWORD, and then 682 * add the following code in TsengSubsequentCPUToScreenColorExpand(): 683 * w = (w + 31) & ~31; this code rounds the width up to the nearest 684 * multiple of 32, and together with SCANLINE_PAD_DWORD, this makes 685 * CPU-to-screen color expansion work. Of course, the display isn't 686 * correct (4 chars are "blanked out" when only one is written, for 687 * example). But this shows that the principle works. But the code 688 * doesn't... 689 * 690 * The same thing goes for PAD_BYTE: this also works (with the same 691 * problems as SCANLINE_PAD_DWORD, although less prominent) 692 */ 693 694 pXAAInfo->CPUToScreenColorExpandFillFlags = 695 BIT_ORDER_IN_BYTE_LSBFIRST | 696 SCANLINE_PAD_DWORD | /* no other choice */ 697 CPU_TRANSFER_PAD_DWORD | 698 NO_PLANEMASK; 699 700 if (Is_W32_any && (pScrn->bitsPerPixel == 8)) { 701 pXAAInfo->SetupForCPUToScreenColorExpandFill = 702 TsengSetupForCPUToScreenColorExpandFill; 703 pXAAInfo->SubsequentCPUToScreenColorExpandFill = 704 TsengSubsequentCPUToScreenColorExpandFill; 705 706 /* we'll be using MMU aperture 2 */ 707 pXAAInfo->ColorExpandBase = (CARD8 *)pTseng->tsengCPU2ACLBase; 708 /* ErrorF("tsengCPU2ACLBase = 0x%x\n", pTseng->tsengCPU2ACLBase); */ 709 /* aperture size is 8kb in banked mode. Larger in linear mode, but 8kb is enough */ 710 pXAAInfo->ColorExpandRange = 8192; 711 } 712#endif 713 return TRUE; 714} 715 716/* 717 * ET4/6K acceleration interface. 718 * 719 * Uses Harm Hanemaayer's generic acceleration interface (XAA). 720 * 721 * Author: Koen Gadeyne 722 * 723 * Much of the acceleration code is based on the XF86_W32 server code from 724 * Glenn Lai. 725 * 726 */ 727 728/* 729 * This is the implementation of the Sync() function. 730 * 731 * To avoid pipeline/cache/buffer flushing in the PCI subsystem and the VGA 732 * controller, we might replace this read-intensive code with a dummy 733 * accelerator operation that causes a hardware-blocking (wait-states) until 734 * the running operation is done. 735 */ 736static void 737TsengSync(ScrnInfoPtr pScrn) 738{ 739 TsengPtr pTseng = TsengPTR(pScrn); 740 741 WAIT_ACL; 742} 743 744/* 745 * This is the implementation of the SetupForSolidFill function 746 * that sets up the coprocessor for a subsequent batch for solid 747 * rectangle fills. 748 */ 749static void 750TsengSetupForSolidFill(ScrnInfoPtr pScrn, 751 int color, int rop, unsigned int planemask) 752{ 753 TsengPtr pTseng = TsengPTR(pScrn); 754 755 /* 756 * all registers are queued in the Tseng chips, except of course for the 757 * stuff we want to store in off-screen memory. So we have to use a 758 * ping-pong method for those if we want to avoid having to wait for the 759 * accelerator when we want to write to these. 760 */ 761 762/* ErrorF("S"); */ 763 764 PINGPONG(pTseng); 765 766 wait_acl_queue(pTseng); 767 768 /* 769 * planemask emulation uses a modified "standard" FG ROP (see ET6000 770 * data book p 66 or W32p databook p 37: "Bit masking"). We only enable 771 * the planemask emulation when the planemask is not a no-op, because 772 * blitting speed would suffer. 773 */ 774 775 if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) { 776 SET_FG_ROP_PLANEMASK(rop); 777 SET_BG_COLOR(pTseng, planemask); 778 } else { 779 SET_FG_ROP(rop); 780 } 781 SET_FG_COLOR(pTseng, color); 782 783 SET_FUNCTION_BLT; 784} 785 786/* 787 * This is the implementation of the SubsequentForSolidFillRect function 788 * that sends commands to the coprocessor to fill a solid rectangle of 789 * the specified location and size, with the parameters from the SetUp 790 * call. 791 * 792 * Splitting it up between ET4000 and ET6000 avoids lots of chipset type 793 * comparisons. 794 */ 795static void 796TsengW32pSubsequentSolidFillRect(ScrnInfoPtr pScrn, 797 int x, int y, int w, int h) 798{ 799 TsengPtr pTseng = TsengPTR(pScrn); 800 int destaddr = FBADDR(pTseng, x, y); 801 802 wait_acl_queue(pTseng); 803 804 /* 805 * Restoring the ACL_SOURCE_ADDRESS here is needed as long as Bresenham 806 * lines are enabled for >8bpp. Or until XAA allows us to render 807 * horizontal lines using the same Bresenham code instead of re-routing 808 * them to FillRectSolid. For XDECREASING lines, the SubsequentBresenham 809 * code adjusts the ACL_SOURCE_ADDRESS to make sure XDECREASING lines 810 * are drawn with the correct colors. But if a batch of subsequent 811 * operations also holds a few horizontal lines, they will be routed to 812 * here without calling the SetupFor... code again, and the 813 * ACL_SOURCE_ADDRESS will be wrong. 814 */ 815 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 816 817 SET_XYDIR(0); /* FIXME: not needed with separate setupforsolidline */ 818 819 SET_XY_4(pTseng, w, h); 820 START_ACL(pTseng, destaddr); 821} 822 823static void 824Tseng6KSubsequentSolidFillRect(ScrnInfoPtr pScrn, 825 int x, int y, int w, int h) 826{ 827 TsengPtr pTseng = TsengPTR(pScrn); 828 int destaddr = FBADDR(pTseng, x, y); 829 830 wait_acl_queue(pTseng); 831 832 /* see comment in TsengW32pSubsequentFillRectSolid */ 833 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 834 835 /* if XYDIR is not reset here, drawing a hardware line in between 836 * blitting, with the same ROP, color, etc will not cause a call to 837 * SetupFor... (because linedrawing uses SetupForSolidFill() as its 838 * Setup() function), and thus the direction register will have been 839 * changed by the last LineDraw operation. 840 */ 841 SET_XYDIR(0); 842 843 SET_XY_6(pTseng, w, h); 844 START_ACL_6(destaddr); 845} 846 847/* 848 * This is the implementation of the SetupForScreenToScreenCopy function 849 * that sets up the coprocessor for a subsequent batch of 850 * screen-to-screen copies. 851 */ 852 853static __inline__ void 854Tseng_setup_screencopy(TsengPtr pTseng, 855 int rop, unsigned int planemask, 856 int trans_color, int blit_dir) 857{ 858 wait_acl_queue(pTseng); 859 860#ifdef ET6K_TRANSPARENCY 861 if ((pTseng->ChipType == ET6000) && (trans_color != -1)) { 862 SET_BG_COLOR(trans_color); 863 SET_FUNCTION_BLT_TR; 864 } else 865 SET_FUNCTION_BLT; 866 867 SET_FG_ROP(rop); 868#else 869 if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) { 870 SET_FG_ROP_PLANEMASK(rop); 871 SET_BG_COLOR(pTseng, planemask); 872 } else { 873 SET_FG_ROP(rop); 874 } 875 SET_FUNCTION_BLT; 876#endif 877 SET_XYDIR(blit_dir); 878} 879 880static void 881TsengSetupForScreenToScreenCopy(ScrnInfoPtr pScrn, 882 int xdir, int ydir, int rop, 883 unsigned int planemask, int trans_color) 884{ 885 /* 886 * xdir can be either 1 (left-to-right) or -1 (right-to-left). 887 * ydir can be either 1 (top-to-bottom) or -1 (bottom-to-top). 888 */ 889 890 TsengPtr pTseng = TsengPTR(pScrn); 891 int blit_dir = 0; 892 893/* ErrorF("C%d ", trans_color); */ 894 895 pTseng->acl_blitxdir = xdir; 896 pTseng->acl_blitydir = ydir; 897 898 if (xdir == -1) 899 blit_dir |= 0x1; 900 if (ydir == -1) 901 blit_dir |= 0x2; 902 903 Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, blit_dir); 904 905 ACL_SOURCE_WRAP(0x77); /* no wrap */ 906 ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1); 907} 908 909/* 910 * This is the implementation of the SubsequentForScreenToScreenCopy 911 * that sends commands to the coprocessor to perform a screen-to-screen 912 * copy of the specified areas, with the parameters from the SetUp call. 913 * In this sample implementation, the direction must be taken into 914 * account when calculating the addresses (with coordinates, it might be 915 * a little easier). 916 * 917 * Splitting up the SubsequentScreenToScreenCopy between ET4000 and ET6000 918 * doesn't seem to improve speed for small blits (as it did with 919 * SolidFillRect). 920 */ 921static void 922TsengSubsequentScreenToScreenCopy(ScrnInfoPtr pScrn, 923 int x1, int y1, int x2, int y2, 924 int w, int h) 925{ 926 TsengPtr pTseng = TsengPTR(pScrn); 927 int srcaddr, destaddr; 928 929 /* 930 * Optimizing note: the pre-calc code below (i.e. until the first 931 * register write) doesn't significantly affect performance. Removing it 932 * all boosts small blits from 24.22 to 25.47 MB/sec. Don't waste time 933 * on that. One less PCI bus write would boost us to 30.00 MB/sec, up 934 * from 24.22. Waste time on _that_... 935 */ 936 937 /* tseng chips want x-sizes in bytes, not pixels */ 938 x1 = MULBPP(pTseng, x1); 939 x2 = MULBPP(pTseng, x2); 940 941 /* 942 * If the direction is "decreasing", the chip wants the addresses 943 * to be at the other end, so we must be aware of that in our 944 * calculations. 945 */ 946 if (pTseng->acl_blitydir == -1) { 947 srcaddr = (y1 + h - 1) * pTseng->line_width; 948 destaddr = (y2 + h - 1) * pTseng->line_width; 949 } else { 950 srcaddr = y1 * pTseng->line_width; 951 destaddr = y2 * pTseng->line_width; 952 } 953 if (pTseng->acl_blitxdir == -1) { 954 /* Accelerator start address must point to first byte to be processed. 955 * Depending on the direction, this is the first or the last byte 956 * in the multi-byte pixel. 957 */ 958 int eol = MULBPP(pTseng, w); 959 960 srcaddr += x1 + eol - 1; 961 destaddr += x2 + eol - 1; 962 } else { 963 srcaddr += x1; 964 destaddr += x2; 965 } 966 967 wait_acl_queue(pTseng); 968 969 SET_XY(pTseng, w, h); 970 ACL_SOURCE_ADDRESS(srcaddr); 971 START_ACL(pTseng, destaddr); 972} 973 974#if 0 975static int pat_src_addr; 976 977static void 978TsengSetupForColor8x8PatternFill(ScrnInfoPtr pScrn, 979 int patx, int paty, int rop, unsigned int planemask, int trans_color) 980{ 981 TsengPtr pTseng = TsengPTR(pScrn); 982 983 pat_src_addr = FBADDR(pTseng, patx, paty); 984 985 ErrorF("P"); 986 987 Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0); 988 989 switch (pTseng->Bytesperpixel) { 990 case 1: 991 ACL_SOURCE_WRAP(0x33); /* 8x8 wrap */ 992 ACL_SOURCE_Y_OFFSET(8 - 1); 993 break; 994 case 2: 995 ACL_SOURCE_WRAP(0x34); /* 16x8 wrap */ 996 ACL_SOURCE_Y_OFFSET(16 - 1); 997 break; 998 case 3: 999 ACL_SOURCE_WRAP(0x3D); /* 24x8 wrap --- only for ET6000 !!! */ 1000 ACL_SOURCE_Y_OFFSET(32 - 1); /* this is no error -- see databook */ 1001 break; 1002 case 4: 1003 ACL_SOURCE_WRAP(0x35); /* 32x8 wrap */ 1004 ACL_SOURCE_Y_OFFSET(32 - 1); 1005 } 1006} 1007 1008static void 1009TsengSubsequentColor8x8PatternFillRect(ScrnInfoPtr pScrn, 1010 int patx, int paty, int x, int y, int w, int h) 1011{ 1012 TsengPtr pTseng = TsengPTR(pScrn); 1013 int destaddr = FBADDR(pTseng, x, y); 1014 int srcaddr = pat_src_addr + MULBPP(pTseng, paty * 8 + patx); 1015 1016 wait_acl_queue(pTseng); 1017 1018 ACL_SOURCE_ADDRESS(srcaddr); 1019 1020 SET_XY(pTseng, w, h); 1021 START_ACL(pTseng, destaddr); 1022} 1023#endif 1024 1025#if 0 1026/* 1027 * ImageWrite is nothing more than a per-scanline screencopy. 1028 */ 1029 1030static void 1031TsengSetupForScanlineImageWrite(ScrnInfoPtr pScrn, 1032 int rop, unsigned int planemask, int trans_color, int bpp, int depth) 1033{ 1034 TsengPtr pTseng = TsengPTR(pScrn); 1035 1036/* ErrorF("IW"); */ 1037 1038 Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0); 1039 1040 ACL_SOURCE_WRAP(0x77); /* no wrap */ 1041 ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1); 1042} 1043 1044static void 1045TsengSubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn, 1046 int x, int y, int w, int h, int skipleft) 1047{ 1048 TsengPtr pTseng = TsengPTR(pScrn); 1049 1050/* ErrorF("r%d",h); */ 1051 1052 pTseng->acl_iw_dest = y * pTseng->line_width + MULBPP(pTseng, x); 1053 pTseng->acl_skipleft = MULBPP(pTseng, skipleft); 1054 1055 wait_acl_queue(pTseng); 1056 SET_XY(pTseng, w, 1); 1057} 1058 1059static void 1060TsengSubsequentImageWriteScanline(ScrnInfoPtr pScrn, 1061 int bufno) 1062{ 1063 TsengPtr pTseng = TsengPTR(pScrn); 1064 1065/* ErrorF("%d", bufno); */ 1066 1067 wait_acl_queue(pTseng); 1068 1069 ACL_SOURCE_ADDRESS(pTseng->AccelImageWriteBufferOffsets[bufno] 1070 + pTseng->acl_skipleft); 1071 START_ACL(pTseng, pTseng->acl_iw_dest); 1072 pTseng->acl_iw_dest += pTseng->line_width; 1073} 1074#endif 1075 1076#if 0 1077/* 1078 * W32p/ET6000 hardware linedraw code. 1079 * 1080 * TsengSetupForSolidFill() is used as a setup function. 1081 * 1082 * Three major problems that needed to be solved here: 1083 * 1084 * 1. The "bias" value must be translated into the "line draw algorithm" 1085 * parameter in the Tseng accelerators. This parameter, although not 1086 * documented as such, needs to be set to the _inverse_ of the 1087 * appropriate bias bit (i.e. for the appropriate octant). 1088 * 1089 * 2. In >8bpp modes, the accelerator will render BYTES in the same order as 1090 * it is drawing the line. This means it will render the colors in the 1091 * same order as well, reversing the byte-order in pixels that are drawn 1092 * right-to-left. This causes wrong colors to be rendered. 1093 * 1094 * 3. The Tseng data book says that the ACL Y count register needs to be 1095 * programmed with "dy-1". A similar thing is said about ACL X count. But 1096 * this assumes (x2,y2) is NOT drawn (although that is not mentionned in 1097 * the data book). X assumes the endpoint _is_ drawn. If "dy-1" is used, 1098 * this sometimes results in a negative value (if dx==dy==0), 1099 * causing a complete accelerator hang. 1100 */ 1101 1102static void 1103TsengSubsequentSolidBresenhamLine(ScrnInfoPtr pScrn, 1104 int x, int y, int major, int minor, int err, int len, int octant) 1105{ 1106 TsengPtr pTseng = TsengPTR(pScrn); 1107 int destaddr = FBADDR(pTseng, x, y); 1108 int xydir = pTseng->BresenhamTable[octant]; 1109 1110 /* Tseng wants the real dx/dy in major/minor. Bresenham uses 2*dx and 2*dy */ 1111 minor >>= 1; 1112 major >>= 1; 1113 1114 wait_acl_queue(pTseng); 1115 1116 if (!(octant & YMAJOR)) { 1117 SET_X_YRAW(pTseng, len, 0xFFF); 1118 } else { 1119 SET_XY_RAW(pTseng,0xFFF, len - 1); 1120 } 1121 1122 SET_DELTA(minor, major); 1123 ACL_ERROR_TERM(-err); /* error term from XAA is NEGATIVE */ 1124 1125 /* make sure colors are rendered correctly if >8bpp */ 1126 if (octant & XDECREASING) { 1127 destaddr += pTseng->Bytesperpixel - 1; 1128 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset 1129 + pTseng->tsengFg + pTseng->neg_x_pixel_offset); 1130 } else 1131 ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 1132 1133 SET_XYDIR(xydir); 1134 1135 START_ACL(pTseng, destaddr); 1136} 1137#endif 1138 1139#ifdef TODO 1140/* 1141 * Trapezoid filling code. 1142 * 1143 * TsengSetupForSolidFill() is used as a setup function 1144 */ 1145 1146#undef DEBUG_TRAP 1147 1148#ifdef TSENG_TRAPEZOIDS 1149static void 1150TsengSubsequentFillTrapezoidSolid(ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR) 1151 int ytop; 1152 int height; 1153 int left; 1154 int dxL, dyL; 1155 int eL; 1156 int right; 1157 int dxR, dyR; 1158 int eR; 1159{ 1160 unsigned int tseng_bias_compensate = 0xd8; 1161 int destaddr, algrthm; 1162 int xcount = right - left + 1; /* both edges included */ 1163 int dir_reg = 0x60; /* trapezoid drawing; use error term for primary edge */ 1164 int sec_dir_reg = 0x20; /* use error term for secondary edge */ 1165 int octant = 0; 1166 1167 /* ErrorF("#"); */ 1168 1169 int destaddr, algrthm; 1170 int xcount = right - left + 1; 1171 1172#ifdef USE_ERROR_TERM 1173 int dir_reg = 0x60; 1174 int sec_dir_reg = 0x20; 1175 1176#else 1177 int dir_reg = 0x40; 1178 int sec_dir_reg = 0x00; 1179 1180#endif 1181 int octant = 0; 1182 int bias = 0x00; /* FIXME !!! */ 1183 1184/* ErrorF("#"); */ 1185 1186#ifdef DEBUG_TRAP 1187 ErrorF("ytop=%d, height=%d, left=%d, dxL=%d, dyL=%d, eL=%d, right=%d, dxR=%d, dyR=%d, eR=%d ", 1188 ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR); 1189#endif 1190 1191 if ((dyL < 0) || (dyR < 0)) 1192 ErrorF("Tseng Trapezoids: Wrong assumption: dyL/R < 0\n"); 1193 1194 destaddr = FBADDR(pTseng, left, ytop); 1195 1196 /* left edge */ 1197 if (dxL < 0) { 1198 dir_reg |= 1; 1199 octant |= XDECREASING; 1200 dxL = -dxL; 1201 } 1202 /* Y direction is always positive (top-to-bottom drawing) */ 1203 1204 wait_acl_queue(pTseng); 1205 1206 /* left edge */ 1207 /* compute axial direction and load registers */ 1208 if (dxL >= dyL) { /* X is major axis */ 1209 dir_reg |= 4; 1210 SET_DELTA(dyL, dxL); 1211 if (dir_reg & 1) { /* edge coherency: draw left edge */ 1212 destaddr += pTseng->Bytesperpixel; 1213 sec_dir_reg |= 0x80; 1214 xcount--; 1215 } 1216 } else { /* Y is major axis */ 1217 SetYMajorOctant(octant); 1218 SET_DELTA(dxL, dyL); 1219 } 1220 ACL_ERROR_TERM(eL); 1221 1222 /* select "linedraw algorithm" (=bias) and load direction register */ 1223 /* ErrorF(" o=%d ", octant); */ 1224 algrthm = ((tseng_bias_compensate >> octant) & 1) ^ 1; 1225 dir_reg |= algrthm << 4; 1226 SET_XYDIR(dir_reg); 1227 1228 /* right edge */ 1229 if (dxR < 0) { 1230 sec_dir_reg |= 1; 1231 dxR = -dxR; 1232 } 1233 /* compute axial direction and load registers */ 1234 if (dxR >= dyR) { /* X is major axis */ 1235 sec_dir_reg |= 4; 1236 SET_SECONDARY_DELTA(dyR, dxR); 1237 if (dir_reg & 1) { /* edge coherency: do not draw right edge */ 1238 sec_dir_reg |= 0x40; 1239 xcount++; 1240 } 1241 } else { /* Y is major axis */ 1242 SET_SECONDARY_DELTA(dxR, dyR); 1243 } 1244 ACL_SECONDARY_ERROR_TERM(eR); 1245 1246 /* ErrorF("%02x", sec_dir_reg); */ 1247 SET_SECONDARY_XYDIR(sec_dir_reg); 1248 1249 SET_XY_6(pTseng, xcount, height); 1250 1251#ifdef DEBUG_TRAP 1252 ErrorF("-> %d,%d\n", xcount, height); 1253#endif 1254 1255 START_ACL_6(destaddr); 1256} 1257#endif 1258 1259#endif 1260 1261#endif 1262 1263/* 1264 * The following function sets up the supported acceleration. Call it from 1265 * the FbInit() function in the SVGA driver. Do NOT initialize any hardware 1266 * in here. That belongs in tseng_init_acl(). 1267 */ 1268Bool 1269TsengXAAInit(ScreenPtr pScreen) 1270{ 1271#ifdef HAVE_XAA_H 1272 ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1273 TsengPtr pTseng = TsengPTR(pScrn); 1274 XAAInfoRecPtr pXAAinfo; 1275 BoxRec AvailFBArea; 1276 1277 PDEBUG(" TsengXAAInit\n"); 1278 pTseng->AccelInfoRec = pXAAinfo = XAACreateInfoRec(); 1279 if (!pXAAinfo) 1280 return FALSE; 1281 1282 /* 1283 * Set up the main acceleration flags. 1284 */ 1285 pXAAinfo->Flags = PIXMAP_CACHE; 1286 1287 /* 1288 * The following line installs a "Sync" function, that waits for 1289 * all coprocessor operations to complete. 1290 */ 1291 pXAAinfo->Sync = TsengSync; 1292 1293 /* W32 and W32i must wait for ACL before changing registers */ 1294 if (pTseng->ChipType == ET4000) 1295 pTseng->need_wait_acl = TRUE; 1296 else 1297 pTseng->need_wait_acl = FALSE; 1298 1299 pTseng->line_width = pScrn->displayWidth * pTseng->Bytesperpixel; 1300 1301#if 1 1302 /* 1303 * SolidFillRect. 1304 * 1305 * The W32 and W32i chips don't have a register to set the amount of 1306 * bytes per pixel, and hence they don't skip 1 byte in each 4-byte word 1307 * at 24bpp. Therefor, the FG or BG colors would have to be concatenated 1308 * in video memory (R-G-B-R-G-B-... instead of R-G-B-X-R-G-B-X-..., with 1309 * X = dont' care), plus a wrap value that is a multiple of 3 would have 1310 * to be set. There is no such wrap combination available. 1311 */ 1312#ifdef OBSOLETE 1313 pXAAinfo->SolidFillFlags |= NO_PLANEMASK; 1314#endif 1315 1316 pXAAinfo->SetupForSolidFill = TsengSetupForSolidFill; 1317 if (pTseng->ChipType == ET6000) 1318 pXAAinfo->SubsequentSolidFillRect = Tseng6KSubsequentSolidFillRect; 1319 else 1320 pXAAinfo->SubsequentSolidFillRect = TsengW32pSubsequentSolidFillRect; 1321 1322#ifdef TSENG_TRAPEZOIDS 1323 if (pTseng->ChipType == ET6000) 1324 /* disabled for now: not fully compliant yet */ 1325 pXAAinfo->SubsequentFillTrapezoidSolid = TsengSubsequentFillTrapezoidSolid; 1326#endif 1327#endif 1328 1329#if 1 1330 /* 1331 * SceenToScreenCopy (BitBLT). 1332 * 1333 * Restrictions: On ET6000, we support EITHER a planemask OR 1334 * TRANSPARENCY, but not both (they use the same Pattern map). 1335 * All other chips can't do TRANSPARENCY at all. 1336 */ 1337#ifdef ET6K_TRANSPARENCY 1338 pXAAinfo->CopyAreaFlags = NO_PLANEMASK; 1339 if (pTseng->ChipType == ET4000) 1340 pXAAinfo->CopyAreaFlags |= NO_TRANSPARENCY; 1341 1342#else 1343 pXAAinfo->CopyAreaFlags = NO_TRANSPARENCY; 1344#endif 1345 1346 pXAAinfo->SetupForScreenToScreenCopy = 1347 TsengSetupForScreenToScreenCopy; 1348 pXAAinfo->SubsequentScreenToScreenCopy = 1349 TsengSubsequentScreenToScreenCopy; 1350#endif 1351 1352#if 0 1353 /* 1354 * ImageWrite. 1355 * 1356 * SInce this uses off-screen scanline buffers, it is only of use when 1357 * complex ROPs are used. But since the current XAA pixmap cache code 1358 * only works when an ImageWrite is provided, the NO_GXCOPY flag is 1359 * temporarily disabled. 1360 */ 1361 1362 if (pTseng->AccelImageWriteBufferOffsets[0]) { 1363 pXAAinfo->ScanlineImageWriteFlags = 1364 pXAAinfo->CopyAreaFlags | LEFT_EDGE_CLIPPING /* | NO_GXCOPY */ ; 1365 pXAAinfo->NumScanlineImageWriteBuffers = 2; 1366 pXAAinfo->SetupForScanlineImageWrite = 1367 TsengSetupForScanlineImageWrite; 1368 pXAAinfo->SubsequentScanlineImageWriteRect = 1369 TsengSubsequentScanlineImageWriteRect; 1370 pXAAinfo->SubsequentImageWriteScanline = 1371 TsengSubsequentImageWriteScanline; 1372 1373 /* calculate memory addresses from video memory offsets */ 1374 for (i = 0; i < pXAAinfo->NumScanlineImageWriteBuffers; i++) { 1375 pTseng->XAAScanlineImageWriteBuffers[i] = 1376 pTseng->FbBase + pTseng->AccelImageWriteBufferOffsets[i]; 1377 } 1378 1379 pXAAinfo->ScanlineImageWriteBuffers = pTseng->XAAScanlineImageWriteBuffers; 1380 } 1381#endif 1382 /* 1383 * 8x8 pattern tiling not possible on W32/i/p chips in 24bpp mode. 1384 * Currently, 24bpp pattern tiling doesn't work at all on those. 1385 * 1386 * FIXME: On W32 cards, pattern tiling doesn't work as expected. 1387 */ 1388 pXAAinfo->Color8x8PatternFillFlags = HARDWARE_PATTERN_PROGRAMMED_ORIGIN; 1389 1390 pXAAinfo->CachePixelGranularity = 8 * 8; 1391 1392#ifdef ET6K_TRANSPARENCY 1393 pXAAinfo->PatternFlags |= HARDWARE_PATTERN_NO_PLANEMASK; 1394 if (pTseng->ChipType == ET6000) 1395 pXAAinfo->PatternFlags |= HARDWARE_PATTERN_TRANSPARENCY; 1396#endif 1397 1398#if 0 1399 /* FIXME! This needs to be fixed for W32 and W32i (it "should work") */ 1400 if (pScrn->bitsPerPixel != 24) { 1401 pXAAinfo->SetupForColor8x8PatternFill = 1402 TsengSetupForColor8x8PatternFill; 1403 pXAAinfo->SubsequentColor8x8PatternFillRect = 1404 TsengSubsequentColor8x8PatternFillRect; 1405 } 1406#endif 1407 1408#if 0 /*1*/ 1409 /* 1410 * SolidLine. 1411 * 1412 * We use Bresenham by preference, because it supports hardware clipping 1413 * (using the error term). TwoPointLines() is implemented, but not used, 1414 * because clipped lines are not accelerated (hardware clipping support 1415 * is lacking)... 1416 */ 1417 1418 /* 1419 * Fill in the hardware linedraw ACL_XY_DIRECTION table 1420 * 1421 * W32BresTable[] converts XAA interface Bresenham octants to direct 1422 * ACL direction register contents. This includes the correct bias 1423 * setting etc. 1424 * 1425 * According to miline.h (but with base 0 instead of base 1 as in 1426 * miline.h), the octants are numbered as follows: 1427 * 1428 * \ | / 1429 * \ 2 | 1 / 1430 * \ | / 1431 * 3 \ | / 0 1432 * \|/ 1433 * ----------- 1434 * /| \ 1435 * 4 / | \ 7 1436 * / | \ 1437 * / 5 | 6 \ 1438 * / | \ 1439 * 1440 * In ACL_XY_DIRECTION, bits 2:0 are defined as follows: 1441 * 0: '1' if XDECREASING 1442 * 1: '1' if YDECREASING 1443 * 2: '1' if XMAJOR (== not YMAJOR) 1444 * 1445 * Bit 4 defines the bias. It should be set to '1' for all octants 1446 * NOT passed to miSetZeroLineBias(). i.e. the inverse of the X bias. 1447 * 1448 * (For MS compatible bias, the data book says to set to the same as 1449 * YDIR, i.e. bit 1 of the same register, = '1' if YDECREASING. MS 1450 * bias is towards octants 0..3 (i.e. Y decreasing), hence this 1451 * definition of bit 4) 1452 * 1453 */ 1454 pTseng->BresenhamTable = xnfalloc(8); 1455 if (pTseng->BresenhamTable == NULL) { 1456 xf86Msg(X_ERROR, "Could not malloc Bresenham Table.\n"); 1457 return FALSE; 1458 } 1459 for (i=0; i<8; i++) { 1460 unsigned char zerolinebias = miGetZeroLineBias(pScreen); 1461 pTseng->BresenhamTable[i] = 0xA0; /* command=linedraw, use error term */ 1462 if (i & XDECREASING) pTseng->BresenhamTable[i] |= 0x01; 1463 if (i & YDECREASING) pTseng->BresenhamTable[i] |= 0x02; 1464 if (!(i & YMAJOR)) pTseng->BresenhamTable[i] |= 0x04; 1465 if ((1 << i) & zerolinebias) pTseng->BresenhamTable[i] |= 0x10; 1466 /* ErrorF("BresenhamTable[%d]=0x%x\n", i, pTseng->BresenhamTable[i]); */ 1467 } 1468 1469 pXAAinfo->SolidLineFlags = 0; 1470 pXAAinfo->SetupForSolidLine = TsengSetupForSolidFill; 1471 pXAAinfo->SubsequentSolidBresenhamLine = 1472 TsengSubsequentSolidBresenhamLine; 1473 /* 1474 * ErrorTermBits is used to limit minor, major and error term, so it 1475 * must be min(errorterm_size, delta_major_size, delta_minor_size) 1476 * But the calculation for major and minor is done on the DOUBLED 1477 * values (as per the Bresenham algorithm), so they can also have 13 1478 * bits (inside XAA). They are divided by 2 in this driver, so they 1479 * are then again limited to 12 bits. 1480 */ 1481 pXAAinfo->SolidBresenhamLineErrorTermBits = 13; 1482 1483#endif 1484 1485#if 1 1486 /* set up color expansion acceleration */ 1487 if (!TsengXAAInit_Colexp(pScrn)) 1488 return FALSE; 1489#endif 1490 1491 1492 /* 1493 * For Tseng, we set up some often-used values 1494 */ 1495 1496 switch (pTseng->Bytesperpixel) { /* for MULBPP optimization */ 1497 case 1: 1498 pTseng->powerPerPixel = 0; 1499 pTseng->planemask_mask = 0x000000FF; 1500 pTseng->neg_x_pixel_offset = 0; 1501 break; 1502 case 2: 1503 pTseng->powerPerPixel = 1; 1504 pTseng->planemask_mask = 0x0000FFFF; 1505 pTseng->neg_x_pixel_offset = 1; 1506 break; 1507 case 3: 1508 pTseng->powerPerPixel = 1; 1509 pTseng->planemask_mask = 0x00FFFFFF; 1510 pTseng->neg_x_pixel_offset = 2; /* is this correct ??? */ 1511 break; 1512 case 4: 1513 pTseng->powerPerPixel = 2; 1514 pTseng->planemask_mask = 0xFFFFFFFF; 1515 pTseng->neg_x_pixel_offset = 3; 1516 break; 1517 } 1518 1519 /* 1520 * Init ping-pong registers. 1521 * This might be obsoleted by the BACKGROUND_OPERATIONS flag. 1522 */ 1523 pTseng->tsengFg = 0; 1524 pTseng->tsengBg = 16; 1525 pTseng->tsengPat = 32; 1526 1527 /* for register write optimisation */ 1528 pTseng->tseng_old_dir = -1; 1529 pTseng->old_x = 0; 1530 pTseng->old_y = 0; 1531 1532 /* 1533 * Finally, we set up the video memory space available to the pixmap 1534 * cache. In this case, all memory from the end of the virtual screen to 1535 * the end of video memory minus 1K (which we already reserved), can be 1536 * used. 1537 */ 1538 1539 AvailFBArea.x1 = 0; 1540 AvailFBArea.y1 = 0; 1541 AvailFBArea.x2 = pScrn->displayWidth; 1542 AvailFBArea.y2 = (pScrn->videoRam * 1024) / 1543 (pScrn->displayWidth * pTseng->Bytesperpixel); 1544 1545 xf86InitFBManager(pScreen, &AvailFBArea); 1546 1547 return (XAAInit(pScreen, pXAAinfo)); 1548#else 1549 return FALSE; 1550#endif 1551} 1552