Home | History | Annotate | Line # | Download | only in src
      1 
      2 #ifdef HAVE_CONFIG_H
      3 #include "config.h"
      4 #endif
      5 
      6 /*
      7  * if NO_OPTIMIZE is set, some optimizations are disabled.
      8  *
      9  * What it basically tries to do is minimize the amounts of writes to
     10  * accelerator registers, since these are the ones that slow down small
     11  * operations a lot.
     12  */
     13 /* #define NO_OPTIMIZE */
     14 
     15 /*
     16  * if ET6K_TRANSPARENCY is set, ScreentoScreenCopy operations (and pattern
     17  * fills) will support transparency. But then the planemask support has to
     18  * be dropped. The default here is to support planemasks, because all Tseng
     19  * chips can do this. Only the ET6000 supports a transparency compare. The
     20  * code could be easily changed to support transparency on the ET6000 and
     21  * planemasks on the others, but that's only useful when transparency is
     22  * more important than planemasks.
     23  */
     24 #undef ET6K_TRANSPARENCY
     25 
     26 #include "tseng.h"
     27 #include "tseng_accel.h"
     28 
     29 #ifdef HAVE_XAA_H
     30 #include "miline.h"
     31 
     32 /*
     33  * conversion from X ROPs to Microsoft ROPs.
     34  */
     35 
     36 static int W32OpTable[] =
     37 {
     38     0x00,			       /* Xclear             0 */
     39     0x88,			       /* Xand               src AND dst */
     40     0x44,			       /* XandReverse        src AND NOT dst */
     41     0xcc,			       /* Xcopy              src */
     42     0x22,			       /* XandInverted       NOT src AND dst */
     43     0xaa,			       /* Xnoop              dst */
     44     0x66,			       /* Xxor               src XOR dst */
     45     0xee,			       /* Xor                src OR dst */
     46     0x11,			       /* Xnor               NOT src AND NOT dst */
     47     0x99,			       /* Xequiv             NOT src XOR dst */
     48     0x55,			       /* Xinvert            NOT dst */
     49     0xdd,			       /* XorReverse         src OR NOT dst */
     50     0x33,			       /* XcopyInverted      NOT src */
     51     0xbb,			       /* XorInverted        NOT src OR dst */
     52     0x77,			       /* Xnand              NOT src OR NOT dst */
     53     0xff			       /* Xset               1 */
     54 };
     55 
     56 static int W32OpTable_planemask[] =
     57 {
     58     0x0a,			       /* Xclear             0 */
     59     0x8a,			       /* Xand               src AND dst */
     60     0x4a,			       /* XandReverse        src AND NOT dst */
     61     0xca,			       /* Xcopy              src */
     62     0x2a,			       /* XandInverted       NOT src AND dst */
     63     0xaa,			       /* Xnoop              dst */
     64     0x6a,			       /* Xxor               src XOR dst */
     65     0xea,			       /* Xor                src OR dst */
     66     0x1a,			       /* Xnor               NOT src AND NOT dst */
     67     0x9a,			       /* Xequiv             NOT src XOR dst */
     68     0x5a,			       /* Xinvert            NOT dst */
     69     0xda,			       /* XorReverse         src OR NOT dst */
     70     0x3a,			       /* XcopyInverted      NOT src */
     71     0xba,			       /* XorInverted        NOT src OR dst */
     72     0x7a,			       /* Xnand              NOT src OR NOT dst */
     73     0xfa			       /* Xset               1 */
     74 };
     75 
     76 static int W32PatternOpTable[] =
     77 {
     78     0x00,			       /* Xclear             0 */
     79     0xa0,			       /* Xand               pat AND dst */
     80     0x50,			       /* XandReverse        pat AND NOT dst */
     81     0xf0,			       /* Xcopy              pat */
     82     0x0a,			       /* XandInverted       NOT pat AND dst */
     83     0xaa,			       /* Xnoop              dst */
     84     0x5a,			       /* Xxor               pat XOR dst */
     85     0xfa,			       /* Xor                pat OR dst */
     86     0x05,			       /* Xnor               NOT pat AND NOT dst */
     87     0xa5,			       /* Xequiv             NOT pat XOR dst */
     88     0x55,			       /* Xinvert            NOT dst */
     89     0xf5,			       /* XorReverse         pat OR NOT dst */
     90     0x0f,			       /* XcopyInverted      NOT pat */
     91     0xaf,			       /* XorInverted        NOT pat OR dst */
     92     0x5f,			       /* Xnand              NOT pat OR NOT dst */
     93     0xff			       /* Xset               1 */
     94 };
     95 
     96 
     97 
     98 /**********************************************************************/
     99 
    100 static void
    101 tseng_terminate_acl(TsengPtr pTseng)
    102 {
    103     /* only terminate when needed */
    104 /*  if (*(volatile unsigned char *)ACL_ACCELERATOR_STATUS & 0x06) */
    105     {
    106 	ACL_SUSPEND_TERMINATE(0x00);
    107 	/* suspend any running operation */
    108 	ACL_SUSPEND_TERMINATE(0x01);
    109 	WAIT_ACL;
    110 	ACL_SUSPEND_TERMINATE(0x00);
    111 	/* ... and now terminate it */
    112 	ACL_SUSPEND_TERMINATE(0x10);
    113 	WAIT_ACL;
    114 	ACL_SUSPEND_TERMINATE(0x00);
    115     }
    116 }
    117 
    118 void
    119 tseng_recover_timeout(TsengPtr pTseng)
    120 {
    121     if (pTseng->ChipType == ET4000) {
    122 	ErrorF("trying to unlock......................................\n");
    123 	MMIO_OUT32(pTseng->tsengCPU2ACLBase,0,0L); /* try unlocking the bus when CPU-to-accel gets stuck */
    124 
    125         /* flush the accelerator pipeline */
    126 	ACL_SUSPEND_TERMINATE(0x00);
    127 	ACL_SUSPEND_TERMINATE(0x02);
    128 	ACL_SUSPEND_TERMINATE(0x00);
    129     }
    130 }
    131 
    132 void
    133 tseng_init_acl(ScrnInfoPtr pScrn)
    134 {
    135     TsengPtr pTseng = TsengPTR(pScrn);
    136 
    137     PDEBUG("	tseng_init_acl\n");
    138     /*
    139      * prepare some shortcuts for faster access to memory mapped registers
    140      */
    141 
    142     pTseng->scratchMemBase = pTseng->FbBase + pTseng->AccelColorBufferOffset;
    143     /*
    144      * we won't be using tsengCPU2ACLBase in linear memory mode anyway, since
    145      * using the MMU apertures restricts the amount of useable video memory
    146      * to only 2MB, supposing we ONLY redirect MMU aperture 2 to the CPU.
    147      * (see data book W32p, page 207)
    148      */
    149     pTseng->tsengCPU2ACLBase = pTseng->FbBase + 0x200000;	/* MMU aperture 2 */
    150 
    151 #ifdef DEBUG
    152     ErrorF("MMioBase = 0x%x, scratchMemBase = 0x%x\n", pTseng->MMioBase, pTseng->scratchMemBase);
    153 #endif
    154 
    155     /*
    156      * prepare the accelerator for some real work
    157      */
    158 
    159     tseng_terminate_acl(pTseng);
    160 
    161     ACL_INTERRUPT_STATUS(0xe);       /* clear interrupts */
    162     ACL_INTERRUPT_MASK(0x04);	       /* disable interrupts, but enable deadlock exit */
    163     ACL_INTERRUPT_STATUS(0x0);
    164     ACL_ACCELERATOR_STATUS_SET(0x0);
    165 
    166     if (pTseng->ChipType == ET6000) {
    167 	ACL_STEPPING_INHIBIT(0x0);   /* Undefined at power-on, let all maps (Src, Dst, Mix, Pat) step */
    168 	ACL_6K_CONFIG(0x00);	       /* maximum performance -- what did you think? */
    169 	ACL_POWER_CONTROL(0x01);     /* conserve power when ACL is idle */
    170 	ACL_MIX_CONTROL(0x33);
    171 	ACL_TRANSFER_DISABLE(0x00);  /* Undefined at power-on, enable all transfers */
    172     } else {			       /* W32i/W32p */
    173   	ACL_RELOAD_CONTROL(0x0);
    174 	ACL_SYNC_ENABLE(0x1);	       /* | 0x2 = 0WS ACL read. Yields up to 10% faster operation for small blits */
    175 	ACL_ROUTING_CONTROL(0x00);
    176     }
    177 
    178     /* Enable the W32p startup bit and set use an eight-bit pixel depth */
    179     ACL_NQ_X_POSITION(0);
    180     ACL_NQ_Y_POSITION(0);
    181     ACL_PIXEL_DEPTH((pScrn->bitsPerPixel - 8) << 1);
    182     /* writing destination address will start ACL */
    183     ACL_OPERATION_STATE(0x10);
    184 
    185     ACL_DESTINATION_Y_OFFSET(pScrn->displayWidth * pTseng->Bytesperpixel - 1);
    186     ACL_XY_DIRECTION(0);
    187 
    188     MMU_CONTROL(0x74);
    189 
    190     if (pTseng->ChipType == ET4000) {
    191 	/*
    192 	 * Since the w32p revs C and D don't have any memory mapped when the
    193 	 * accelerator registers are used it is necessary to use the MMUs to
    194 	 * provide a semblance of linear memory. Fortunately on these chips
    195 	 * the MMU appertures are 1 megabyte each. So as long as we are
    196 	 * willing to only use 3 megs of video memory we can have some
    197 	 * acceleration. If we ever get the CPU-to-screen-color-expansion
    198 	 * stuff working then we will NOT need to sacrifice the extra 1MB
    199 	 * provided by MBP2, because we could do dynamic switching of the APT
    200 	 * bit in the MMU control register.
    201 	 *
    202 	 * On W32p rev c and d MBP2 is hardwired to 0x200000 when linear
    203 	 * memory mode is enabled. (On rev a it is programmable).
    204 	 *
    205 	 * W32p rev a and b have their first 2M mapped in the normal (non-MMU)
    206 	 * way, and MMU0 and MMU1, each 512 kb wide, can be used to access
    207 	 * another 1MB of memory. This totals to 3MB of mem. available in
    208 	 * linear memory when the accelerator is enabled.
    209 	 */
    210 	if ((pTseng->ChipRev == REV_A) || (pTseng->ChipRev == REV_B)) {
    211 	    MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x200000L);
    212 	    MMIO_OUT32(pTseng->MMioBase, 0x04<<0, 0x280000L);
    213 	} else {		       /* rev C & D */
    214 	    MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x0L);
    215 	    MMIO_OUT32 (pTseng->MMioBase, 0x04<<0, 0x100000L);
    216 	}
    217     }
    218 }
    219 
    220 /*
    221  * ET4/6K acceleration interface -- color expansion primitives.
    222  *
    223  * Uses Harm Hanemaayer's generic acceleration interface (XAA).
    224  *
    225  * Author: Koen Gadeyne
    226  *
    227  * Much of the acceleration code is based on the XF86_W32 server code from
    228  * Glenn Lai.
    229  *
    230  *
    231  *     Color expansion capabilities of the Tseng chip families:
    232  *
    233  *     Chip     screen-to-screen   CPU-to-screen   Supported depths
    234  *
    235  *   ET4000W32/W32i   No               Yes             8bpp only
    236  *   ET4000W32p       Yes              Yes             8bpp only
    237  *   ET6000           Yes              No              8/16/24/32 bpp
    238  */
    239 #define SET_FUNCTION_COLOREXPAND \
    240     if (pTseng->ChipType == ET6000) \
    241       ACL_MIX_CONTROL(0x32); \
    242     else \
    243       ACL_ROUTING_CONTROL(0x08);
    244 
    245 #define SET_FUNCTION_COLOREXPAND_CPU \
    246     ACL_ROUTING_CONTROL(0x02);
    247 
    248 
    249 static void
    250 TsengSubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
    251     int x, int y, int w, int h, int skipleft)
    252 {
    253     TsengPtr pTseng = TsengPTR(pScrn);
    254 
    255     if (pTseng->ChipType == ET4000) {
    256 	/* the accelerator needs DWORD padding, and "w" is in PIXELS... */
    257 	pTseng->acl_colexp_width_dwords = (MULBPP(pTseng, w) + 31) >> 5;
    258 	pTseng->acl_colexp_width_bytes = (MULBPP(pTseng, w) + 7) >> 3;
    259     }
    260 
    261     pTseng->acl_ColorExpandDst = FBADDR(pTseng, x, y);
    262     pTseng->acl_skipleft = skipleft;
    263 
    264     wait_acl_queue(pTseng);
    265 
    266 #if 0
    267     ACL_MIX_Y_OFFSET(w - 1);
    268 
    269     ErrorF(" W=%d", w);
    270 #endif
    271     SET_XY(pTseng, w, 1);
    272 }
    273 
    274 static void
    275 TsengSubsequentColorExpandScanline(ScrnInfoPtr pScrn,
    276     int bufno)
    277 {
    278     TsengPtr pTseng = TsengPTR(pScrn);
    279 
    280     wait_acl_queue(pTseng);
    281 
    282     ACL_MIX_ADDRESS((pTseng->AccelColorExpandBufferOffsets[bufno] << 3) + pTseng->acl_skipleft);
    283     START_ACL(pTseng, pTseng->acl_ColorExpandDst);
    284 
    285     /* move to next scanline */
    286     pTseng->acl_ColorExpandDst += pTseng->line_width;
    287 
    288     /*
    289      * If not using triple-buffering, we need to wait for the queued
    290      * register set to be transferred to the working register set here,
    291      * because otherwise an e.g. double-buffering mechanism could overwrite
    292      * the buffer that's currently being worked with with new data too soon.
    293      *
    294      * WAIT_QUEUE; // not needed with triple-buffering
    295      */
    296 }
    297 
    298 
    299 
    300 /*
    301  * We use this intermediate CPU-to-Screen color expansion because the one
    302  * provided by XAA seems to lock up the accelerator engine.
    303  *
    304  * One of the main differences between the XAA approach and this one is that
    305  * transfers are done per byte. I'm not sure if that is needed though.
    306  */
    307 static void
    308 TsengSubsequentColorExpandScanline_8bpp(ScrnInfoPtr pScrn, int bufno)
    309 {
    310     TsengPtr pTseng = TsengPTR(pScrn);
    311     pointer dest = pTseng->tsengCPU2ACLBase;
    312     int i,j;
    313     CARD8 *bufptr;
    314 
    315     i = pTseng->acl_colexp_width_bytes;
    316     bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
    317 
    318     wait_acl_queue(pTseng);
    319     START_ACL (pTseng, pTseng->acl_ColorExpandDst);
    320 
    321 /*  *((LongP) (MMioBase + 0x08)) = (CARD32) pTseng->acl_ColorExpandDst;*/
    322 /*  MMIO_OUT32(tsengCPU2ACLBase,0, (CARD32)pTseng->acl_ColorExpandDst); */
    323     j = 0;
    324     /* Copy scanline data to accelerator MMU aperture byte by byte */
    325     while (i--) {		       /* FIXME: we need to take care of PCI bursting and MMU overflow here! */
    326 	MMIO_OUT8(dest,j++, *bufptr++);
    327     }
    328 
    329     /* move to next scanline */
    330     pTseng->acl_ColorExpandDst += pTseng->line_width;
    331 }
    332 
    333 /*
    334  * This function does direct memory-to-CPU bit doubling for color-expansion
    335  * at 16bpp on W32 chips. They can only do 8bpp color expansion, so we have
    336  * to expand the incoming data to 2bpp first.
    337  */
    338 static void
    339 TsengSubsequentColorExpandScanline_16bpp(ScrnInfoPtr pScrn, int bufno)
    340 {
    341     TsengPtr pTseng = TsengPTR(pScrn);
    342     pointer dest = pTseng->tsengCPU2ACLBase;
    343     int i,j;
    344     CARD8 *bufptr;
    345     register CARD32 bits16;
    346 
    347     i = pTseng->acl_colexp_width_dwords * 2;
    348     bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
    349 
    350     wait_acl_queue(pTseng);
    351     START_ACL(pTseng, pTseng->acl_ColorExpandDst);
    352 
    353     j = 0;
    354     while (i--) {
    355 	bits16 = pTseng->ColExpLUT[*bufptr++];
    356 	MMIO_OUT8(dest,j++,bits16 & 0xFF);
    357 	MMIO_OUT8(dest,j++,(bits16 >> 8) & 0xFF);
    358     }
    359 
    360     /* move to next scanline */
    361     pTseng->acl_ColorExpandDst += pTseng->line_width;
    362 }
    363 
    364 /*
    365  * This function does direct memory-to-CPU bit doubling for color-expansion
    366  * at 24bpp on W32 chips. They can only do 8bpp color expansion, so we have
    367  * to expand the incoming data to 3bpp first.
    368  */
    369 static void
    370 TsengSubsequentColorExpandScanline_24bpp(ScrnInfoPtr pScrn, int bufno)
    371 {
    372     TsengPtr pTseng = TsengPTR(pScrn);
    373     pointer dest = pTseng->tsengCPU2ACLBase;
    374     int i, k, j = -1;
    375     CARD8 *bufptr;
    376     register CARD32 bits24;
    377 
    378     i = pTseng->acl_colexp_width_dwords * 4;
    379     bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
    380 
    381     wait_acl_queue(pTseng);
    382     START_ACL(pTseng, pTseng->acl_ColorExpandDst);
    383 
    384     /* take 8 input bits, expand to 3 output bytes */
    385     bits24 = pTseng->ColExpLUT[*bufptr++];
    386     k = 0;
    387     while (i--) {
    388 	if ((j++) == 2) {	       /* "i % 3" operation is much to expensive */
    389 	    j = 0;
    390 	    bits24 = pTseng->ColExpLUT[*bufptr++];
    391 	}
    392 	MMIO_OUT8(dest,k++,bits24 & 0xFF);
    393 	bits24 >>= 8;
    394     }
    395 
    396     /* move to next scanline */
    397     pTseng->acl_ColorExpandDst += pTseng->line_width;
    398 }
    399 
    400 /*
    401  * This function does direct memory-to-CPU bit doubling for color-expansion
    402  * at 32bpp on W32 chips. They can only do 8bpp color expansion, so we have
    403  * to expand the incoming data to 4bpp first.
    404  */
    405 static void
    406 TsengSubsequentColorExpandScanline_32bpp(ScrnInfoPtr pScrn, int bufno)
    407 {
    408     TsengPtr pTseng = TsengPTR(pScrn);
    409     pointer dest = pTseng->tsengCPU2ACLBase;
    410     int i,j;
    411     CARD8 *bufptr;
    412     register CARD32 bits32;
    413 
    414     i = pTseng->acl_colexp_width_dwords;
    415    /* amount of blocks of 8 bits to expand to 32 bits (=1 DWORD) */
    416     bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]);
    417 
    418     wait_acl_queue(pTseng);
    419     START_ACL(pTseng, pTseng->acl_ColorExpandDst);
    420 
    421     j = 0;
    422     while (i--) {
    423 	bits32 = pTseng->ColExpLUT[*bufptr++];
    424 	MMIO_OUT8(dest,j++,bits32 & 0xFF);
    425 	MMIO_OUT8(dest,j++,(bits32 >> 8) & 0xFF);
    426 	MMIO_OUT8(dest,j++,(bits32 >> 16) & 0xFF);
    427 	MMIO_OUT8(dest,j++,(bits32 >> 24) & 0xFF);
    428     }
    429 
    430     /* move to next scanline */
    431     pTseng->acl_ColorExpandDst += pTseng->line_width;
    432 }
    433 
    434 /*
    435  * CPU-to-Screen color expansion.
    436  *   This is for ET4000 only (The ET6000 cannot do this)
    437  */
    438 static void
    439 TsengSetupForCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
    440     int fg, int bg, int rop, unsigned int planemask)
    441 {
    442     TsengPtr pTseng = TsengPTR(pScrn);
    443 
    444 /*  ErrorF("X"); */
    445 
    446     PINGPONG(pTseng);
    447 
    448     wait_acl_queue(pTseng);
    449 
    450     SET_FG_ROP(rop);
    451     SET_BG_ROP_TR(rop, bg);
    452 
    453     SET_XYDIR(0);
    454 
    455     SET_FG_BG_COLOR(pTseng, fg, bg);
    456 
    457     SET_FUNCTION_COLOREXPAND_CPU;
    458 
    459     /* assure correct alignment of MIX address (ACL needs same alignment here as in MMU aperture) */
    460     ACL_MIX_ADDRESS(0);
    461 }
    462 
    463 #ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND
    464 /*
    465  * TsengSubsequentCPUToScreenColorExpand() is potentially dangerous:
    466  *   Not writing enough data to the MMU aperture for CPU-to-screen color
    467  *   expansion will eventually cause a system deadlock!
    468  *
    469  * Note that CPUToScreenColorExpand operations _always_ require a
    470  * WAIT_INTERFACE before starting a new operation (this is empyrical,
    471  * though)
    472  */
    473 static void
    474 TsengSubsequentCPUToScreenColorExpandFill(ScrnInfoPtr pScrn,
    475     int x, int y, int w, int h, int skipleft)
    476 {
    477     TsengPtr pTseng = TsengPTR(pScrn);
    478     int destaddr = FBADDR(pTseng, x, y);
    479 
    480     /* ErrorF(" %dx%d|%d ",w,h,skipleft); */
    481     if (skipleft)
    482 	ErrorF("Can't do: Skipleft = %d\n", skipleft);
    483 
    484 /*  wait_acl_queue(); */
    485     ErrorF("=========WAIT     FIXME!\n");
    486     WAIT_INTERFACE;
    487 
    488     ACL_MIX_Y_OFFSET(w - 1);
    489     SET_XY(pTseng, w, h);
    490     START_ACL(pTseng, destaddr);
    491 }
    492 #endif
    493 
    494 static void
    495 TsengSetupForScreenToScreenColorExpandFill(ScrnInfoPtr pScrn,
    496     int fg, int bg, int rop, unsigned int planemask)
    497 {
    498     TsengPtr pTseng = TsengPTR(pScrn);
    499 
    500 /*  ErrorF("SSC "); */
    501 
    502     PINGPONG(pTseng);
    503 
    504     wait_acl_queue(pTseng);
    505 
    506     SET_FG_ROP(rop);
    507     SET_BG_ROP_TR(rop, bg);
    508 
    509     SET_FG_BG_COLOR(pTseng, fg, bg);
    510 
    511     SET_FUNCTION_COLOREXPAND;
    512 
    513     SET_XYDIR(0);
    514 }
    515 
    516 static void
    517 TsengSubsequentScreenToScreenColorExpandFill(ScrnInfoPtr pScrn,
    518     int x, int y, int w, int h, int srcx, int srcy, int skipleft)
    519 {
    520     TsengPtr pTseng = TsengPTR(pScrn);
    521     int destaddr = FBADDR(pTseng, x, y);
    522 
    523 /*    int srcaddr = FBADDR(pTseng, srcx, srcy); */
    524 
    525     wait_acl_queue(pTseng);
    526 
    527     SET_XY(pTseng, w, h);
    528     ACL_MIX_ADDRESS(		       /* MIX address is in BITS */
    529 	(((srcy * pScrn->displayWidth) + srcx) * pScrn->bitsPerPixel) + skipleft);
    530 
    531     ACL_MIX_Y_OFFSET(pTseng->line_width << 3);
    532 
    533     START_ACL(pTseng, destaddr);
    534 }
    535 
    536 /*
    537  *
    538  */
    539 static Bool
    540 TsengXAAInit_Colexp(ScrnInfoPtr pScrn)
    541 {
    542     int i, j, r;
    543     TsengPtr pTseng = TsengPTR(pScrn);
    544     XAAInfoRecPtr pXAAInfo = pTseng->AccelInfoRec;
    545 
    546     PDEBUG("	TsengXAAInit_Colexp\n");
    547 
    548 #ifdef TODO
    549     if (OFLG_ISSET(OPTION_XAA_NO_COL_EXP, &vga256InfoRec.options))
    550 	return;
    551 #endif
    552 
    553     /* FIXME! disable accelerated color expansion for W32/W32i until it's fixed */
    554 /*  if (Is_W32 || Is_W32i) return; */
    555 
    556     /*
    557      * Screen-to-screen color expansion.
    558      *
    559      * Scanline-screen-to-screen color expansion is slower than
    560      * CPU-to-screen color expansion.
    561      */
    562 
    563     pXAAInfo->ScreenToScreenColorExpandFillFlags =
    564 	BIT_ORDER_IN_BYTE_LSBFIRST |
    565 	SCANLINE_PAD_DWORD |
    566 	LEFT_EDGE_CLIPPING |
    567 	NO_PLANEMASK;
    568 
    569 #if 1
    570     if ((pTseng->ChipType == ET6000) || (pScrn->bitsPerPixel == 8)) {
    571 	pXAAInfo->SetupForScreenToScreenColorExpandFill =
    572 	    TsengSetupForScreenToScreenColorExpandFill;
    573 	pXAAInfo->SubsequentScreenToScreenColorExpandFill =
    574 	    TsengSubsequentScreenToScreenColorExpandFill;
    575     }
    576 #endif
    577 
    578     /*
    579      * Scanline CPU to screen color expansion for all W32 engines.
    580      *
    581      * real CPU-to-screen color expansion is extremely tricky, and only
    582      * works for 8bpp anyway.
    583      *
    584      * This also allows us to do 16, 24 and 32 bpp color expansion by first
    585      * doubling the bitmap pattern before color-expanding it, because W32s
    586      * can only do 8bpp color expansion.
    587      */
    588 
    589     pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags =
    590 	BIT_ORDER_IN_BYTE_LSBFIRST |
    591 	SCANLINE_PAD_DWORD |
    592 	NO_PLANEMASK;
    593 
    594     if (pTseng->ChipType == ET4000) {
    595 	pTseng->XAAScanlineColorExpandBuffers[0] =
    596 	    xnfalloc(((pScrn->virtualX + 31)/32) * 4 * pTseng->Bytesperpixel);
    597 	if (pTseng->XAAScanlineColorExpandBuffers[0] == NULL) {
    598 	    xf86Msg(X_ERROR, "Could not malloc color expansion scanline buffer.\n");
    599 	    return FALSE;
    600 	}
    601 	pXAAInfo->NumScanlineColorExpandBuffers = 1;
    602 	pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAScanlineColorExpandBuffers;
    603 
    604 	pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill =
    605 	    TsengSetupForCPUToScreenColorExpandFill;
    606 
    607 	pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill =
    608 	    TsengSubsequentScanlineCPUToScreenColorExpandFill;
    609 
    610 	switch (pScrn->bitsPerPixel) {
    611 	case 8:
    612 	    pXAAInfo->SubsequentColorExpandScanline =
    613 		TsengSubsequentColorExpandScanline_8bpp;
    614 	    break;
    615 	case 15:
    616 	case 16:
    617 	    pXAAInfo->SubsequentColorExpandScanline =
    618 		TsengSubsequentColorExpandScanline_16bpp;
    619 	    break;
    620 	case 24:
    621 	    pXAAInfo->SubsequentColorExpandScanline =
    622 		TsengSubsequentColorExpandScanline_24bpp;
    623 	    break;
    624 	case 32:
    625 	    pXAAInfo->SubsequentColorExpandScanline =
    626 		TsengSubsequentColorExpandScanline_32bpp;
    627 	    break;
    628 	}
    629 	/* create color expansion LUT (used for >8bpp only) */
    630 	pTseng->ColExpLUT = xnfalloc(sizeof(CARD32)*256);
    631 	if (pTseng->ColExpLUT == NULL) {
    632 	    xf86Msg(X_ERROR, "Could not malloc color expansion tables.\n");
    633 	    return FALSE;
    634 	}
    635 	for (i = 0; i < 256; i++) {
    636 	    r = 0;
    637 	    for (j = 7; j >= 0; j--) {
    638 		r <<= pTseng->Bytesperpixel;
    639 		if ((i >> j) & 1)
    640 		    r |= (1 << pTseng->Bytesperpixel) - 1;
    641 	    }
    642 	    pTseng->ColExpLUT[i] = r;
    643 	    /* ErrorF("0x%08X, ",r ); if ((i%8)==7) ErrorF("\n"); */
    644 	}
    645     } else {
    646 	/*
    647 	 * Triple-buffering is needed to account for double-buffering of Tseng
    648 	 * acceleration registers.
    649 	 */
    650 	pXAAInfo->NumScanlineColorExpandBuffers = 3;
    651 	pXAAInfo->ScanlineColorExpandBuffers =
    652 	    pTseng->XAAColorExpandBuffers;
    653 	pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill =
    654 	    TsengSetupForScreenToScreenColorExpandFill;
    655 	pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill =
    656 	    TsengSubsequentScanlineCPUToScreenColorExpandFill;
    657 	pXAAInfo->SubsequentColorExpandScanline =
    658 	    TsengSubsequentColorExpandScanline;
    659 
    660 	/* calculate memory addresses from video memory offsets */
    661 	for (i = 0; i < pXAAInfo->NumScanlineColorExpandBuffers; i++) {
    662 	    pTseng->XAAColorExpandBuffers[i] =
    663 		pTseng->FbBase + pTseng->AccelColorExpandBufferOffsets[i];
    664 	}
    665 
    666 	pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAColorExpandBuffers;
    667     }
    668 
    669 #ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND
    670     /*
    671      * CPU-to-screen color expansion doesn't seem to be reliable yet. The
    672      * W32 needs the correct amount of data sent to it in this mode, or it
    673      * hangs the machine until is does (?). Currently, the init code in this
    674      * file or the XAA code that uses this does something wrong, so that
    675      * occasionally we get accelerator timeouts, and after a few, complete
    676      * system hangs.
    677      *
    678      * The W32 engine requires SCANLINE_NO_PAD, but that doesn't seem to
    679      * work very well (accelerator hangs).
    680      *
    681      * What works is this: tell XAA that we have SCANLINE_PAD_DWORD, and then
    682      * add the following code in TsengSubsequentCPUToScreenColorExpand():
    683      *     w = (w + 31) & ~31; this code rounds the width up to the nearest
    684      * multiple of 32, and together with SCANLINE_PAD_DWORD, this makes
    685      * CPU-to-screen color expansion work. Of course, the display isn't
    686      * correct (4 chars are "blanked out" when only one is written, for
    687      * example). But this shows that the principle works. But the code
    688      * doesn't...
    689      *
    690      * The same thing goes for PAD_BYTE: this also works (with the same
    691      * problems as SCANLINE_PAD_DWORD, although less prominent)
    692      */
    693 
    694     pXAAInfo->CPUToScreenColorExpandFillFlags =
    695 	BIT_ORDER_IN_BYTE_LSBFIRST |
    696 	SCANLINE_PAD_DWORD |   /* no other choice */
    697 	CPU_TRANSFER_PAD_DWORD |
    698 	NO_PLANEMASK;
    699 
    700     if (Is_W32_any && (pScrn->bitsPerPixel == 8)) {
    701 	pXAAInfo->SetupForCPUToScreenColorExpandFill =
    702 	    TsengSetupForCPUToScreenColorExpandFill;
    703 	pXAAInfo->SubsequentCPUToScreenColorExpandFill =
    704 	    TsengSubsequentCPUToScreenColorExpandFill;
    705 
    706 	/* we'll be using MMU aperture 2 */
    707 	pXAAInfo->ColorExpandBase = (CARD8 *)pTseng->tsengCPU2ACLBase;
    708 	/* ErrorF("tsengCPU2ACLBase = 0x%x\n", pTseng->tsengCPU2ACLBase); */
    709 	/* aperture size is 8kb in banked mode. Larger in linear mode, but 8kb is enough */
    710 	pXAAInfo->ColorExpandRange = 8192;
    711     }
    712 #endif
    713     return TRUE;
    714 }
    715 
    716 /*
    717  * ET4/6K acceleration interface.
    718  *
    719  * Uses Harm Hanemaayer's generic acceleration interface (XAA).
    720  *
    721  * Author: Koen Gadeyne
    722  *
    723  * Much of the acceleration code is based on the XF86_W32 server code from
    724  * Glenn Lai.
    725  *
    726  */
    727 
    728 /*
    729  * This is the implementation of the Sync() function.
    730  *
    731  * To avoid pipeline/cache/buffer flushing in the PCI subsystem and the VGA
    732  * controller, we might replace this read-intensive code with a dummy
    733  * accelerator operation that causes a hardware-blocking (wait-states) until
    734  * the running operation is done.
    735  */
    736 static void
    737 TsengSync(ScrnInfoPtr pScrn)
    738 {
    739     TsengPtr pTseng = TsengPTR(pScrn);
    740 
    741     WAIT_ACL;
    742 }
    743 
    744 /*
    745  * This is the implementation of the SetupForSolidFill function
    746  * that sets up the coprocessor for a subsequent batch for solid
    747  * rectangle fills.
    748  */
    749 static void
    750 TsengSetupForSolidFill(ScrnInfoPtr pScrn,
    751     int color, int rop, unsigned int planemask)
    752 {
    753     TsengPtr pTseng = TsengPTR(pScrn);
    754 
    755     /*
    756      * all registers are queued in the Tseng chips, except of course for the
    757      * stuff we want to store in off-screen memory. So we have to use a
    758      * ping-pong method for those if we want to avoid having to wait for the
    759      * accelerator when we want to write to these.
    760      */
    761 
    762 /*    ErrorF("S"); */
    763 
    764     PINGPONG(pTseng);
    765 
    766     wait_acl_queue(pTseng);
    767 
    768     /*
    769      * planemask emulation uses a modified "standard" FG ROP (see ET6000
    770      * data book p 66 or W32p databook p 37: "Bit masking"). We only enable
    771      * the planemask emulation when the planemask is not a no-op, because
    772      * blitting speed would suffer.
    773      */
    774 
    775     if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) {
    776 	SET_FG_ROP_PLANEMASK(rop);
    777 	SET_BG_COLOR(pTseng, planemask);
    778     } else {
    779 	SET_FG_ROP(rop);
    780     }
    781     SET_FG_COLOR(pTseng, color);
    782 
    783     SET_FUNCTION_BLT;
    784 }
    785 
    786 /*
    787  * This is the implementation of the SubsequentForSolidFillRect function
    788  * that sends commands to the coprocessor to fill a solid rectangle of
    789  * the specified location and size, with the parameters from the SetUp
    790  * call.
    791  *
    792  * Splitting it up between ET4000 and ET6000 avoids lots of chipset type
    793  * comparisons.
    794  */
    795 static void
    796 TsengW32pSubsequentSolidFillRect(ScrnInfoPtr pScrn,
    797     int x, int y, int w, int h)
    798 {
    799     TsengPtr pTseng = TsengPTR(pScrn);
    800     int destaddr = FBADDR(pTseng, x, y);
    801 
    802     wait_acl_queue(pTseng);
    803 
    804     /*
    805      * Restoring the ACL_SOURCE_ADDRESS here is needed as long as Bresenham
    806      * lines are enabled for >8bpp. Or until XAA allows us to render
    807      * horizontal lines using the same Bresenham code instead of re-routing
    808      * them to FillRectSolid. For XDECREASING lines, the SubsequentBresenham
    809      * code adjusts the ACL_SOURCE_ADDRESS to make sure XDECREASING lines
    810      * are drawn with the correct colors. But if a batch of subsequent
    811      * operations also holds a few horizontal lines, they will be routed to
    812      * here without calling the SetupFor... code again, and the
    813      * ACL_SOURCE_ADDRESS will be wrong.
    814      */
    815     ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg);
    816 
    817     SET_XYDIR(0);   /* FIXME: not needed with separate setupforsolidline */
    818 
    819     SET_XY_4(pTseng, w, h);
    820     START_ACL(pTseng, destaddr);
    821 }
    822 
    823 static void
    824 Tseng6KSubsequentSolidFillRect(ScrnInfoPtr pScrn,
    825     int x, int y, int w, int h)
    826 {
    827     TsengPtr pTseng = TsengPTR(pScrn);
    828     int destaddr = FBADDR(pTseng, x, y);
    829 
    830     wait_acl_queue(pTseng);
    831 
    832     /* see comment in TsengW32pSubsequentFillRectSolid */
    833     ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg);
    834 
    835     /* if XYDIR is not reset here, drawing a hardware line in between
    836      * blitting, with the same ROP, color, etc will not cause a call to
    837      * SetupFor... (because linedrawing uses SetupForSolidFill() as its
    838      * Setup() function), and thus the direction register will have been
    839      * changed by the last LineDraw operation.
    840      */
    841     SET_XYDIR(0);
    842 
    843     SET_XY_6(pTseng, w, h);
    844     START_ACL_6(destaddr);
    845 }
    846 
    847 /*
    848  * This is the implementation of the SetupForScreenToScreenCopy function
    849  * that sets up the coprocessor for a subsequent batch of
    850  * screen-to-screen copies.
    851  */
    852 
    853 static __inline__ void
    854 Tseng_setup_screencopy(TsengPtr pTseng,
    855     int rop, unsigned int planemask,
    856     int trans_color, int blit_dir)
    857 {
    858     wait_acl_queue(pTseng);
    859 
    860 #ifdef ET6K_TRANSPARENCY
    861     if ((pTseng->ChipType == ET6000) && (trans_color != -1)) {
    862 	SET_BG_COLOR(trans_color);
    863 	SET_FUNCTION_BLT_TR;
    864     } else
    865 	SET_FUNCTION_BLT;
    866 
    867     SET_FG_ROP(rop);
    868 #else
    869     if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) {
    870 	SET_FG_ROP_PLANEMASK(rop);
    871 	SET_BG_COLOR(pTseng, planemask);
    872     } else {
    873 	SET_FG_ROP(rop);
    874     }
    875     SET_FUNCTION_BLT;
    876 #endif
    877     SET_XYDIR(blit_dir);
    878 }
    879 
    880 static void
    881 TsengSetupForScreenToScreenCopy(ScrnInfoPtr pScrn,
    882     int xdir, int ydir, int rop,
    883     unsigned int planemask, int trans_color)
    884 {
    885     /*
    886      * xdir can be either 1 (left-to-right) or -1 (right-to-left).
    887      * ydir can be either 1 (top-to-bottom) or -1 (bottom-to-top).
    888      */
    889 
    890     TsengPtr pTseng = TsengPTR(pScrn);
    891     int blit_dir = 0;
    892 
    893 /*    ErrorF("C%d ", trans_color); */
    894 
    895     pTseng->acl_blitxdir = xdir;
    896     pTseng->acl_blitydir = ydir;
    897 
    898     if (xdir == -1)
    899 	blit_dir |= 0x1;
    900     if (ydir == -1)
    901 	blit_dir |= 0x2;
    902 
    903     Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, blit_dir);
    904 
    905     ACL_SOURCE_WRAP(0x77);	       /* no wrap */
    906     ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1);
    907 }
    908 
    909 /*
    910  * This is the implementation of the SubsequentForScreenToScreenCopy
    911  * that sends commands to the coprocessor to perform a screen-to-screen
    912  * copy of the specified areas, with the parameters from the SetUp call.
    913  * In this sample implementation, the direction must be taken into
    914  * account when calculating the addresses (with coordinates, it might be
    915  * a little easier).
    916  *
    917  * Splitting up the SubsequentScreenToScreenCopy between ET4000 and ET6000
    918  * doesn't seem to improve speed for small blits (as it did with
    919  * SolidFillRect).
    920  */
    921 static void
    922 TsengSubsequentScreenToScreenCopy(ScrnInfoPtr pScrn,
    923     int x1, int y1, int x2, int y2,
    924     int w, int h)
    925 {
    926     TsengPtr pTseng = TsengPTR(pScrn);
    927     int srcaddr, destaddr;
    928 
    929     /*
    930      * Optimizing note: the pre-calc code below (i.e. until the first
    931      * register write) doesn't significantly affect performance. Removing it
    932      * all boosts small blits from 24.22 to 25.47 MB/sec. Don't waste time
    933      * on that. One less PCI bus write would boost us to 30.00 MB/sec, up
    934      * from 24.22. Waste time on _that_...
    935      */
    936 
    937     /* tseng chips want x-sizes in bytes, not pixels */
    938     x1 = MULBPP(pTseng, x1);
    939     x2 = MULBPP(pTseng, x2);
    940 
    941     /*
    942      * If the direction is "decreasing", the chip wants the addresses
    943      * to be at the other end, so we must be aware of that in our
    944      * calculations.
    945      */
    946     if (pTseng->acl_blitydir == -1) {
    947 	srcaddr = (y1 + h - 1) * pTseng->line_width;
    948 	destaddr = (y2 + h - 1) * pTseng->line_width;
    949     } else {
    950 	srcaddr = y1 * pTseng->line_width;
    951 	destaddr = y2 * pTseng->line_width;
    952     }
    953     if (pTseng->acl_blitxdir == -1) {
    954 	/* Accelerator start address must point to first byte to be processed.
    955 	 * Depending on the direction, this is the first or the last byte
    956 	 * in the multi-byte pixel.
    957 	 */
    958 	int eol = MULBPP(pTseng, w);
    959 
    960 	srcaddr += x1 + eol - 1;
    961 	destaddr += x2 + eol - 1;
    962     } else {
    963 	srcaddr += x1;
    964 	destaddr += x2;
    965     }
    966 
    967     wait_acl_queue(pTseng);
    968 
    969     SET_XY(pTseng, w, h);
    970     ACL_SOURCE_ADDRESS(srcaddr);
    971     START_ACL(pTseng, destaddr);
    972 }
    973 
    974 #if 0
    975 static int pat_src_addr;
    976 
    977 static void
    978 TsengSetupForColor8x8PatternFill(ScrnInfoPtr pScrn,
    979     int patx, int paty, int rop, unsigned int planemask, int trans_color)
    980 {
    981     TsengPtr pTseng = TsengPTR(pScrn);
    982 
    983     pat_src_addr = FBADDR(pTseng, patx, paty);
    984 
    985     ErrorF("P");
    986 
    987     Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0);
    988 
    989     switch (pTseng->Bytesperpixel) {
    990     case 1:
    991 	ACL_SOURCE_WRAP(0x33);       /* 8x8 wrap */
    992 	ACL_SOURCE_Y_OFFSET(8 - 1);
    993 	break;
    994     case 2:
    995 	ACL_SOURCE_WRAP(0x34);       /* 16x8 wrap */
    996 	ACL_SOURCE_Y_OFFSET(16 - 1);
    997 	break;
    998     case 3:
    999 	ACL_SOURCE_WRAP(0x3D);       /* 24x8 wrap --- only for ET6000 !!! */
   1000 	ACL_SOURCE_Y_OFFSET(32 - 1); /* this is no error -- see databook */
   1001 	break;
   1002     case 4:
   1003 	ACL_SOURCE_WRAP(0x35);       /* 32x8 wrap */
   1004 	ACL_SOURCE_Y_OFFSET(32 - 1);
   1005     }
   1006 }
   1007 
   1008 static void
   1009 TsengSubsequentColor8x8PatternFillRect(ScrnInfoPtr pScrn,
   1010     int patx, int paty, int x, int y, int w, int h)
   1011 {
   1012     TsengPtr pTseng = TsengPTR(pScrn);
   1013     int destaddr = FBADDR(pTseng, x, y);
   1014     int srcaddr = pat_src_addr + MULBPP(pTseng, paty * 8 + patx);
   1015 
   1016     wait_acl_queue(pTseng);
   1017 
   1018     ACL_SOURCE_ADDRESS(srcaddr);
   1019 
   1020     SET_XY(pTseng, w, h);
   1021     START_ACL(pTseng, destaddr);
   1022 }
   1023 #endif
   1024 
   1025 #if 0
   1026 /*
   1027  * ImageWrite is nothing more than a per-scanline screencopy.
   1028  */
   1029 
   1030 static void
   1031 TsengSetupForScanlineImageWrite(ScrnInfoPtr pScrn,
   1032     int rop, unsigned int planemask, int trans_color, int bpp, int depth)
   1033 {
   1034     TsengPtr pTseng = TsengPTR(pScrn);
   1035 
   1036 /*    ErrorF("IW"); */
   1037 
   1038     Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0);
   1039 
   1040     ACL_SOURCE_WRAP(0x77);	       /* no wrap */
   1041     ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1);
   1042 }
   1043 
   1044 static void
   1045 TsengSubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn,
   1046     int x, int y, int w, int h, int skipleft)
   1047 {
   1048     TsengPtr pTseng = TsengPTR(pScrn);
   1049 
   1050 /*    ErrorF("r%d",h); */
   1051 
   1052     pTseng->acl_iw_dest = y * pTseng->line_width + MULBPP(pTseng, x);
   1053     pTseng->acl_skipleft = MULBPP(pTseng, skipleft);
   1054 
   1055     wait_acl_queue(pTseng);
   1056     SET_XY(pTseng, w, 1);
   1057 }
   1058 
   1059 static void
   1060 TsengSubsequentImageWriteScanline(ScrnInfoPtr pScrn,
   1061     int bufno)
   1062 {
   1063     TsengPtr pTseng = TsengPTR(pScrn);
   1064 
   1065 /*    ErrorF("%d", bufno); */
   1066 
   1067     wait_acl_queue(pTseng);
   1068 
   1069     ACL_SOURCE_ADDRESS(pTseng->AccelImageWriteBufferOffsets[bufno]
   1070 		       + pTseng->acl_skipleft);
   1071     START_ACL(pTseng, pTseng->acl_iw_dest);
   1072     pTseng->acl_iw_dest += pTseng->line_width;
   1073 }
   1074 #endif
   1075 
   1076 #if 0
   1077 /*
   1078  * W32p/ET6000 hardware linedraw code.
   1079  *
   1080  * TsengSetupForSolidFill() is used as a setup function.
   1081  *
   1082  * Three major problems that needed to be solved here:
   1083  *
   1084  * 1. The "bias" value must be translated into the "line draw algorithm"
   1085  *    parameter in the Tseng accelerators. This parameter, although not
   1086  *    documented as such, needs to be set to the _inverse_ of the
   1087  *    appropriate bias bit (i.e. for the appropriate octant).
   1088  *
   1089  * 2. In >8bpp modes, the accelerator will render BYTES in the same order as
   1090  *    it is drawing the line. This means it will render the colors in the
   1091  *    same order as well, reversing the byte-order in pixels that are drawn
   1092  *    right-to-left. This causes wrong colors to be rendered.
   1093  *
   1094  * 3. The Tseng data book says that the ACL Y count register needs to be
   1095  *    programmed with "dy-1". A similar thing is said about ACL X count. But
   1096  *    this assumes (x2,y2) is NOT drawn (although that is not mentionned in
   1097  *    the data book). X assumes the endpoint _is_ drawn. If "dy-1" is used,
   1098  *    this sometimes results in a negative value (if dx==dy==0),
   1099  *    causing a complete accelerator hang.
   1100  */
   1101 
   1102 static void
   1103 TsengSubsequentSolidBresenhamLine(ScrnInfoPtr pScrn,
   1104     int x, int y, int major, int minor, int err, int len, int octant)
   1105 {
   1106     TsengPtr pTseng = TsengPTR(pScrn);
   1107     int destaddr = FBADDR(pTseng, x, y);
   1108     int xydir = pTseng->BresenhamTable[octant];
   1109 
   1110     /* Tseng wants the real dx/dy in major/minor. Bresenham uses 2*dx and 2*dy */
   1111     minor >>= 1;
   1112     major >>= 1;
   1113 
   1114     wait_acl_queue(pTseng);
   1115 
   1116     if (!(octant & YMAJOR)) {
   1117 	SET_X_YRAW(pTseng, len, 0xFFF);
   1118     } else {
   1119 	SET_XY_RAW(pTseng,0xFFF, len - 1);
   1120     }
   1121 
   1122     SET_DELTA(minor, major);
   1123     ACL_ERROR_TERM(-err);  /* error term from XAA is NEGATIVE */
   1124 
   1125     /* make sure colors are rendered correctly if >8bpp */
   1126     if (octant & XDECREASING) {
   1127 	destaddr += pTseng->Bytesperpixel - 1;
   1128 	ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset
   1129 			   + pTseng->tsengFg + pTseng->neg_x_pixel_offset);
   1130     } else
   1131 	ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg);
   1132 
   1133     SET_XYDIR(xydir);
   1134 
   1135     START_ACL(pTseng, destaddr);
   1136 }
   1137 #endif
   1138 
   1139 #ifdef TODO
   1140 /*
   1141  * Trapezoid filling code.
   1142  *
   1143  * TsengSetupForSolidFill() is used as a setup function
   1144  */
   1145 
   1146 #undef DEBUG_TRAP
   1147 
   1148 #ifdef TSENG_TRAPEZOIDS
   1149 static void
   1150 TsengSubsequentFillTrapezoidSolid(ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR)
   1151     int ytop;
   1152     int height;
   1153     int left;
   1154     int dxL, dyL;
   1155     int eL;
   1156     int right;
   1157     int dxR, dyR;
   1158     int eR;
   1159 {
   1160     unsigned int tseng_bias_compensate = 0xd8;
   1161     int destaddr, algrthm;
   1162     int xcount = right - left + 1;     /* both edges included */
   1163     int dir_reg = 0x60;		       /* trapezoid drawing; use error term for primary edge */
   1164     int sec_dir_reg = 0x20;	       /* use error term for secondary edge */
   1165     int octant = 0;
   1166 
   1167     /*    ErrorF("#"); */
   1168 
   1169     int destaddr, algrthm;
   1170     int xcount = right - left + 1;
   1171 
   1172 #ifdef USE_ERROR_TERM
   1173     int dir_reg = 0x60;
   1174     int sec_dir_reg = 0x20;
   1175 
   1176 #else
   1177     int dir_reg = 0x40;
   1178     int sec_dir_reg = 0x00;
   1179 
   1180 #endif
   1181     int octant = 0;
   1182     int bias = 0x00;		       /* FIXME !!! */
   1183 
   1184 /*    ErrorF("#"); */
   1185 
   1186 #ifdef DEBUG_TRAP
   1187     ErrorF("ytop=%d, height=%d, left=%d, dxL=%d, dyL=%d, eL=%d, right=%d, dxR=%d, dyR=%d, eR=%d ",
   1188 	ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR);
   1189 #endif
   1190 
   1191     if ((dyL < 0) || (dyR < 0))
   1192 	ErrorF("Tseng Trapezoids: Wrong assumption: dyL/R < 0\n");
   1193 
   1194     destaddr = FBADDR(pTseng, left, ytop);
   1195 
   1196     /* left edge */
   1197     if (dxL < 0) {
   1198 	dir_reg |= 1;
   1199 	octant |= XDECREASING;
   1200 	dxL = -dxL;
   1201     }
   1202     /* Y direction is always positive (top-to-bottom drawing) */
   1203 
   1204     wait_acl_queue(pTseng);
   1205 
   1206     /* left edge */
   1207     /* compute axial direction and load registers */
   1208     if (dxL >= dyL) {		       /* X is major axis */
   1209 	dir_reg |= 4;
   1210 	SET_DELTA(dyL, dxL);
   1211 	if (dir_reg & 1) {	       /* edge coherency: draw left edge */
   1212 	    destaddr += pTseng->Bytesperpixel;
   1213 	    sec_dir_reg |= 0x80;
   1214 	    xcount--;
   1215 	}
   1216     } else {			       /* Y is major axis */
   1217 	SetYMajorOctant(octant);
   1218 	SET_DELTA(dxL, dyL);
   1219     }
   1220     ACL_ERROR_TERM(eL);
   1221 
   1222     /* select "linedraw algorithm" (=bias) and load direction register */
   1223     /* ErrorF(" o=%d ", octant); */
   1224     algrthm = ((tseng_bias_compensate >> octant) & 1) ^ 1;
   1225     dir_reg |= algrthm << 4;
   1226     SET_XYDIR(dir_reg);
   1227 
   1228     /* right edge */
   1229     if (dxR < 0) {
   1230 	sec_dir_reg |= 1;
   1231 	dxR = -dxR;
   1232     }
   1233     /* compute axial direction and load registers */
   1234     if (dxR >= dyR) {		       /* X is major axis */
   1235 	sec_dir_reg |= 4;
   1236 	SET_SECONDARY_DELTA(dyR, dxR);
   1237 	if (dir_reg & 1) {	       /* edge coherency: do not draw right edge */
   1238 	    sec_dir_reg |= 0x40;
   1239 	    xcount++;
   1240 	}
   1241     } else {			       /* Y is major axis */
   1242 	SET_SECONDARY_DELTA(dxR, dyR);
   1243     }
   1244     ACL_SECONDARY_ERROR_TERM(eR);
   1245 
   1246     /* ErrorF("%02x", sec_dir_reg); */
   1247     SET_SECONDARY_XYDIR(sec_dir_reg);
   1248 
   1249     SET_XY_6(pTseng, xcount, height);
   1250 
   1251 #ifdef DEBUG_TRAP
   1252     ErrorF("-> %d,%d\n", xcount, height);
   1253 #endif
   1254 
   1255     START_ACL_6(destaddr);
   1256 }
   1257 #endif
   1258 
   1259 #endif
   1260 
   1261 #endif
   1262 
   1263 /*
   1264  * The following function sets up the supported acceleration. Call it from
   1265  * the FbInit() function in the SVGA driver. Do NOT initialize any hardware
   1266  * in here. That belongs in tseng_init_acl().
   1267  */
   1268 Bool
   1269 TsengXAAInit(ScreenPtr pScreen)
   1270 {
   1271 #ifdef HAVE_XAA_H
   1272     ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen);
   1273     TsengPtr pTseng = TsengPTR(pScrn);
   1274     XAAInfoRecPtr pXAAinfo;
   1275     BoxRec AvailFBArea;
   1276 
   1277     PDEBUG("	TsengXAAInit\n");
   1278     pTseng->AccelInfoRec = pXAAinfo = XAACreateInfoRec();
   1279     if (!pXAAinfo)
   1280 	return FALSE;
   1281 
   1282     /*
   1283      * Set up the main acceleration flags.
   1284      */
   1285     pXAAinfo->Flags = PIXMAP_CACHE;
   1286 
   1287     /*
   1288      * The following line installs a "Sync" function, that waits for
   1289      * all coprocessor operations to complete.
   1290      */
   1291     pXAAinfo->Sync = TsengSync;
   1292 
   1293     /* W32 and W32i must wait for ACL before changing registers */
   1294     if (pTseng->ChipType == ET4000)
   1295         pTseng->need_wait_acl = TRUE;
   1296     else
   1297         pTseng->need_wait_acl = FALSE;
   1298 
   1299     pTseng->line_width = pScrn->displayWidth * pTseng->Bytesperpixel;
   1300 
   1301 #if 1
   1302     /*
   1303      * SolidFillRect.
   1304      *
   1305      * The W32 and W32i chips don't have a register to set the amount of
   1306      * bytes per pixel, and hence they don't skip 1 byte in each 4-byte word
   1307      * at 24bpp. Therefor, the FG or BG colors would have to be concatenated
   1308      * in video memory (R-G-B-R-G-B-... instead of R-G-B-X-R-G-B-X-..., with
   1309      * X = dont' care), plus a wrap value that is a multiple of 3 would have
   1310      * to be set. There is no such wrap combination available.
   1311      */
   1312 #ifdef OBSOLETE
   1313     pXAAinfo->SolidFillFlags |= NO_PLANEMASK;
   1314 #endif
   1315 
   1316     pXAAinfo->SetupForSolidFill = TsengSetupForSolidFill;
   1317     if (pTseng->ChipType == ET6000)
   1318         pXAAinfo->SubsequentSolidFillRect = Tseng6KSubsequentSolidFillRect;
   1319     else
   1320         pXAAinfo->SubsequentSolidFillRect = TsengW32pSubsequentSolidFillRect;
   1321 
   1322 #ifdef TSENG_TRAPEZOIDS
   1323     if (pTseng->ChipType == ET6000)
   1324 	/* disabled for now: not fully compliant yet */
   1325 	pXAAinfo->SubsequentFillTrapezoidSolid = TsengSubsequentFillTrapezoidSolid;
   1326 #endif
   1327 #endif
   1328 
   1329 #if 1
   1330     /*
   1331      * SceenToScreenCopy (BitBLT).
   1332      *
   1333      * Restrictions: On ET6000, we support EITHER a planemask OR
   1334      * TRANSPARENCY, but not both (they use the same Pattern map).
   1335      * All other chips can't do TRANSPARENCY at all.
   1336      */
   1337 #ifdef ET6K_TRANSPARENCY
   1338     pXAAinfo->CopyAreaFlags = NO_PLANEMASK;
   1339     if (pTseng->ChipType == ET4000)
   1340 	pXAAinfo->CopyAreaFlags |= NO_TRANSPARENCY;
   1341 
   1342 #else
   1343     pXAAinfo->CopyAreaFlags = NO_TRANSPARENCY;
   1344 #endif
   1345 
   1346     pXAAinfo->SetupForScreenToScreenCopy =
   1347 	TsengSetupForScreenToScreenCopy;
   1348     pXAAinfo->SubsequentScreenToScreenCopy =
   1349 	TsengSubsequentScreenToScreenCopy;
   1350 #endif
   1351 
   1352 #if 0
   1353     /*
   1354      * ImageWrite.
   1355      *
   1356      * SInce this uses off-screen scanline buffers, it is only of use when
   1357      * complex ROPs are used. But since the current XAA pixmap cache code
   1358      * only works when an ImageWrite is provided, the NO_GXCOPY flag is
   1359      * temporarily disabled.
   1360      */
   1361 
   1362     if (pTseng->AccelImageWriteBufferOffsets[0]) {
   1363 	pXAAinfo->ScanlineImageWriteFlags =
   1364 	    pXAAinfo->CopyAreaFlags | LEFT_EDGE_CLIPPING /* | NO_GXCOPY */ ;
   1365 	pXAAinfo->NumScanlineImageWriteBuffers = 2;
   1366 	pXAAinfo->SetupForScanlineImageWrite =
   1367 	    TsengSetupForScanlineImageWrite;
   1368 	pXAAinfo->SubsequentScanlineImageWriteRect =
   1369 	    TsengSubsequentScanlineImageWriteRect;
   1370 	pXAAinfo->SubsequentImageWriteScanline =
   1371 	    TsengSubsequentImageWriteScanline;
   1372 
   1373 	/* calculate memory addresses from video memory offsets */
   1374 	for (i = 0; i < pXAAinfo->NumScanlineImageWriteBuffers; i++) {
   1375 	    pTseng->XAAScanlineImageWriteBuffers[i] =
   1376 		pTseng->FbBase + pTseng->AccelImageWriteBufferOffsets[i];
   1377 	}
   1378 
   1379 	pXAAinfo->ScanlineImageWriteBuffers = pTseng->XAAScanlineImageWriteBuffers;
   1380     }
   1381 #endif
   1382     /*
   1383      * 8x8 pattern tiling not possible on W32/i/p chips in 24bpp mode.
   1384      * Currently, 24bpp pattern tiling doesn't work at all on those.
   1385      *
   1386      * FIXME: On W32 cards, pattern tiling doesn't work as expected.
   1387      */
   1388     pXAAinfo->Color8x8PatternFillFlags = HARDWARE_PATTERN_PROGRAMMED_ORIGIN;
   1389 
   1390     pXAAinfo->CachePixelGranularity = 8 * 8;
   1391 
   1392 #ifdef ET6K_TRANSPARENCY
   1393     pXAAinfo->PatternFlags |= HARDWARE_PATTERN_NO_PLANEMASK;
   1394     if (pTseng->ChipType == ET6000)
   1395 	pXAAinfo->PatternFlags |= HARDWARE_PATTERN_TRANSPARENCY;
   1396 #endif
   1397 
   1398 #if 0
   1399     /* FIXME! This needs to be fixed for W32 and W32i (it "should work") */
   1400     if (pScrn->bitsPerPixel != 24) {
   1401 	pXAAinfo->SetupForColor8x8PatternFill =
   1402 	    TsengSetupForColor8x8PatternFill;
   1403 	pXAAinfo->SubsequentColor8x8PatternFillRect =
   1404 	    TsengSubsequentColor8x8PatternFillRect;
   1405     }
   1406 #endif
   1407 
   1408 #if 0 /*1*/
   1409     /*
   1410      * SolidLine.
   1411      *
   1412      * We use Bresenham by preference, because it supports hardware clipping
   1413      * (using the error term). TwoPointLines() is implemented, but not used,
   1414      * because clipped lines are not accelerated (hardware clipping support
   1415      * is lacking)...
   1416      */
   1417 
   1418     /*
   1419      * Fill in the hardware linedraw ACL_XY_DIRECTION table
   1420      *
   1421      * W32BresTable[] converts XAA interface Bresenham octants to direct
   1422      * ACL direction register contents. This includes the correct bias
   1423      * setting etc.
   1424      *
   1425      * According to miline.h (but with base 0 instead of base 1 as in
   1426      * miline.h), the octants are numbered as follows:
   1427      *
   1428      *   \    |    /
   1429      *    \ 2 | 1 /
   1430      *     \  |  /
   1431      *    3 \ | / 0
   1432      *       \|/
   1433      *   -----------
   1434      *       /|                                 \
   1435      *    4 / | \ 7
   1436      *     /  |       \
   1437      *    / 5 | 6      \
   1438      *   /    |        \
   1439      *
   1440      * In ACL_XY_DIRECTION, bits 2:0 are defined as follows:
   1441      *	0: '1' if XDECREASING
   1442      *	1: '1' if YDECREASING
   1443      *	2: '1' if XMAJOR (== not YMAJOR)
   1444      *
   1445      * Bit 4 defines the bias.  It should be set to '1' for all octants
   1446      * NOT passed to miSetZeroLineBias(). i.e. the inverse of the X bias.
   1447      *
   1448      * (For MS compatible bias, the data book says to set to the same as
   1449      * YDIR, i.e. bit 1 of the same register, = '1' if YDECREASING. MS
   1450      * bias is towards octants 0..3 (i.e. Y decreasing), hence this
   1451      * definition of bit 4)
   1452      *
   1453      */
   1454     pTseng->BresenhamTable = xnfalloc(8);
   1455     if (pTseng->BresenhamTable == NULL) {
   1456         xf86Msg(X_ERROR, "Could not malloc Bresenham Table.\n");
   1457         return FALSE;
   1458     }
   1459     for (i=0; i<8; i++) {
   1460         unsigned char zerolinebias = miGetZeroLineBias(pScreen);
   1461         pTseng->BresenhamTable[i] = 0xA0; /* command=linedraw, use error term */
   1462         if (i & XDECREASING) pTseng->BresenhamTable[i] |= 0x01;
   1463         if (i & YDECREASING) pTseng->BresenhamTable[i] |= 0x02;
   1464         if (!(i & YMAJOR))   pTseng->BresenhamTable[i] |= 0x04;
   1465         if ((1 << i) & zerolinebias) pTseng->BresenhamTable[i] |= 0x10;
   1466         /* ErrorF("BresenhamTable[%d]=0x%x\n", i, pTseng->BresenhamTable[i]); */
   1467     }
   1468 
   1469     pXAAinfo->SolidLineFlags = 0;
   1470     pXAAinfo->SetupForSolidLine = TsengSetupForSolidFill;
   1471     pXAAinfo->SubsequentSolidBresenhamLine =
   1472         TsengSubsequentSolidBresenhamLine;
   1473     /*
   1474      * ErrorTermBits is used to limit minor, major and error term, so it
   1475      * must be min(errorterm_size, delta_major_size, delta_minor_size)
   1476      * But the calculation for major and minor is done on the DOUBLED
   1477      * values (as per the Bresenham algorithm), so they can also have 13
   1478      * bits (inside XAA). They are divided by 2 in this driver, so they
   1479      * are then again limited to 12 bits.
   1480      */
   1481     pXAAinfo->SolidBresenhamLineErrorTermBits = 13;
   1482 
   1483 #endif
   1484 
   1485 #if 1
   1486     /* set up color expansion acceleration */
   1487     if (!TsengXAAInit_Colexp(pScrn))
   1488 	return FALSE;
   1489 #endif
   1490 
   1491 
   1492     /*
   1493      * For Tseng, we set up some often-used values
   1494      */
   1495 
   1496     switch (pTseng->Bytesperpixel) {   /* for MULBPP optimization */
   1497     case 1:
   1498 	pTseng->powerPerPixel = 0;
   1499 	pTseng->planemask_mask = 0x000000FF;
   1500 	pTseng->neg_x_pixel_offset = 0;
   1501 	break;
   1502     case 2:
   1503 	pTseng->powerPerPixel = 1;
   1504 	pTseng->planemask_mask = 0x0000FFFF;
   1505 	pTseng->neg_x_pixel_offset = 1;
   1506 	break;
   1507     case 3:
   1508 	pTseng->powerPerPixel = 1;
   1509 	pTseng->planemask_mask = 0x00FFFFFF;
   1510 	pTseng->neg_x_pixel_offset = 2;		/* is this correct ??? */
   1511 	break;
   1512     case 4:
   1513 	pTseng->powerPerPixel = 2;
   1514 	pTseng->planemask_mask = 0xFFFFFFFF;
   1515 	pTseng->neg_x_pixel_offset = 3;
   1516 	break;
   1517     }
   1518 
   1519     /*
   1520      * Init ping-pong registers.
   1521      * This might be obsoleted by the BACKGROUND_OPERATIONS flag.
   1522      */
   1523     pTseng->tsengFg = 0;
   1524     pTseng->tsengBg = 16;
   1525     pTseng->tsengPat = 32;
   1526 
   1527     /* for register write optimisation */
   1528     pTseng->tseng_old_dir = -1;
   1529     pTseng->old_x = 0;
   1530     pTseng->old_y = 0;
   1531 
   1532     /*
   1533      * Finally, we set up the video memory space available to the pixmap
   1534      * cache. In this case, all memory from the end of the virtual screen to
   1535      * the end of video memory minus 1K (which we already reserved), can be
   1536      * used.
   1537      */
   1538 
   1539     AvailFBArea.x1 = 0;
   1540     AvailFBArea.y1 = 0;
   1541     AvailFBArea.x2 = pScrn->displayWidth;
   1542     AvailFBArea.y2 = (pScrn->videoRam * 1024) /
   1543 	(pScrn->displayWidth * pTseng->Bytesperpixel);
   1544 
   1545     xf86InitFBManager(pScreen, &AvailFBArea);
   1546 
   1547     return (XAAInit(pScreen, pXAAinfo));
   1548 #else
   1549     return FALSE;
   1550 #endif
   1551 }
   1552