1d983712dSmrg 2d983712dSmrg#ifdef HAVE_CONFIG_H 3d983712dSmrg#include "config.h" 4d983712dSmrg#endif 5d983712dSmrg 6d983712dSmrg/* 7d983712dSmrg * if NO_OPTIMIZE is set, some optimizations are disabled. 8d983712dSmrg * 9d983712dSmrg * What it basically tries to do is minimize the amounts of writes to 10d983712dSmrg * accelerator registers, since these are the ones that slow down small 11d983712dSmrg * operations a lot. 12d983712dSmrg */ 13d983712dSmrg/* #define NO_OPTIMIZE */ 14d983712dSmrg 15d983712dSmrg/* 16d983712dSmrg * if ET6K_TRANSPARENCY is set, ScreentoScreenCopy operations (and pattern 17d983712dSmrg * fills) will support transparency. But then the planemask support has to 18d983712dSmrg * be dropped. The default here is to support planemasks, because all Tseng 19d983712dSmrg * chips can do this. Only the ET6000 supports a transparency compare. The 20d983712dSmrg * code could be easily changed to support transparency on the ET6000 and 21d983712dSmrg * planemasks on the others, but that's only useful when transparency is 22d983712dSmrg * more important than planemasks. 23d983712dSmrg */ 24d983712dSmrg#undef ET6K_TRANSPARENCY 25d983712dSmrg 26d983712dSmrg#include "tseng.h" 27d983712dSmrg#include "tseng_accel.h" 28d983712dSmrg 294b9470b1Smrg#ifdef HAVE_XAA_H 30d983712dSmrg#include "miline.h" 31d983712dSmrg 32d983712dSmrg/* 33d983712dSmrg * conversion from X ROPs to Microsoft ROPs. 34d983712dSmrg */ 35d983712dSmrg 36d983712dSmrgstatic int W32OpTable[] = 37d983712dSmrg{ 38d983712dSmrg 0x00, /* Xclear 0 */ 39d983712dSmrg 0x88, /* Xand src AND dst */ 40d983712dSmrg 0x44, /* XandReverse src AND NOT dst */ 41d983712dSmrg 0xcc, /* Xcopy src */ 42d983712dSmrg 0x22, /* XandInverted NOT src AND dst */ 43d983712dSmrg 0xaa, /* Xnoop dst */ 44d983712dSmrg 0x66, /* Xxor src XOR dst */ 45d983712dSmrg 0xee, /* Xor src OR dst */ 46d983712dSmrg 0x11, /* Xnor NOT src AND NOT dst */ 47d983712dSmrg 0x99, /* Xequiv NOT src XOR dst */ 48d983712dSmrg 0x55, /* Xinvert NOT dst */ 49d983712dSmrg 0xdd, /* XorReverse src OR NOT dst */ 50d983712dSmrg 0x33, /* XcopyInverted NOT src */ 51d983712dSmrg 0xbb, /* XorInverted NOT src OR dst */ 52d983712dSmrg 0x77, /* Xnand NOT src OR NOT dst */ 53d983712dSmrg 0xff /* Xset 1 */ 54d983712dSmrg}; 55d983712dSmrg 56d983712dSmrgstatic int W32OpTable_planemask[] = 57d983712dSmrg{ 58d983712dSmrg 0x0a, /* Xclear 0 */ 59d983712dSmrg 0x8a, /* Xand src AND dst */ 60d983712dSmrg 0x4a, /* XandReverse src AND NOT dst */ 61d983712dSmrg 0xca, /* Xcopy src */ 62d983712dSmrg 0x2a, /* XandInverted NOT src AND dst */ 63d983712dSmrg 0xaa, /* Xnoop dst */ 64d983712dSmrg 0x6a, /* Xxor src XOR dst */ 65d983712dSmrg 0xea, /* Xor src OR dst */ 66d983712dSmrg 0x1a, /* Xnor NOT src AND NOT dst */ 67d983712dSmrg 0x9a, /* Xequiv NOT src XOR dst */ 68d983712dSmrg 0x5a, /* Xinvert NOT dst */ 69d983712dSmrg 0xda, /* XorReverse src OR NOT dst */ 70d983712dSmrg 0x3a, /* XcopyInverted NOT src */ 71d983712dSmrg 0xba, /* XorInverted NOT src OR dst */ 72d983712dSmrg 0x7a, /* Xnand NOT src OR NOT dst */ 73d983712dSmrg 0xfa /* Xset 1 */ 74d983712dSmrg}; 75d983712dSmrg 76d983712dSmrgstatic int W32PatternOpTable[] = 77d983712dSmrg{ 78d983712dSmrg 0x00, /* Xclear 0 */ 79d983712dSmrg 0xa0, /* Xand pat AND dst */ 80d983712dSmrg 0x50, /* XandReverse pat AND NOT dst */ 81d983712dSmrg 0xf0, /* Xcopy pat */ 82d983712dSmrg 0x0a, /* XandInverted NOT pat AND dst */ 83d983712dSmrg 0xaa, /* Xnoop dst */ 84d983712dSmrg 0x5a, /* Xxor pat XOR dst */ 85d983712dSmrg 0xfa, /* Xor pat OR dst */ 86d983712dSmrg 0x05, /* Xnor NOT pat AND NOT dst */ 87d983712dSmrg 0xa5, /* Xequiv NOT pat XOR dst */ 88d983712dSmrg 0x55, /* Xinvert NOT dst */ 89d983712dSmrg 0xf5, /* XorReverse pat OR NOT dst */ 90d983712dSmrg 0x0f, /* XcopyInverted NOT pat */ 91d983712dSmrg 0xaf, /* XorInverted NOT pat OR dst */ 92d983712dSmrg 0x5f, /* Xnand NOT pat OR NOT dst */ 93d983712dSmrg 0xff /* Xset 1 */ 94d983712dSmrg}; 95d983712dSmrg 96d983712dSmrg 97d983712dSmrg 98d983712dSmrg/**********************************************************************/ 99d983712dSmrg 100d983712dSmrgstatic void 101d983712dSmrgtseng_terminate_acl(TsengPtr pTseng) 102d983712dSmrg{ 103d983712dSmrg /* only terminate when needed */ 104d983712dSmrg/* if (*(volatile unsigned char *)ACL_ACCELERATOR_STATUS & 0x06) */ 105d983712dSmrg { 106d983712dSmrg ACL_SUSPEND_TERMINATE(0x00); 107d983712dSmrg /* suspend any running operation */ 108d983712dSmrg ACL_SUSPEND_TERMINATE(0x01); 109d983712dSmrg WAIT_ACL; 110d983712dSmrg ACL_SUSPEND_TERMINATE(0x00); 111d983712dSmrg /* ... and now terminate it */ 112d983712dSmrg ACL_SUSPEND_TERMINATE(0x10); 113d983712dSmrg WAIT_ACL; 114d983712dSmrg ACL_SUSPEND_TERMINATE(0x00); 115d983712dSmrg } 116d983712dSmrg} 117d983712dSmrg 1183cb82e98Smrgvoid 119d983712dSmrgtseng_recover_timeout(TsengPtr pTseng) 120d983712dSmrg{ 121d983712dSmrg if (pTseng->ChipType == ET4000) { 122d983712dSmrg ErrorF("trying to unlock......................................\n"); 123d983712dSmrg MMIO_OUT32(pTseng->tsengCPU2ACLBase,0,0L); /* try unlocking the bus when CPU-to-accel gets stuck */ 124d983712dSmrg 125d983712dSmrg /* flush the accelerator pipeline */ 126d983712dSmrg ACL_SUSPEND_TERMINATE(0x00); 127d983712dSmrg ACL_SUSPEND_TERMINATE(0x02); 128d983712dSmrg ACL_SUSPEND_TERMINATE(0x00); 129d983712dSmrg } 130d983712dSmrg} 131d983712dSmrg 132d983712dSmrgvoid 133d983712dSmrgtseng_init_acl(ScrnInfoPtr pScrn) 134d983712dSmrg{ 135d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 136d983712dSmrg 137d983712dSmrg PDEBUG(" tseng_init_acl\n"); 138d983712dSmrg /* 139d983712dSmrg * prepare some shortcuts for faster access to memory mapped registers 140d983712dSmrg */ 141d983712dSmrg 142d983712dSmrg pTseng->scratchMemBase = pTseng->FbBase + pTseng->AccelColorBufferOffset; 143d983712dSmrg /* 144d983712dSmrg * we won't be using tsengCPU2ACLBase in linear memory mode anyway, since 145d983712dSmrg * using the MMU apertures restricts the amount of useable video memory 146d983712dSmrg * to only 2MB, supposing we ONLY redirect MMU aperture 2 to the CPU. 147d983712dSmrg * (see data book W32p, page 207) 148d983712dSmrg */ 149d983712dSmrg pTseng->tsengCPU2ACLBase = pTseng->FbBase + 0x200000; /* MMU aperture 2 */ 150d983712dSmrg 151d983712dSmrg#ifdef DEBUG 152d983712dSmrg ErrorF("MMioBase = 0x%x, scratchMemBase = 0x%x\n", pTseng->MMioBase, pTseng->scratchMemBase); 153d983712dSmrg#endif 154d983712dSmrg 155d983712dSmrg /* 156d983712dSmrg * prepare the accelerator for some real work 157d983712dSmrg */ 158d983712dSmrg 159d983712dSmrg tseng_terminate_acl(pTseng); 160d983712dSmrg 161d983712dSmrg ACL_INTERRUPT_STATUS(0xe); /* clear interrupts */ 162d983712dSmrg ACL_INTERRUPT_MASK(0x04); /* disable interrupts, but enable deadlock exit */ 163d983712dSmrg ACL_INTERRUPT_STATUS(0x0); 164d983712dSmrg ACL_ACCELERATOR_STATUS_SET(0x0); 165d983712dSmrg 166d983712dSmrg if (pTseng->ChipType == ET6000) { 167d983712dSmrg ACL_STEPPING_INHIBIT(0x0); /* Undefined at power-on, let all maps (Src, Dst, Mix, Pat) step */ 168d983712dSmrg ACL_6K_CONFIG(0x00); /* maximum performance -- what did you think? */ 169d983712dSmrg ACL_POWER_CONTROL(0x01); /* conserve power when ACL is idle */ 170d983712dSmrg ACL_MIX_CONTROL(0x33); 171d983712dSmrg ACL_TRANSFER_DISABLE(0x00); /* Undefined at power-on, enable all transfers */ 172d983712dSmrg } else { /* W32i/W32p */ 173d983712dSmrg ACL_RELOAD_CONTROL(0x0); 174d983712dSmrg ACL_SYNC_ENABLE(0x1); /* | 0x2 = 0WS ACL read. Yields up to 10% faster operation for small blits */ 175d983712dSmrg ACL_ROUTING_CONTROL(0x00); 176d983712dSmrg } 177d983712dSmrg 178d983712dSmrg /* Enable the W32p startup bit and set use an eight-bit pixel depth */ 179d983712dSmrg ACL_NQ_X_POSITION(0); 180d983712dSmrg ACL_NQ_Y_POSITION(0); 181d983712dSmrg ACL_PIXEL_DEPTH((pScrn->bitsPerPixel - 8) << 1); 182d983712dSmrg /* writing destination address will start ACL */ 183d983712dSmrg ACL_OPERATION_STATE(0x10); 184d983712dSmrg 185d983712dSmrg ACL_DESTINATION_Y_OFFSET(pScrn->displayWidth * pTseng->Bytesperpixel - 1); 186d983712dSmrg ACL_XY_DIRECTION(0); 187d983712dSmrg 188d983712dSmrg MMU_CONTROL(0x74); 189d983712dSmrg 190d983712dSmrg if (pTseng->ChipType == ET4000) { 191d983712dSmrg /* 192d983712dSmrg * Since the w32p revs C and D don't have any memory mapped when the 193d983712dSmrg * accelerator registers are used it is necessary to use the MMUs to 194d983712dSmrg * provide a semblance of linear memory. Fortunately on these chips 195d983712dSmrg * the MMU appertures are 1 megabyte each. So as long as we are 196d983712dSmrg * willing to only use 3 megs of video memory we can have some 197d983712dSmrg * acceleration. If we ever get the CPU-to-screen-color-expansion 198d983712dSmrg * stuff working then we will NOT need to sacrifice the extra 1MB 199d983712dSmrg * provided by MBP2, because we could do dynamic switching of the APT 200d983712dSmrg * bit in the MMU control register. 201d983712dSmrg * 202d983712dSmrg * On W32p rev c and d MBP2 is hardwired to 0x200000 when linear 203d983712dSmrg * memory mode is enabled. (On rev a it is programmable). 204d983712dSmrg * 205d983712dSmrg * W32p rev a and b have their first 2M mapped in the normal (non-MMU) 206d983712dSmrg * way, and MMU0 and MMU1, each 512 kb wide, can be used to access 207d983712dSmrg * another 1MB of memory. This totals to 3MB of mem. available in 208d983712dSmrg * linear memory when the accelerator is enabled. 209d983712dSmrg */ 210d983712dSmrg if ((pTseng->ChipRev == REV_A) || (pTseng->ChipRev == REV_B)) { 211d983712dSmrg MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x200000L); 212d983712dSmrg MMIO_OUT32(pTseng->MMioBase, 0x04<<0, 0x280000L); 213d983712dSmrg } else { /* rev C & D */ 214d983712dSmrg MMIO_OUT32(pTseng->MMioBase, 0x00<<0, 0x0L); 215d983712dSmrg MMIO_OUT32 (pTseng->MMioBase, 0x04<<0, 0x100000L); 216d983712dSmrg } 217d983712dSmrg } 218d983712dSmrg} 219d983712dSmrg 220d983712dSmrg/* 221d983712dSmrg * ET4/6K acceleration interface -- color expansion primitives. 222d983712dSmrg * 223d983712dSmrg * Uses Harm Hanemaayer's generic acceleration interface (XAA). 224d983712dSmrg * 225d983712dSmrg * Author: Koen Gadeyne 226d983712dSmrg * 227d983712dSmrg * Much of the acceleration code is based on the XF86_W32 server code from 228d983712dSmrg * Glenn Lai. 229d983712dSmrg * 230d983712dSmrg * 231d983712dSmrg * Color expansion capabilities of the Tseng chip families: 232d983712dSmrg * 233d983712dSmrg * Chip screen-to-screen CPU-to-screen Supported depths 234d983712dSmrg * 235d983712dSmrg * ET4000W32/W32i No Yes 8bpp only 236d983712dSmrg * ET4000W32p Yes Yes 8bpp only 237d983712dSmrg * ET6000 Yes No 8/16/24/32 bpp 238d983712dSmrg */ 239d983712dSmrg#define SET_FUNCTION_COLOREXPAND \ 240d983712dSmrg if (pTseng->ChipType == ET6000) \ 241d983712dSmrg ACL_MIX_CONTROL(0x32); \ 242d983712dSmrg else \ 243d983712dSmrg ACL_ROUTING_CONTROL(0x08); 244d983712dSmrg 245d983712dSmrg#define SET_FUNCTION_COLOREXPAND_CPU \ 246d983712dSmrg ACL_ROUTING_CONTROL(0x02); 247d983712dSmrg 248d983712dSmrg 249d983712dSmrgstatic void 250d983712dSmrgTsengSubsequentScanlineCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 251d983712dSmrg int x, int y, int w, int h, int skipleft) 252d983712dSmrg{ 253d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 254d983712dSmrg 255d983712dSmrg if (pTseng->ChipType == ET4000) { 256d983712dSmrg /* the accelerator needs DWORD padding, and "w" is in PIXELS... */ 257d983712dSmrg pTseng->acl_colexp_width_dwords = (MULBPP(pTseng, w) + 31) >> 5; 258d983712dSmrg pTseng->acl_colexp_width_bytes = (MULBPP(pTseng, w) + 7) >> 3; 259d983712dSmrg } 260d983712dSmrg 261d983712dSmrg pTseng->acl_ColorExpandDst = FBADDR(pTseng, x, y); 262d983712dSmrg pTseng->acl_skipleft = skipleft; 263d983712dSmrg 264d983712dSmrg wait_acl_queue(pTseng); 265d983712dSmrg 266d983712dSmrg#if 0 267d983712dSmrg ACL_MIX_Y_OFFSET(w - 1); 268d983712dSmrg 269d983712dSmrg ErrorF(" W=%d", w); 270d983712dSmrg#endif 271d983712dSmrg SET_XY(pTseng, w, 1); 272d983712dSmrg} 273d983712dSmrg 274d983712dSmrgstatic void 275d983712dSmrgTsengSubsequentColorExpandScanline(ScrnInfoPtr pScrn, 276d983712dSmrg int bufno) 277d983712dSmrg{ 278d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 279d983712dSmrg 280d983712dSmrg wait_acl_queue(pTseng); 281d983712dSmrg 282d983712dSmrg ACL_MIX_ADDRESS((pTseng->AccelColorExpandBufferOffsets[bufno] << 3) + pTseng->acl_skipleft); 283d983712dSmrg START_ACL(pTseng, pTseng->acl_ColorExpandDst); 284d983712dSmrg 285d983712dSmrg /* move to next scanline */ 286d983712dSmrg pTseng->acl_ColorExpandDst += pTseng->line_width; 287d983712dSmrg 288d983712dSmrg /* 289d983712dSmrg * If not using triple-buffering, we need to wait for the queued 290d983712dSmrg * register set to be transferred to the working register set here, 291d983712dSmrg * because otherwise an e.g. double-buffering mechanism could overwrite 292d983712dSmrg * the buffer that's currently being worked with with new data too soon. 293d983712dSmrg * 294d983712dSmrg * WAIT_QUEUE; // not needed with triple-buffering 295d983712dSmrg */ 296d983712dSmrg} 297d983712dSmrg 298d983712dSmrg 299d983712dSmrg 300d983712dSmrg/* 301d983712dSmrg * We use this intermediate CPU-to-Screen color expansion because the one 302d983712dSmrg * provided by XAA seems to lock up the accelerator engine. 303d983712dSmrg * 304d983712dSmrg * One of the main differences between the XAA approach and this one is that 305d983712dSmrg * transfers are done per byte. I'm not sure if that is needed though. 306d983712dSmrg */ 307d983712dSmrgstatic void 308d983712dSmrgTsengSubsequentColorExpandScanline_8bpp(ScrnInfoPtr pScrn, int bufno) 309d983712dSmrg{ 310d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 311d983712dSmrg pointer dest = pTseng->tsengCPU2ACLBase; 312d983712dSmrg int i,j; 313d983712dSmrg CARD8 *bufptr; 314d983712dSmrg 315d983712dSmrg i = pTseng->acl_colexp_width_bytes; 316d983712dSmrg bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 317d983712dSmrg 318d983712dSmrg wait_acl_queue(pTseng); 319d983712dSmrg START_ACL (pTseng, pTseng->acl_ColorExpandDst); 320d983712dSmrg 321d983712dSmrg/* *((LongP) (MMioBase + 0x08)) = (CARD32) pTseng->acl_ColorExpandDst;*/ 322d983712dSmrg/* MMIO_OUT32(tsengCPU2ACLBase,0, (CARD32)pTseng->acl_ColorExpandDst); */ 323d983712dSmrg j = 0; 324d983712dSmrg /* Copy scanline data to accelerator MMU aperture byte by byte */ 325d983712dSmrg while (i--) { /* FIXME: we need to take care of PCI bursting and MMU overflow here! */ 326d983712dSmrg MMIO_OUT8(dest,j++, *bufptr++); 327d983712dSmrg } 328d983712dSmrg 329d983712dSmrg /* move to next scanline */ 330d983712dSmrg pTseng->acl_ColorExpandDst += pTseng->line_width; 331d983712dSmrg} 332d983712dSmrg 333d983712dSmrg/* 334d983712dSmrg * This function does direct memory-to-CPU bit doubling for color-expansion 335d983712dSmrg * at 16bpp on W32 chips. They can only do 8bpp color expansion, so we have 336d983712dSmrg * to expand the incoming data to 2bpp first. 337d983712dSmrg */ 338d983712dSmrgstatic void 339d983712dSmrgTsengSubsequentColorExpandScanline_16bpp(ScrnInfoPtr pScrn, int bufno) 340d983712dSmrg{ 341d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 342d983712dSmrg pointer dest = pTseng->tsengCPU2ACLBase; 343d983712dSmrg int i,j; 344d983712dSmrg CARD8 *bufptr; 345d983712dSmrg register CARD32 bits16; 346d983712dSmrg 347d983712dSmrg i = pTseng->acl_colexp_width_dwords * 2; 348d983712dSmrg bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 349d983712dSmrg 350d983712dSmrg wait_acl_queue(pTseng); 351d983712dSmrg START_ACL(pTseng, pTseng->acl_ColorExpandDst); 352d983712dSmrg 353d983712dSmrg j = 0; 354d983712dSmrg while (i--) { 355d983712dSmrg bits16 = pTseng->ColExpLUT[*bufptr++]; 356d983712dSmrg MMIO_OUT8(dest,j++,bits16 & 0xFF); 357d983712dSmrg MMIO_OUT8(dest,j++,(bits16 >> 8) & 0xFF); 358d983712dSmrg } 359d983712dSmrg 360d983712dSmrg /* move to next scanline */ 361d983712dSmrg pTseng->acl_ColorExpandDst += pTseng->line_width; 362d983712dSmrg} 363d983712dSmrg 364d983712dSmrg/* 365d983712dSmrg * This function does direct memory-to-CPU bit doubling for color-expansion 366d983712dSmrg * at 24bpp on W32 chips. They can only do 8bpp color expansion, so we have 367d983712dSmrg * to expand the incoming data to 3bpp first. 368d983712dSmrg */ 369d983712dSmrgstatic void 370d983712dSmrgTsengSubsequentColorExpandScanline_24bpp(ScrnInfoPtr pScrn, int bufno) 371d983712dSmrg{ 372d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 373d983712dSmrg pointer dest = pTseng->tsengCPU2ACLBase; 374d983712dSmrg int i, k, j = -1; 375d983712dSmrg CARD8 *bufptr; 376d983712dSmrg register CARD32 bits24; 377d983712dSmrg 378d983712dSmrg i = pTseng->acl_colexp_width_dwords * 4; 379d983712dSmrg bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 380d983712dSmrg 381d983712dSmrg wait_acl_queue(pTseng); 382d983712dSmrg START_ACL(pTseng, pTseng->acl_ColorExpandDst); 383d983712dSmrg 384d983712dSmrg /* take 8 input bits, expand to 3 output bytes */ 385d983712dSmrg bits24 = pTseng->ColExpLUT[*bufptr++]; 386d983712dSmrg k = 0; 387d983712dSmrg while (i--) { 388d983712dSmrg if ((j++) == 2) { /* "i % 3" operation is much to expensive */ 389d983712dSmrg j = 0; 390d983712dSmrg bits24 = pTseng->ColExpLUT[*bufptr++]; 391d983712dSmrg } 392d983712dSmrg MMIO_OUT8(dest,k++,bits24 & 0xFF); 393d983712dSmrg bits24 >>= 8; 394d983712dSmrg } 395d983712dSmrg 396d983712dSmrg /* move to next scanline */ 397d983712dSmrg pTseng->acl_ColorExpandDst += pTseng->line_width; 398d983712dSmrg} 399d983712dSmrg 400d983712dSmrg/* 401d983712dSmrg * This function does direct memory-to-CPU bit doubling for color-expansion 402d983712dSmrg * at 32bpp on W32 chips. They can only do 8bpp color expansion, so we have 403d983712dSmrg * to expand the incoming data to 4bpp first. 404d983712dSmrg */ 405d983712dSmrgstatic void 406d983712dSmrgTsengSubsequentColorExpandScanline_32bpp(ScrnInfoPtr pScrn, int bufno) 407d983712dSmrg{ 408d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 409d983712dSmrg pointer dest = pTseng->tsengCPU2ACLBase; 410d983712dSmrg int i,j; 411d983712dSmrg CARD8 *bufptr; 412d983712dSmrg register CARD32 bits32; 413d983712dSmrg 414d983712dSmrg i = pTseng->acl_colexp_width_dwords; 415d983712dSmrg /* amount of blocks of 8 bits to expand to 32 bits (=1 DWORD) */ 416d983712dSmrg bufptr = (CARD8 *) (pTseng->XAAScanlineColorExpandBuffers[bufno]); 417d983712dSmrg 418d983712dSmrg wait_acl_queue(pTseng); 419d983712dSmrg START_ACL(pTseng, pTseng->acl_ColorExpandDst); 420d983712dSmrg 421d983712dSmrg j = 0; 422d983712dSmrg while (i--) { 423d983712dSmrg bits32 = pTseng->ColExpLUT[*bufptr++]; 424d983712dSmrg MMIO_OUT8(dest,j++,bits32 & 0xFF); 425d983712dSmrg MMIO_OUT8(dest,j++,(bits32 >> 8) & 0xFF); 426d983712dSmrg MMIO_OUT8(dest,j++,(bits32 >> 16) & 0xFF); 427d983712dSmrg MMIO_OUT8(dest,j++,(bits32 >> 24) & 0xFF); 428d983712dSmrg } 429d983712dSmrg 430d983712dSmrg /* move to next scanline */ 431d983712dSmrg pTseng->acl_ColorExpandDst += pTseng->line_width; 432d983712dSmrg} 433d983712dSmrg 434d983712dSmrg/* 435d983712dSmrg * CPU-to-Screen color expansion. 436d983712dSmrg * This is for ET4000 only (The ET6000 cannot do this) 437d983712dSmrg */ 438d983712dSmrgstatic void 439d983712dSmrgTsengSetupForCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 440d983712dSmrg int fg, int bg, int rop, unsigned int planemask) 441d983712dSmrg{ 442d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 443d983712dSmrg 444d983712dSmrg/* ErrorF("X"); */ 445d983712dSmrg 446d983712dSmrg PINGPONG(pTseng); 447d983712dSmrg 448d983712dSmrg wait_acl_queue(pTseng); 449d983712dSmrg 450d983712dSmrg SET_FG_ROP(rop); 451d983712dSmrg SET_BG_ROP_TR(rop, bg); 452d983712dSmrg 453d983712dSmrg SET_XYDIR(0); 454d983712dSmrg 455d983712dSmrg SET_FG_BG_COLOR(pTseng, fg, bg); 456d983712dSmrg 457d983712dSmrg SET_FUNCTION_COLOREXPAND_CPU; 458d983712dSmrg 459d983712dSmrg /* assure correct alignment of MIX address (ACL needs same alignment here as in MMU aperture) */ 460d983712dSmrg ACL_MIX_ADDRESS(0); 461d983712dSmrg} 462d983712dSmrg 463d983712dSmrg#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND 464d983712dSmrg/* 465d983712dSmrg * TsengSubsequentCPUToScreenColorExpand() is potentially dangerous: 466d983712dSmrg * Not writing enough data to the MMU aperture for CPU-to-screen color 467d983712dSmrg * expansion will eventually cause a system deadlock! 468d983712dSmrg * 469d983712dSmrg * Note that CPUToScreenColorExpand operations _always_ require a 470d983712dSmrg * WAIT_INTERFACE before starting a new operation (this is empyrical, 471d983712dSmrg * though) 472d983712dSmrg */ 473d983712dSmrgstatic void 474d983712dSmrgTsengSubsequentCPUToScreenColorExpandFill(ScrnInfoPtr pScrn, 475d983712dSmrg int x, int y, int w, int h, int skipleft) 476d983712dSmrg{ 477d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 478d983712dSmrg int destaddr = FBADDR(pTseng, x, y); 479d983712dSmrg 480d983712dSmrg /* ErrorF(" %dx%d|%d ",w,h,skipleft); */ 481d983712dSmrg if (skipleft) 482d983712dSmrg ErrorF("Can't do: Skipleft = %d\n", skipleft); 483d983712dSmrg 484d983712dSmrg/* wait_acl_queue(); */ 485d983712dSmrg ErrorF("=========WAIT FIXME!\n"); 486d983712dSmrg WAIT_INTERFACE; 487d983712dSmrg 488d983712dSmrg ACL_MIX_Y_OFFSET(w - 1); 489d983712dSmrg SET_XY(pTseng, w, h); 490d983712dSmrg START_ACL(pTseng, destaddr); 491d983712dSmrg} 492d983712dSmrg#endif 493d983712dSmrg 494d983712dSmrgstatic void 495d983712dSmrgTsengSetupForScreenToScreenColorExpandFill(ScrnInfoPtr pScrn, 496d983712dSmrg int fg, int bg, int rop, unsigned int planemask) 497d983712dSmrg{ 498d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 499d983712dSmrg 500d983712dSmrg/* ErrorF("SSC "); */ 501d983712dSmrg 502d983712dSmrg PINGPONG(pTseng); 503d983712dSmrg 504d983712dSmrg wait_acl_queue(pTseng); 505d983712dSmrg 506d983712dSmrg SET_FG_ROP(rop); 507d983712dSmrg SET_BG_ROP_TR(rop, bg); 508d983712dSmrg 509d983712dSmrg SET_FG_BG_COLOR(pTseng, fg, bg); 510d983712dSmrg 511d983712dSmrg SET_FUNCTION_COLOREXPAND; 512d983712dSmrg 513d983712dSmrg SET_XYDIR(0); 514d983712dSmrg} 515d983712dSmrg 516d983712dSmrgstatic void 517d983712dSmrgTsengSubsequentScreenToScreenColorExpandFill(ScrnInfoPtr pScrn, 518d983712dSmrg int x, int y, int w, int h, int srcx, int srcy, int skipleft) 519d983712dSmrg{ 520d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 521d983712dSmrg int destaddr = FBADDR(pTseng, x, y); 522d983712dSmrg 523d983712dSmrg/* int srcaddr = FBADDR(pTseng, srcx, srcy); */ 524d983712dSmrg 525d983712dSmrg wait_acl_queue(pTseng); 526d983712dSmrg 527d983712dSmrg SET_XY(pTseng, w, h); 528d983712dSmrg ACL_MIX_ADDRESS( /* MIX address is in BITS */ 529d983712dSmrg (((srcy * pScrn->displayWidth) + srcx) * pScrn->bitsPerPixel) + skipleft); 530d983712dSmrg 531d983712dSmrg ACL_MIX_Y_OFFSET(pTseng->line_width << 3); 532d983712dSmrg 533d983712dSmrg START_ACL(pTseng, destaddr); 534d983712dSmrg} 535d983712dSmrg 536d983712dSmrg/* 537d983712dSmrg * 538d983712dSmrg */ 539d983712dSmrgstatic Bool 540d983712dSmrgTsengXAAInit_Colexp(ScrnInfoPtr pScrn) 541d983712dSmrg{ 542d983712dSmrg int i, j, r; 543d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 544d983712dSmrg XAAInfoRecPtr pXAAInfo = pTseng->AccelInfoRec; 545d983712dSmrg 546d983712dSmrg PDEBUG(" TsengXAAInit_Colexp\n"); 547d983712dSmrg 548d983712dSmrg#ifdef TODO 549d983712dSmrg if (OFLG_ISSET(OPTION_XAA_NO_COL_EXP, &vga256InfoRec.options)) 550d983712dSmrg return; 551d983712dSmrg#endif 552d983712dSmrg 553d983712dSmrg /* FIXME! disable accelerated color expansion for W32/W32i until it's fixed */ 554d983712dSmrg/* if (Is_W32 || Is_W32i) return; */ 555d983712dSmrg 556d983712dSmrg /* 557d983712dSmrg * Screen-to-screen color expansion. 558d983712dSmrg * 559d983712dSmrg * Scanline-screen-to-screen color expansion is slower than 560d983712dSmrg * CPU-to-screen color expansion. 561d983712dSmrg */ 562d983712dSmrg 563d983712dSmrg pXAAInfo->ScreenToScreenColorExpandFillFlags = 564d983712dSmrg BIT_ORDER_IN_BYTE_LSBFIRST | 565d983712dSmrg SCANLINE_PAD_DWORD | 566d983712dSmrg LEFT_EDGE_CLIPPING | 567d983712dSmrg NO_PLANEMASK; 568d983712dSmrg 569d983712dSmrg#if 1 570d983712dSmrg if ((pTseng->ChipType == ET6000) || (pScrn->bitsPerPixel == 8)) { 571d983712dSmrg pXAAInfo->SetupForScreenToScreenColorExpandFill = 572d983712dSmrg TsengSetupForScreenToScreenColorExpandFill; 573d983712dSmrg pXAAInfo->SubsequentScreenToScreenColorExpandFill = 574d983712dSmrg TsengSubsequentScreenToScreenColorExpandFill; 575d983712dSmrg } 576d983712dSmrg#endif 577d983712dSmrg 578d983712dSmrg /* 579d983712dSmrg * Scanline CPU to screen color expansion for all W32 engines. 580d983712dSmrg * 581d983712dSmrg * real CPU-to-screen color expansion is extremely tricky, and only 582d983712dSmrg * works for 8bpp anyway. 583d983712dSmrg * 584d983712dSmrg * This also allows us to do 16, 24 and 32 bpp color expansion by first 585d983712dSmrg * doubling the bitmap pattern before color-expanding it, because W32s 586d983712dSmrg * can only do 8bpp color expansion. 587d983712dSmrg */ 588d983712dSmrg 589d983712dSmrg pXAAInfo->ScanlineCPUToScreenColorExpandFillFlags = 590d983712dSmrg BIT_ORDER_IN_BYTE_LSBFIRST | 591d983712dSmrg SCANLINE_PAD_DWORD | 592d983712dSmrg NO_PLANEMASK; 593d983712dSmrg 594d983712dSmrg if (pTseng->ChipType == ET4000) { 595d983712dSmrg pTseng->XAAScanlineColorExpandBuffers[0] = 596d983712dSmrg xnfalloc(((pScrn->virtualX + 31)/32) * 4 * pTseng->Bytesperpixel); 597d983712dSmrg if (pTseng->XAAScanlineColorExpandBuffers[0] == NULL) { 598d983712dSmrg xf86Msg(X_ERROR, "Could not malloc color expansion scanline buffer.\n"); 599d983712dSmrg return FALSE; 600d983712dSmrg } 601d983712dSmrg pXAAInfo->NumScanlineColorExpandBuffers = 1; 602d983712dSmrg pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAScanlineColorExpandBuffers; 603d983712dSmrg 604d983712dSmrg pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill = 605d983712dSmrg TsengSetupForCPUToScreenColorExpandFill; 606d983712dSmrg 607d983712dSmrg pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill = 608d983712dSmrg TsengSubsequentScanlineCPUToScreenColorExpandFill; 609d983712dSmrg 610d983712dSmrg switch (pScrn->bitsPerPixel) { 611d983712dSmrg case 8: 612d983712dSmrg pXAAInfo->SubsequentColorExpandScanline = 613d983712dSmrg TsengSubsequentColorExpandScanline_8bpp; 614d983712dSmrg break; 615d983712dSmrg case 15: 616d983712dSmrg case 16: 617d983712dSmrg pXAAInfo->SubsequentColorExpandScanline = 618d983712dSmrg TsengSubsequentColorExpandScanline_16bpp; 619d983712dSmrg break; 620d983712dSmrg case 24: 621d983712dSmrg pXAAInfo->SubsequentColorExpandScanline = 622d983712dSmrg TsengSubsequentColorExpandScanline_24bpp; 623d983712dSmrg break; 624d983712dSmrg case 32: 625d983712dSmrg pXAAInfo->SubsequentColorExpandScanline = 626d983712dSmrg TsengSubsequentColorExpandScanline_32bpp; 627d983712dSmrg break; 628d983712dSmrg } 629d983712dSmrg /* create color expansion LUT (used for >8bpp only) */ 630d983712dSmrg pTseng->ColExpLUT = xnfalloc(sizeof(CARD32)*256); 631d983712dSmrg if (pTseng->ColExpLUT == NULL) { 632d983712dSmrg xf86Msg(X_ERROR, "Could not malloc color expansion tables.\n"); 633d983712dSmrg return FALSE; 634d983712dSmrg } 635d983712dSmrg for (i = 0; i < 256; i++) { 636d983712dSmrg r = 0; 637d983712dSmrg for (j = 7; j >= 0; j--) { 638d983712dSmrg r <<= pTseng->Bytesperpixel; 639d983712dSmrg if ((i >> j) & 1) 640d983712dSmrg r |= (1 << pTseng->Bytesperpixel) - 1; 641d983712dSmrg } 642d983712dSmrg pTseng->ColExpLUT[i] = r; 643d983712dSmrg /* ErrorF("0x%08X, ",r ); if ((i%8)==7) ErrorF("\n"); */ 644d983712dSmrg } 645d983712dSmrg } else { 646d983712dSmrg /* 647d983712dSmrg * Triple-buffering is needed to account for double-buffering of Tseng 648d983712dSmrg * acceleration registers. 649d983712dSmrg */ 650d983712dSmrg pXAAInfo->NumScanlineColorExpandBuffers = 3; 651d983712dSmrg pXAAInfo->ScanlineColorExpandBuffers = 652d983712dSmrg pTseng->XAAColorExpandBuffers; 653d983712dSmrg pXAAInfo->SetupForScanlineCPUToScreenColorExpandFill = 654d983712dSmrg TsengSetupForScreenToScreenColorExpandFill; 655d983712dSmrg pXAAInfo->SubsequentScanlineCPUToScreenColorExpandFill = 656d983712dSmrg TsengSubsequentScanlineCPUToScreenColorExpandFill; 657d983712dSmrg pXAAInfo->SubsequentColorExpandScanline = 658d983712dSmrg TsengSubsequentColorExpandScanline; 659d983712dSmrg 660d983712dSmrg /* calculate memory addresses from video memory offsets */ 661d983712dSmrg for (i = 0; i < pXAAInfo->NumScanlineColorExpandBuffers; i++) { 662d983712dSmrg pTseng->XAAColorExpandBuffers[i] = 663d983712dSmrg pTseng->FbBase + pTseng->AccelColorExpandBufferOffsets[i]; 664d983712dSmrg } 665d983712dSmrg 666d983712dSmrg pXAAInfo->ScanlineColorExpandBuffers = pTseng->XAAColorExpandBuffers; 667d983712dSmrg } 668d983712dSmrg 669d983712dSmrg#ifdef TSENG_CPU_TO_SCREEN_COLOREXPAND 670d983712dSmrg /* 671d983712dSmrg * CPU-to-screen color expansion doesn't seem to be reliable yet. The 672d983712dSmrg * W32 needs the correct amount of data sent to it in this mode, or it 673d983712dSmrg * hangs the machine until is does (?). Currently, the init code in this 674d983712dSmrg * file or the XAA code that uses this does something wrong, so that 675d983712dSmrg * occasionally we get accelerator timeouts, and after a few, complete 676d983712dSmrg * system hangs. 677d983712dSmrg * 678d983712dSmrg * The W32 engine requires SCANLINE_NO_PAD, but that doesn't seem to 679d983712dSmrg * work very well (accelerator hangs). 680d983712dSmrg * 681d983712dSmrg * What works is this: tell XAA that we have SCANLINE_PAD_DWORD, and then 682d983712dSmrg * add the following code in TsengSubsequentCPUToScreenColorExpand(): 683d983712dSmrg * w = (w + 31) & ~31; this code rounds the width up to the nearest 684d983712dSmrg * multiple of 32, and together with SCANLINE_PAD_DWORD, this makes 685d983712dSmrg * CPU-to-screen color expansion work. Of course, the display isn't 686d983712dSmrg * correct (4 chars are "blanked out" when only one is written, for 687d983712dSmrg * example). But this shows that the principle works. But the code 688d983712dSmrg * doesn't... 689d983712dSmrg * 690d983712dSmrg * The same thing goes for PAD_BYTE: this also works (with the same 691d983712dSmrg * problems as SCANLINE_PAD_DWORD, although less prominent) 692d983712dSmrg */ 693d983712dSmrg 694d983712dSmrg pXAAInfo->CPUToScreenColorExpandFillFlags = 695d983712dSmrg BIT_ORDER_IN_BYTE_LSBFIRST | 696d983712dSmrg SCANLINE_PAD_DWORD | /* no other choice */ 697d983712dSmrg CPU_TRANSFER_PAD_DWORD | 698d983712dSmrg NO_PLANEMASK; 699d983712dSmrg 700d983712dSmrg if (Is_W32_any && (pScrn->bitsPerPixel == 8)) { 701d983712dSmrg pXAAInfo->SetupForCPUToScreenColorExpandFill = 702d983712dSmrg TsengSetupForCPUToScreenColorExpandFill; 703d983712dSmrg pXAAInfo->SubsequentCPUToScreenColorExpandFill = 704d983712dSmrg TsengSubsequentCPUToScreenColorExpandFill; 705d983712dSmrg 706d983712dSmrg /* we'll be using MMU aperture 2 */ 707d983712dSmrg pXAAInfo->ColorExpandBase = (CARD8 *)pTseng->tsengCPU2ACLBase; 708d983712dSmrg /* ErrorF("tsengCPU2ACLBase = 0x%x\n", pTseng->tsengCPU2ACLBase); */ 709d983712dSmrg /* aperture size is 8kb in banked mode. Larger in linear mode, but 8kb is enough */ 710d983712dSmrg pXAAInfo->ColorExpandRange = 8192; 711d983712dSmrg } 712d983712dSmrg#endif 713d983712dSmrg return TRUE; 714d983712dSmrg} 715d983712dSmrg 716d983712dSmrg/* 717d983712dSmrg * ET4/6K acceleration interface. 718d983712dSmrg * 719d983712dSmrg * Uses Harm Hanemaayer's generic acceleration interface (XAA). 720d983712dSmrg * 721d983712dSmrg * Author: Koen Gadeyne 722d983712dSmrg * 723d983712dSmrg * Much of the acceleration code is based on the XF86_W32 server code from 724d983712dSmrg * Glenn Lai. 725d983712dSmrg * 726d983712dSmrg */ 727d983712dSmrg 728d983712dSmrg/* 729d983712dSmrg * This is the implementation of the Sync() function. 730d983712dSmrg * 731d983712dSmrg * To avoid pipeline/cache/buffer flushing in the PCI subsystem and the VGA 732d983712dSmrg * controller, we might replace this read-intensive code with a dummy 733d983712dSmrg * accelerator operation that causes a hardware-blocking (wait-states) until 734d983712dSmrg * the running operation is done. 735d983712dSmrg */ 736d983712dSmrgstatic void 737d983712dSmrgTsengSync(ScrnInfoPtr pScrn) 738d983712dSmrg{ 739d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 740d983712dSmrg 741d983712dSmrg WAIT_ACL; 742d983712dSmrg} 743d983712dSmrg 744d983712dSmrg/* 745d983712dSmrg * This is the implementation of the SetupForSolidFill function 746d983712dSmrg * that sets up the coprocessor for a subsequent batch for solid 747d983712dSmrg * rectangle fills. 748d983712dSmrg */ 749d983712dSmrgstatic void 750d983712dSmrgTsengSetupForSolidFill(ScrnInfoPtr pScrn, 751d983712dSmrg int color, int rop, unsigned int planemask) 752d983712dSmrg{ 753d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 754d983712dSmrg 755d983712dSmrg /* 756d983712dSmrg * all registers are queued in the Tseng chips, except of course for the 757d983712dSmrg * stuff we want to store in off-screen memory. So we have to use a 758d983712dSmrg * ping-pong method for those if we want to avoid having to wait for the 759d983712dSmrg * accelerator when we want to write to these. 760d983712dSmrg */ 761d983712dSmrg 762d983712dSmrg/* ErrorF("S"); */ 763d983712dSmrg 764d983712dSmrg PINGPONG(pTseng); 765d983712dSmrg 766d983712dSmrg wait_acl_queue(pTseng); 767d983712dSmrg 768d983712dSmrg /* 769d983712dSmrg * planemask emulation uses a modified "standard" FG ROP (see ET6000 770d983712dSmrg * data book p 66 or W32p databook p 37: "Bit masking"). We only enable 771d983712dSmrg * the planemask emulation when the planemask is not a no-op, because 772d983712dSmrg * blitting speed would suffer. 773d983712dSmrg */ 774d983712dSmrg 775d983712dSmrg if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) { 776d983712dSmrg SET_FG_ROP_PLANEMASK(rop); 777d983712dSmrg SET_BG_COLOR(pTseng, planemask); 778d983712dSmrg } else { 779d983712dSmrg SET_FG_ROP(rop); 780d983712dSmrg } 781d983712dSmrg SET_FG_COLOR(pTseng, color); 782d983712dSmrg 783d983712dSmrg SET_FUNCTION_BLT; 784d983712dSmrg} 785d983712dSmrg 786d983712dSmrg/* 787d983712dSmrg * This is the implementation of the SubsequentForSolidFillRect function 788d983712dSmrg * that sends commands to the coprocessor to fill a solid rectangle of 789d983712dSmrg * the specified location and size, with the parameters from the SetUp 790d983712dSmrg * call. 791d983712dSmrg * 792d983712dSmrg * Splitting it up between ET4000 and ET6000 avoids lots of chipset type 793d983712dSmrg * comparisons. 794d983712dSmrg */ 795d983712dSmrgstatic void 796d983712dSmrgTsengW32pSubsequentSolidFillRect(ScrnInfoPtr pScrn, 797d983712dSmrg int x, int y, int w, int h) 798d983712dSmrg{ 799d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 800d983712dSmrg int destaddr = FBADDR(pTseng, x, y); 801d983712dSmrg 802d983712dSmrg wait_acl_queue(pTseng); 803d983712dSmrg 804d983712dSmrg /* 805d983712dSmrg * Restoring the ACL_SOURCE_ADDRESS here is needed as long as Bresenham 806d983712dSmrg * lines are enabled for >8bpp. Or until XAA allows us to render 807d983712dSmrg * horizontal lines using the same Bresenham code instead of re-routing 808d983712dSmrg * them to FillRectSolid. For XDECREASING lines, the SubsequentBresenham 809d983712dSmrg * code adjusts the ACL_SOURCE_ADDRESS to make sure XDECREASING lines 810d983712dSmrg * are drawn with the correct colors. But if a batch of subsequent 811d983712dSmrg * operations also holds a few horizontal lines, they will be routed to 812d983712dSmrg * here without calling the SetupFor... code again, and the 813d983712dSmrg * ACL_SOURCE_ADDRESS will be wrong. 814d983712dSmrg */ 815d983712dSmrg ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 816d983712dSmrg 817d983712dSmrg SET_XYDIR(0); /* FIXME: not needed with separate setupforsolidline */ 818d983712dSmrg 819d983712dSmrg SET_XY_4(pTseng, w, h); 820d983712dSmrg START_ACL(pTseng, destaddr); 821d983712dSmrg} 822d983712dSmrg 823d983712dSmrgstatic void 824d983712dSmrgTseng6KSubsequentSolidFillRect(ScrnInfoPtr pScrn, 825d983712dSmrg int x, int y, int w, int h) 826d983712dSmrg{ 827d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 828d983712dSmrg int destaddr = FBADDR(pTseng, x, y); 829d983712dSmrg 830d983712dSmrg wait_acl_queue(pTseng); 831d983712dSmrg 832d983712dSmrg /* see comment in TsengW32pSubsequentFillRectSolid */ 833d983712dSmrg ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 834d983712dSmrg 835d983712dSmrg /* if XYDIR is not reset here, drawing a hardware line in between 836d983712dSmrg * blitting, with the same ROP, color, etc will not cause a call to 837d983712dSmrg * SetupFor... (because linedrawing uses SetupForSolidFill() as its 838d983712dSmrg * Setup() function), and thus the direction register will have been 839d983712dSmrg * changed by the last LineDraw operation. 840d983712dSmrg */ 841d983712dSmrg SET_XYDIR(0); 842d983712dSmrg 843d983712dSmrg SET_XY_6(pTseng, w, h); 844d983712dSmrg START_ACL_6(destaddr); 845d983712dSmrg} 846d983712dSmrg 847d983712dSmrg/* 848d983712dSmrg * This is the implementation of the SetupForScreenToScreenCopy function 849d983712dSmrg * that sets up the coprocessor for a subsequent batch of 850d983712dSmrg * screen-to-screen copies. 851d983712dSmrg */ 852d983712dSmrg 853d983712dSmrgstatic __inline__ void 854d983712dSmrgTseng_setup_screencopy(TsengPtr pTseng, 855d983712dSmrg int rop, unsigned int planemask, 856d983712dSmrg int trans_color, int blit_dir) 857d983712dSmrg{ 858d983712dSmrg wait_acl_queue(pTseng); 859d983712dSmrg 860d983712dSmrg#ifdef ET6K_TRANSPARENCY 861d983712dSmrg if ((pTseng->ChipType == ET6000) && (trans_color != -1)) { 862d983712dSmrg SET_BG_COLOR(trans_color); 863d983712dSmrg SET_FUNCTION_BLT_TR; 864d983712dSmrg } else 865d983712dSmrg SET_FUNCTION_BLT; 866d983712dSmrg 867d983712dSmrg SET_FG_ROP(rop); 868d983712dSmrg#else 869d983712dSmrg if ((planemask & pTseng->planemask_mask) != pTseng->planemask_mask) { 870d983712dSmrg SET_FG_ROP_PLANEMASK(rop); 871d983712dSmrg SET_BG_COLOR(pTseng, planemask); 872d983712dSmrg } else { 873d983712dSmrg SET_FG_ROP(rop); 874d983712dSmrg } 875d983712dSmrg SET_FUNCTION_BLT; 876d983712dSmrg#endif 877d983712dSmrg SET_XYDIR(blit_dir); 878d983712dSmrg} 879d983712dSmrg 880d983712dSmrgstatic void 881d983712dSmrgTsengSetupForScreenToScreenCopy(ScrnInfoPtr pScrn, 882d983712dSmrg int xdir, int ydir, int rop, 883d983712dSmrg unsigned int planemask, int trans_color) 884d983712dSmrg{ 885d983712dSmrg /* 886d983712dSmrg * xdir can be either 1 (left-to-right) or -1 (right-to-left). 887d983712dSmrg * ydir can be either 1 (top-to-bottom) or -1 (bottom-to-top). 888d983712dSmrg */ 889d983712dSmrg 890d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 891d983712dSmrg int blit_dir = 0; 892d983712dSmrg 893d983712dSmrg/* ErrorF("C%d ", trans_color); */ 894d983712dSmrg 895d983712dSmrg pTseng->acl_blitxdir = xdir; 896d983712dSmrg pTseng->acl_blitydir = ydir; 897d983712dSmrg 898d983712dSmrg if (xdir == -1) 899d983712dSmrg blit_dir |= 0x1; 900d983712dSmrg if (ydir == -1) 901d983712dSmrg blit_dir |= 0x2; 902d983712dSmrg 903d983712dSmrg Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, blit_dir); 904d983712dSmrg 905d983712dSmrg ACL_SOURCE_WRAP(0x77); /* no wrap */ 906d983712dSmrg ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1); 907d983712dSmrg} 908d983712dSmrg 909d983712dSmrg/* 910d983712dSmrg * This is the implementation of the SubsequentForScreenToScreenCopy 911d983712dSmrg * that sends commands to the coprocessor to perform a screen-to-screen 912d983712dSmrg * copy of the specified areas, with the parameters from the SetUp call. 913d983712dSmrg * In this sample implementation, the direction must be taken into 914d983712dSmrg * account when calculating the addresses (with coordinates, it might be 915d983712dSmrg * a little easier). 916d983712dSmrg * 917d983712dSmrg * Splitting up the SubsequentScreenToScreenCopy between ET4000 and ET6000 918d983712dSmrg * doesn't seem to improve speed for small blits (as it did with 919d983712dSmrg * SolidFillRect). 920d983712dSmrg */ 921d983712dSmrgstatic void 922d983712dSmrgTsengSubsequentScreenToScreenCopy(ScrnInfoPtr pScrn, 923d983712dSmrg int x1, int y1, int x2, int y2, 924d983712dSmrg int w, int h) 925d983712dSmrg{ 926d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 927d983712dSmrg int srcaddr, destaddr; 928d983712dSmrg 929d983712dSmrg /* 930d983712dSmrg * Optimizing note: the pre-calc code below (i.e. until the first 931d983712dSmrg * register write) doesn't significantly affect performance. Removing it 932d983712dSmrg * all boosts small blits from 24.22 to 25.47 MB/sec. Don't waste time 933d983712dSmrg * on that. One less PCI bus write would boost us to 30.00 MB/sec, up 934d983712dSmrg * from 24.22. Waste time on _that_... 935d983712dSmrg */ 936d983712dSmrg 937d983712dSmrg /* tseng chips want x-sizes in bytes, not pixels */ 938d983712dSmrg x1 = MULBPP(pTseng, x1); 939d983712dSmrg x2 = MULBPP(pTseng, x2); 940d983712dSmrg 941d983712dSmrg /* 942d983712dSmrg * If the direction is "decreasing", the chip wants the addresses 943d983712dSmrg * to be at the other end, so we must be aware of that in our 944d983712dSmrg * calculations. 945d983712dSmrg */ 946d983712dSmrg if (pTseng->acl_blitydir == -1) { 947d983712dSmrg srcaddr = (y1 + h - 1) * pTseng->line_width; 948d983712dSmrg destaddr = (y2 + h - 1) * pTseng->line_width; 949d983712dSmrg } else { 950d983712dSmrg srcaddr = y1 * pTseng->line_width; 951d983712dSmrg destaddr = y2 * pTseng->line_width; 952d983712dSmrg } 953d983712dSmrg if (pTseng->acl_blitxdir == -1) { 954d983712dSmrg /* Accelerator start address must point to first byte to be processed. 955d983712dSmrg * Depending on the direction, this is the first or the last byte 956d983712dSmrg * in the multi-byte pixel. 957d983712dSmrg */ 958d983712dSmrg int eol = MULBPP(pTseng, w); 959d983712dSmrg 960d983712dSmrg srcaddr += x1 + eol - 1; 961d983712dSmrg destaddr += x2 + eol - 1; 962d983712dSmrg } else { 963d983712dSmrg srcaddr += x1; 964d983712dSmrg destaddr += x2; 965d983712dSmrg } 966d983712dSmrg 967d983712dSmrg wait_acl_queue(pTseng); 968d983712dSmrg 969d983712dSmrg SET_XY(pTseng, w, h); 970d983712dSmrg ACL_SOURCE_ADDRESS(srcaddr); 971d983712dSmrg START_ACL(pTseng, destaddr); 972d983712dSmrg} 973d983712dSmrg 974d983712dSmrg#if 0 975d983712dSmrgstatic int pat_src_addr; 976d983712dSmrg 977d983712dSmrgstatic void 978d983712dSmrgTsengSetupForColor8x8PatternFill(ScrnInfoPtr pScrn, 979d983712dSmrg int patx, int paty, int rop, unsigned int planemask, int trans_color) 980d983712dSmrg{ 981d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 982d983712dSmrg 983d983712dSmrg pat_src_addr = FBADDR(pTseng, patx, paty); 984d983712dSmrg 985d983712dSmrg ErrorF("P"); 986d983712dSmrg 987d983712dSmrg Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0); 988d983712dSmrg 989d983712dSmrg switch (pTseng->Bytesperpixel) { 990d983712dSmrg case 1: 991d983712dSmrg ACL_SOURCE_WRAP(0x33); /* 8x8 wrap */ 992d983712dSmrg ACL_SOURCE_Y_OFFSET(8 - 1); 993d983712dSmrg break; 994d983712dSmrg case 2: 995d983712dSmrg ACL_SOURCE_WRAP(0x34); /* 16x8 wrap */ 996d983712dSmrg ACL_SOURCE_Y_OFFSET(16 - 1); 997d983712dSmrg break; 998d983712dSmrg case 3: 999d983712dSmrg ACL_SOURCE_WRAP(0x3D); /* 24x8 wrap --- only for ET6000 !!! */ 1000d983712dSmrg ACL_SOURCE_Y_OFFSET(32 - 1); /* this is no error -- see databook */ 1001d983712dSmrg break; 1002d983712dSmrg case 4: 1003d983712dSmrg ACL_SOURCE_WRAP(0x35); /* 32x8 wrap */ 1004d983712dSmrg ACL_SOURCE_Y_OFFSET(32 - 1); 1005d983712dSmrg } 1006d983712dSmrg} 1007d983712dSmrg 1008d983712dSmrgstatic void 1009d983712dSmrgTsengSubsequentColor8x8PatternFillRect(ScrnInfoPtr pScrn, 1010d983712dSmrg int patx, int paty, int x, int y, int w, int h) 1011d983712dSmrg{ 1012d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 1013d983712dSmrg int destaddr = FBADDR(pTseng, x, y); 1014d983712dSmrg int srcaddr = pat_src_addr + MULBPP(pTseng, paty * 8 + patx); 1015d983712dSmrg 1016d983712dSmrg wait_acl_queue(pTseng); 1017d983712dSmrg 1018d983712dSmrg ACL_SOURCE_ADDRESS(srcaddr); 1019d983712dSmrg 1020d983712dSmrg SET_XY(pTseng, w, h); 1021d983712dSmrg START_ACL(pTseng, destaddr); 1022d983712dSmrg} 1023d983712dSmrg#endif 1024d983712dSmrg 1025d983712dSmrg#if 0 1026d983712dSmrg/* 1027d983712dSmrg * ImageWrite is nothing more than a per-scanline screencopy. 1028d983712dSmrg */ 1029d983712dSmrg 1030d983712dSmrgstatic void 1031d983712dSmrgTsengSetupForScanlineImageWrite(ScrnInfoPtr pScrn, 1032d983712dSmrg int rop, unsigned int planemask, int trans_color, int bpp, int depth) 1033d983712dSmrg{ 1034d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 1035d983712dSmrg 1036d983712dSmrg/* ErrorF("IW"); */ 1037d983712dSmrg 1038d983712dSmrg Tseng_setup_screencopy(pTseng, rop, planemask, trans_color, 0); 1039d983712dSmrg 1040d983712dSmrg ACL_SOURCE_WRAP(0x77); /* no wrap */ 1041d983712dSmrg ACL_SOURCE_Y_OFFSET(pTseng->line_width - 1); 1042d983712dSmrg} 1043d983712dSmrg 1044d983712dSmrgstatic void 1045d983712dSmrgTsengSubsequentScanlineImageWriteRect(ScrnInfoPtr pScrn, 1046d983712dSmrg int x, int y, int w, int h, int skipleft) 1047d983712dSmrg{ 1048d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 1049d983712dSmrg 1050d983712dSmrg/* ErrorF("r%d",h); */ 1051d983712dSmrg 1052d983712dSmrg pTseng->acl_iw_dest = y * pTseng->line_width + MULBPP(pTseng, x); 1053d983712dSmrg pTseng->acl_skipleft = MULBPP(pTseng, skipleft); 1054d983712dSmrg 1055d983712dSmrg wait_acl_queue(pTseng); 1056d983712dSmrg SET_XY(pTseng, w, 1); 1057d983712dSmrg} 1058d983712dSmrg 1059d983712dSmrgstatic void 1060d983712dSmrgTsengSubsequentImageWriteScanline(ScrnInfoPtr pScrn, 1061d983712dSmrg int bufno) 1062d983712dSmrg{ 1063d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 1064d983712dSmrg 1065d983712dSmrg/* ErrorF("%d", bufno); */ 1066d983712dSmrg 1067d983712dSmrg wait_acl_queue(pTseng); 1068d983712dSmrg 1069d983712dSmrg ACL_SOURCE_ADDRESS(pTseng->AccelImageWriteBufferOffsets[bufno] 1070d983712dSmrg + pTseng->acl_skipleft); 1071d983712dSmrg START_ACL(pTseng, pTseng->acl_iw_dest); 1072d983712dSmrg pTseng->acl_iw_dest += pTseng->line_width; 1073d983712dSmrg} 1074d983712dSmrg#endif 1075d983712dSmrg 1076d983712dSmrg#if 0 1077d983712dSmrg/* 1078d983712dSmrg * W32p/ET6000 hardware linedraw code. 1079d983712dSmrg * 1080d983712dSmrg * TsengSetupForSolidFill() is used as a setup function. 1081d983712dSmrg * 1082d983712dSmrg * Three major problems that needed to be solved here: 1083d983712dSmrg * 1084d983712dSmrg * 1. The "bias" value must be translated into the "line draw algorithm" 1085d983712dSmrg * parameter in the Tseng accelerators. This parameter, although not 1086d983712dSmrg * documented as such, needs to be set to the _inverse_ of the 1087d983712dSmrg * appropriate bias bit (i.e. for the appropriate octant). 1088d983712dSmrg * 1089d983712dSmrg * 2. In >8bpp modes, the accelerator will render BYTES in the same order as 1090d983712dSmrg * it is drawing the line. This means it will render the colors in the 1091d983712dSmrg * same order as well, reversing the byte-order in pixels that are drawn 1092d983712dSmrg * right-to-left. This causes wrong colors to be rendered. 1093d983712dSmrg * 1094d983712dSmrg * 3. The Tseng data book says that the ACL Y count register needs to be 1095d983712dSmrg * programmed with "dy-1". A similar thing is said about ACL X count. But 1096d983712dSmrg * this assumes (x2,y2) is NOT drawn (although that is not mentionned in 1097d983712dSmrg * the data book). X assumes the endpoint _is_ drawn. If "dy-1" is used, 1098d983712dSmrg * this sometimes results in a negative value (if dx==dy==0), 1099d983712dSmrg * causing a complete accelerator hang. 1100d983712dSmrg */ 1101d983712dSmrg 1102d983712dSmrgstatic void 1103d983712dSmrgTsengSubsequentSolidBresenhamLine(ScrnInfoPtr pScrn, 1104d983712dSmrg int x, int y, int major, int minor, int err, int len, int octant) 1105d983712dSmrg{ 1106d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 1107d983712dSmrg int destaddr = FBADDR(pTseng, x, y); 1108d983712dSmrg int xydir = pTseng->BresenhamTable[octant]; 1109d983712dSmrg 1110d983712dSmrg /* Tseng wants the real dx/dy in major/minor. Bresenham uses 2*dx and 2*dy */ 1111d983712dSmrg minor >>= 1; 1112d983712dSmrg major >>= 1; 1113d983712dSmrg 1114d983712dSmrg wait_acl_queue(pTseng); 1115d983712dSmrg 1116d983712dSmrg if (!(octant & YMAJOR)) { 1117d983712dSmrg SET_X_YRAW(pTseng, len, 0xFFF); 1118d983712dSmrg } else { 1119d983712dSmrg SET_XY_RAW(pTseng,0xFFF, len - 1); 1120d983712dSmrg } 1121d983712dSmrg 1122d983712dSmrg SET_DELTA(minor, major); 1123d983712dSmrg ACL_ERROR_TERM(-err); /* error term from XAA is NEGATIVE */ 1124d983712dSmrg 1125d983712dSmrg /* make sure colors are rendered correctly if >8bpp */ 1126d983712dSmrg if (octant & XDECREASING) { 1127d983712dSmrg destaddr += pTseng->Bytesperpixel - 1; 1128d983712dSmrg ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset 1129d983712dSmrg + pTseng->tsengFg + pTseng->neg_x_pixel_offset); 1130d983712dSmrg } else 1131d983712dSmrg ACL_SOURCE_ADDRESS(pTseng->AccelColorBufferOffset + pTseng->tsengFg); 1132d983712dSmrg 1133d983712dSmrg SET_XYDIR(xydir); 1134d983712dSmrg 1135d983712dSmrg START_ACL(pTseng, destaddr); 1136d983712dSmrg} 1137d983712dSmrg#endif 1138d983712dSmrg 1139d983712dSmrg#ifdef TODO 1140d983712dSmrg/* 1141d983712dSmrg * Trapezoid filling code. 1142d983712dSmrg * 1143d983712dSmrg * TsengSetupForSolidFill() is used as a setup function 1144d983712dSmrg */ 1145d983712dSmrg 1146d983712dSmrg#undef DEBUG_TRAP 1147d983712dSmrg 1148d983712dSmrg#ifdef TSENG_TRAPEZOIDS 1149d983712dSmrgstatic void 1150d983712dSmrgTsengSubsequentFillTrapezoidSolid(ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR) 1151d983712dSmrg int ytop; 1152d983712dSmrg int height; 1153d983712dSmrg int left; 1154d983712dSmrg int dxL, dyL; 1155d983712dSmrg int eL; 1156d983712dSmrg int right; 1157d983712dSmrg int dxR, dyR; 1158d983712dSmrg int eR; 1159d983712dSmrg{ 1160d983712dSmrg unsigned int tseng_bias_compensate = 0xd8; 1161d983712dSmrg int destaddr, algrthm; 1162d983712dSmrg int xcount = right - left + 1; /* both edges included */ 1163d983712dSmrg int dir_reg = 0x60; /* trapezoid drawing; use error term for primary edge */ 1164d983712dSmrg int sec_dir_reg = 0x20; /* use error term for secondary edge */ 1165d983712dSmrg int octant = 0; 1166d983712dSmrg 1167d983712dSmrg /* ErrorF("#"); */ 1168d983712dSmrg 1169d983712dSmrg int destaddr, algrthm; 1170d983712dSmrg int xcount = right - left + 1; 1171d983712dSmrg 1172d983712dSmrg#ifdef USE_ERROR_TERM 1173d983712dSmrg int dir_reg = 0x60; 1174d983712dSmrg int sec_dir_reg = 0x20; 1175d983712dSmrg 1176d983712dSmrg#else 1177d983712dSmrg int dir_reg = 0x40; 1178d983712dSmrg int sec_dir_reg = 0x00; 1179d983712dSmrg 1180d983712dSmrg#endif 1181d983712dSmrg int octant = 0; 1182d983712dSmrg int bias = 0x00; /* FIXME !!! */ 1183d983712dSmrg 1184d983712dSmrg/* ErrorF("#"); */ 1185d983712dSmrg 1186d983712dSmrg#ifdef DEBUG_TRAP 1187d983712dSmrg ErrorF("ytop=%d, height=%d, left=%d, dxL=%d, dyL=%d, eL=%d, right=%d, dxR=%d, dyR=%d, eR=%d ", 1188d983712dSmrg ytop, height, left, dxL, dyL, eL, right, dxR, dyR, eR); 1189d983712dSmrg#endif 1190d983712dSmrg 1191d983712dSmrg if ((dyL < 0) || (dyR < 0)) 1192d983712dSmrg ErrorF("Tseng Trapezoids: Wrong assumption: dyL/R < 0\n"); 1193d983712dSmrg 1194d983712dSmrg destaddr = FBADDR(pTseng, left, ytop); 1195d983712dSmrg 1196d983712dSmrg /* left edge */ 1197d983712dSmrg if (dxL < 0) { 1198d983712dSmrg dir_reg |= 1; 1199d983712dSmrg octant |= XDECREASING; 1200d983712dSmrg dxL = -dxL; 1201d983712dSmrg } 1202d983712dSmrg /* Y direction is always positive (top-to-bottom drawing) */ 1203d983712dSmrg 1204d983712dSmrg wait_acl_queue(pTseng); 1205d983712dSmrg 1206d983712dSmrg /* left edge */ 1207d983712dSmrg /* compute axial direction and load registers */ 1208d983712dSmrg if (dxL >= dyL) { /* X is major axis */ 1209d983712dSmrg dir_reg |= 4; 1210d983712dSmrg SET_DELTA(dyL, dxL); 1211d983712dSmrg if (dir_reg & 1) { /* edge coherency: draw left edge */ 1212d983712dSmrg destaddr += pTseng->Bytesperpixel; 1213d983712dSmrg sec_dir_reg |= 0x80; 1214d983712dSmrg xcount--; 1215d983712dSmrg } 1216d983712dSmrg } else { /* Y is major axis */ 1217d983712dSmrg SetYMajorOctant(octant); 1218d983712dSmrg SET_DELTA(dxL, dyL); 1219d983712dSmrg } 1220d983712dSmrg ACL_ERROR_TERM(eL); 1221d983712dSmrg 1222d983712dSmrg /* select "linedraw algorithm" (=bias) and load direction register */ 1223d983712dSmrg /* ErrorF(" o=%d ", octant); */ 1224d983712dSmrg algrthm = ((tseng_bias_compensate >> octant) & 1) ^ 1; 1225d983712dSmrg dir_reg |= algrthm << 4; 1226d983712dSmrg SET_XYDIR(dir_reg); 1227d983712dSmrg 1228d983712dSmrg /* right edge */ 1229d983712dSmrg if (dxR < 0) { 1230d983712dSmrg sec_dir_reg |= 1; 1231d983712dSmrg dxR = -dxR; 1232d983712dSmrg } 1233d983712dSmrg /* compute axial direction and load registers */ 1234d983712dSmrg if (dxR >= dyR) { /* X is major axis */ 1235d983712dSmrg sec_dir_reg |= 4; 1236d983712dSmrg SET_SECONDARY_DELTA(dyR, dxR); 1237d983712dSmrg if (dir_reg & 1) { /* edge coherency: do not draw right edge */ 1238d983712dSmrg sec_dir_reg |= 0x40; 1239d983712dSmrg xcount++; 1240d983712dSmrg } 1241d983712dSmrg } else { /* Y is major axis */ 1242d983712dSmrg SET_SECONDARY_DELTA(dxR, dyR); 1243d983712dSmrg } 1244d983712dSmrg ACL_SECONDARY_ERROR_TERM(eR); 1245d983712dSmrg 1246d983712dSmrg /* ErrorF("%02x", sec_dir_reg); */ 1247d983712dSmrg SET_SECONDARY_XYDIR(sec_dir_reg); 1248d983712dSmrg 1249d983712dSmrg SET_XY_6(pTseng, xcount, height); 1250d983712dSmrg 1251d983712dSmrg#ifdef DEBUG_TRAP 1252d983712dSmrg ErrorF("-> %d,%d\n", xcount, height); 1253d983712dSmrg#endif 1254d983712dSmrg 1255d983712dSmrg START_ACL_6(destaddr); 1256d983712dSmrg} 1257d983712dSmrg#endif 1258d983712dSmrg 1259d983712dSmrg#endif 1260d983712dSmrg 12614b9470b1Smrg#endif 1262d983712dSmrg 1263d983712dSmrg/* 1264d983712dSmrg * The following function sets up the supported acceleration. Call it from 1265d983712dSmrg * the FbInit() function in the SVGA driver. Do NOT initialize any hardware 1266d983712dSmrg * in here. That belongs in tseng_init_acl(). 1267d983712dSmrg */ 1268d983712dSmrgBool 1269d983712dSmrgTsengXAAInit(ScreenPtr pScreen) 1270d983712dSmrg{ 12714b9470b1Smrg#ifdef HAVE_XAA_H 12724b9470b1Smrg ScrnInfoPtr pScrn = xf86ScreenToScrn(pScreen); 1273d983712dSmrg TsengPtr pTseng = TsengPTR(pScrn); 1274d983712dSmrg XAAInfoRecPtr pXAAinfo; 1275d983712dSmrg BoxRec AvailFBArea; 1276d983712dSmrg 1277d983712dSmrg PDEBUG(" TsengXAAInit\n"); 1278d983712dSmrg pTseng->AccelInfoRec = pXAAinfo = XAACreateInfoRec(); 1279d983712dSmrg if (!pXAAinfo) 1280d983712dSmrg return FALSE; 1281d983712dSmrg 1282d983712dSmrg /* 1283d983712dSmrg * Set up the main acceleration flags. 1284d983712dSmrg */ 1285d983712dSmrg pXAAinfo->Flags = PIXMAP_CACHE; 1286d983712dSmrg 1287d983712dSmrg /* 1288d983712dSmrg * The following line installs a "Sync" function, that waits for 1289d983712dSmrg * all coprocessor operations to complete. 1290d983712dSmrg */ 1291d983712dSmrg pXAAinfo->Sync = TsengSync; 1292d983712dSmrg 1293d983712dSmrg /* W32 and W32i must wait for ACL before changing registers */ 1294d983712dSmrg if (pTseng->ChipType == ET4000) 1295d983712dSmrg pTseng->need_wait_acl = TRUE; 1296d983712dSmrg else 1297d983712dSmrg pTseng->need_wait_acl = FALSE; 1298d983712dSmrg 1299d983712dSmrg pTseng->line_width = pScrn->displayWidth * pTseng->Bytesperpixel; 1300d983712dSmrg 1301d983712dSmrg#if 1 1302d983712dSmrg /* 1303d983712dSmrg * SolidFillRect. 1304d983712dSmrg * 1305d983712dSmrg * The W32 and W32i chips don't have a register to set the amount of 1306d983712dSmrg * bytes per pixel, and hence they don't skip 1 byte in each 4-byte word 1307d983712dSmrg * at 24bpp. Therefor, the FG or BG colors would have to be concatenated 1308d983712dSmrg * in video memory (R-G-B-R-G-B-... instead of R-G-B-X-R-G-B-X-..., with 1309d983712dSmrg * X = dont' care), plus a wrap value that is a multiple of 3 would have 1310d983712dSmrg * to be set. There is no such wrap combination available. 1311d983712dSmrg */ 1312d983712dSmrg#ifdef OBSOLETE 1313d983712dSmrg pXAAinfo->SolidFillFlags |= NO_PLANEMASK; 1314d983712dSmrg#endif 1315d983712dSmrg 1316d983712dSmrg pXAAinfo->SetupForSolidFill = TsengSetupForSolidFill; 1317d983712dSmrg if (pTseng->ChipType == ET6000) 1318d983712dSmrg pXAAinfo->SubsequentSolidFillRect = Tseng6KSubsequentSolidFillRect; 1319d983712dSmrg else 1320d983712dSmrg pXAAinfo->SubsequentSolidFillRect = TsengW32pSubsequentSolidFillRect; 1321d983712dSmrg 1322d983712dSmrg#ifdef TSENG_TRAPEZOIDS 1323d983712dSmrg if (pTseng->ChipType == ET6000) 1324d983712dSmrg /* disabled for now: not fully compliant yet */ 1325d983712dSmrg pXAAinfo->SubsequentFillTrapezoidSolid = TsengSubsequentFillTrapezoidSolid; 1326d983712dSmrg#endif 1327d983712dSmrg#endif 1328d983712dSmrg 1329d983712dSmrg#if 1 1330d983712dSmrg /* 1331d983712dSmrg * SceenToScreenCopy (BitBLT). 1332d983712dSmrg * 1333d983712dSmrg * Restrictions: On ET6000, we support EITHER a planemask OR 1334d983712dSmrg * TRANSPARENCY, but not both (they use the same Pattern map). 1335d983712dSmrg * All other chips can't do TRANSPARENCY at all. 1336d983712dSmrg */ 1337d983712dSmrg#ifdef ET6K_TRANSPARENCY 1338d983712dSmrg pXAAinfo->CopyAreaFlags = NO_PLANEMASK; 1339d983712dSmrg if (pTseng->ChipType == ET4000) 1340d983712dSmrg pXAAinfo->CopyAreaFlags |= NO_TRANSPARENCY; 1341d983712dSmrg 1342d983712dSmrg#else 1343d983712dSmrg pXAAinfo->CopyAreaFlags = NO_TRANSPARENCY; 1344d983712dSmrg#endif 1345d983712dSmrg 1346d983712dSmrg pXAAinfo->SetupForScreenToScreenCopy = 1347d983712dSmrg TsengSetupForScreenToScreenCopy; 1348d983712dSmrg pXAAinfo->SubsequentScreenToScreenCopy = 1349d983712dSmrg TsengSubsequentScreenToScreenCopy; 1350d983712dSmrg#endif 1351d983712dSmrg 1352d983712dSmrg#if 0 1353d983712dSmrg /* 1354d983712dSmrg * ImageWrite. 1355d983712dSmrg * 1356d983712dSmrg * SInce this uses off-screen scanline buffers, it is only of use when 1357d983712dSmrg * complex ROPs are used. But since the current XAA pixmap cache code 1358d983712dSmrg * only works when an ImageWrite is provided, the NO_GXCOPY flag is 1359d983712dSmrg * temporarily disabled. 1360d983712dSmrg */ 1361d983712dSmrg 1362d983712dSmrg if (pTseng->AccelImageWriteBufferOffsets[0]) { 1363d983712dSmrg pXAAinfo->ScanlineImageWriteFlags = 1364d983712dSmrg pXAAinfo->CopyAreaFlags | LEFT_EDGE_CLIPPING /* | NO_GXCOPY */ ; 1365d983712dSmrg pXAAinfo->NumScanlineImageWriteBuffers = 2; 1366d983712dSmrg pXAAinfo->SetupForScanlineImageWrite = 1367d983712dSmrg TsengSetupForScanlineImageWrite; 1368d983712dSmrg pXAAinfo->SubsequentScanlineImageWriteRect = 1369d983712dSmrg TsengSubsequentScanlineImageWriteRect; 1370d983712dSmrg pXAAinfo->SubsequentImageWriteScanline = 1371d983712dSmrg TsengSubsequentImageWriteScanline; 1372d983712dSmrg 1373d983712dSmrg /* calculate memory addresses from video memory offsets */ 1374d983712dSmrg for (i = 0; i < pXAAinfo->NumScanlineImageWriteBuffers; i++) { 1375d983712dSmrg pTseng->XAAScanlineImageWriteBuffers[i] = 1376d983712dSmrg pTseng->FbBase + pTseng->AccelImageWriteBufferOffsets[i]; 1377d983712dSmrg } 1378d983712dSmrg 1379d983712dSmrg pXAAinfo->ScanlineImageWriteBuffers = pTseng->XAAScanlineImageWriteBuffers; 1380d983712dSmrg } 1381d983712dSmrg#endif 1382d983712dSmrg /* 1383d983712dSmrg * 8x8 pattern tiling not possible on W32/i/p chips in 24bpp mode. 1384d983712dSmrg * Currently, 24bpp pattern tiling doesn't work at all on those. 1385d983712dSmrg * 1386d983712dSmrg * FIXME: On W32 cards, pattern tiling doesn't work as expected. 1387d983712dSmrg */ 1388d983712dSmrg pXAAinfo->Color8x8PatternFillFlags = HARDWARE_PATTERN_PROGRAMMED_ORIGIN; 1389d983712dSmrg 1390d983712dSmrg pXAAinfo->CachePixelGranularity = 8 * 8; 1391d983712dSmrg 1392d983712dSmrg#ifdef ET6K_TRANSPARENCY 1393d983712dSmrg pXAAinfo->PatternFlags |= HARDWARE_PATTERN_NO_PLANEMASK; 1394d983712dSmrg if (pTseng->ChipType == ET6000) 1395d983712dSmrg pXAAinfo->PatternFlags |= HARDWARE_PATTERN_TRANSPARENCY; 1396d983712dSmrg#endif 1397d983712dSmrg 1398d983712dSmrg#if 0 1399d983712dSmrg /* FIXME! This needs to be fixed for W32 and W32i (it "should work") */ 1400d983712dSmrg if (pScrn->bitsPerPixel != 24) { 1401d983712dSmrg pXAAinfo->SetupForColor8x8PatternFill = 1402d983712dSmrg TsengSetupForColor8x8PatternFill; 1403d983712dSmrg pXAAinfo->SubsequentColor8x8PatternFillRect = 1404d983712dSmrg TsengSubsequentColor8x8PatternFillRect; 1405d983712dSmrg } 1406d983712dSmrg#endif 1407d983712dSmrg 1408d983712dSmrg#if 0 /*1*/ 1409d983712dSmrg /* 1410d983712dSmrg * SolidLine. 1411d983712dSmrg * 1412d983712dSmrg * We use Bresenham by preference, because it supports hardware clipping 1413d983712dSmrg * (using the error term). TwoPointLines() is implemented, but not used, 1414d983712dSmrg * because clipped lines are not accelerated (hardware clipping support 1415d983712dSmrg * is lacking)... 1416d983712dSmrg */ 1417d983712dSmrg 1418d983712dSmrg /* 1419d983712dSmrg * Fill in the hardware linedraw ACL_XY_DIRECTION table 1420d983712dSmrg * 1421d983712dSmrg * W32BresTable[] converts XAA interface Bresenham octants to direct 1422d983712dSmrg * ACL direction register contents. This includes the correct bias 1423d983712dSmrg * setting etc. 1424d983712dSmrg * 1425d983712dSmrg * According to miline.h (but with base 0 instead of base 1 as in 1426d983712dSmrg * miline.h), the octants are numbered as follows: 1427d983712dSmrg * 1428d983712dSmrg * \ | / 1429d983712dSmrg * \ 2 | 1 / 1430d983712dSmrg * \ | / 1431d983712dSmrg * 3 \ | / 0 1432d983712dSmrg * \|/ 1433d983712dSmrg * ----------- 1434d983712dSmrg * /| \ 1435d983712dSmrg * 4 / | \ 7 1436d983712dSmrg * / | \ 1437d983712dSmrg * / 5 | 6 \ 1438d983712dSmrg * / | \ 1439d983712dSmrg * 1440d983712dSmrg * In ACL_XY_DIRECTION, bits 2:0 are defined as follows: 1441d983712dSmrg * 0: '1' if XDECREASING 1442d983712dSmrg * 1: '1' if YDECREASING 1443d983712dSmrg * 2: '1' if XMAJOR (== not YMAJOR) 1444d983712dSmrg * 1445d983712dSmrg * Bit 4 defines the bias. It should be set to '1' for all octants 1446d983712dSmrg * NOT passed to miSetZeroLineBias(). i.e. the inverse of the X bias. 1447d983712dSmrg * 1448d983712dSmrg * (For MS compatible bias, the data book says to set to the same as 1449d983712dSmrg * YDIR, i.e. bit 1 of the same register, = '1' if YDECREASING. MS 1450d983712dSmrg * bias is towards octants 0..3 (i.e. Y decreasing), hence this 1451d983712dSmrg * definition of bit 4) 1452d983712dSmrg * 1453d983712dSmrg */ 1454d983712dSmrg pTseng->BresenhamTable = xnfalloc(8); 1455d983712dSmrg if (pTseng->BresenhamTable == NULL) { 1456d983712dSmrg xf86Msg(X_ERROR, "Could not malloc Bresenham Table.\n"); 1457d983712dSmrg return FALSE; 1458d983712dSmrg } 1459d983712dSmrg for (i=0; i<8; i++) { 1460d983712dSmrg unsigned char zerolinebias = miGetZeroLineBias(pScreen); 1461d983712dSmrg pTseng->BresenhamTable[i] = 0xA0; /* command=linedraw, use error term */ 1462d983712dSmrg if (i & XDECREASING) pTseng->BresenhamTable[i] |= 0x01; 1463d983712dSmrg if (i & YDECREASING) pTseng->BresenhamTable[i] |= 0x02; 1464d983712dSmrg if (!(i & YMAJOR)) pTseng->BresenhamTable[i] |= 0x04; 1465d983712dSmrg if ((1 << i) & zerolinebias) pTseng->BresenhamTable[i] |= 0x10; 1466d983712dSmrg /* ErrorF("BresenhamTable[%d]=0x%x\n", i, pTseng->BresenhamTable[i]); */ 1467d983712dSmrg } 1468d983712dSmrg 1469d983712dSmrg pXAAinfo->SolidLineFlags = 0; 1470d983712dSmrg pXAAinfo->SetupForSolidLine = TsengSetupForSolidFill; 1471d983712dSmrg pXAAinfo->SubsequentSolidBresenhamLine = 1472d983712dSmrg TsengSubsequentSolidBresenhamLine; 1473d983712dSmrg /* 1474d983712dSmrg * ErrorTermBits is used to limit minor, major and error term, so it 1475d983712dSmrg * must be min(errorterm_size, delta_major_size, delta_minor_size) 1476d983712dSmrg * But the calculation for major and minor is done on the DOUBLED 1477d983712dSmrg * values (as per the Bresenham algorithm), so they can also have 13 1478d983712dSmrg * bits (inside XAA). They are divided by 2 in this driver, so they 1479d983712dSmrg * are then again limited to 12 bits. 1480d983712dSmrg */ 1481d983712dSmrg pXAAinfo->SolidBresenhamLineErrorTermBits = 13; 1482d983712dSmrg 1483d983712dSmrg#endif 1484d983712dSmrg 1485d983712dSmrg#if 1 1486d983712dSmrg /* set up color expansion acceleration */ 1487d983712dSmrg if (!TsengXAAInit_Colexp(pScrn)) 1488d983712dSmrg return FALSE; 1489d983712dSmrg#endif 1490d983712dSmrg 1491d983712dSmrg 1492d983712dSmrg /* 1493d983712dSmrg * For Tseng, we set up some often-used values 1494d983712dSmrg */ 1495d983712dSmrg 1496d983712dSmrg switch (pTseng->Bytesperpixel) { /* for MULBPP optimization */ 1497d983712dSmrg case 1: 1498d983712dSmrg pTseng->powerPerPixel = 0; 1499d983712dSmrg pTseng->planemask_mask = 0x000000FF; 1500d983712dSmrg pTseng->neg_x_pixel_offset = 0; 1501d983712dSmrg break; 1502d983712dSmrg case 2: 1503d983712dSmrg pTseng->powerPerPixel = 1; 1504d983712dSmrg pTseng->planemask_mask = 0x0000FFFF; 1505d983712dSmrg pTseng->neg_x_pixel_offset = 1; 1506d983712dSmrg break; 1507d983712dSmrg case 3: 1508d983712dSmrg pTseng->powerPerPixel = 1; 1509d983712dSmrg pTseng->planemask_mask = 0x00FFFFFF; 1510d983712dSmrg pTseng->neg_x_pixel_offset = 2; /* is this correct ??? */ 1511d983712dSmrg break; 1512d983712dSmrg case 4: 1513d983712dSmrg pTseng->powerPerPixel = 2; 1514d983712dSmrg pTseng->planemask_mask = 0xFFFFFFFF; 1515d983712dSmrg pTseng->neg_x_pixel_offset = 3; 1516d983712dSmrg break; 1517d983712dSmrg } 1518d983712dSmrg 1519d983712dSmrg /* 1520d983712dSmrg * Init ping-pong registers. 1521d983712dSmrg * This might be obsoleted by the BACKGROUND_OPERATIONS flag. 1522d983712dSmrg */ 1523d983712dSmrg pTseng->tsengFg = 0; 1524d983712dSmrg pTseng->tsengBg = 16; 1525d983712dSmrg pTseng->tsengPat = 32; 1526d983712dSmrg 1527d983712dSmrg /* for register write optimisation */ 1528d983712dSmrg pTseng->tseng_old_dir = -1; 1529d983712dSmrg pTseng->old_x = 0; 1530d983712dSmrg pTseng->old_y = 0; 1531d983712dSmrg 1532d983712dSmrg /* 1533d983712dSmrg * Finally, we set up the video memory space available to the pixmap 1534d983712dSmrg * cache. In this case, all memory from the end of the virtual screen to 1535d983712dSmrg * the end of video memory minus 1K (which we already reserved), can be 1536d983712dSmrg * used. 1537d983712dSmrg */ 1538d983712dSmrg 1539d983712dSmrg AvailFBArea.x1 = 0; 1540d983712dSmrg AvailFBArea.y1 = 0; 1541d983712dSmrg AvailFBArea.x2 = pScrn->displayWidth; 1542d983712dSmrg AvailFBArea.y2 = (pScrn->videoRam * 1024) / 1543d983712dSmrg (pScrn->displayWidth * pTseng->Bytesperpixel); 1544d983712dSmrg 1545d983712dSmrg xf86InitFBManager(pScreen, &AvailFBArea); 1546d983712dSmrg 1547d983712dSmrg return (XAAInit(pScreen, pXAAinfo)); 15484b9470b1Smrg#else 15494b9470b1Smrg return FALSE; 15504b9470b1Smrg#endif 1551d983712dSmrg} 1552